Add transformers v4.40 models to config, update changelog

inseq-team · Apr 30, 2024 · 04dde30 · 04dde30
1 parent 8de5e29
commit 04dde30
Show file tree

Hide file tree

Showing 2 changed files with 33 additions and 1 deletion.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,23 @@
 # Changelog
 
 *This file contains a high-level description of changes that were merged into the Inseq main branch since the last release. Refer to the [releases page](https://github.com/inseq-team/inseq/releases) for an exhaustive overview of changes introduced at each release.*
+
+## 🚀 Features
+
+- Added new models `DbrxForCausalLM`, `OlmoForCausalLM`, `Phi3ForCausalLM`, `Qwen2MoeForCausalLM` to model config.
+
+## 🔧 Fixes and Refactoring
+
+- Fix the issue in the attention implementation from [#268](https://github.com/inseq-team/inseq/issues/268) where non-terminal position in the tensor were set to nan if they were 0s ([#269](https://github.com/inseq-team/inseq/pull/269)).
+
+- Fix the pad token in cases where it is not specified by default in the loaded model (e.g. for Qwen models) ([#269](https://github.com/inseq-team/inseq/pull/269)).
+
+- Fix bug reported in [#266](https://github.com/inseq-team/inseq/issues/266) making `value_zeroing` unusable for SDPA attention. This enables using the method on models using SDPA attention as default (e.g. `GemmaForCausalLM`) without passing `model_kwargs={'attn_implementation': 'eager'}` ([#267](https://github.com/inseq-team/inseq/pull/267)).
+
+## 📝 Documentation and Tutorials
+
+*No changes*
+
+## 💥 Breaking Changes
+
+*No changes*
diff --git a/inseq/models/model_config.yaml b/inseq/models/model_config.yaml
@@ -11,6 +11,9 @@ CodeGenForCausalLM:
 CohereForCausalLM:
     self_attention_module: "self_attn"
     value_vector: "value_states"
+DbrxForCausalLM:
+    self_attention_module: "attn"
+    value_vector: "value_states"
 FalconForCausalLM:
     self_attention_module: "self_attention"
     value_vector: "value_layer"
@@ -44,6 +47,9 @@ MixtralForCausalLM:
 MptForCausalLM:
     self_attention_module: "attn"
     value_vector: "value_states"
+OlmoForCausalLM:
+    self_attention_module: "self_attn"
+    value_vector: "value_states"
 OpenAIGPTLMHeadModel:
     self_attention_module: "attn"
     value_vector: "value"
@@ -53,9 +59,15 @@ OPTForCausalLM:
 PhiForCausalLM:
     self_attention_module: "self_attn"
     value_vector: "value_states"
+Phi3ForCausalLM:
+    self_attention_module: "self_attn"
+    value_vector: "value_states"
 Qwen2ForCausalLM:
     self_attention_module: "self_attn"
     value_vector: "value_states"
+Qwen2MoeForCausalLM:
+    self_attention_module: "self_attn"
+    value_vector: "value_states"
 StableLmForCausalLM:
     self_attention_module: "self_attn"
     value_vector: "value_states"
@@ -114,4 +126,4 @@ T5ForConditionalGeneration:
 UMT5ForConditionalGeneration:
     self_attention_module: "SelfAttention"
     cross_attention_module: "EncDecAttention"
-    value_vector: "value_states"
+    value_vector: "value_states"