From 9dde9e1b71ba45709ea7f396268706de5a85ce53 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Thu, 17 Jul 2025 09:47:45 -0400 Subject: [PATCH] misc fixes 202507 (#2937) [skip ci] * misc fixes 202507 * manually handle attn class for llama4 --- codecov.yml | 1 + src/axolotl/monkeypatch/lora_kernels.py | 5 +++++ src/axolotl/utils/data/shared.py | 4 ++-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/codecov.yml b/codecov.yml index 2741b1758..28921f9be 100644 --- a/codecov.yml +++ b/codecov.yml @@ -22,6 +22,7 @@ coverage: only_pulls: true flags: null paths: null + informational: true patch: default: # basic diff --git a/src/axolotl/monkeypatch/lora_kernels.py b/src/axolotl/monkeypatch/lora_kernels.py index 4702ad19d..48bc10c0b 100644 --- a/src/axolotl/monkeypatch/lora_kernels.py +++ b/src/axolotl/monkeypatch/lora_kernels.py @@ -151,6 +151,11 @@ def get_attention_cls_from_config(cfg: DictDefault) -> Type[nn.Module]: return MllamaTextSelfAttention + if model_type == "llama4": + from transformers.models.llama4.modeling_llama4 import Llama4TextAttention + + return Llama4TextAttention + try: # Dynamically import the module and attention class module_path = f"transformers.models.{model_type}.modeling_{model_type}" diff --git a/src/axolotl/utils/data/shared.py b/src/axolotl/utils/data/shared.py index c3c70545c..c30459d5b 100644 --- a/src/axolotl/utils/data/shared.py +++ b/src/axolotl/utils/data/shared.py @@ -460,13 +460,13 @@ def load_preprocessed_dataset(cfg: DictDefault, dataset_hash: str) -> Dataset | ): LOG.info( f"Loading prepared dataset from disk at {prepared_ds_path}...", - main_process_only=False, + main_process_only=True, ) return load_from_disk(str(prepared_ds_path)) LOG.info( f"Unable to find prepared dataset in {prepared_ds_path}", - main_process_only=False, + main_process_only=True, ) return None