Compare commits

..

11 Commits

Author SHA1 Message Date
bursteratom
60c98a4353 stuff 2024-12-13 15:44:51 -05:00
bursteratom
c760d2b815 test accelerator 2024-12-12 12:29:35 -05:00
bursteratom
2014f58181 set os environ RANK 2024-12-11 11:45:07 -05:00
bursteratom
b5f9dd44f2 set os environ RANK 2024-12-11 11:40:20 -05:00
bursteratom
b17b1aada7 initialise process group for tp 2024-12-11 11:37:21 -05:00
bursteratom
85381b6b15 initialise process group for tp 2024-12-11 11:35:16 -05:00
bursteratom
acde081321 test lora tp 2024-12-11 11:19:34 -05:00
bursteratom
e4c68a0cbc test lora tp 2024-12-11 11:11:52 -05:00
bursteratom
3855f5c3d3 tp example tp auto 2024-12-11 11:03:39 -05:00
bursteratom
5dd566dc63 tp example 2024-12-11 11:01:23 -05:00
bursteratom
42389c1f78 enable tensor parallel 2024-12-11 10:38:14 -05:00
14 changed files with 147 additions and 13 deletions

View File

@@ -44,11 +44,6 @@ jobs:
python-version: ${{ matrix.python_version }}
cache: 'pip' # caching pip dependencies
- name: upgrade pip
run: |
pip3 install --upgrade pip
pip3 install --upgrade packaging setuptools wheel
- name: Install PyTorch
run: |
pip3 install torch==${{ matrix.pytorch_version }} --index-url https://download.pytorch.org/whl/cpu

View File

@@ -8,8 +8,3 @@ pytest -v --durations=10 -n8 --ignore=tests/e2e/ --ignore=tests/patched/ /worksp
pytest -v --durations=10 -n1 --dist loadfile /workspace/axolotl/tests/e2e/patched/
pytest -v --durations=10 -n1 --dist loadfile /workspace/axolotl/tests/e2e/integrations/
pytest -v --durations=10 --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/
tests=$(pytest --collect-only -q tests/e2e/each)
for t in $tests; do
pytest $t
done

View File

@@ -0,0 +1,58 @@
base_model: NousResearch/Meta-Llama-3.1-8B
load_in_8bit: false
load_in_4bit: false
strict: false
datasets:
- path: tatsu-lab/alpaca
type: alpaca
dataset_prepared_path: last_run_prepared
val_set_size: 0.05
output_dir: ./outputs/out
sequence_len: 8192
sample_packing: true
pad_to_sequence_len: true
wandb_project:
wandb_entity:
wandb_watch:
wandb_name:
wandb_log_model:
gradient_accumulation_steps: 8
micro_batch_size: 1
num_epochs: 1
optimizer: paged_adamw_8bit
lr_scheduler: cosine
learning_rate: 2e-5
train_on_inputs: false
group_by_length: false
bf16: auto
fp16:
tf32: false
tensor_parallel: 'auto'
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
early_stopping_patience:
resume_from_checkpoint:
logging_steps: 1
xformers_attention:
flash_attention: true
warmup_steps: 100
evals_per_epoch: 2
eval_table_size:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
fsdp:
fsdp_config:
special_tokens:
pad_token: <|end_of_text|>

View File

@@ -0,0 +1,73 @@
base_model: NousResearch/Meta-Llama-3.1-8B
model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer
load_in_8bit: true
load_in_4bit: false
strict: false
datasets:
- path: mhenrichsen/alpaca_2k_test
type: alpaca
dataset_prepared_path:
val_set_size: 0.05
output_dir: ./outputs/lora-out
sequence_len: 4096
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true
adapter: lora
lora_model_dir:
lora_r: 32
lora_alpha: 16
lora_dropout: 0.05
lora_target_linear: true
lora_fan_in_fan_out:
lora_modules_to_save:
- embed_tokens
- lm_head
wandb_project:
wandb_entity:
wandb_watch:
wandb_name:
wandb_log_model:
gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 4
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: auto
fp16:
tf32: false
tensor_parallel: 'auto'
gradient_checkpointing: true
early_stopping_patience:
resume_from_checkpoint:
local_rank:
logging_steps: 1
xformers_attention:
flash_attention: true
s2_attention:
warmup_steps: 10
evals_per_epoch: 4
eval_table_size:
eval_max_new_tokens: 128
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
fsdp:
fsdp_config:
special_tokens:
pad_token: <|end_of_text|>

View File

@@ -12,7 +12,7 @@ liger-kernel==0.4.2
packaging==23.2
peft==0.14.0
transformers==4.47.0
transformers>=4.46.3
tokenizers>=0.20.1
accelerate==1.2.0
datasets==3.1.0

View File

@@ -1319,6 +1319,10 @@ class TrainerBuilderBase(abc.ABC):
if hasattr(model, "add_model_tags"):
model.add_model_tags(["axolotl"])
if self.cfg.tensor_parallel == "auto" and self.model.supports_tp_plan:
os.environ["ACCELERATE_USE_TP"] = "true"
# self.model =
@property
def model_ref(self):
return self._model_ref

View File

@@ -66,7 +66,10 @@ class EvalFirstStepCallback(
control: TrainerControl,
**kwargs,
):
if args.eval_strategy == IntervalStrategy.STEPS and state.global_step == 1:
if (
args.evaluation_strategy == IntervalStrategy.STEPS
and state.global_step == 1
):
control.should_evaluate = True
return control

View File

@@ -393,7 +393,7 @@ class ModelInputConfig(BaseModel):
default=None, json_schema_extra={"description": "transformers processor class"}
)
trust_remote_code: Optional[bool] = None
tensor_parallel: Optional[Union[Literal["auto"], bool]] = "auto"
model_kwargs: Optional[Dict[str, Any]] = None
@field_validator("trust_remote_code")

View File

@@ -1187,9 +1187,15 @@ class ModelLoader:
gc.collect()
torch.cuda.empty_cache()
self.post_loading_set_env()
# TODO resume_from_checkpoint handling
return self.model, lora_config
def post_loading_set_env(self):
if self.cfg.tensor_parallel == "auto" and self.model.supports_tp_plan:
os.environ["ACCELERATE_USE_TP"] = "true"
def load_model(
cfg: DictDefault,