diff --git a/.nojekyll b/.nojekyll index 8fc3cab38..b505fa798 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -ce7842d3 \ No newline at end of file +e3a927da \ No newline at end of file diff --git a/FAQS.html b/FAQS.html index f830a41e7..b8e57147a 100644 --- a/FAQS.html +++ b/FAQS.html @@ -2,7 +2,7 @@
- + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/TODO.html b/TODO.html index fe087edb8..043afc751 100644 --- a/TODO.html +++ b/TODO.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/amd_hpc.html b/docs/amd_hpc.html index 43f28314c..6729e593b 100644 --- a/docs/amd_hpc.html +++ b/docs/amd_hpc.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.args.html b/docs/api/cli.args.html index 5c7ccf150..45326f855 100644 --- a/docs/api/cli.args.html +++ b/docs/api/cli.args.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.checks.html b/docs/api/cli.checks.html index 3a399516b..81b7a2db1 100644 --- a/docs/api/cli.checks.html +++ b/docs/api/cli.checks.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.cloud.base.html b/docs/api/cli.cloud.base.html index d84d7361c..8c7084221 100644 --- a/docs/api/cli.cloud.base.html +++ b/docs/api/cli.cloud.base.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.cloud.modal_.html b/docs/api/cli.cloud.modal_.html index ef7344cde..70ada4697 100644 --- a/docs/api/cli.cloud.modal_.html +++ b/docs/api/cli.cloud.modal_.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.config.html b/docs/api/cli.config.html index 699a9c317..a99253e7e 100644 --- a/docs/api/cli.config.html +++ b/docs/api/cli.config.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.evaluate.html b/docs/api/cli.evaluate.html index bd7172685..4bcf4396f 100644 --- a/docs/api/cli.evaluate.html +++ b/docs/api/cli.evaluate.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.inference.html b/docs/api/cli.inference.html index 179b702fd..a2e44c3a9 100644 --- a/docs/api/cli.inference.html +++ b/docs/api/cli.inference.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.main.html b/docs/api/cli.main.html index ed8aaab61..ad902b8de 100644 --- a/docs/api/cli.main.html +++ b/docs/api/cli.main.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.merge_lora.html b/docs/api/cli.merge_lora.html index 2db0ca75e..64d3e26c8 100644 --- a/docs/api/cli.merge_lora.html +++ b/docs/api/cli.merge_lora.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.merge_sharded_fsdp_weights.html b/docs/api/cli.merge_sharded_fsdp_weights.html index 259201127..e30111985 100644 --- a/docs/api/cli.merge_sharded_fsdp_weights.html +++ b/docs/api/cli.merge_sharded_fsdp_weights.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.preprocess.html b/docs/api/cli.preprocess.html index 7be534fa3..ea78d1c3a 100644 --- a/docs/api/cli.preprocess.html +++ b/docs/api/cli.preprocess.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.quantize.html b/docs/api/cli.quantize.html index 19d629c57..ece804f0e 100644 --- a/docs/api/cli.quantize.html +++ b/docs/api/cli.quantize.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.sweeps.html b/docs/api/cli.sweeps.html index 85006fc13..7774d319f 100644 --- a/docs/api/cli.sweeps.html +++ b/docs/api/cli.sweeps.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.train.html b/docs/api/cli.train.html index 9aeca8838..696c29151 100644 --- a/docs/api/cli.train.html +++ b/docs/api/cli.train.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.utils.html b/docs/api/cli.utils.html index 8e465ee35..a1c20b2e0 100644 --- a/docs/api/cli.utils.html +++ b/docs/api/cli.utils.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.vllm_serve.html b/docs/api/cli.vllm_serve.html index 390efa71c..9b4756035 100644 --- a/docs/api/cli.vllm_serve.html +++ b/docs/api/cli.vllm_serve.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/common.architectures.html b/docs/api/common.architectures.html index 16f5c5968..2387cbdce 100644 --- a/docs/api/common.architectures.html +++ b/docs/api/common.architectures.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/common.const.html b/docs/api/common.const.html index abce5b674..26823cc80 100644 --- a/docs/api/common.const.html +++ b/docs/api/common.const.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/common.datasets.html b/docs/api/common.datasets.html index fcea0706f..f7348888d 100644 --- a/docs/api/common.datasets.html +++ b/docs/api/common.datasets.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/convert.html b/docs/api/convert.html index e12228a5b..3085bb207 100644 --- a/docs/api/convert.html +++ b/docs/api/convert.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.builders.base.html b/docs/api/core.builders.base.html index e8e66d364..887ef2037 100644 --- a/docs/api/core.builders.base.html +++ b/docs/api/core.builders.base.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.builders.causal.html b/docs/api/core.builders.causal.html index 2ed028e94..943afe52d 100644 --- a/docs/api/core.builders.causal.html +++ b/docs/api/core.builders.causal.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.builders.rl.html b/docs/api/core.builders.rl.html index 0057eae22..a4908556c 100644 --- a/docs/api/core.builders.rl.html +++ b/docs/api/core.builders.rl.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.chat.format.chatml.html b/docs/api/core.chat.format.chatml.html index 611ecd3da..3adbc037a 100644 --- a/docs/api/core.chat.format.chatml.html +++ b/docs/api/core.chat.format.chatml.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/core.chat.format.llama3x.html b/docs/api/core.chat.format.llama3x.html index 96dbb7a21..57a26f889 100644 --- a/docs/api/core.chat.format.llama3x.html +++ b/docs/api/core.chat.format.llama3x.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/core.chat.format.shared.html b/docs/api/core.chat.format.shared.html index b2853426b..bf92033e0 100644 --- a/docs/api/core.chat.format.shared.html +++ b/docs/api/core.chat.format.shared.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/core.chat.messages.html b/docs/api/core.chat.messages.html index 41c1796c3..e5fda6d2e 100644 --- a/docs/api/core.chat.messages.html +++ b/docs/api/core.chat.messages.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.datasets.chat.html b/docs/api/core.datasets.chat.html index e5adfdb9f..2a7bf4af9 100644 --- a/docs/api/core.datasets.chat.html +++ b/docs/api/core.datasets.chat.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.datasets.transforms.chat_builder.html b/docs/api/core.datasets.transforms.chat_builder.html index 938c416b8..0f5e8297e 100644 --- a/docs/api/core.datasets.transforms.chat_builder.html +++ b/docs/api/core.datasets.transforms.chat_builder.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.base.html b/docs/api/core.trainers.base.html index d89770446..541c9fa51 100644 --- a/docs/api/core.trainers.base.html +++ b/docs/api/core.trainers.base.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.dpo.trainer.html b/docs/api/core.trainers.dpo.trainer.html index 1c6925099..514549c33 100644 --- a/docs/api/core.trainers.dpo.trainer.html +++ b/docs/api/core.trainers.dpo.trainer.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.grpo.sampler.html b/docs/api/core.trainers.grpo.sampler.html index c6f989026..103dc7b9b 100644 --- a/docs/api/core.trainers.grpo.sampler.html +++ b/docs/api/core.trainers.grpo.sampler.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.grpo.trainer.html b/docs/api/core.trainers.grpo.trainer.html index 38949c3ec..aa0bcecfc 100644 --- a/docs/api/core.trainers.grpo.trainer.html +++ b/docs/api/core.trainers.grpo.trainer.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.mamba.html b/docs/api/core.trainers.mamba.html index a6a9393d3..13a503d08 100644 --- a/docs/api/core.trainers.mamba.html +++ b/docs/api/core.trainers.mamba.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.mixins.optimizer.html b/docs/api/core.trainers.mixins.optimizer.html index 5a1e2d1e8..efdcaa905 100644 --- a/docs/api/core.trainers.mixins.optimizer.html +++ b/docs/api/core.trainers.mixins.optimizer.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.mixins.rng_state_loader.html b/docs/api/core.trainers.mixins.rng_state_loader.html index 12a6edceb..8e754b6ec 100644 --- a/docs/api/core.trainers.mixins.rng_state_loader.html +++ b/docs/api/core.trainers.mixins.rng_state_loader.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.mixins.scheduler.html b/docs/api/core.trainers.mixins.scheduler.html index 70fc85389..64b74ae7b 100644 --- a/docs/api/core.trainers.mixins.scheduler.html +++ b/docs/api/core.trainers.mixins.scheduler.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.relora.html b/docs/api/core.trainers.relora.html index 8ca9b12b2..bad4b4c7a 100644 --- a/docs/api/core.trainers.relora.html +++ b/docs/api/core.trainers.relora.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.trl.html b/docs/api/core.trainers.trl.html index 2f6145b8e..6ca87adf9 100644 --- a/docs/api/core.trainers.trl.html +++ b/docs/api/core.trainers.trl.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.utils.html b/docs/api/core.trainers.utils.html index e58ef1e3b..fbf337991 100644 --- a/docs/api/core.trainers.utils.html +++ b/docs/api/core.trainers.utils.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/core.training_args.html b/docs/api/core.training_args.html index f35ea760a..e2c507b72 100644 --- a/docs/api/core.training_args.html +++ b/docs/api/core.training_args.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + @@ -477,7 +477,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});core.training_args.AxolotlCPOConfig(
- model_type=None,
- lr_quadratic_warmup=False,
- pretraining=False,
- sample_packing=False,
- sample_packing_sequentially=False,
- multipack_real_batches=False,
- eval_sample_packing=None,
- sample_packing_efficiency=1.0,
- sample_packing_bin_size=200,
- sample_packing_group_size=100000,
- max_seq_length=2048,
- dataset_num_proc=None,
- relora_steps=None,
- relora_warmup_steps=None,
- relora_anneal_steps=None,
- relora_prune_ratio=0.9,
- bench_split='eval',
- bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',
- do_bench_eval=False,
- do_causal_lm_eval=False,
- max_bench_samples=None,
- bench_source_max_len=2048,
- dataloader_prefetch_factor=None,
- cosine_min_lr_ratio=None,
- cosine_constant_lr_ratio=None,
- loraplus_lr_ratio=None,
- loraplus_lr_embedding=1e-06,
- embedding_lr_scale=None,
- lr_groups=None,
- embedding_lr=None,
- qlora=False,
- orpo_alpha=None,
- lisa_n_layers=None,
- lisa_step_interval=None,
- lisa_layers_attribute=None,
- curriculum_sampling=None,
- alternate_lr_scheduler_type=None,
- chat_template=None,
- kd_ce_alpha=None,
- kd_alpha=1.0,
- kd_temperature=1.0,
- kd_zscore_base_temp=None,
- kd_top_k_before_softmax=None,
- adam_beta3=None,
- adam_epsilon2=None,
- image_size=None,
- image_resize_algorithm=None,
- simpo_gamma=None,
-)core.training_args.AxolotlCPOConfig(simpo_gamma=None)CPO config for CPO training
core.training_args.AxolotlKTOConfig(
- model_type=None,
- lr_quadratic_warmup=False,
- pretraining=False,
- sample_packing=False,
- sample_packing_sequentially=False,
- multipack_real_batches=False,
- eval_sample_packing=None,
- sample_packing_efficiency=1.0,
- sample_packing_bin_size=200,
- sample_packing_group_size=100000,
- max_seq_length=2048,
- dataset_num_proc=None,
- relora_steps=None,
- relora_warmup_steps=None,
- relora_anneal_steps=None,
- relora_prune_ratio=0.9,
- bench_split='eval',
- bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',
- do_bench_eval=False,
- do_causal_lm_eval=False,
- max_bench_samples=None,
- bench_source_max_len=2048,
- dataloader_prefetch_factor=None,
- cosine_min_lr_ratio=None,
- cosine_constant_lr_ratio=None,
- loraplus_lr_ratio=None,
- loraplus_lr_embedding=1e-06,
- embedding_lr_scale=None,
- lr_groups=None,
- embedding_lr=None,
- qlora=False,
- orpo_alpha=None,
- lisa_n_layers=None,
- lisa_step_interval=None,
- lisa_layers_attribute=None,
- curriculum_sampling=None,
- alternate_lr_scheduler_type=None,
- chat_template=None,
- kd_ce_alpha=None,
- kd_alpha=1.0,
- kd_temperature=1.0,
- kd_zscore_base_temp=None,
- kd_top_k_before_softmax=None,
- adam_beta3=None,
- adam_epsilon2=None,
- image_size=None,
- image_resize_algorithm=None,
-)core.training_args.AxolotlKTOConfig()KTO config for KTO training
core.training_args.AxolotlORPOConfig(
- model_type=None,
- lr_quadratic_warmup=False,
- pretraining=False,
- sample_packing=False,
- sample_packing_sequentially=False,
- multipack_real_batches=False,
- eval_sample_packing=None,
- sample_packing_efficiency=1.0,
- sample_packing_bin_size=200,
- sample_packing_group_size=100000,
- max_seq_length=2048,
- dataset_num_proc=None,
- relora_steps=None,
- relora_warmup_steps=None,
- relora_anneal_steps=None,
- relora_prune_ratio=0.9,
- bench_split='eval',
- bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',
- do_bench_eval=False,
- do_causal_lm_eval=False,
- max_bench_samples=None,
- bench_source_max_len=2048,
- dataloader_prefetch_factor=None,
- cosine_min_lr_ratio=None,
- cosine_constant_lr_ratio=None,
- loraplus_lr_ratio=None,
- loraplus_lr_embedding=1e-06,
- embedding_lr_scale=None,
- lr_groups=None,
- embedding_lr=None,
- qlora=False,
- orpo_alpha=None,
- lisa_n_layers=None,
- lisa_step_interval=None,
- lisa_layers_attribute=None,
- curriculum_sampling=None,
- alternate_lr_scheduler_type=None,
- chat_template=None,
- kd_ce_alpha=None,
- kd_alpha=1.0,
- kd_temperature=1.0,
- kd_zscore_base_temp=None,
- kd_top_k_before_softmax=None,
- adam_beta3=None,
- adam_epsilon2=None,
- image_size=None,
- image_resize_algorithm=None,
-)core.training_args.AxolotlORPOConfig()ORPO config for ORPO training
core.training_args.AxolotlPRMConfig(
- model_type=None,
- lr_quadratic_warmup=False,
- pretraining=False,
- sample_packing=False,
- sample_packing_sequentially=False,
- multipack_real_batches=False,
- eval_sample_packing=None,
- sample_packing_efficiency=1.0,
- sample_packing_bin_size=200,
- sample_packing_group_size=100000,
- max_seq_length=2048,
- dataset_num_proc=None,
- relora_steps=None,
- relora_warmup_steps=None,
- relora_anneal_steps=None,
- relora_prune_ratio=0.9,
- bench_split='eval',
- bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',
- do_bench_eval=False,
- do_causal_lm_eval=False,
- max_bench_samples=None,
- bench_source_max_len=2048,
- dataloader_prefetch_factor=None,
- cosine_min_lr_ratio=None,
- cosine_constant_lr_ratio=None,
- loraplus_lr_ratio=None,
- loraplus_lr_embedding=1e-06,
- embedding_lr_scale=None,
- lr_groups=None,
- embedding_lr=None,
- qlora=False,
- orpo_alpha=None,
- lisa_n_layers=None,
- lisa_step_interval=None,
- lisa_layers_attribute=None,
- curriculum_sampling=None,
- alternate_lr_scheduler_type=None,
- chat_template=None,
- kd_ce_alpha=None,
- kd_alpha=1.0,
- kd_temperature=1.0,
- kd_zscore_base_temp=None,
- kd_top_k_before_softmax=None,
- adam_beta3=None,
- adam_epsilon2=None,
- image_size=None,
- image_resize_algorithm=None,
-)core.training_args.AxolotlPRMConfig()PRM config for PRM training
core.training_args.AxolotlRewardConfig(
- model_type=None,
- lr_quadratic_warmup=False,
- pretraining=False,
- sample_packing=False,
- sample_packing_sequentially=False,
- multipack_real_batches=False,
- eval_sample_packing=None,
- sample_packing_efficiency=1.0,
- sample_packing_bin_size=200,
- sample_packing_group_size=100000,
- max_seq_length=2048,
- dataset_num_proc=None,
- relora_steps=None,
- relora_warmup_steps=None,
- relora_anneal_steps=None,
- relora_prune_ratio=0.9,
- bench_split='eval',
- bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',
- do_bench_eval=False,
- do_causal_lm_eval=False,
- max_bench_samples=None,
- bench_source_max_len=2048,
- dataloader_prefetch_factor=None,
- cosine_min_lr_ratio=None,
- cosine_constant_lr_ratio=None,
- loraplus_lr_ratio=None,
- loraplus_lr_embedding=1e-06,
- embedding_lr_scale=None,
- lr_groups=None,
- embedding_lr=None,
- qlora=False,
- orpo_alpha=None,
- lisa_n_layers=None,
- lisa_step_interval=None,
- lisa_layers_attribute=None,
- curriculum_sampling=None,
- alternate_lr_scheduler_type=None,
- chat_template=None,
- kd_ce_alpha=None,
- kd_alpha=1.0,
- kd_temperature=1.0,
- kd_zscore_base_temp=None,
- kd_top_k_before_softmax=None,
- adam_beta3=None,
- adam_epsilon2=None,
- image_size=None,
- image_resize_algorithm=None,
-)core.training_args.AxolotlRewardConfig()Reward config for Reward training
core.training_args.AxolotlTrainingArguments(
- model_type=None,
- lr_quadratic_warmup=False,
- pretraining=False,
- sample_packing=False,
- sample_packing_sequentially=False,
- multipack_real_batches=False,
- eval_sample_packing=None,
- sample_packing_efficiency=1.0,
- sample_packing_bin_size=200,
- sample_packing_group_size=100000,
- max_seq_length=2048,
- dataset_num_proc=None,
- relora_steps=None,
- relora_warmup_steps=None,
- relora_anneal_steps=None,
- relora_prune_ratio=0.9,
- bench_split='eval',
- bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',
- do_bench_eval=False,
- do_causal_lm_eval=False,
- max_bench_samples=None,
- bench_source_max_len=2048,
- dataloader_prefetch_factor=None,
- cosine_min_lr_ratio=None,
- cosine_constant_lr_ratio=None,
- loraplus_lr_ratio=None,
- loraplus_lr_embedding=1e-06,
- embedding_lr_scale=None,
- lr_groups=None,
- embedding_lr=None,
- qlora=False,
- orpo_alpha=None,
- lisa_n_layers=None,
- lisa_step_interval=None,
- lisa_layers_attribute=None,
- curriculum_sampling=None,
- alternate_lr_scheduler_type=None,
- chat_template=None,
- kd_ce_alpha=None,
- kd_alpha=1.0,
- kd_temperature=1.0,
- kd_zscore_base_temp=None,
- kd_top_k_before_softmax=None,
- adam_beta3=None,
- adam_epsilon2=None,
- image_size=None,
- image_resize_algorithm=None,
-)core.training_args.AxolotlTrainingArguments()Training arguments for Causal trainer
This code is duplicated due to HF TrainingArguments not setting output_dir with a default value so it can’t be used as a mixin.
-core.training_args.AxolotlTrainingMixins(
- model_type=None,
- lr_quadratic_warmup=False,
- pretraining=False,
- sample_packing=False,
- sample_packing_sequentially=False,
- multipack_real_batches=False,
- eval_sample_packing=None,
- sample_packing_efficiency=1.0,
- sample_packing_bin_size=200,
- sample_packing_group_size=100000,
- max_seq_length=2048,
- dataset_num_proc=None,
- relora_steps=None,
- relora_warmup_steps=None,
- relora_anneal_steps=None,
- relora_prune_ratio=0.9,
- bench_split='eval',
- bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',
- do_bench_eval=False,
- do_causal_lm_eval=False,
- max_bench_samples=None,
- bench_source_max_len=2048,
- dataloader_prefetch_factor=None,
- cosine_min_lr_ratio=None,
- cosine_constant_lr_ratio=None,
- loraplus_lr_ratio=None,
- loraplus_lr_embedding=1e-06,
- embedding_lr_scale=None,
- lr_groups=None,
- embedding_lr=None,
- qlora=False,
- orpo_alpha=None,
- lisa_n_layers=None,
- lisa_step_interval=None,
- lisa_layers_attribute=None,
- curriculum_sampling=None,
- alternate_lr_scheduler_type=None,
- chat_template=None,
- kd_ce_alpha=None,
- kd_alpha=1.0,
- kd_temperature=1.0,
- kd_zscore_base_temp=None,
- kd_top_k_before_softmax=None,
- adam_beta3=None,
- adam_epsilon2=None,
- image_size=None,
- image_resize_algorithm=None,
-)Mixin class for the Axolotl training args.
integrations.base.BasePlugin.get_collator_cls_and_kwargs(cfg, is_eval=False)Returns a custom class for the collator.
+| Name | +Type | +Description | +Default | +
|---|---|---|---|
| cfg | +DictDefault | +The global axolotl configuration. | +required | +
| is_eval | +bool | +Whether this is an eval split. | +False |
+
| Name | +Type | +Description | +
|---|---|---|
| class | ++ | The class for the collator. | +
integrations.base.BasePlugin.get_input_args()integrations.base.BasePlugin.get_input_args()Returns a pydantic model for the plugin’s input arguments.
integrations.base.BasePlugin.get_trainer_cls(cfg)integrations.base.BasePlugin.get_trainer_cls(cfg)Returns a custom class for the trainer.
-integrations.base.BasePlugin.get_training_args(cfg)Returns custom training arguments to set on TrainingArgs.
+| Name | +Type | +Description | +Default | +
|---|---|---|---|
| cfg | +DictDefault | +The global axolotl configuration. | +required | +
| Name | +Type | +Description | +
|---|---|---|
| object | ++ | dict containing the training arguments. | +
integrations.base.BasePlugin.get_training_args_mixin()Returns a dataclass model for the plugin’s training arguments.
+integrations.base.BasePlugin.load_datasets(cfg, preprocess=False)integrations.base.BasePlugin.load_datasets(cfg, preprocess=False)Loads and preprocesses the dataset for training.
-| Name | -Type | -Description | -Default | -
|---|---|---|---|
| cfg | -DictDefault | -The configuration for the plugin. | -required | -
| model | -PreTrainedModel | PeftModel | -The loaded model. | -required | -
integrations.base.BasePlugin.post_model_build(cfg, model)Performs actions after the model is built/loaded, but before any adapters are applied.
-| Name | -Type | -Description | -Default | -
|---|---|---|---|
| cfg | -DictDefault | -The configuration for the plugin. | -required | -
integrations.base.BasePlugin.post_model_load(cfg, model)Performs actions after the model is loaded.
integrations.base.BasePlugin.post_train(cfg, model)Performs actions after training is complete.
+integrations.base.BasePlugin.post_model_build(cfg, model)Performs actions after the model is built/loaded, but before any adapters are applied.
| Name | +Type | +Description | +Default | +
|---|---|---|---|
| cfg | +DictDefault | +The configuration for the plugin. | +required | +
integrations.base.BasePlugin.post_model_load(cfg, model)Performs actions after the model is loaded.
+| Name | +Type | +Description | +Default | +
|---|---|---|---|
| cfg | +DictDefault | +The configuration for the plugin. | +required | +
| model | +PreTrainedModel | PeftModel | +The loaded model. | +required | +
integrations.base.BasePlugin.post_train(cfg, model)Performs actions after training is complete.
+Calls the create_optimizer method of all registered plugins and returns |
|
| get_collator_cls_and_kwargs | +Calls the get_collator_cls_and_kwargs method of all registered plugins and returns the first non-None collator class. | +
| get_input_args | Returns a list of Pydantic classes for all registered plugins’ input arguments.’ |
| get_instance | Returns the singleton instance of PluginManager. If the instance doesn’t |
| get_trainer_cls | Calls the get_trainer_cls method of all registered plugins and returns the |
| get_training_args | +Calls the get_training_args method of all registered plugins and returns the combined training arguments. | +
| get_training_args_mixin | +Returns a list of dataclasses for all registered plugins’ training args mixins’ | +
| load_datasets | Calls the load_datasets method of each registered plugin. |
| post_lora_load | Calls the post_lora_load method of all registered plugins. |
| post_model_build | Calls the post_model_build method of all registered plugins after the |
| post_model_load | Calls the post_model_load method of all registered plugins after the model |
| post_train | Calls the post_train method of all registered plugins. |
| post_train_unload | Calls the post_train_unload method of all registered plugins. |
| post_trainer_create | Calls the post_trainer_create method of all registered plugins. |
| pre_lora_load | Calls the pre_lora_load method of all registered plugins. |
| pre_model_load | Calls the pre_model_load method of all registered plugins. |
| register | Registers a new plugin by its name. |
integrations.base.PluginManager.add_callbacks_post_trainer(cfg, trainer)integrations.base.PluginManager.add_callbacks_post_trainer(cfg, trainer)Calls the add_callbacks_post_trainer method of all registered plugins.
integrations.base.PluginManager.get_collator_cls_and_kwargs(cfg, is_eval=False)Calls the get_collator_cls_and_kwargs method of all registered plugins and returns the first non-None collator class.
+Parameters: +cfg (dict): The configuration for the plugins. +is_eval (bool): Whether this is an eval split.
+Returns: +object: The collator class, or None if none was found.
+integrations.base.PluginManager.get_input_args()integrations.base.PluginManager.get_input_args()Returns a list of Pydantic classes for all registered plugins’ input arguments.’
-integrations.base.PluginManager.get_training_args(cfg)Calls the get_training_args method of all registered plugins and returns the combined training arguments.
+Parameters: +cfg (dict): The configuration for the plugins.
+Returns: +object: The training arguments
+integrations.base.PluginManager.get_training_args_mixin()Returns a list of dataclasses for all registered plugins’ training args mixins’
+Returns: +list[str]: A list of dataclsses
+integrations.base.PluginManager.load_datasets(cfg, preprocess=False)integrations.base.PluginManager.load_datasets(cfg, preprocess=False)Calls the load_datasets method of each registered plugin.
-| Name | -Type | -Description | -Default | -
|---|---|---|---|
| cfg | -DictDefault | -The configuration for the plugins. | -required | -
| model | -PreTrainedModel | PeftModel | -The loaded model. | -required | -
integrations.base.PluginManager.post_model_build(cfg, model)Calls the post_model_build method of all registered plugins after the
-model has been built / loaded, but before any adapters have been applied.
| Name | -Type | -Description | -Default | -
|---|---|---|---|
| cfg | -DictDefault | -The configuration for the plugins. | -required | -
| model | -PreTrainedModel | -The loaded model. | -required | -
integrations.base.PluginManager.post_model_load(cfg, model)Calls the post_model_load method of all registered plugins after the model
-has been loaded inclusive of any adapters.
integrations.base.PluginManager.post_train(cfg, model)Calls the post_train method of all registered plugins.
+integrations.base.PluginManager.post_model_build(cfg, model)Calls the post_model_build method of all registered plugins after the
+model has been built / loaded, but before any adapters have been applied.
| Name | +Type | +Description | +Default | +
|---|---|---|---|
| cfg | +DictDefault | +The configuration for the plugins. | +required | +
| model | +PreTrainedModel | +The loaded model. | +required | +
integrations.base.PluginManager.post_model_load(cfg, model)Calls the post_model_load method of all registered plugins after the model
+has been loaded inclusive of any adapters.
| Name | +Type | +Description | +Default | +
|---|---|---|---|
| cfg | +DictDefault | +The configuration for the plugins. | +required | +
| model | +PreTrainedModel | PeftModel | +The loaded model. | +required | +
integrations.base.PluginManager.post_train(cfg, model)Calls the post_train method of all registered plugins.
+integrations.base.load_plugin(plugin_name)integrations.base.load_plugin(plugin_name)Loads a plugin based on the given plugin name.
The plugin name should be in the format “module_name.class_name”. This function splits the plugin name into module and class, imports the module, retrieves the class from the module, and creates an instance of the class.
-integrations.kd.trainer.AxolotlKDTrainer(
- *_args,
- bench_data_collator=None,
- eval_data_collator=None,
- dataset_tags=None,
- **kwargs,
-)integrations.kd.trainer.AxolotlKDTrainer(*args, **kwargs)Custom trainer subclass for Knowledge Distillation (KD)
| - | tuple[HFRLTrainerBuilder | HFCausalTrainerBuilder, PeftModel | PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None] | +tuple['HFRLTrainerBuilder' | 'HFCausalTrainerBuilder', PeftModel | PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None] | Tuple of: - Trainer (Causal or RLHF) - Model - Tokenizer - PEFT config - Processor |