Compare commits
5 Commits
enable_tp
...
pretrain-d
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9eaae5925a | ||
|
|
d000851eeb | ||
|
|
effc4dc409 | ||
|
|
02629c7cdf | ||
|
|
78a4aa86d6 |
5
.github/workflows/tests-nightly.yml
vendored
5
.github/workflows/tests-nightly.yml
vendored
@@ -44,6 +44,11 @@ jobs:
|
|||||||
python-version: ${{ matrix.python_version }}
|
python-version: ${{ matrix.python_version }}
|
||||||
cache: 'pip' # caching pip dependencies
|
cache: 'pip' # caching pip dependencies
|
||||||
|
|
||||||
|
- name: upgrade pip
|
||||||
|
run: |
|
||||||
|
pip3 install --upgrade pip
|
||||||
|
pip3 install --upgrade packaging setuptools wheel
|
||||||
|
|
||||||
- name: Install PyTorch
|
- name: Install PyTorch
|
||||||
run: |
|
run: |
|
||||||
pip3 install torch==${{ matrix.pytorch_version }} --index-url https://download.pytorch.org/whl/cpu
|
pip3 install torch==${{ matrix.pytorch_version }} --index-url https://download.pytorch.org/whl/cpu
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ liger-kernel==0.4.2
|
|||||||
|
|
||||||
packaging==23.2
|
packaging==23.2
|
||||||
peft==0.14.0
|
peft==0.14.0
|
||||||
transformers>=4.46.3
|
transformers==4.47.0
|
||||||
tokenizers>=0.20.1
|
tokenizers>=0.20.1
|
||||||
accelerate==1.2.0
|
accelerate==1.2.0
|
||||||
datasets==3.1.0
|
datasets==3.1.0
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ class PretrainTokenizationStrategy(PromptTokenizingStrategy):
|
|||||||
seq + [self.tokenizer.eos_token_id] for seq in res["input_ids"]
|
seq + [self.tokenizer.eos_token_id] for seq in res["input_ids"]
|
||||||
]
|
]
|
||||||
res["attention_mask"] = [seq + [1] for seq in res["attention_mask"]]
|
res["attention_mask"] = [seq + [1] for seq in res["attention_mask"]]
|
||||||
|
res["labels"] = res["input_ids"].copy()
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
@@ -49,12 +50,16 @@ class PretrainTokenizationStrategy(PromptTokenizingStrategy):
|
|||||||
|
|
||||||
|
|
||||||
def load(tokenizer, cfg):
|
def load(tokenizer, cfg):
|
||||||
|
if cfg.pretraining_dataset:
|
||||||
|
cfg_ds = cfg.pretraining_dataset
|
||||||
|
else:
|
||||||
|
cfg_ds = cfg.datasets
|
||||||
strat = PretrainTokenizationStrategy(
|
strat = PretrainTokenizationStrategy(
|
||||||
PretrainTokenizer(),
|
PretrainTokenizer(),
|
||||||
tokenizer,
|
tokenizer,
|
||||||
cfg.train_on_inputs,
|
cfg.train_on_inputs,
|
||||||
cfg.sequence_len,
|
cfg.sequence_len,
|
||||||
text_column=cfg.pretraining_dataset[0]["text_column"] or "text",
|
text_column=cfg_ds[0]["text_column"] or "text",
|
||||||
max_length=cfg.sequence_len * 64,
|
max_length=cfg.sequence_len * 64,
|
||||||
)
|
)
|
||||||
return strat
|
return strat
|
||||||
|
|||||||
@@ -66,10 +66,7 @@ class EvalFirstStepCallback(
|
|||||||
control: TrainerControl,
|
control: TrainerControl,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
if (
|
if args.eval_strategy == IntervalStrategy.STEPS and state.global_step == 1:
|
||||||
args.evaluation_strategy == IntervalStrategy.STEPS
|
|
||||||
and state.global_step == 1
|
|
||||||
):
|
|
||||||
control.should_evaluate = True
|
control.should_evaluate = True
|
||||||
return control
|
return control
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user