Compare commits
1 Commits
pretrain-d
...
base-model
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d657ff9c94 |
@@ -478,7 +478,7 @@ See [examples](examples) for quick start. It is recommended to duplicate and mod
|
||||
|
||||
- model
|
||||
```yaml
|
||||
base_model: ./llama-7b-hf # local or huggingface repo
|
||||
base_model: ./llama-7b-hf/ # local or huggingface repo
|
||||
```
|
||||
Note: The code will load the right architecture.
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ liger-kernel==0.4.2
|
||||
|
||||
packaging==23.2
|
||||
peft==0.14.0
|
||||
transformers==4.47.0
|
||||
transformers>=4.46.3
|
||||
tokenizers>=0.20.1
|
||||
accelerate==1.2.0
|
||||
datasets==3.1.0
|
||||
|
||||
@@ -41,7 +41,6 @@ class PretrainTokenizationStrategy(PromptTokenizingStrategy):
|
||||
seq + [self.tokenizer.eos_token_id] for seq in res["input_ids"]
|
||||
]
|
||||
res["attention_mask"] = [seq + [1] for seq in res["attention_mask"]]
|
||||
res["labels"] = res["input_ids"].copy()
|
||||
|
||||
return res
|
||||
|
||||
@@ -50,16 +49,12 @@ class PretrainTokenizationStrategy(PromptTokenizingStrategy):
|
||||
|
||||
|
||||
def load(tokenizer, cfg):
|
||||
if cfg.pretraining_dataset:
|
||||
cfg_ds = cfg.pretraining_dataset
|
||||
else:
|
||||
cfg_ds = cfg.datasets
|
||||
strat = PretrainTokenizationStrategy(
|
||||
PretrainTokenizer(),
|
||||
tokenizer,
|
||||
cfg.train_on_inputs,
|
||||
cfg.sequence_len,
|
||||
text_column=cfg_ds[0]["text_column"] or "text",
|
||||
text_column=cfg.pretraining_dataset[0]["text_column"] or "text",
|
||||
max_length=cfg.sequence_len * 64,
|
||||
)
|
||||
return strat
|
||||
|
||||
Reference in New Issue
Block a user