Compare commits
9 Commits
shared-pre
...
print_venv
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
454eea049f | ||
|
|
5a961ecadf | ||
|
|
b37ddf9778 | ||
|
|
bf38e507fb | ||
|
|
d00bd99279 | ||
|
|
2b41bfe9eb | ||
|
|
5bbbd599b4 | ||
|
|
26c782183d | ||
|
|
8065fed126 |
@@ -22,9 +22,11 @@ RUN apt-get update \
|
|||||||
&& mkdir /root/.conda \
|
&& mkdir /root/.conda \
|
||||||
&& bash Miniconda3-latest-Linux-x86_64.sh -b \
|
&& bash Miniconda3-latest-Linux-x86_64.sh -b \
|
||||||
&& rm -f Miniconda3-latest-Linux-x86_64.sh \
|
&& rm -f Miniconda3-latest-Linux-x86_64.sh \
|
||||||
&& conda create -n "py${PYTHON_VERSION}" python="${PYTHON_VERSION}"
|
&& conda create -n "axolotl-py${PYTHON_VERSION}" python="${PYTHON_VERSION}" \
|
||||||
|
&& conda init bash \
|
||||||
|
&& echo "conda activate axolotl-py${PYTHON_VERSION}" >> ~/.bashrc
|
||||||
|
|
||||||
ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}"
|
ENV PATH="/root/miniconda3/envs/axolotl-py${PYTHON_VERSION}/bin:${PATH}"
|
||||||
|
|
||||||
WORKDIR /workspace
|
WORKDIR /workspace
|
||||||
|
|
||||||
|
|||||||
@@ -22,9 +22,11 @@ RUN apt-get update \
|
|||||||
&& mkdir /root/.conda \
|
&& mkdir /root/.conda \
|
||||||
&& bash Miniconda3-latest-Linux-x86_64.sh -b \
|
&& bash Miniconda3-latest-Linux-x86_64.sh -b \
|
||||||
&& rm -f Miniconda3-latest-Linux-x86_64.sh \
|
&& rm -f Miniconda3-latest-Linux-x86_64.sh \
|
||||||
&& conda create -n "py${PYTHON_VERSION}" python="${PYTHON_VERSION}"
|
&& conda create -n "axolotl-py${PYTHON_VERSION}" python="${PYTHON_VERSION}" \
|
||||||
|
&& conda init bash \
|
||||||
|
&& echo "conda activate axolotl-py${PYTHON_VERSION}" >> ~/.bashrc
|
||||||
|
|
||||||
ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}"
|
ENV PATH="/root/miniconda3/envs/axolotl-py${PYTHON_VERSION}/bin:${PATH}"
|
||||||
|
|
||||||
WORKDIR /workspace
|
WORKDIR /workspace
|
||||||
|
|
||||||
|
|||||||
@@ -22,9 +22,11 @@ RUN apt-get update \
|
|||||||
&& mkdir /root/.conda \
|
&& mkdir /root/.conda \
|
||||||
&& bash Miniconda3-latest-Linux-x86_64.sh -b \
|
&& bash Miniconda3-latest-Linux-x86_64.sh -b \
|
||||||
&& rm -f Miniconda3-latest-Linux-x86_64.sh \
|
&& rm -f Miniconda3-latest-Linux-x86_64.sh \
|
||||||
&& conda create -n "py${PYTHON_VERSION}" python="${PYTHON_VERSION}"
|
&& conda create -n "axolotl-py${PYTHON_VERSION}" python="${PYTHON_VERSION}" \
|
||||||
|
&& conda init bash \
|
||||||
|
&& echo "conda activate axolotl-py${PYTHON_VERSION}" >> ~/.bashrc
|
||||||
|
|
||||||
ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}"
|
ENV PATH="/root/miniconda3/envs/axolotl-py${PYTHON_VERSION}/bin:${PATH}"
|
||||||
|
|
||||||
WORKDIR /workspace
|
WORKDIR /workspace
|
||||||
|
|
||||||
|
|||||||
@@ -51,6 +51,10 @@ description: Frequently asked questions
|
|||||||
> pad_token: "..."
|
> pad_token: "..."
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
|
**Q: `IterableDataset error` or `KeyError: 'input_ids'` when using `preprocess` CLI**
|
||||||
|
|
||||||
|
> A: This is because you may be using `preprocess` CLI with `pretraining_dataset:` or `skip_prepare_dataset: true` respectively. Please use `axolotl train` CLI directly instead as these datasets are prepared on demand.
|
||||||
|
|
||||||
### Chat templates
|
### Chat templates
|
||||||
|
|
||||||
**Q: `jinja2.exceptions.UndefinedError: 'dict object' has no attribute 'content' / 'role' / ____`**
|
**Q: `jinja2.exceptions.UndefinedError: 'dict object' has no attribute 'content' / 'role' / ____`**
|
||||||
|
|||||||
@@ -35,6 +35,12 @@ def do_preprocess(cfg: DictDefault, cli_args: PreprocessCliArgs) -> None:
|
|||||||
check_accelerate_default_config()
|
check_accelerate_default_config()
|
||||||
check_user_token()
|
check_user_token()
|
||||||
|
|
||||||
|
for key in ["skip_prepare_dataset", "pretraining_dataset"]:
|
||||||
|
if cfg.get("key"):
|
||||||
|
raise ValueError(
|
||||||
|
f"You have set `{key}:`. `preprocess` is not needed. Run the `axolotl train` CLI directly instead."
|
||||||
|
)
|
||||||
|
|
||||||
if not cfg.dataset_prepared_path:
|
if not cfg.dataset_prepared_path:
|
||||||
msg = (
|
msg = (
|
||||||
Fore.RED
|
Fore.RED
|
||||||
|
|||||||
@@ -526,8 +526,9 @@ def merge_datasets(datasets: list[Dataset], cfg: DictDefault) -> Dataset:
|
|||||||
if len(datasets) == 1:
|
if len(datasets) == 1:
|
||||||
ds = datasets[0]
|
ds = datasets[0]
|
||||||
|
|
||||||
# Do not shuffle if curriculum sampling is enabled
|
# Do not shuffle if curriculum sampling is enabled or
|
||||||
if cfg.curriculum_sampling:
|
# shuffle_merged_datasets is disabled
|
||||||
|
if cfg.curriculum_sampling or not cfg.shuffle_merged_datasets:
|
||||||
return ds
|
return ds
|
||||||
|
|
||||||
return ds.shuffle(seed=cfg.seed)
|
return ds.shuffle(seed=cfg.seed)
|
||||||
|
|||||||
@@ -609,6 +609,9 @@ def prepare_opinionated_env(cfg):
|
|||||||
if cfg.qlora_sharded_model_loading:
|
if cfg.qlora_sharded_model_loading:
|
||||||
# model loading is forked after the tokenizer
|
# model loading is forked after the tokenizer
|
||||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||||
|
if cfg.sample_packing:
|
||||||
|
# multipack parallel packing sampler defaults to using fork
|
||||||
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||||
|
|
||||||
|
|
||||||
def setup_trainer(
|
def setup_trainer(
|
||||||
|
|||||||
Reference in New Issue
Block a user