From 4396f8c3a2f0846d498045479cfcc742b263a173 Mon Sep 17 00:00:00 2001 From: Quarto GHA Workflow Runner Date: Mon, 9 Dec 2024 19:04:11 +0000 Subject: [PATCH] Built site for gh-pages --- .nojekyll | 2 +- docs/dataset-formats/index.html | 10 +- docs/rlhf.html | 33 ++- index.html | 410 +++++++++++++++++--------------- search.json | 6 +- sitemap.xml | 56 ++--- 6 files changed, 275 insertions(+), 242 deletions(-) diff --git a/.nojekyll b/.nojekyll index 3c0ef2ac9..02b3dc3b4 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -9efd6963 \ No newline at end of file +7c6a53ca \ No newline at end of file diff --git a/docs/dataset-formats/index.html b/docs/dataset-formats/index.html index 805189389..1592d1745 100644 --- a/docs/dataset-formats/index.html +++ b/docs/dataset-formats/index.html @@ -363,7 +363,7 @@ Description - + Pre-training @@ -371,7 +371,7 @@ Description Data format for a pre-training completion task. - + Instruction Tuning @@ -379,7 +379,7 @@ Description Instruction tuning formats for supervised fine-tuning. - + Conversation @@ -387,7 +387,7 @@ Description Conversation format for supervised fine-tuning. - + Template-Free @@ -395,7 +395,7 @@ Description Construct prompts without a template. - + Custom Pre-Tokenized Dataset diff --git a/docs/rlhf.html b/docs/rlhf.html index 39be52d9d..7c2a41d23 100644 --- a/docs/rlhf.html +++ b/docs/rlhf.html @@ -367,20 +367,37 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - path: argilla/ultrafeedback-binarized-preferences-cleaned type: chat_template.argilla +
+

KTO

+
rl: kto
+rl_beta: 0.5
+kto_desirable_weight: 0.2
+
+remove_unused_columns: false
+
+datasets:
+  - path: argilla/ultrafeedback-binarized-preferences-cleaned-kto
+    type: llama3.ultra
+    split: train
+
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: true
+

Using local dataset files

-
datasets:
-  - ds_type: json
-    data_files:
-      - orca_rlhf.jsonl
-    split: train
-    type: chatml.intel
+
datasets:
+  - ds_type: json
+    data_files:
+      - orca_rlhf.jsonl
+    split: train
+    type: chatml.intel

Trl autounwrap for peft

Trl supports autounwrapping peft models, so that a ref model does not need to be additionally loaded, leading to less VRAM needed. This is on by default. To turn it off, pass the following config.

-
# load ref model when adapter training.
-rl_adapter_ref_model: true
+
# load ref model when adapter training.
+rl_adapter_ref_model: true
diff --git a/index.html b/index.html index a4f2cd788..e05b7606f 100644 --- a/index.html +++ b/index.html @@ -294,8 +294,9 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin