This commit is contained in:
Dan Saunders
2025-06-09 20:38:46 +00:00
parent 8f75136ad3
commit 4f39aeefb9
3 changed files with 9 additions and 3 deletions

View File

@@ -64,7 +64,9 @@ class TokenizedPromptDataset(Dataset):
desc="Strategy Filtering Rows", desc="Strategy Filtering Rows",
) )
import ipdb; ipdb.set_trace() import ipdb
ipdb.set_trace()
return dataset.map( return dataset.map(
self.prompt_tokenizer.tokenize_prompt, self.prompt_tokenizer.tokenize_prompt,

View File

@@ -67,7 +67,9 @@ class PromptTokenizingStrategy(abc.ABC):
LOG.warning("Empty text requested for tokenization.") LOG.warning("Empty text requested for tokenization.")
return empty return empty
import ipdb; ipdb.set_trace() import ipdb
ipdb.set_trace()
result = self.tokenizer( result = self.tokenizer(
prompt, prompt,

View File

@@ -486,7 +486,9 @@ def get_dataset_wrapper(
f"Loading dataset: {config_dataset['path']} with base_type: {d_base_type} and prompt_style: {d_prompt_style}" f"Loading dataset: {config_dataset['path']} with base_type: {d_base_type} and prompt_style: {d_prompt_style}"
) )
import ipdb; ipdb.set_trace() import ipdb
ipdb.set_trace()
if ( if (
isinstance(dataset, Dataset) isinstance(dataset, Dataset)