make it work with pythia in the cloud

This commit is contained in:
Wing Lian
2023-04-14 07:24:55 -04:00
parent ce24f5e246
commit 8d959a7e26
7 changed files with 352 additions and 70 deletions

View File

@@ -44,6 +44,7 @@ class JsonToJsonlConverter:
def convert(self, input_file_path, output_file_path):
content = self.file_reader.read(input_file_path)
data = self.json_parser.parse(content)
# data = [r for r in data if r["conversations"]] # vicuna cleaned has rows with empty conversations
jsonl_content = self.jsonl_serializer.serialize(data)
self.file_writer.write(jsonl_content)