Lint convert.py
This commit is contained in:
@@ -1,47 +1,76 @@
|
||||
"""Module containing File Reader, File Writer, Json Parser, and Jsonl Serializer classes"""
|
||||
|
||||
|
||||
import json
|
||||
import sys
|
||||
|
||||
|
||||
class FileReader:
|
||||
"""
|
||||
Reads a file and returns its contents as a string
|
||||
"""
|
||||
|
||||
def read(self, file_path):
|
||||
with open(file_path, "r") as file:
|
||||
with open(file_path, encoding="utf-8") as file:
|
||||
return file.read()
|
||||
|
||||
|
||||
class FileWriter:
|
||||
"""
|
||||
Writes a string to a file
|
||||
"""
|
||||
|
||||
def __init__(self, file_path):
|
||||
self.file_path = file_path
|
||||
|
||||
def write(self, content):
|
||||
with open(self.file_path, "w") as file:
|
||||
with open(self.file_path, "w", encoding="utf-8") as file:
|
||||
file.write(content)
|
||||
|
||||
|
||||
class StdoutWriter:
|
||||
"""
|
||||
Writes a string to stdout
|
||||
"""
|
||||
|
||||
def write(self, content):
|
||||
sys.stdout.write(content)
|
||||
sys.stdout.write("\n")
|
||||
|
||||
|
||||
class JsonParser:
|
||||
"""
|
||||
Parses a string as JSON and returns the result
|
||||
"""
|
||||
|
||||
def parse(self, content):
|
||||
return json.loads(content)
|
||||
|
||||
|
||||
class JsonlSerializer:
|
||||
"""
|
||||
Serializes a list of JSON objects into a JSONL string
|
||||
"""
|
||||
|
||||
def serialize(self, data):
|
||||
lines = [json.dumps(item) for item in data]
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
class JsonToJsonlConverter:
|
||||
"""
|
||||
Converts a JSON file to JSONL
|
||||
"""
|
||||
|
||||
def __init__(self, file_reader, file_writer, json_parser, jsonl_serializer):
|
||||
self.file_reader = file_reader
|
||||
self.file_writer = file_writer
|
||||
self.json_parser = json_parser
|
||||
self.jsonl_serializer = jsonl_serializer
|
||||
|
||||
def convert(self, input_file_path, output_file_path):
|
||||
def convert(
|
||||
self, input_file_path, output_file_path
|
||||
): # pylint: disable=unused-argument
|
||||
content = self.file_reader.read(input_file_path)
|
||||
data = self.json_parser.parse(content)
|
||||
# data = [r for r in data if r["conversations"]] # vicuna cleaned has rows with empty conversations
|
||||
|
||||
Reference in New Issue
Block a user