RL datasets: warn and drop unsalvageable over-length prompts post-truncate; add post-truncate filter; support alias config key 'excess_token_handling'
This commit is contained in:
15
transscribe.py
Normal file
15
transscribe.py
Normal file
@@ -0,0 +1,15 @@
|
||||
import pymongo
|
||||
|
||||
MONGO_URI = "mongodb://root:9AsYmXYKmYLHcNsShmCb3L5DZMXH77rQ9GBRxm0HKownNWLwdzH9dW7zhPG9mpuR@46.4.101.229:8281/?directConnection=true"
|
||||
COLLECTION_NAME = "tts_data"
|
||||
|
||||
client = pymongo.MongoClient(MONGO_URI)
|
||||
db = client["tts_data"]
|
||||
collection = db[COLLECTION_NAME]
|
||||
|
||||
# Get all documents from the collection that does not have a "transcription" field
|
||||
documents = collection.find({"transcription": {"$exists": False}})
|
||||
|
||||
for document in documents:
|
||||
print(document)
|
||||
break
|
||||
Reference in New Issue
Block a user