-
Notifications
You must be signed in to change notification settings - Fork 19
/
convert-chatgpt-conversation.py
61 lines (47 loc) · 1.97 KB
/
convert-chatgpt-conversation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import os
import json
class Message:
def __init__(self, role, content, create_time, model=None):
self.role = role
self.content = content
self.create_time = create_time
self.model = model
class Discussion:
def __init__(self, messages, create_time, title):
self.messages = messages
self.create_time = create_time
self.title = title
json_dir = '.'
# Get a list of all JSON files in the directory
json_files = [f for f in os.listdir(json_dir) if f.endswith('.json')]
question_answer_pairs = []
for file in json_files:
with open(os.path.join(json_dir, file), 'r') as f:
discussions = json.load(f)
# Extract the user and assistant messages into an array of questions and answers
for discussion in discussions:
question = ""
answer = ""
for message_data in discussion['messages']:
message = Message(**message_data)
# Ensure message and content are defined and content is not empty
if message and message.content and len(message.content) > 0:
if message.role == "user":
question = message.content[0]
elif message.role == "assistant":
answer = message.content[0]
# Skip this iteration if the question which has more than 300 characters
if len(question) > 300:
question = ""
answer = ""
continue
if question and answer:
question_answer_pairs.append(
{'question': question, 'answer': answer})
# Clear question and answer for the next pair
question = ""
answer = ""
# Save questionAnswerPairs to a JSON file
output_file_path = 'conversations.json'
with open(output_file_path, 'w') as f:
json.dump(question_answer_pairs, f, indent=2)