RelationalIntentionGraph/abstract_generate_api.py at main · HKUST-KnowComp/RelationalIntentionGraph · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import json
from time import sleep
from tqdm import tqdm
import random
import argparse
import logging
from collections import Counter
from gpt4_prompting import get_message_from_api as gpt4_api
from gpt35_prompting import get_message_from_api as gpt35_api
# 50578

def generate(args):
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    file_handler = logging.FileHandler('generate_log.log')
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
    logging.getLogger().addHandler(file_handler)

    with open(args.input_file, "r") as file:
        lines = file.readlines()
    sessions = []
    for line in lines:
        dict_obj = json.loads(line)
        sessions.append(dict_obj)
    if args.sample_num:
        sessions = random.sample(sessions, args.sample_num)

    with open(args.output_file, "w") as file:
        for session in tqdm(sessions):
            intention = session['Intentions']
            tmp = []
            for intention in intention:
                prompt = f'''I will give you an INTENTION. You need to give several phrases containing 1-3
                words for the ABSTRACT INTENTION of this INTENTION.
                You must return your answer in the following format: phrases1,phrases2,phrases3,....
                These abstract intention words should fulfill the following requirements.
                1. The ABSTRACT INTENTION phrases can well represent the INTENTION.
                2. The ABSTRACT INTENTION phrases don't have a lot of less relevant word meanings. For example, "spring" is not a good abstract intention word because it can represent both a coiled metal device and the season of the year.
                3. The ABSTRACT INTENTION phrases of the same INTENTION cannot be semantically similar with each other. For example, health and wellness are two close synonyms, so they can't be together.
                # You can refer to the following example.
                # Example1:
                INTENTION: PersonX enjoys exercising in the gym.
                Your answer: sports,health lifestyle,fitness facility,
                # Example2:
                INTENTION: PersonX enjoys hiking.
                Your answer: sports,outdoor activity,hiking
                # Here is the INTENTION:
                INTENTION: {intention}.
                Your answer:
                '''
                try:
                    if args.api_type == "gpt-35":
                        a, prompt_tokens, completion_tokens = gpt35_api(prompt)
                    elif args.api_type == "gpt-4":
                        a, prompt_tokens, completion_tokens = gpt4_api(prompt)
                    else:
                        raise ValueError("Invalid API type.")
                except Exception as e:
                    logging.error(f"Error occurred on element intention: {intention}. Error message: {e}")
                    sleep(2)
                    continue

                answers = a.split(",")
                for item in answers:
                    item = item.strip()
                tmp.append({
                    "INTENTION": intention,
                    "ABSTRACT INTENTION": answers,
                    "prompt_tokens": prompt_tokens,
                    "completion_tokens": completion_tokens,
                    "api_type": args.api_type
                })
            session['abstract_generation_result'] = tmp
            json.dump(session, file)
            file.write("\n")
    file.close()
    return


def static(results):
    pricing_35 = {
        "prompt_tokens": 0.0015,
        "completion_tokens": 0.002,
    }
    pricing_4 = {
        "prompt_tokens": 0.03,
        "completion_tokens": 0.06,
    }
    total_cost = 0
    abstract_intentions = []
    # 统计conceptualization之间的交叉情况
    for item in results:
        abstract_intentions.extend(item['ABSTRACT INTENTION'])
        if item['api_type'] == "gpt-4":
            total_cost += item['prompt_tokens'] * pricing_4['prompt_tokens']
            total_cost += item['completion_tokens'] * pricing_4['completion_tokens']
        elif item['api_type'] == "gpt-35":
            total_cost += item['prompt_tokens'] * pricing_35['prompt_tokens']
            total_cost += item['completion_tokens'] * pricing_35['completion_tokens']
    counter = Counter(abstract_intentions)
    count = 0
    for key, value in counter.items():
        if value >= 2:
            count += 1
    ratio = count / len(counter.keys())
    # 出现两次及以上的abstract intention word占所有word的比例为： 0.29295426452410384
    # conceptualization之间的交叉比例： 0.018081677070159796
    print('出现两次及以上的abstract intention word占所有word的比例为：', ratio)
    count = 0
    for i in range(len(results)):
        for j in range(i+1, len(results)):
            if len(set(results[i]['ABSTRACT INTENTION']) & set(results[j]['ABSTRACT INTENTION'])) > 0:
                count += 1
    print('conceptualization之间的交叉比例：', count/(len(results)*(len(results)-1)/2))


def main():
    random.seed(8)
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_file', type=str, default="./week4/gpt-35-turbo_answer_0_intentions.json", help="Path to the input file.")
    parser.add_argument('--output_file', type=str, default="./week4/abstract_intention.json", help="Path to the output file.")
    parser.add_argument('--sample_num', type=int, default=None, help="Sample number of total pairs.")
    parser.add_argument('--api_type', type=str, default="gpt-35", help="API type to use.")
    args = parser.parse_args()
    generate(args)


if __name__=="__main__":
    main()