-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathopenai_generate_prompt.py
138 lines (132 loc) · 5.82 KB
/
openai_generate_prompt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import random
from textwrap import dedent
from typing import Literal, Optional
from flows import FlowSpec
PromptType = Literal['DirectPrediction', 'SimilarityCheck']
samples = {
'CommandInjection': {
'normal': ['execute,command'],
'unusual': []
},
'CodeInjection': {
'normal': ['eval', 'execute', 'compile', 'render', 'callback', 'function', 'fn'],
'unusual': []
},
'ReflectedXss': {
'normal': ['send,content'],
'unusual': []
},
'TaintedPath': {
'normal': ['file', 'directory', 'path', 'cwd', 'source', 'input'],
'unusual': []
},
'logging': {
'normal': ['authkey', 'password', 'passcode', 'passphrase'],
'unusual': []
}
}
sink_descriptions = {
'CommandInjection': 'arbitrary command line execution',
'CodeInjection': 'arbitrary code execution',
'ReflectedXss': 'reflected cross-site scripting',
'TaintedPath': 'uncontrolled data used in path expression',
'logging': ''
}
def get_prompt_from_csv(ground_truth, sink, prompt_type: PromptType, query_spec_tuple: Optional[tuple[FlowSpec, bool]] = None, text_file=None):
ground_truth_list = list(ground_truth.items())
ground_truth_list.sort(key=lambda x: str(x[0].param_repr()))
full_str = ''
if query_spec_tuple:
current_spec, current_label = query_spec_tuple
seed = hash(current_spec)
else:
seed = 42
random.Random(seed).shuffle(ground_truth_list)
examples_count = 10
# Type 1 (as code: predict by parameter name, function name, and parameter docstring)
if prompt_type == 'DirectPrediction':
# f.write(f'Classify the following parameter, function, and optional parameter document to "expected" or "unexpected" for arbitrary path expression.\n')
for spec, is_unusual in ground_truth_list:
if examples_count <= 0:
break
elif spec.sink == sink and (not query_spec_tuple or spec != current_spec):
full_str += prompt_text_one(spec, is_unusual, sink)
examples_count -= 1
if current_spec:
full_str += prompt_text_one(current_spec, current_label, sink, show_expected=False)
if text_file:
with open(text_file, 'w') as f:
f.write(full_str)
return full_str
# Type 2 (similarity check)
elif prompt_type == 'SimilarityCheck':
normal_samples = samples[sink]['normal']
# surrond normal_samples with double quotes
normal_samples_str = ', '.join([f'"{s}"' for s in normal_samples])
full_str += f'Answer "yes" or "no" to whether the following words are similar to {normal_samples_str}.\n\n'
for spec, is_unusual in ground_truth_list:
if examples_count <= 0:
break
elif spec.sink == sink and (not query_spec_tuple or spec != current_spec):
full_str += prompt_text_two(spec, is_unusual, normal_samples_str)
examples_count -= 1
if current_spec:
full_str += prompt_text_two(current_spec, current_label, normal_samples_str, show_expected=False)
if text_file:
with open(text_file, 'w') as f:
f.write(full_str)
return full_str
else:
raise ValueError(f'Unknown prompt type {prompt_type}')
# Type 3 (as text: predict by parameter name, function name, and parameter docstring)
# with open(text_file, 'w') as f:
# ground_truth = get_ground_truth_dict(ground_truth_path=csv_file)
# ground_truth_list = list(ground_truth.items())
# random.shuffle(ground_truth_list)
# count = 0
# f.write(f'Classify the following parameter, function, and optional parameter document to "expected" or "unexpected" for arbitrary path expression.\n')
# for spec, is_unusual in ground_truth_list:
# if spec.sink == sink:
# param_doc = spec.param.param_doc.replace("\n"," ")
# txt = f'Parameter {spec.param.parameter} of function {spec.param.function} with parameter document "{param_doc}" is:'
# if count > 0:
# txt += ' unexpected' if is_unusual else ' expected'
# count -= 1
# txt += '\n'
# f.write(txt)
def prompt_text_one(spec, is_unusual, sink, show_expected: bool = True):
description = sink_descriptions[sink]
param_doc = spec.param.param_doc.replace("\n"," ")
# Parameter {spec.param.parameter} of function {spec.param.function} with parameter document "{param_doc}" is:
if sink == 'logging':
txt = f'''
function f({spec.param.parameter}) {{
console.log({spec.param.parameter});
}}
// In the above function f, the parameter "{spec.param.parameter}" is being logged, which likely exposes'''
if show_expected:
txt += ' sensitive data.' if is_unusual else ' insensitive data.'
txt += '\n'
else:
txt = f'''
/**
* @param {spec.param.parameter} - {param_doc}
*/
function {spec.param.function}({spec.param.parameter}) {{
}}
// In the above function "{spec.param.function}", the parameter "{spec.param.parameter}" flows into {sink} sink ({description}), which is'''
if show_expected:
txt += ' unexpected.' if is_unusual else ' expected.'
txt += '\n'
txt = dedent(txt)
return txt
def prompt_text_two(current_spec, is_unusual, normal_samples_str, show_expected: bool = True):
# param_doc = current_spec.param.param_doc.replace("\n"," ")
name = current_spec.param.function + ' ' + current_spec.param.parameter
name = name.strip()
txt = f'Q: is "{name}" similar to any of {normal_samples_str}?\n'
txt += 'A:'
if show_expected:
txt += ' No.' if is_unusual else ' Yes.'
txt += '\n'
return txt