-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathsemgrep_rules.py
executable file
·81 lines (67 loc) · 3.79 KB
/
semgrep_rules.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import argparse
import requests
import semgrep
import yaml
from yaml import dump, load
from yaml.loader import SafeLoader
RULES_LIST: dict[str, list[str]] = {
'python': ['https://semgrep.dev/c/r/python'],
'javascript': ['https://semgrep.dev/c/r/javascript'],
'typescript': ['https://semgrep.dev/c/r/typescript'],
}
EXCLUDE_LIST: dict[str, list[str]] = {
'python': [
'python.django.security.audit.django-ratelimit.missing-ratelimit.missing-ratelimit',
'python.lang.maintainability.is-function-without-parentheses.is-function-without-parentheses',
'python.django.security.audit.xss.direct-use-of-httpresponse.direct-use-of-httpresponse',
'python.lang.security.audit.non-literal-import.non-literal-import',
'python.jwt.security.audit.jwt-exposed-data.jwt-python-exposed-data',
'python.requests.best-practice.use-raise-for-status.use-raise-for-status',
# this rule has problems, but would be great if it would work
'python.lang.correctness.common-mistakes.string-concat-in-list.string-concat-in-list',
'python.lang.best-practice.logging-error-without-handling.logging-error-without-handling',
'python.lang.security.use-defusedcsv.use-defusedcsv',
# replaced with internal rule that was extended
'python.django.performance.access-foreign-keys.access-foreign-keys',
'python.lang.best-practice.unspecified-open-encoding.unspecified-open-encoding',
'python.django.security.audit.unvalidated-password.unvalidated-password'
],
'javascript': [],
'typescript': [
'typescript.react.security.audit.react-no-refs.react-no-refs',
'typescript.react.portability.i18next.i18next-key-format.i18next-key-format', # temporary
],
}
def selective_representer(dumper, data):
"""Process yml to correctly handle \n."""
return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|' if '\n' in data else None)
yaml.add_representer(str, selective_representer)
def get_rules(rules: list[str], rules_version: str):
"""Get rules file with all rules excluding the ones we don't want to have."""
final_rules: list[dict] = []
for rule_choice in rules:
for rule_list in RULES_LIST[rule_choice]:
# get rules from semgrep registry
print(f'Downloading {rule_choice} rules from semgrep registry...')
response = requests.get(rule_list, headers={'User-Agent': f'Semgrep/{semgrep.__VERSION__}'})
config_file = load(response.text, Loader=SafeLoader)
rules = config_file['rules']
updated_rules = [rule for rule in rules if rule['id'] not in EXCLUDE_LIST[rule_choice]]
final_rules += updated_rules
# get rules from our github (we only have rules for python for now)
if rule_choice in ['python', 'typescript']:
print(f'Downloading {rule_choice} rules from our repository...')
url = f'https://raw.githubusercontent.com/Seedstars/culture/master/code/validation/{rules_version}/semgrep_rules_{rule_choice}.yml'
response = requests.get(url)
config_file = load(response.text, Loader=SafeLoader)
rules = config_file['rules']
updated_rules = [rule for rule in rules if rule['id'] not in EXCLUDE_LIST[rule_choice]]
final_rules += updated_rules
with open('./.semgrep_rules.yml', 'w', encoding='utf-8') as temp_rule_file:
dump({'rules': final_rules}, temp_rule_file)
if __name__ in '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-r', '--rules', choices=['python', 'javascript', 'typescript'], nargs='+', required=True)
parser.add_argument('-v', '--version', type=str, nargs='?', required=True)
args = parser.parse_args()
get_rules(args.rules, args.version)