-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathanalysis.py
2364 lines (2321 loc) · 131 KB
/
analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# (c) 2017-2020, Lionel PRAT <lionel.prat9@gmail.com>
# Version 1.02
# Analysis by clamav extraction and yara rules
# All rights reserved.
import logging
import importlib
import pydot
import hashlib
import shutil
import os
import json, pprint
import tempfile
import yara
import re
import errno
from datetime import datetime, timedelta
import subprocess
import sys, getopt
import collections
import zlib
import unidecode
import zipfile
import traceback
import cfscrape
import base64
from virus_total_apis import PublicApi as VirusTotalPublicApi
from OTXv2 import OTXv2
from OTXv2 import IndicatorTypes
import pymisp
#TODO: interesting projet: https://github.com/jacob-baines/elfparser
## file[path], direcory_extract[path], graph[bool]
#verify clamscan present, or verify ENV CLAMSCAN_PATH
###########VAR THUG########
useragent='win7ie90'
referer='https://mail.google.com'
###########################
#Check THUG if present?
foundThug = importlib.util.find_spec("thug")
if foundThug:
from thug.ThugAPI import ThugAPI
foundThug = True
class ThugurlAPI(ThugAPI):
def __init__(self):
ThugAPI.__init__(self)
def analyze(self, url, useragent, referer, logdir):
# Set useragent to Internet Explorer 9.0 (Windows 7)
self.set_useragent(useragent)
# Set referer to http://www.honeynet.org
self.set_referer(referer)
# Enable file logging mode
self.set_file_logging()
# Enable JSON logging mode (requires file logging mode enabled)
self.set_json_logging()
# [IMPORTANT] The following three steps should be implemented (in the exact
# order of this example) almost in every situation when you are going to
# analyze a remote site.
# Initialize logging
self.log_init(url)
#choice dir to log
self.set_log_dir(logdir)
# Run analysis
self.run_remote(url)
# Log analysis results
self.log_event()
else:
foundThug = False
######GLOBAL VAR######
ioc_global = {}
api_vt=""
api_misp=""
host_misp=""
api_otx=""
api_intezer=""
api_xforce=""
pass_xforce=""
api_hybrid=""
osint_scoremin=3
osint = False
rate_vt=4 #4/min
stop_vt = True
javadecomp = False
path_procyon = '/usr/bin/procyon'
#IF YARA ERROR 30 change value:
#yara.set_config(max_strings_per_rule=100000, stack_size=65536)
######################
#########################################################################################################
##### USE MSO FILE EXTRACT because clamav don't uncompress activemime
########### FUNCTION ORIGIN: https://github.com/decalage2/oletools/blob/master/oletools/olevba.py
########### Author: Philippe Lagadec - http://www.decalage.info
########### License: BSD, see source code in https://github.com/decalage2/oletools/
MSO_ACTIVEMIME_HEADER = b'ActiveMime'
def is_mso_file(data):
"""
Check if the provided data is the content of a MSO/ActiveMime file, such as
the ones created by Outlook in some cases, or Word/Excel when saving a
file with the MHTML format or the Word 2003 XML format.
This function only checks the ActiveMime magic at the beginning of data.
:param data: bytes string, MSO/ActiveMime file content
:return: bool, True if the file is MSO, False otherwise
"""
return data.startswith(MSO_ACTIVEMIME_HEADER)
# regex to find zlib block headers, starting with byte 0x78 = 'x'
re_zlib_header = re.compile(r'x')
def mso_file_extract(data):
"""
Extract the data stored into a MSO/ActiveMime file, such as
the ones created by Outlook in some cases, or Word/Excel when saving a
file with the MHTML format or the Word 2003 XML format.
:param data: bytes string, MSO/ActiveMime file content
:return: bytes string, extracted data (uncompressed)
raise a MsoExtractionError if the data cannot be extracted
"""
# check the magic:
assert is_mso_file(data)
# In all the samples seen so far, Word always uses an offset of 0x32,
# and Excel 0x22A. But we read the offset from the header to be more
# generic.
offsets = [0x32, 0x22A]
# First, attempt to get the compressed data offset from the header
# According to my tests, it should be an unsigned 16 bits integer,
# at offset 0x1E (little endian) + add 46:
try:
offset = struct.unpack_from('<H', data, offset=0x1E)[0] + 46
offsets.insert(0, offset) # insert at beginning of offsets
except:
pass
# now try offsets
for start in offsets:
try:
extracted_data = zlib.decompress(data[start:])
return extracted_data
except zlib.error as exc:
pass
# None of the guessed offsets worked, let's try brute-forcing by looking
# for potential zlib-compressed blocks starting with 0x78:
for match in re_zlib_header.finditer(data):
start = match.start()
try:
extracted_data = zlib.decompress(data[start:])
return extracted_data
except zlib.error as exc:
pass
############ END OF FUNCTION ORIGIN: https://github.com/decalage2/oletools/blob/master/oletools/olevba.py
#########################################################################################################
def usage():
print("Usage: analysis.py [-c /usr/local/bin/clamscan] [-d /tmp/extract_emmbedded] [-p pattern.db] [-s /tmp/graph.png] [-j /tmp/result.json] [-m coef_path] [-g] [-v] [-b password.pwdb] [-i /usr/bin/tesseract] [-l fra] [-O] [-J] -f/-u path_filename/URL -y yara_rules_path1/ -a yara_rules_path2/\n")
print("\t -h/--help : for help to use\n")
print("\t -f/--filename= : path of filename to analysis\n")
print("\t -u/--url= : url analysis use thug\n")
print("\t -U/--useragent= : useragent for thug (default: win7ie90)\n")
print("\t -L/--listthug= : list useragent for thug\n")
print("\t -R/--referer= : referer for thug (default: https://mail.google.com)\n")
print("\t -y/--yara_rules_path= : path of rules yara level 1\n")
print("\t -a/--yara_rules_path2= : path of rules yara level 2\n")
print("\t -p/--pattern= : path of pattern filename for data miner\n")
print("\t -b/--password= : path of password clamav (.pwdb see: https://blog.didierstevens.com/2017/02/15/quickpost-clamav-and-zip-file-decryption/)\n")
print("\t -c/--clamscan_path= : path of binary clamscan [>=0.99.3]\n")
print("\t -m/--coef_path= : path of coef config file\n")
print("\t -d/--directory_tmp= : path of directory to extract emmbedded file(s)\n")
print("\t -j/--json_save= : path filename where save json result (JSON)\n")
print("\t -i/--image= : path of \'tesseract\' for analysis on potential social engenering by image\n")
print("\t -J/--java_decomp : Java decompile class/jar with procyon (apt-get install procyon-decompiler)\n")
print("\t -l/--lang_image= : \'tesseract\' lang ocr extratc (eng, fra, ...) \n")
print("\t -g/--graph : generate graphe of analyz\n")
print("\t -s/--save_graph= : path filename where save graph (PNG)\n")
print("\t -r/--remove= : remove tempory files\n")
print("\t -O/--osint : active OSINT (hash, filename, domaine, url)\n\t\tOSINT hybridanalisys env key: HYBRID_KEY\n\t\tOTX env key: OTX_KEY\n\t\tXFORCE env key: XFORCE_KEY & env pass: XFORCE_PASS\n\t\tVirusTotal env key: VT_KEY\n\t\tMISP env key: MISP_KEY & MISP env host: MISP_HOST\n\t\tINTEZER env key: INTEZER_KEY\n")
print("\t -v/--verbose= : verbose mode\n")
print("\t example: analysis.py -c ./clamav-devel/clamscan/clamscan -f /home/analyz/strange/invoice.rtf -y yara_rules1/ -a yara_rules2/ -b password.pwdb -i /usr/bin/tesseract -l fra -g -O\n")
print("\t example: analysis.py -c ./clamav-devel/clamscan/clamscan -u www.exploitkit.top/id?000 -y yara_rules1/ -a yara_rules2/ -b password.pwdb -i /usr/bin/tesseract -l fra -g -O\n")
#source: https://stackoverflow.com/questions/377017/test-if-executable-exists-in-python
def which(program):
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
fpath, fname = os.path.split(program)
if fpath:
if is_exe(program):
return program
else:
for path in os.environ["PATH"].split(os.pathsep):
path = path.strip('"')
exe_file = os.path.join(path, program)
if is_exe(exe_file):
return exe_file
return None
#source: https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file
def md5(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
#source: https://stackoverflow.com/questions/6027558/flatten-nested-python-dictionaries-compressing-keys
def flatten(d, parent_key='', sep='_'):
items = []
for k, v in d.items():
new_key = parent_key + sep + k if parent_key else k
if isinstance(v, collections.MutableMapping):
items.extend(flatten(v, new_key, sep=sep).items())
else:
items.append((new_key, v))
return dict(items)
#extract pattern info: URI, IP, ...
def extract_info(pathfile,pat):
find = []
with open(pathfile, 'rb') as content_file:
content = content_file.read()
for k, v in pat.items():
ret = re.findall(v,content.decode('utf-8', errors='ignore'))
retl = [each for each in ret if len(each) >0]
for item in retl:
tmp = {}
tmp[k] = str(item)
if tmp not in find:
find.append(tmp)
return find
#check key exist element in key dict
def checkdict(nested_dict,path):
cour=nested_dict
for pk in path:
if type(pk) is int:
cour=cour[pk]
elif pk in cour:
cour=cour[pk]
else:
return False
return True
#read element in key dict
def readdict(nested_dict,path):
cour=nested_dict
for pk in path:
if type(pk) is int:
cour=cour[pk]
elif pk in cour:
cour=cour[pk]
else:
return False
return cour
#parse vt result
def parse_vt(vt_dict):
edict={}
keep=['scan_date', 'permalink', 'positives', 'total', 'scans']
for k, v in vt_dict.items():
if k in keep:
if type(v) is str:
#edict["vt_"+k.encode('utf8')]=v #p2v
edict["vt_"+k]=v
elif type(v) is int:
#edict["vt_"+k.encode('utf8')+"_int"]=v #p2v
edict["vt_"+k+"_int"]=v
if k == 'scans':
edict['vt_detected']=[]
for kx, vx in v.items():
if 'result' in vx and vx['result'] and not vx['result'] in edict['vt_detected']:
edict['vt_detected'].append(vx['result'])
if 'vt_detected' in edict:
edict['vt_detected'] = str(edict['vt_detected'])
return edict
#extract dict level key/value by path
def dict_extract_path(nested_dict,path):
edict={}
flat_info = {}
cour=nested_dict
for pk in path:
if type(pk) is int:
cour=cour[pk]
elif pk in cour:
cour=cour[pk]
else:
return edict
for k, v in cour.items():
if "ContainedObjects" != k:
if type(v) is int:
edict[k+"_int"]=v
elif type(v) is bool:
edict[k+"_bool"]=v
elif type(v) is str:
edict[k]=v
elif type(v) is dict:
tmp = flatten(v,k)
flat_info.update(tmp)
elif type(v) is list:
edict[k] = str(v)
for kr,vr in flat_info.items():
if type(vr) is list:
if kr not in edict:
edict[kr] = str(vr)
else:
edict[kr] = edict[kr] + "||--||" + str(vr)
elif type(vr) is bool:
edict[kr+"_bool"] = vr
elif type(vr) is int:
edict[kr+"_int"] = vr
else:
if kr not in edict:
edict[kr] = str(vr)
else:
edict[kr] = edict[kr] + "||--||" + str(vr)
return edict
#add element in key dict
def adddict(nested_dict,k,v,path,overwrite=False):
cour=nested_dict
for pk in path:
if type(pk) is int:
cour=cour[pk]
elif pk in cour:
cour=cour[pk]
else:
return False
if k in cour:
if type(cour[k]) is list:
if type(v) is list:
if overwrite:
cour[k]=v
else:
for elemv in v:
if not elemv in cour[k]:
cour[k].append(elemv)
#cour[k] = list(set(cour[k]))
else:
if not v in cour[k]:
cour[k].append(v)
elif k == 'RiskScore':
if cour[k] < v:
cour[k]=v
else:
if not cour[k] == v:
if overwrite:
cour[k] += v
else:
cour[k] += "||||" + v
else:
if k == 'ContainedObjects':
cour[k]=[v]
else:
cour[k]=v
return nested_dict
#modify element in key dict
def moddict(nested_dict,v,path):
cour=nested_dict
for pk in path:
if type(pk) is int:
cour=cour[pk]
elif pk in cour:
cour=cour[pk]
else:
return False
cour=v
return nested_dict
#function to find md5 in result clamav
def getpath(nested_dict, value, prepath=()):
resultx = []
for k, v in nested_dict.items():
path = prepath + (k,)
#print str(k) + " == " + str(value) + " in " + str(path)
if type(v) is list:
count = 0
for elem in v:
if type(elem) is dict:
ret = getpath(elem, value, path + (count,)) # recursive call
resultx = ret + resultx
#if p is not None:
# return p
count = count + 1
elif type(v) is dict: # v is a dict
ret = getpath(v, value, path) # recursive call
resultx = ret + resultx
#if p is not None:
#return p
elif k == 'FileMD5' and v == value: # found value
resultx.append(path)
return resultx
def findLogPath(serr,directory_tmp,path_find):
file_parent = ""
#re.findall(r'(/tmp/tmpMYwPhO/clamav-[0-9a-f]+.tmp/.*)\s+.*\n(.*\n){1,100}.*/tmp/tmpMYwPhO/clamav-9ad6c389cad6fe266160874482974c84.tmp/clamav-542c546718bca7c316f719ea416f6a6e',content,re.MULTILINE)
r=re.findall(r'(' + directory_tmp + "/clamav-[0-9a-f]+.tmp/.*)\s+.*\n(.*\n){1,100}.*" + path_find,serr,re.MULTILINE)
#print "R: " + str(r)
if r:
file_parent = r[0][0]
#find md5 file parent
return file_parent
def check_all_score(nested_dict):
scores = {}
for k, v in nested_dict.items():
if type(v) is list and k == "Yara":
for elem in v:
if type(elem) is dict:
for kx, vx in elem.items():
scores[kx] = vx['score']
if type(v) is list and k == "ContainedObjects":
for elem in v:
if type(elem) is dict:
ret = check_all_score(elem) # recursive call
scores.update(ret)
elif type(v) is dict: # v is a dict
ret = check_all_score(v) # recursive call
scores.update(ret)
return scores
def remove_double(nested_dict):
list_md5 = []
remove_count = []
for k, v in nested_dict.items():
if type(v) is list and k == "ContainedObjects":
count = 0
for elem in v:
if type(elem) is dict and 'FileMD5' in elem:
if elem['FileMD5'] in list_md5:
#remove
remove_count.append(count)
else:
list_md5.append(elem['FileMD5'])
count += 1
for index in sorted(remove_count, key=int, reverse=True):
v.pop(index)
for elem in v:
if type(elem) is dict and 'ContainedObjects' in elem:
remove_double(elem)
elif type(v) is dict: # v is a dict
remove_double(v) # recursive call
def scan_json(filename, cl_parent, cdbname, cl_type, patterndb, var_dynamic, extract_var_global, yara_RC, yara_RC2, score_max, md5_file, tesseract, lang, externals_var_extra={}, verbose=False):
global ioc_global
global stop_vt
global javadecomp
global path_procyon
if api_vt:
vt = VirusTotalPublicApi(api_vt)
#find size file
size_file = os.path.getsize(filename)
#extract info
ext_info = extract_info(filename,patterndb)
extract_var_local = {}
for elemx in ext_info:
for kx, vx in elemx.items():
if kx not in extract_var_local:
extract_var_local["extract_local_"+kx] = vx
elif vx not in extract_var_local[kx]:
extract_var_local["extract_local_"+kx] = extract_var_local[kx] + "||--||" + vx
if kx not in extract_var_global:
extract_var_global["extract_global_"+kx] = vx
elif vx not in extract_var_global[kx]:
extract_var_global["extract_global_"+kx] = extract_var_global[kx] + "||--||" + vx
#yara check
externals_var = {'FileParentType': cl_parent, 'FileType': "CL_TYPE_" + cl_type, 'FileSize': int(size_file), 'FileMD5': md5_file, 'PathFile': filename}
#check image content by ocr
if tesseract and os.path.isfile(tesseract) and cl_type in ['PNG', 'JPEG', 'GIF', 'TIFF', 'BMP']:
temp = tempfile.NamedTemporaryFile()
args_ocr = [tesseract, filename, temp.name, '-l', lang]
new_env = dict(os.environ)
(working_dir, filenamex) = os.path.split(filename)
proc_ocr = subprocess.Popen(args_ocr, env=new_env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=working_dir)
output_ocr, serr_ocr = proc_ocr.communicate()
with open(temp.name+".txt", 'r') as content_file:
#A VERIFIER PASSAGE PY3!!!! TODO
externals_var['image2text'] = unidecode.unidecode(content_file.read())
temp.close
if cdbname:
externals_var['CDBNAME']=cdbname
if externals_var_extra:
externals_var.update(externals_var_extra)
if verbose:
print("Debug info -- External var:"+str(externals_var))
externals_var.update(var_dynamic)
#add extinfo in var_dyn
externals_var.update(extract_var_local)
externals_var.update(extract_var_global)
detect_yara_rule = []
detect_yara_score = 0
detect_yara_strings = ext_info
java_compiled_found_jar = False
java_compiled_found_class = False
#Check YARA rules level 1
ret_yara = yara_RC.match(filename, externals=externals_var, timeout=120)
check_level2 = {}
for match in ret_yara:
if 'check_level2' in match.meta:
#split "val1,val2"
check2vals=str(match.meta['check_level2']).split(",")
for check2val in check2vals:
check_level2[str(check2val)] = True
if match.meta['weight'] > 0:
if verbose and match.strings:
print('YARA '+match.rule+' match DEBUG:'+str(match.strings))
if str(match.rule) == "java_class":
java_compiled_found_class = True
elif str(match.rule) == "java_jar":
java_compiled_found_jar = True
found_rule={match.rule: {'description': match.meta['description'], 'score': match.meta['weight']}}
if 'tag' in match.meta:
found_rule[match.rule]['tags']=match.meta['tag']
if 'ids' in match.meta and match.meta['ids'] and match.strings:
if not match.meta['ids'].lower() in ioc_global:
ioc_global[match.meta['ids'].lower()] = []
found_rule[match.rule]['ioc']=[]
for iocx in match.strings:
#iocxx=str(iocx[2]).replace("\x00", "")
iocxx=iocx[2].decode('utf-8', errors='ignore').replace("\x00", "")
if not iocxx in found_rule[match.rule]['ioc']:
found_rule[match.rule]['ioc'].append(iocxx)
if not iocxx in ioc_global[match.meta['ids'].lower()]:
ioc_global[match.meta['ids'].lower()].append(iocxx)
detect_yara_rule.append(found_rule)
if match.meta['weight'] > detect_yara_score:
detect_yara_score = match.meta['weight']
if detect_yara_score > score_max:
score_max = detect_yara_score
#detect_yara_strings += match.strings
#detect_yara_strings = list(set(detect_yara_strings))
if 'var_match' in match.meta:
var_dynamic[str(match.meta['var_match'])] = True
elif 'var_match' in match.meta:
var_dynamic[str(match.meta['var_match'])] = True
elif 'ids' in match.meta and match.meta['ids'] and match.strings:
if not match.meta['ids'].lower() in ioc_global:
ioc_global[match.meta['ids'].lower()] = []
for iocx in match.strings:
#iocxx=str(iocx[2]).replace("\x00", "")
iocxx=iocx[2].decode('utf-8', errors='ignore').replace("\x00", "")
if not iocxx in ioc_global[match.meta['ids'].lower()]:
ioc_global[match.meta['ids'].lower()].append(iocxx)
#Check YARA rules level 2
#decompil jar/class to java
if javadecomp and (java_compiled_found_jar or java_compiled_found_class or (cdbname and re.search("\.jar$|\.class$", cdbname))):
tempx = tempfile.NamedTemporaryFile()
if java_compiled_found_class or re.search("\.class$", cdbname):
temp = tempx.name + ".class"
else:
temp = tempx.name + ".jar"
tempx.close
shutil.copy2(filename, temp)
args_decomp = [path_procyon, temp]
new_env = dict(os.environ)
(working_dir, filenamex) = os.path.split(filename)
proc_decomp = subprocess.Popen(args_decomp, env=new_env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=working_dir)
output_decomp, serr_decomp = proc_decomp.communicate()
if output_decomp:
externals_var['decompiledjava'] = unidecode.unidecode(output_decomp)
externals_var.update(check_level2)
externals_var.update(var_dynamic)
ret_yara = yara_RC2.match(filename, externals=externals_var, timeout=120)
for match in ret_yara:
if match.meta['weight'] > 0:
if verbose and match.strings:
print('YARA '+match.rule+' match DEBUG:'+str(match.strings))
found_rule={match.rule: {'description': match.meta['description'], 'score': match.meta['weight']}}
if 'tag' in match.meta:
found_rule[match.rule]['tags']=match.meta['tag']
if 'ids' in match.meta and match.meta['ids'] and match.strings:
if not match.meta['ids'].lower() in ioc_global:
ioc_global[match.meta['ids'].lower()] = []
found_rule[match.rule]['ioc']=[]
for iocx in match.strings:
#iocxx=str(iocx[2]).replace("\x00", "")
iocxx=iocx[2].decode('utf-8', errors='ignore').replace("\x00", "")
if not iocxx in found_rule[match.rule]['ioc']:
found_rule[match.rule]['ioc'].append(iocxx)
if not iocxx in ioc_global[match.meta['ids'].lower()]:
ioc_global[match.meta['ids'].lower()].append(iocxx)
detect_yara_rule.append(found_rule)
if match.meta['weight'] > detect_yara_score:
detect_yara_score = match.meta['weight']
if detect_yara_score > score_max:
score_max = detect_yara_score
#detect_yara_strings += match.strings
#detect_yara_strings = list(set(detect_yara_strings))
if 'var_match' in match.meta:
var_dynamic[str(match.meta['var_match'])] = True
elif 'var_match' in match.meta:
var_dynamic[str(match.meta['var_match'])] = True
elif 'ids' in match.meta and match.meta['ids'] and match.strings:
if not match.meta['ids'].lower() in ioc_global:
ioc_global[match.meta['ids'].lower()] = []
for iocx in match.strings:
#iocxx=str(iocx[2]).replace("\x00", "")
iocxx=iocx[2].decode('utf-8', errors='ignore').replace("\x00", "")
if not iocxx in ioc_global[match.meta['ids'].lower()]:
ioc_global[match.meta['ids'].lower()].append(iocxx)
#if not isinstance(cl_type, unicode):
# cl_type=unicode(cl_type, "utf-8")
vt_result=None
xforce_result=None
hybrid_result=None
anyrun_result=None
otx_result=None
misp_result=None
intezer_result=None
if osint:
#CHECK VT
if api_vt and stop_vt and detect_yara_score > osint_scoremin:
try:
response = vt.get_file_report(md5_file)
if response and 'response_code' in response and response['response_code'] == 200:
if "results" in response and response["results"]:
vttmp=parse_vt(response["results"])
vt_result=vttmp
#add yara rules and add score
if 'vt_detected' in vt_result and vt_result['vt_detected'] and re.match(r"CVE[_\-]*[0-9]+", vt_result['vt_detected'], re.IGNORECASE):
#Virus Total detect CVE use
found_rule={'VT_cve': {'description': 'Virus Total detect CVE use', 'score': 8}}
detect_yara_rule.append(found_rule)
if 8 > detect_yara_score:
detect_yara_score = 8
if detect_yara_score > score_max:
score_max = detect_yara_score
if 'vt_positives_int' in vt_result and vt_result['vt_positives_int'] and vt_result['vt_positives_int']>10:
#Virus Total detect malware
found_rule={'VT_high': {'description': 'Virus Total detect malware', 'score': 8}}
detect_yara_rule.append(found_rule)
if 8 > detect_yara_score:
detect_yara_score = 8
if detect_yara_score > score_max:
score_max = detect_yara_score
elif 'vt_positives_int' in vt_result and vt_result['vt_positives_int'] and vt_result['vt_positives_int']>2 and vt_result['vt_positives_int']<10:
#Virus Total detect potential malware
found_rule={'VT_low': {'description': 'Virus Total detect potential malware', 'score': 4}}
detect_yara_rule.append(found_rule)
if 4 > detect_yara_score:
detect_yara_score = 4
if detect_yara_score > score_max:
score_max = detect_yara_score
else:
if verbose:
print("Debug info: VT no result for you md5hash")
else:
if response and 'response_code' in response and response['response_code'] == 204:
if verbose:
print("Debug info: VT response error exceeded rate limit:"+str(response))
stop_vt=False
else:
if verbose:
print("Debug info: VT response error (maybe key api not valid):"+str(response))
stop_vt=False
except Exception as e:
print("Error: Virus total error:"+str(e))
#OTX API
if api_otx and detect_yara_score > osint_scoremin:
try:
otx = OTXv2(api_otx)
response = otx.get_indicator_details_full(IndicatorTypes.FILE_HASH_MD5, md5_file)
if response and 'general' in response and response['general'] and 'pulse_info' in response['general'] and response['general']['pulse_info'] and 'count' in response['general']['pulse_info'] and response['general']['pulse_info']['count']:
otx_result="https://otx.alienvault.com/indicator/file/" + str(md5_file)
except Exception as e:
print("Error: OTX error:"+str(e))
#XFORCE API
if api_xforce and pass_xforce and detect_yara_score > osint_scoremin:
#from https://github.com/johestephan/XFE/blob/master/python/xfexchange.py
try:
myURL = "https://api.xforce.ibmcloud.com:443/malware/" + md5_file
token = base64.b64encode(str("{0}:{1}".format(api_xforce, pass_xforce)).encode())
headers = {"Authorization": "Basic %s" % token.decode(), "Accept": "application/json", 'User-Agent': 'Mozilla 5.0'}
response = requests.get(myURL, headers=headers, verify=False).json()
if response and 'malware' in response and response['malware'] and 'origins' in response['malware'] and response['malware']['origins']:
#url https://exchange.xforce.ibmcloud.com/malware/MD5
xforce_result={'link': 'https://exchange.xforce.ibmcloud.com/malware/'+md5_file.upper()}
#risk - score
if 'risk' in response['malware']['origins'] and response['malware']['origins']['risk']:
xforce_result['risk']=response['malware']['origins']['risk'].lower()
if 'high' in response['malware']['origins']['risk'].lower():
found_rule={'XForce_high': {'description': 'XFORCE detect risk high', 'score': 8}}
detect_yara_rule.append(found_rule)
if 8 > detect_yara_score:
detect_yara_score = 8
if detect_yara_score > score_max:
score_max = detect_yara_score
elif 'medium' in response['malware']['origins']['risk'].lower():
found_rule={'XForce_middle': {'description': 'XFORCE detect risk medium', 'score': 5}}
detect_yara_rule.append(found_rule)
if 5 > detect_yara_score:
detect_yara_score = 5
if detect_yara_score > score_max:
score_max = detect_yara_score
elif 'low' in response['malware']['origins']['risk'].lower():
found_rule={'XForce_low': {'description': 'XFORCE detect risk low', 'score': 3}}
detect_yara_rule.append(found_rule)
if 3 > detect_yara_score:
detect_yara_score = 3
if detect_yara_score > score_max:
score_max = detect_yara_score
#external - info
if 'external' in response['malware']['origins'] and response['malware']['origins']['external']:
#malwareType
if 'malwareType' in response['malware']['origins']['external'] and response['malware']['origins']['external']['malwareType']:
xforce_result['malwareType']=response['malware']['origins']['external']['malwareType']
#detectionCoverage
if 'detectionCoverage' in response['malware']['origins']['external'] and response['malware']['origins']['external']['detectionCoverage']:
xforce_result['detectionCoverage']=response['malware']['origins']['external']['detectionCoverage']
#family
if 'family' in response['malware']['origins']['external'] and response['malware']['origins']['external']['family']:
xforce_result['family']=response['malware']['origins']['external']['family']
except Exception as e:
print("Error: XFORCE error:"+str(e))
#APP ANYRUN
if detect_yara_score > osint_scoremin:
#from: https://github.com/Neo23x0/munin/blob/master/munin.py
try:
cfscraper = cfscrape.create_scraper()
sha256=None
with open(filename,"rb") as f:
bytes = f.read() # read entire file as bytes
sha256 = str(hashlib.sha256(bytes).hexdigest()).lower();
response = cfscraper.get("https://any.run/report/%s" % sha256)
if response.status_code == 200:
#exist
anyrun_result="https://any.run/report/%s" % sha256
except Exception as e:
print("Error: APP ANYRUN error:"+str(e))
#INTEZER API
if api_intezer and detect_yara_score > osint_scoremin:
#from: https://github.com/Neo23x0/munin/blob/master/munin.py
try:
base_url = 'https://analyze.intezer.com/api/v2-0'
response = requests.post(base_url + '/get-access-token', json={'api_key': api_intezer})
response.raise_for_status()
session = requests.session()
session.headers['Authorization'] = session.headers['Authorization'] = 'Bearer %s' % response.json()['result']
data = {'hash': md5_file}
response = session.post(base_url + '/analyze-by-hash', json=data).json()
if response and 'result_url' in response and response['result_url']:
intezer_result="https://analyze.intezer.com/#"+str(response['result_url'])
except Exception as e:
print("Error: INTEZER error:"+str(e))
#HYBRID API
if api_hybrid and detect_yara_score > osint_scoremin:
#from: https://github.com/Neo23x0/munin/blob/master/munin.py
try:
headers = {'User-Agent': 'VxStream', 'api-key': api_hybrid}
data = {'hash': md5_file}
response = requests.post('https://www.hybrid-analysis.com/api/v2/search/hash', headers=headers, data=data).json()
if response and isinstance(response, list) and len(response)>0:
hybrid_result = []
for hybrid_r in response:
hyb_tmp={}
if 'sha256' in hybrid_r and hybrid_r['sha256']:
hyb_tmp['link']='https://www.hybrid-analysis.com/sample/'+hybrid_r['sha256']+'?environmentId=100'
if 'av_detect' in hybrid_r and hybrid_r['av_detect']: #int max 100
hyb_tmp['av_detect']=hybrid_r['av_detect']
if hybrid_r['av_detect'] >= 50:
found_rule={'Hybrid_av_detect': {'description': 'Hybrid Analysis AV detect > 50', 'score': 8}}
detect_yara_rule.append(found_rule)
if 8 > detect_yara_score:
detect_yara_score = 8
if detect_yara_score > score_max:
score_max = detect_yara_score
elif hybrid_r['av_detect'] > 10:
found_rule={'Hybrid_av_detect': {'description': 'Hybrid Analysis AV detect > 10 & < 50', 'score': 4}}
detect_yara_rule.append(found_rule)
if 4 > detect_yara_score:
detect_yara_score = 4
if detect_yara_score > score_max:
score_max = detect_yara_score
if 'vx_family' in hybrid_r and hybrid_r['vx_family']: #str
hyb_tmp['vx_family']=hybrid_r['vx_family']
if 'threat_score' in hybrid_r and hybrid_r['threat_score']: #int
hyb_tmp['threat_score']=hybrid_r['threat_score']
if hyb_tmp['threat_score'] >= 80:
found_rule={'Hybrid_threat_score': {'description': 'Hybrid Analysis threat score >= 80', 'score': 8}}
detect_yara_rule.append(found_rule)
if 8 > detect_yara_score:
detect_yara_score = 8
if detect_yara_score > score_max:
score_max = detect_yara_score
elif hyb_tmp['threat_score'] >= 50:
found_rule={'Hybrid_threat_score': {'description': 'Hybrid Analysis threat score >= 50', 'score': 6}}
detect_yara_rule.append(found_rule)
if 6 > detect_yara_score:
detect_yara_score = 6
if detect_yara_score > score_max:
score_max = detect_yara_score
elif hyb_tmp['threat_score'] >= 20:
found_rule={'Hybrid_threat_score': {'description': 'Hybrid Analysis threat score >= 20', 'score': 4}}
detect_yara_rule.append(found_rule)
if 4 > detect_yara_score:
detect_yara_score = 4
if detect_yara_score > score_max:
score_max = detect_yara_score
if 'threat_level' in hybrid_r and hybrid_r['threat_level']: #int #(0=No Threat, 1=Suspicious, 2=Malicious, 3=Unknown)
hyb_tmp['threat_level']=hybrid_r['threat_level']
if hyb_tmp['threat_level'] >= 2:
found_rule={'Hybrid_threat_level': {'description': 'Hybrid Analysis threat Malicious or Unknown', 'score': 8}}
detect_yara_rule.append(found_rule)
if 8 > detect_yara_score:
detect_yara_score = 8
if detect_yara_score > score_max:
score_max = detect_yara_score
elif hyb_tmp['threat_level'] == 1:
found_rule={'Hybrid_threat_level': {'description': 'Hybrid Analysis threat Suspicious', 'score': 5}}
detect_yara_rule.append(found_rule)
if 5 > detect_yara_score:
detect_yara_score = 5
if detect_yara_score > score_max:
score_max = detect_yara_score
if 'verdict' in hybrid_r and hybrid_r['verdict']: #(0=No Threat, 1=Suspicious, 2=Malicious, 3=Unknown)
hyb_tmp['verdict']=hybrid_r['verdict']
hyb_tmp['mitre_attcks']=[]
if 'mitre_attcks' in hybrid_r and hybrid_r['mitre_attcks']:
found_rule={'Hybrid_mitre': {'description': 'Hybrid Analysis detect mitre attack techniques', 'score': 5}}
if 5 > detect_yara_score:
detect_yara_score = 5
if detect_yara_score > score_max:
score_max = detect_yara_score
for ma_hybrid in hybrid_r['mitre_attcks']:
if 'tactic' in ma_hybrid and ma_hybrid['tactic']: #"tactic": "Execution",
hyb_tmp['mitre_attcks'].append('attack.'+ma_hybrid['tactic'].lower().replace(' ','_'))
if 'attck_id' in ma_hybrid and ma_hybrid['attck_id']: #"attck_id": "T1168"
hyb_tmp['mitre_attcks'].append('attack.'+ma_hybrid['attck_id'].lower())
found_rule['Hybrid_mitre']['tags']=','.join(hyb_tmp['mitre_attcks'])
detect_yara_rule.append(found_rule)
hybrid_result.append()
except Exception as e:
print("Error: Hybrid Analysis error:"+str(e))
#MISP API
if api_misp and detect_yara_score > osint_scoremin:
try:
misp = pymisp.PyMISP(host_misp, api_misp, True, 'json')
response = misp.search(controller='attributes', type_attribute='md5', value=md5_file, to_ids='1')
if response and 'Attribute' in response and response['Attribute']:
found_in_misp=len(response['Attribute']) #number of found entry in misp
for misp_resp in response['Attribute']:
misp_result='https://misppriv.circl.lu/events/view/%s' % misp_resp['event_id']
except Exception as e:
print("Error: MISP error:"+str(e))
result_file = { 'FileParentType': cl_parent, 'FileType': "CL_TYPE_" + cl_type, 'FileSize': int(size_file), 'FileMD5': md5_file, 'PathFile': [filename], 'RiskScore': detect_yara_score, 'Yara': detect_yara_rule, 'ExtractInfo': detect_yara_strings, 'ContainedObjects': []}
if vt_result:
print("VT RESULT ADD")
result_file['VT_Results']=vt_result
if hybrid_result:
print("Hybrid RESULT ADD")
result_file['Hybrid_Results']=hybrid_result
if otx_result:
print("OTX RESULT ADD")
result_file['OTX_Results']=otx_result
if xforce_result:
print("XFORCE RESULT ADD")
result_file['XFORCE_Results']=xforce_result
if misp_result:
print("MISP RESULT ADD")
result_file['MISP_Results']=misp_result
if anyrun_result:
print("ANYRUN RESULT ADD")
result_file['ANYRUN_Results']=anyrun_result
if intezer_result:
print("INTEZER RESULT ADD")
result_file['INTEZER_Results']=intezer_result
if cdbname:
result_file['CDBNAME']=cdbname
if 'zip_crypt_bool' in externals_var_extra:
result_file['zip_crypt']=True
if 'EMBED_FILES' in externals_var_extra:
result_file['EMBED_FILES']=externals_var_extra['EMBED_FILES']
return score_max, var_dynamic, extract_var_global, result_file
def clamscan(clamav_path, directory_tmp, filename_path, yara_RC, yara_RC2, patterndb, coef, usepass, tesseract, lang, verbose):
#add time in external variable yara for special check
global ioc_global
global javadecomp
global path_procyon
if api_vt:
vt = VirusTotalPublicApi(api_vt)
now=datetime.now()
dd=datetime(int(now.strftime('%Y')),int(now.strftime('%m')),int(now.strftime('%d')))+timedelta(days=-7)
tnow7=dd.strftime("%s000")
result_extract = {}
coefx = 1
print("Extract emmbedded file(s) with clamav...")
#create file for no check sig on file but check password if file crypted
#Ref: https://blog.didierstevens.com/2017/02/15/quickpost-clamav-and-zip-file-decryption/
emptyrule_path = tempfile.gettempdir() + '/empty.yar'
if usepass:
emptyrule_path=usepass
else:
if not os.path.isfile(emptyrule_path):
f=open(emptyrule_path, 'a').close
(working_dir, filename) = os.path.split(filename_path)
new_env = dict(os.environ)
#'--max-filesize=300M', '--max-scansize=300M',
args = [clamav_path, '--gen-json', '--debug', '--leave-temps', '--normalize=no', '--tempdir=' + directory_tmp, '-d', emptyrule_path, filename]
proc = subprocess.Popen(args, env=new_env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=working_dir)
output, serr = proc.communicate()
serr=serr.decode('utf-8', errors='ignore')
print("Analyz result...")
#run command problem
if verbose:
print(serr)
if proc.returncode:
print("Error: clamscan could not process the file.\n")
shutil.rmtree(directory_tmp)
md5_file = md5(filename_path)
size_file = os.path.getsize(filename_path)
ret_error={ "ExtractInfo": [], "FileMD5": str(md5_file), "FileSize": size_file, "FileType": "CLAMAV ERROR could not process the file", "GlobalIOC": {}, "GlobalRiskScore": 10, "GlobalRiskScoreCoef": 1, "GlobalTags": "CLAMAV ERROR could not process the file", "RiskScore": 10, "RootFileType": "CLAMAV ERROR could not process the file", "TempDirExtract": str(directory_tmp), "Yara": []}
return ret_error
#run command OK
#LibClamAV debug: cli_updatelimits: scansize exceeded (initial: 104857600, consumed: 0, needed: 873684452)
#LibClamAV debug: cli_updatelimits: filesize exceeded (allowed: 26214400, needed: 873684452)
#if re.search("cli_updatelimits: filesize exceeded", serr):
if 'filesize exceeded' in serr or 'scansize exceeded' in serr or 'CL_EMAXSIZE Exceeded' in serr or 'recursion limit exceeded' in serr or 'recursion limit,' in serr or 'it would exceed max scansize.' in serr or 'Stopping after cli_scanraw reached' in serr or 'Files limit reached' in serr:
print("Error: clamscan could not process the file because file size is exceeded size allowed.\n")
shutil.rmtree(directory_tmp)
md5_file = md5(filename_path)
size_file = os.path.getsize(filename_path)
ret_error={ "ExtractInfo": [], "FileMD5": str(md5_file), "FileSize": size_file, "FileType": "CLAMAV ERROR Exceed Max limit", "GlobalIOC": {}, "GlobalRiskScore": 10, "GlobalRiskScoreCoef": 1, "GlobalTags": "CLAMAV ERROR Exceed Max limit", "RiskScore": 10, "RootFileType": "CLAMAV ERROR Exceed Max limit", "TempDirExtract": str(directory_tmp), "Yara": []}
return ret_error
else:
#find json file -- > json written to: tmp5//clamav-07c46ccfca138bfce61564c552931476.tmp
root_type = "UNKNOWN"
score_max = 0
global_tags = []
var_dynamic = {}
extract_var_global = {}
m = re.search('json written to:\s+(.+)\n', serr)
json_find = False
json_file = ""
if m:
json_file = m.group(1)
print("Find resultat in json file:" + json_file + "...")
if os.path.isfile(json_file):
with open(json_file) as data_file:
try:
result_extract = json.load(data_file)
except:
print("Error to parse json result...")
var_dynamic['now_7_int'] = int(tnow7)
md5_file = None
size_file = None
type_file = None
if result_extract:
json_find = True
remove_double(result_extract)
else:
#analyz debug information for find external variable for yara
regexp_bool = re.compile(r'_bool$')
regexp_int = re.compile(r'_int$')
#Put serr (clamav debug) in external variable if json not detected
var_dynamic['serr'] = serr
pdf_analyz = { 'cli_pdf: %%EOF not found': 'PDFStats_NoEOF_bool', 'cli_pdf: encrypted pdf found': 'PDFStats_Encrypted_bool', 'cli_pdf: did not find valid xref': 'PDFStats_NoXREF_bool', 'cli_pdf: startxref not found': 'PDFStats_NoXREF_bool', 'cli_pdf: bad pdf version:': 'PDFStats_BadVersion_bool', 'cli_pdf: no PDF- header found': 'PDFStats_BadHeaderPosition_bool', 'cli_pdf: bad format object': 'PDFStats_InvalidObjectCount_int'}
for ka,va in pdf_analyz.items():
if ka in serr:
if regexp_bool.search(va):
var_dynamic[va] = True
elif regexp_int.search(va):
var_dynamic[va] = 1
else:
var_dynamic[va] = "True"
md5_file = md5(filename_path)
size_file = os.path.getsize(filename_path)
#LibClamAV debug: Recognized RTF file
type_file = "UNKNOWN"
#BUG: File type must be on some words, by example MS CHM
#m = re.search('LibClamAV debug:\s+Recognized\s+(\S+)\s+', serr) #LibClamAV debug: Recognized RTF file
m = re.search('LibClamAV debug:\s+Recognized\s+(.+)\s+file', serr) #LibClamAV debug: Recognized RTF file
if m:
type_file = m.group(1).replace(" ", "_")
root_type = type_file
else:
m1 = re.search('LibClamAV debug:\s+Recognized\s+(\S+)\s+', serr) #LibClamAV debug: Recognized RTF file
if m1:
type_file = m1.group(1)
root_type = type_file
#extract info
ext_info = extract_info(filename_path,patterndb)
extract_var_local = {}
for elemx in ext_info:
for kx, vx in elemx.items():
if kx not in extract_var_local:
extract_var_local["extract_local_"+kx] = vx
elif vx not in extract_var_local[kx]: