Skip to content

Commit

Permalink
text decoding errors ultimate solution
Browse files Browse the repository at this point in the history
--> reading documentation is good <---
  • Loading branch information
piotrj committed Dec 30, 2023
1 parent bde8486 commit 22d634b
Showing 1 changed file with 86 additions and 80 deletions.
166 changes: 86 additions & 80 deletions src/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from time import sleep, perf_counter,time,strftime,localtime
from threading import Thread
from os import cpu_count,scandir,stat,sep,name as os_name,remove as os_remove

windows = bool(os_name=='nt')

if windows:
Expand All @@ -46,7 +47,7 @@
from platform import system as platform_system,release as platform_release,node as platform_node

from fnmatch import fnmatch
from re import search
from re import search as re_search
from sys import getsizeof
import sys
from collections import defaultdict
Expand Down Expand Up @@ -157,7 +158,7 @@ def byte_to_bools(byte, num_bools=10):
def test_regexp(expr):
teststring='abc'
try:
search(expr,teststring)
re_search(expr,teststring)
except Exception as e:
return e

Expand Down Expand Up @@ -192,13 +193,15 @@ def get_command(executable,parameters,full_file_path,shell):

return res,' '.join(res)

def popen_win(command,shell,text):
return Popen(command, stdout=PIPE, stderr=STDOUT,stdin=DEVNULL,shell=shell,text=text,creationflags=CREATE_NO_WINDOW,close_fds=False)
#'ignore','replace','backslashreplace'
def popen_win(command,shell):
return Popen(command, stdout=PIPE, stderr=STDOUT,stdin=DEVNULL,shell=shell,text=True,universal_newlines=True,creationflags=CREATE_NO_WINDOW,close_fds=False,errors='ignore')
universal

def popen_lin(command,shell,text):
return Popen(command, stdout=PIPE, stderr=STDOUT,stdin=DEVNULL,shell=shell,text=text,start_new_session=True)
def popen_lin(command,shell):
return Popen(command, stdout=PIPE, stderr=STDOUT,stdin=DEVNULL,shell=shell,text=True,universal_newlines=True,start_new_session=True,errors='ignore')

uni_popen = (lambda command,shell=False,text=True : popen_win(command,shell,text)) if windows else (lambda command,shell=False,text=True : popen_lin(command,shell,text))
uni_popen = (lambda command,shell=False : popen_win(command,shell)) if windows else (lambda command,shell=False : popen_lin(command,shell))

def kill_subprocess(subproc):
try:
Expand Down Expand Up @@ -605,96 +608,102 @@ def threaded_cde(timeout_semi_list):

time_start_all = perf_counter()

creationflags = CREATE_NO_WINDOW | HIGH_PRIORITY_CLASS if windows else 0
temp_file_name = 'temp_file.dat'
aborted_string = 'Custom data extraction was aborted.'
for (scan_like_list,subpath,rule_nr,size) in self.customdata_pool.values():
#decoding_error=False
self.killed=False
self.abort_single_file_cde=False

time_start = perf_counter()
if self.abort_action:
returncode=200
output = aborted_string
aborted = True
else:
aborted = False

with open(temp_file_name, 'ba+') as temp_file:
temp_file_seek = temp_file.seek
temp_file_truncate = temp_file.truncate
temp_file_read = temp_file.read
returncode=202
expressions,use_smin,smin_int,use_smax,smax_int,executable,parameters,shell,timeout,do_crc = cde_list[rule_nr]
full_file_path = normpath(abspath(sep.join([scan_path,subpath]))).replace('/',sep)
command,command_info = get_command(executable,parameters,full_file_path,shell)

self_header_files_cde_errors_quant = self_header.files_cde_errors_quant
self.info_line_current = f"{command_info} ({bytes_to_str(size)})"

for (scan_like_list,subpath,rule_nr,size) in self.customdata_pool.values():
decoding_error=False
self.killed=False
self.abort_single_file_cde=False
timeout_val=time()+timeout if timeout else None
#####################################

time_start = perf_counter()
if self.abort_action:
returncode=200
output = 'Custom data extraction was aborted.'
aborted = True
try:
subprocess = uni_popen(command,shell)
timeout_semi_list[0]=(timeout_val,subprocess)
except Exception as re:
print('threaded_cde error:',re)
subprocess = None
timeout_semi_list[0]=(timeout_val,subprocess)
returncode=201
output = str(re)
else:
aborted = False
subprocess_stdout_readline = subprocess.stdout.readline
subprocess_poll = subprocess.poll

returncode=202
expressions,use_smin,smin_int,use_smax,smax_int,executable,parameters,shell,timeout,do_crc = cde_list[rule_nr]
full_file_path = normpath(abspath(sep.join([scan_path,subpath]))).replace('/',sep)
command,command_info = get_command(executable,parameters,full_file_path,shell)
output_list = []
output_list_append = output_list.append

self.info_line_current = f"{command_info} ({bytes_to_str(size)})"
while True:
line = subprocess_stdout_readline().rstrip()

timeout_val=time()+timeout if timeout else None
#####################################
#try:
#except Exception as le:
#print(command,le)
# line = str(le)
#decoding_error = True

temp_file_seek(0)
temp_file_truncate()
output_list_append(line)

try:
subprocess = Popen(command, stdout=temp_file, stderr=temp_file,stdin=DEVNULL,shell=shell,start_new_session=True,creationflags=creationflags,close_fds=False)
timeout_semi_list[0]=(timeout_val,subprocess)
except Exception as re:
output = bytes(str(re),encoding='utf-8')
returncode=100
else:
returncode = subprocess.wait()
timeout_semi_list[0] = None
if not line and subprocess_poll() is not None:
returncode=subprocess.returncode
timeout_semi_list[0] = None
break

temp_file_seek(0)
try:
output = temp_file_read()
except Exception as de:
decoding_error=True
output = bytes(str(de),encoding='utf-8')
if self.killed:
output_list_append('Killed.')

output = '\n'.join(output_list).strip()

#####################################

time_end = perf_counter()
customdata_stats_time[rule_nr]+=time_end-time_start
time_end = perf_counter()
customdata_stats_time[rule_nr]+=time_end-time_start

if returncode or decoding_error or self.killed or aborted:
self_header_files_cde_errors_quant[rule_nr]+=1
self_header.files_cde_errors_quant_all+=1
if returncode or self.killed or aborted:
self_header.files_cde_errors_quant[rule_nr]+=1
self_header.files_cde_errors_quant_all+=1

if not aborted:
self_header.files_cde_quant += 1
self_header.files_cde_size += size
self_header.files_cde_size_extracted += getsizeof(output)
if not aborted:
self_header.files_cde_quant += 1
self_header.files_cde_size += size
self_header.files_cde_size_extracted += getsizeof(output)

new_elem={}
new_elem['cd_ok']= bool(returncode==0 and not decoding_error and not self.killed and not aborted)
new_elem={}
new_elem['cd_ok']= bool(returncode==0 and not self.killed and not aborted)

cd_field=(rule_nr,returncode,output)
if cd_field not in customdata_helper:
customdata_helper[cd_field]=cd_index
new_elem['cd_index']=cd_index
cd_index+=1
cd_field=(rule_nr,returncode,output)
if cd_field not in customdata_helper:
customdata_helper[cd_field]=cd_index
new_elem['cd_index']=cd_index
cd_index+=1

self_customdata_append(cd_field)
self_customdata_append(cd_field)

customdata_stats_size[rule_nr]+=asizeof(cd_field)
customdata_stats_uniq[rule_nr]+=1
customdata_stats_refs[rule_nr]+=1
else:
new_elem['cd_index']=customdata_helper[cd_field]
customdata_stats_refs[rule_nr]+=1

#if do_crc:
# new_elem['crc_val']=crc_val
scan_like_list.append(new_elem)
customdata_stats_size[rule_nr]+=asizeof(cd_field)
customdata_stats_uniq[rule_nr]+=1
customdata_stats_refs[rule_nr]+=1
else:
new_elem['cd_index']=customdata_helper[cd_field]
customdata_stats_refs[rule_nr]+=1

send2trash_delete(temp_file_name)
#if do_crc:
# new_elem['crc_val']=crc_val
scan_like_list.append(new_elem)

time_end_all = perf_counter()

Expand Down Expand Up @@ -1200,10 +1209,9 @@ def unload_customdata(self):
class LibrerCore:
records = set()

def __init__(self,db_dir,temp_dir,log):
def __init__(self,db_dir,log):
self.records = set()
self.db_dir = db_dir
self.temp_dir = temp_dir
self.log=log
self.info_line = 'init'
#self.info_line_current = ''
Expand Down Expand Up @@ -1720,8 +1728,6 @@ def threaded_run(record_nr,commands_list,results_list,progress_list,info_list,pr

def delete_record(self,record):
file_path = record.file_path
print('file_path',file_path)

self.records.remove(record)

self.log.info('removing file to trash:%s',file_path)
Expand Down

0 comments on commit 22d634b

Please sign in to comment.