diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..a59b23b --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.pythonPath": "C:\\Users\\WISSAM-PC\\.conda\\envs\\thesis\\python.exe" +} \ No newline at end of file diff --git a/rename_pdf_files_from_content_title_or_arxiv.py b/rename_pdf_files_from_content_title_or_arxiv.py index fded8a8..b6258e1 100644 --- a/rename_pdf_files_from_content_title_or_arxiv.py +++ b/rename_pdf_files_from_content_title_or_arxiv.py @@ -4,7 +4,7 @@ import re import glob import pdftitle -from tqdm import tqdm_notebook as tqdm +from tqdm import tqdm import arxiv import random #%% @@ -25,7 +25,7 @@ def get_valid_filename(s): return re.sub(r'(?u)[^-\w.]', '', s) #%% -all_files_names = glob.glob("C:\\Users\\WISSAM-PC\\Downloads\\Documents - copy\\*.pdf") +all_files_names = glob.glob("C:\\Users\\WISSAM-PC\\Downloads\\Documents\\*.pdf") #%% # research_files = [] # for f in tqdm(all_files_names): @@ -39,35 +39,36 @@ def get_valid_filename(s): # %% research_files = [] for f in tqdm(all_files_names): - basename = os.path.basename(f) - if "__OLDNAME__" in basename: + if "__OLDNAME__" in f or '__XX__' in f: continue + basename = os.path.basename(f) dirname = os.path.dirname(f) - try: - title = pdftitle.get_title_from_file(f) - if len(title) < 3: - continue - title = re.sub(r'[\\/:"*?<>|]+'," ",title) - title = re.sub(r'\s'," ",title) - title = get_valid_filename(title) - new_basename = title + '__OLDNAME__' + basename - new_path = os.path.join(dirname, new_basename) - except: - title = None + + if len(re.findall(r'\b\d{4}\.\d{5}',basename)) > 0 : + article_id = basename.strip('.pdf') + entry = arxiv.query(id_list=[article_id]) + if len(entry) > 0: + title = entry[0]['title'].replace(":","") + title = re.sub(r'\s'," ",title) + title = get_valid_filename(title) + new_basename = title + '__OLDNAME__' + basename + new_path = os.path.join(dirname, new_basename) - if title==None: - if len(re.findall(r'\b\d{4}\.\d{5}',basename)) > 0 : - article_id = basename.strip('.pdf') - entry = arxiv.query(id_list=[article_id]) - if len(entry) > 0: - title = entry[0]['title'].replace(":","") - title = re.sub(r'\s'," ",title) - title = get_valid_filename(title) - new_basename = title + '__OLDNAME__' + basename - new_path = os.path.join(dirname, new_basename) - else: + else: + try: + title = pdftitle.get_title_from_file(f) + if len(title) < 3: + continue + title = re.sub(r'[\\/:"*?<>|]+'," ",title) + title = re.sub(r'\s'," ",title) + title = get_valid_filename(title) + new_basename = title + '__OLDNAME__' + basename + new_path = os.path.join(dirname, new_basename) + except: + title = None + os.rename(f,f[:-4]+'__XX__'+'.pdf') continue - + try: print(new_path) os.rename(f, new_path) @@ -76,9 +77,8 @@ def get_valid_filename(s): dirname = os.path.dirname(new_path) os.rename(f,os.path.join(dirname, new_basename[:-4]+str(random.randint(0,999))+'.pdf')) except: + os.rename(f,f[:-4]+'__XX__'+'.pdf') print('skipping') - - - # %% +# %% diff --git a/run_pdf_renamer.bat b/run_pdf_renamer.bat new file mode 100644 index 0000000..30d5a31 --- /dev/null +++ b/run_pdf_renamer.bat @@ -0,0 +1 @@ +"C:\Users\WISSAM-PC\.conda\envs\thesis\python.exe" rename_pdf_files_from_content_title_or_arxiv.py \ No newline at end of file