-
Notifications
You must be signed in to change notification settings - Fork 2
Possible improvements to repository header processing script
Mark Fullmer edited this page Sep 6, 2021
·
13 revisions
https://gist.github.com/markfullmer/cb1cb58924a89650ea5abbb1baf78757
This reduces indentation & unnecessary debugging.
def process_directory(directory_name):
cwd = os.getcwd()
for dirpath, dirnames, files in os.walk(directory_name):
for filename in files:
+ ext = os.path.splitext(filename)
+ if ext[1] != ".txt":
+ continue
process_file(os.path.join(cwd, dirpath, filename))
for line in textfile:
this_line = re.sub(r'\r?\n', r'\r\n', line)
- if this_line != '\r\n':
new_line = re.sub(r'\s+', r' ', this_line)
new_line = new_line.strip()
print(new_line, file = output_file)
for line in textfile:
this_line = re.sub(r'\r?\n', r'\r\n', line)
+ if this_line == '\r\n':
+ continue
new_line = re.sub(r'\s+', r' ', this_line)
new_line = new_line.strip()
print(new_line, file = output_file)
Reference: https://www.geeksforgeeks.org/python-continue-statement/
question="Do you want to include debugging?"
reply = str(input(question +' (Y/n): ')).lower().strip()
debug = True
if reply == 'n':
debug = False
Reference: https://gist.github.com/garrettdreyfus/8153571
filename_keys = ["Course Number", "Assignment", "Document Type", "Instructor", "Institution"]
relative_filename = os.path.split(filename)[1]
# Split filename on underscore character: 106_VA_LP_1342_UA => ('106', 'VA', 'LP', '1342', 'UA')
filename_parts = os.path.splitext(relative_filename)[0].split('_')
# Merge parts into dictionary.
data = dict(zip(filename_keys, filename_parts))
if debug:
print()
print('FILENAME COMPONENTS:')
print()
for key, value in data.items():
print(key, '-->', value)
print()
x = input('Press enter to continue')
FILENAME COMPONENTS:
Course Number --> 106
Assignment --> VA
Document Type --> LP
Instructor --> 1354
Institution --> UA
Components can then be assigned by key, rather than numerically (e.g., data['Assignment']
):
print("<" + key + ": " + value + ">", file=output_file)
This avoids problems with operating system differences and unexpected subdirectories.
# Split filepath: /User/me/Desktop/ENGL106/Fall 2017/Language_Awareness/<filename>
# => ('User', 'me, 'Desktop', 'ENGL106', 'Fall 2017', '1011', 'Language Awareness', '<filename>')
all_filepath_parts = splitall(filename)
# Get the last 4 items from the end of the directory structure, excluding the final filename.
end_filepath_parts = all_filepath_parts[-5:-1]
# => ('ENGL106', 'Fall 2017', '1011', 'Language Awareness')
Reference: https://www.learnpython.dev/03-intermediate-python/20-advanced-looping/50-slicing/