-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_letters.py
95 lines (79 loc) · 3.51 KB
/
generate_letters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import pandas as pd
from docx import Document
from datetime import datetime
from docx2pdf import convert
import os
# Constants
EXCEL_FILE = "Applications-details.xlsx"
EXCEL_SHEET = "Details"
WORD_TEMPLATE = "RecommendationLetter_Template.docx"
OUTPUT_DIR = "GeneratedLetters"
# Step 1: Load the Excel File
try:
data = pd.read_excel(EXCEL_FILE, sheet_name=EXCEL_SHEET)
print("Excel file loaded successfully.")
except Exception as e:
print(f"Error loading Excel file: {e}")
exit()
# Step 2: Filter rows with non-empty ID
data = data.dropna(subset=["ID"])
if data.empty:
print("No valid rows found in the Excel sheet.")
exit()
# Step 3: Ensure output directory exists
os.makedirs(OUTPUT_DIR, exist_ok=True)
# Step 4: Process each row and generate recommendation letters
for _, row in data.iterrows():
try:
# Extract data
recommendation_id = row["ID"]
committee = row["Recommendation-Committee"]
position = row["Recommendation-Position"]
department = row["Recommendation-Department"]
print(
f"\nProcessing row ID: [{recommendation_id}], Committee: [{committee}], Position: [{position}], Department: [{department}]"
)
# Check for NaN values and handle them explicitly
if (pd.isna(recommendation_id) or pd.isna(committee) or
not str(recommendation_id).strip() or not str(committee).strip()):
print(f"Skipping row due to empty ID or Committee")
continue
# Convert values to strings and strip whitespace or handle NaN
recommendation_id = "" if pd.isna(recommendation_id) else str(recommendation_id).strip()
committee = "" if pd.isna(committee) else str(committee).strip()
position = "" if pd.isna(position) else str(position).strip()
department = "" if pd.isna(department) else str(department).strip()
# Open the Word template
try:
doc = Document(WORD_TEMPLATE)
except Exception as e:
print(f"Error opening Word template: {e}")
continue
# Replace placeholders in the document
for paragraph in doc.paragraphs:
if "<Date>" in paragraph.text: # Format: Mon, DD, YYY
paragraph.text = paragraph.text.replace("<Date>", datetime.now().strftime("%b %d, %Y"))
if "<Recommendation-Committee>" in paragraph.text:
paragraph.text = paragraph.text.replace("<Recommendation-Committee>", committee)
if "<Recommendation-Position>" in paragraph.text:
paragraph.text = paragraph.text.replace("<Recommendation-Position>", position)
if "<Recommendation-Department>" in paragraph.text:
paragraph.text = paragraph.text.replace("<Recommendation-Department>", department)
# Save the personalized Word document
word_file = os.path.join(OUTPUT_DIR, f"{recommendation_id}.docx")
doc.save(word_file)
# Convert to PDF
try:
convert(word_file, os.path.join(OUTPUT_DIR, f"{recommendation_id}.pdf"))
except Exception as e:
print(f"Error converting {word_file} to PDF: {e}")
continue
# Remove the intermediate Word file
# os.remove(word_file)
print(f"Generated PDF for ID: {recommendation_id}")
except KeyError as e:
print(f"Failed to generate. Missing expected column in row: {e}")
exit()
except Exception as e:
print(f"Error processing row ID {row['ID']}: {e}")
print("\nProcessing complete!")