-
Notifications
You must be signed in to change notification settings - Fork 0
/
debug_logger.py
63 lines (51 loc) · 2.62 KB
/
debug_logger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os
import logging
from datetime import datetime
class DebugLogger:
"""Helper class for debug logging of web scraping content."""
def __init__(self):
self.debug_mode = os.getenv('GRAPHRAG_DEBUG', '').lower() == 'true'
if self.debug_mode:
self.debug_dir = os.path.join("./logs", "debug")
os.makedirs(self.debug_dir, exist_ok=True)
# Set up a file handler for debug logging
self.logger = logging.getLogger('web-scraping-debug')
self.logger.setLevel(logging.DEBUG)
self.logger.propagate = False # Prevent propagation to root logger
# Create a formatter
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
# Create a file handler
debug_log = os.path.join(self.debug_dir, f"scraping_debug_{datetime.now().strftime('%Y%m%d')}.log")
fh = logging.FileHandler(debug_log, mode='a', encoding='utf-8')
fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)
# Add the handler to logger
self.logger.addHandler(fh)
def log_scraped_content(self, url, title, content):
"""Log scraped content to a debug file if in debug mode."""
if not self.debug_mode:
return
# Create a unique filename for this scrape
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
url_hash = abs(hash(url))
debug_file = os.path.join(self.debug_dir, f"content_{timestamp}_{url_hash}.txt")
try:
# Write content to file
with open(debug_file, 'w', encoding='utf-8') as f:
f.write("=" * 80 + "\n")
f.write(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write(f"URL: {url}\n")
f.write(f"Title: {title}\n")
f.write(f"Word Count: {len(content.split())}\n")
f.write("=" * 80 + "\n")
f.write("\nCONTENT:\n")
f.write("=" * 80 + "\n\n")
f.write(content)
f.write("\n\n" + "=" * 80 + "\n")
f.flush() # Ensure content is written
# Log the successful write
self.logger.debug(f"Scraped content saved to: {debug_file}")
self.logger.debug(f"Content summary - URL: {url}, Title: {title}, Words: {len(content.split())}")
except Exception as e:
self.logger.error(f"Error saving debug content for {url}: {str(e)}")
return debug_file