diff --git a/.gitignore b/.gitignore index 03aaace..4200b72 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,58 @@ -/venv +# Python bytecode files +__pycache__/ +*.py[cod] + +# Distribution / packaging +build/ +dist/ +*.egg-info/ + +# Virtual environment +venv/ +env/ +ENV/ +venv.bak/ +env.bak/ + +# IDE and Editor files +.vscode/ +.idea/ + +# macOS system files +.DS_Store + +# Windows system files +Thumbs.db +ehthumbs.db + +# Logs and temp files +*.log +*.pot +*.pyc +*.swp +*.swo +*.bak + +# Jupyter Notebook checkpoints +.ipynb_checkpoints/ + +# pytest +.cache/ + +# MyPy +.mypy_cache/ + +# Coverage reports +.coverage +.coverage.* + +# Pycharm +.idea/ + +# Linux-based IDEs (like Sublime Text) +*.sublime-workspace +*.sublime-project + .env test_zone.py training.csv \ No newline at end of file diff --git a/bot.py b/bot.py index 0568822..1243cf6 100644 --- a/bot.py +++ b/bot.py @@ -1,4 +1,10 @@ -from interactions import Client, Intents, listen, ContextMenuContext, Message, message_context_menu +import logging +from logging.handlers import RotatingFileHandler +from interactions import ( + Client, + Intents, + listen, +) from interactions.api.events import MessageCreate from dotenv import load_dotenv import os @@ -7,21 +13,62 @@ import asyncio import bot_retrainer import bot_recorder +from pathlib import Path +from datetime import datetime + def load_pickle(file_name): - with open(file_name, 'rb') as file: + with open(file_name, "rb") as file: return pickle.load(file) + +# Setup for logging +log_folder = Path("logs") +log_folder.mkdir(exist_ok=True) + +# Get today's date for the log file +current_date = datetime.now().strftime("%Y-%m-%d") +log_file = log_folder / "latest.log" # Current log file as 'latest.log' +log_file_template = ( + log_folder / f"{current_date}-%d.log" +) # Rotated log files with date-based naming + +log_formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") + +# Rotating File Handler: Max 5MB per file, keeps the last 5 rotated files per date +log_handler = RotatingFileHandler( + log_file, + maxBytes=5 * 1024 * 1024, # 5MB + backupCount=5, # Keep last 5 rotated files for each day +) +log_handler.setFormatter(log_formatter) +log_handler.setLevel(logging.DEBUG) + +# Console Handler for printing logs to the console +console_handler = logging.StreamHandler() +console_handler.setFormatter(log_formatter) + +# Logger setup +logger = logging.getLogger("LisaBotLogger") +logger.setLevel(logging.DEBUG) +logger.addHandler(log_handler) +logger.addHandler(console_handler) + +logger.info("Centralized logging initialized with date-based filenames.") + # Loading the serialized components -vectorizer = load_pickle('vectorizer.pkl') -classifier = load_pickle('classifier.pkl') -encoded_to_string = load_pickle('encoded_to_string.pkl') +vectorizer = load_pickle("vectorizer.pkl") +classifier = load_pickle("classifier.pkl") +encoded_to_string = load_pickle("encoded_to_string.pkl") tfidf_transformer = "" + def get_emoji_back(encoded_number): return encoded_to_string.get(encoded_number, "Unknown") + def predict_emoji(text, classifier, vectorizer, tfidf_transformer, threshold=0.1): + logger.debug(f"Predicting emojis for text: {text}") # Preprocess and vectorize the input text text_vectorized = vectorizer.transform([text]) @@ -36,19 +83,36 @@ def predict_emoji(text, classifier, vectorizer, tfidf_transformer, threshold=0.1 # Decode the emojis emojis = [get_emoji_back(index) for index in high_prob_indices] - + + logger.debug(f"Predicted emojis: {emojis}") return emojis + def answer_question(message): - #first check if message is a question directed at lisabot - if message.strip().endswith('?'): + # first check if message is a question directed at lisabot + if message.strip().endswith("?"): if len(message) > 1: likert_scale = { - "strong_agree": ["YES", "YESSSS", "Absolutely!"], - "agree": ["yes", "yessir", "ya", "shut up yes", "https://tenor.com/view/kkekekekekkeke-kk-gif-27232250"], - "neutral": ["I actually dont know", "um duh", "period", "not rn", "lol", "I wish I could tell you but I don't want to", "AYO????", "https://cdn.discordapp.com/attachments/1113266262345273428/1213985020901851146/IMG_7578.png?ex=65f776a7&is=65e501a7&hm=20b5210081b1362c97ca5a5fd5dd02dd33f15b2da783cc44aab2fa1a670d6878&"], - "disagree": ["nope", "ewww no", "naurrr", "joever"], - "strong_disagree": ["WHAT NO", "NO", "Absolutely not!!!"] + "strong_agree": ["YES", "YESSSS", "Absolutely!"], + "agree": [ + "yes", + "yessir", + "ya", + "shut up yes", + "https://tenor.com/view/kkekekekekkeke-kk-gif-27232250", + ], + "neutral": [ + "I actually dont know", + "um duh", + "period", + "not rn", + "lol", + "I wish I could tell you but I don't want to", + "AYO????", + "https://cdn.discordapp.com/attachments/1113266262345273428/1213985020901851146/IMG_7578.png?ex=65f776a7&is=65e501a7&hm=20b5210081b1362c97ca5a5fd5dd02dd33f15b2da783cc44aab2fa1a670d6878&", + ], + "disagree": ["nope", "ewww no", "naurrr", "joever"], + "strong_disagree": ["WHAT NO", "NO", "Absolutely not!!!"], } random_opinion = random.choice(list(likert_scale.keys())) @@ -58,27 +122,30 @@ def answer_question(message): else: return "?" + # put canned responses def response_to(message): reply = "" + message = message.lower() + keywords = { - "erm actually": lambda: "https://tenor.com/view/nerd-dog-nerd-dog-gif-nerd-dog-alen-orbanic-gif-15562966513664309472", - "lisa burger": lambda: "https://media.discordapp.net/attachments/1113266262345273428/1187365137598922802/imageedit_9_9053779888.png?ex=65969ef4&is=658429f4&hm=8ea4ed39282ce942d2556cced752afcc23353607d02a60e7b5119a4ac9c8e43f&=&format=webp&quality=lossless&width=462&height=462", - "in my opinion": lambda: "https://cdn.discordapp.com/attachments/1168400523104358442/1193073645715734578/y147cc9pwqac1.png?ex=65ab636b&is=6598ee6b&hm=c081a2716fbb8415c6a8be0d8ad8fa24fbfa639a5d97d0d673b15c65e2b984ca&", + "erm actually": lambda: "https://tenor.com/view/nerd-dog-nerd-dog-gif-nerd-dog-alen-orbanic-gif-15562966513664309472", + "lisa burger": lambda: "https://media.discordapp.net/attachments/1113266262345273428/1187365137598922802/imageedit_9_9053779888.png?ex=65969ef4&is=658429f4&hm=8ea4ed39282ce942d2556cced752afcc23353607d02a60e7b5119a4ac9c8e43f&=&format=webp&quality=lossless&width=462&height=462", + "in my opinion": lambda: "https://cdn.discordapp.com/attachments/1168400523104358442/1193073645715734578/y147cc9pwqac1.png?ex=65ab636b&is=6598ee6b&hm=c081a2716fbb8415c6a8be0d8ad8fa24fbfa639a5d97d0d673b15c65e2b984ca&", "are you the real lisa?": lambda: "I am the real Lisa", - "fortnite blake": lambda: "https://tenor.com/view/fortnite-fish-guy-fortnite-wow-gif-27449064", - "ramesh": lambda: "https://tenor.com/view/sleeping-sleep-dog-dawg-eeper-gif-5083970977419902566", + "fortnite blake": lambda: "https://tenor.com/view/fortnite-fish-guy-fortnite-wow-gif-27449064", + "ramesh": lambda: "https://tenor.com/view/sleeping-sleep-dog-dawg-eeper-gif-5083970977419902566", "too powerful": lambda: "https://cdn.discordapp.com/emojis/853892024879808513.gif?size=128&quality=lossless", - # add here as required } for keyword, action in keywords.items(): - if keyword in message: + if keyword.lower() in message: reply = action() return reply + load_dotenv() TOKEN = os.getenv("TOKEN") @@ -90,32 +157,39 @@ def response_to(message): # count of how many replies the bot has before it stops. count = 10 + async def run_every_2_hours(): global count, vectorizer, tfidf_transformer, classifier, encoded_to_string while True: count += 3 + logger.info("Retraining bot...") + vectorizer, tfidf_transformer, classifier, encoded_to_string = ( + bot_retrainer.retrain_bot() + ) + logger.info("Bot retrained successfully.") + await asyncio.sleep(3 * 60 * 60) - vectorizer, tfidf_transformer, classifier, encoded_to_string = bot_retrainer.retrain_bot() - - await asyncio.sleep(3 * 60 * 60) # 3 hours in seconds @listen() async def on_ready(): - print(f"This bot is owned by {bot.owner}.") + logger.info("Bot is ready and connected.") asyncio.create_task(run_every_2_hours()) + @listen() async def on_message_create(event: MessageCreate): global count msg = event.message guild_emojis = await msg.guild.fetch_all_custom_emojis() + logger.debug(f"Message received: {msg.content} by {msg.author}") if msg.author.bot: # Check if the message is from a bot + logger.debug("Message is from a bot. Ignoring.") return - #lisa38 + # lisa38 if msg.author == "lisa38": - #print("lisa spoke") + # print("lisa spoke") bot_recorder.record_msg(msg, guild_emojis) # follow along if everyone is posting the same thing. @@ -124,6 +198,7 @@ async def on_message_create(event: MessageCreate): if isinstance(previous_msg, str): previous_msg = msg if msg.content == previous_msg.content and msg.author != previous_msg.author: + logger.info(f"Echoing message: {msg.content}") await msg.channel.send(msg.content) previous_msg.content = "lisabot" return @@ -131,60 +206,57 @@ async def on_message_create(event: MessageCreate): likert_answer = "" - #checks for trigger word and sends hardcoded reply + # checks for trigger word and sends hardcoded reply trigger = response_to(msg.content) if trigger != "": + logger.info(f"Triggered response for keyword. Replying with: {trigger}") await msg.reply(trigger) return - - # Check if the bot is mentioned + # checks if the bot is mentioned bot_mentioned = ( - f"@{bot.user.id}" in msg.content or - f"<@{bot.user.id}>" in msg.content + f"@{bot.user.id}" in msg.content or f"<@{bot.user.id}>" in msg.content ) if bot_mentioned: - #gets rid of bot's name before running through the classifier + # gets rid of bot's name before running through the classifier text = msg.content.replace(f"<@{bot.user.id}>", "").strip() - - #wakes the bot up for a bit - count = count+3 - - #now check if the message is a question for lisa + # wakes the bot for a bit + count += 3 likert_answer = answer_question(text) - - else: + else: text = msg.content text = str(msg.author)[1:] + " " + text # Determine whether to skip the count and random chance check - skip_check = bot_mentioned or (count > 0 and random.randint(1, 15) == 1) - + skip_check = bot_mentioned or (count > 0 and random.randint(1, 30) == 1) if not skip_check: - return #print(f"Check not passed. Exiting function. {bot_mentioned}, {count}") - - emoji_list = predict_emoji(text, classifier, vectorizer, tfidf_transformer,threshold=0.1) + logger.debug("Check not passed. Exiting function.") + return # print(f"Check not passed. Exiting function. {bot_mentioned}, {count}") - if emoji_list: # This is equivalent to checking if len(emoji_list) > 0 + emoji_list = predict_emoji( + text, classifier, vectorizer, tfidf_transformer, threshold=0.1 + ) + if emoji_list: emojis_to_send = "" - for emoji_name in emoji_list: - for guild_emoji in guild_emojis: - if guild_emoji.name == emoji_name.replace(":",""): + for guild_emoji in await msg.guild.fetch_all_custom_emojis(): + if guild_emoji.name == emoji_name.replace(":", ""): emojis_to_send += str(guild_emoji) if emojis_to_send or likert_answer: + response = likert_answer + " " + emojis_to_send if random.randint(1, 2) == 1: - await msg.reply(likert_answer + " " + emojis_to_send) + logger.info(f"Replying to message: {response}") + await msg.reply(response) else: - await msg.channel.send(likert_answer + " " + emojis_to_send) + logger.info(f"Sending message: {response}") + await msg.channel.send(response) else: - r = random.randint(1, 4) - if r == 1: - await msg.channel.send("HUH") - elif r == 2: - await msg.channel.send("??") + fallback_response = random.choice(["HUH", "??"]) + logger.info(f"Fallback response: {fallback_response}") + await msg.channel.send(fallback_response) + + count -= 1 - count = count-1 bot.start(TOKEN) diff --git a/bot_recorder.py b/bot_recorder.py index 546c802..2a15830 100644 --- a/bot_recorder.py +++ b/bot_recorder.py @@ -1,58 +1,75 @@ -#TODO filter message:check if emoji exists. is it an emoji found within the guild? if not discard. -#TODO filter message pt2. :figure out what the reply is, find author of reply. -#TODO record message in training.csv - import re import emoji +import csv +import os +import logging -def is_emoji_name(text): - return text in emoji.UNICODE_EMOJI_ENGLISH - -def record_msg(msg, guild_emojis): - print("record_msg()") +logger = logging.getLogger("LisaBotLogger") - if not emoji_check(msg, guild_emojis): - return - filtered_message = filter_message(msg) +def is_emoji_name(text): + # Check if the text is a valid standard emoji. + return text in emoji.UNICODE_EMOJI_ENGLISH - with open('training.csv', 'a') as file: - file.write(filtered_message + '\n') - return -# return true if emoji exists in message and if emoji is found within guild -# need to also check for if it's a normal emoji. def emoji_check(msg, guild_emojis): + """Check if emoji exists in the message and if it belongs to the guild or is a standard emoji.""" + logger.debug(f"Checking emoji existence in message: {msg.content}") discord_emoji_pattern = re.compile(r":[a-zA-Z0-9_]+:") - matches = discord_emoji_pattern.findall(msg.content) + if matches: match = matches[0][1:-1] # Remove the colons - - if match: - # Check against guild emojis - for guild_emoji in guild_emojis: - if guild_emoji.name == match: - return True - - # Check against standard emojis - if is_emoji_name(match): + for guild_emoji in guild_emojis: + if guild_emoji.name == match: + logger.debug(f"Found matching guild emoji: {match}") return True + if is_emoji_name(match): + logger.debug(f"Found matching standard emoji: {match}") + return True + + logger.debug("No matching emoji found.") return False -# return a line of csv's containing author, message, and lisa's reply. -# author,original_message,reply_emojis + def filter_message(msg): + # Filter the message to capture author, original message, and emoji reply. + logger.debug("Filtering message for recording.") filtered_message = "" - author = msg.get_referenced_message.author - original_message = msg.get_referenced_message.message + # Check if the message is a reply + if msg.get_referenced_message: + author = msg.get_referenced_message.author.display_name + original_message = msg.get_referenced_message.content + else: + # Fallback for non-replied messages + author = msg.author.display_name + original_message = msg.content + # Find emoji in the message content discord_emoji_pattern = re.compile(r":[a-zA-Z0-9_]+:") matches = discord_emoji_pattern.findall(msg.content) - reply_emoji = matches[0] + reply_emoji = matches[0] if matches else "None" filtered_message = ",".join([author, original_message, reply_emoji]) - + logger.debug(f"Filtered message: {filtered_message}") return filtered_message + + +def record_msg(msg, guild_emojis): + # Record the message if it contains valid emojis. + logger.info("Recording a message.") + if not emoji_check(msg, guild_emojis): + logger.debug("No valid emoji in message. Skipping.") + return + + filtered_message = filter_message(msg) + file_exists = os.path.isfile("training.csv") + with open("training.csv", "a", newline="") as file: + writer = csv.writer(file) + if not file_exists: + writer.writerow(["author", "original_message", "reply_emoji"]) + writer.writerow(filtered_message.split(",")) + + logger.info("Message recorded successfully.") diff --git a/bot_retrainer.py b/bot_retrainer.py index 15692c2..d4da6e2 100644 --- a/bot_retrainer.py +++ b/bot_retrainer.py @@ -2,68 +2,97 @@ from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer from sklearn.tree import DecisionTreeClassifier from imblearn.over_sampling import SMOTE +import logging + +logger = logging.getLogger("LisaBotLogger") + def retrain_classifier(df): - X = df['author_and_message'] - y = df['encoded'].tolist() + # Retrain the classifier based on the training data. + logger.info("Starting classifier retraining.") + + X = df["author_and_message"] + y = df["encoded"].tolist() - # Count Vectorise + logger.debug("Performing Count Vectorization.") vectorizer = CountVectorizer() X_counts = vectorizer.fit_transform(X) # TFIDF Convert + logger.debug("Applying TFIDF transformation.") tfidf_transformer = TfidfTransformer() X_tfidf = tfidf_transformer.fit_transform(X_counts) + logger.debug("Applying SMOTE for class imbalance.") smote = SMOTE(random_state=42, k_neighbors=4) X_train, y_train = smote.fit_resample(X_tfidf, y) + logger.debug("Training Decision Tree Classifier.") dt_classifier = DecisionTreeClassifier( - class_weight='balanced', - splitter='random', - max_features='sqrt' + class_weight="balanced", splitter="random", max_features="sqrt" ) dt_classifier.fit(X_train, y_train) + logger.info("Classifier retraining completed.") return vectorizer, tfidf_transformer, dt_classifier + def update_encoder(df): - # Encoding the emoji column to numeric values - df['encoded'] = pd.factorize(df['reply_emojis'])[0] + # Encode the emoji column to numeric values. + logger.info("Updating encoder for emoji column.") - emojis = pd.factorize(df['reply_emojis'])[1] + df["encoded"] = pd.factorize(df["reply_emojis"])[0] + emojis = pd.factorize(df["reply_emojis"])[1] - # creating a function to get the emoji back from the numbers. + # creating a function to get the emoji back from the numbers. encoded_to_string = {i: string for i, string in enumerate(emojis)} + + logger.debug(f"Encoded-to-string mapping created: {encoded_to_string}") return encoded_to_string -def filter_df(df): - """ - Filter out rows from the DataFrame where the emoji reaction is used less than 5 times. - Args: - df (pd.DataFrame): A DataFrame with columns 'author', 'original_message', 'reply_emojis', and 'encoded'. +def filter_df(df): + # Filter out rows where the emoji reaction appears fewer than 5 times. + logger.info("Filtering training data based on emoji frequency.") - Returns: - pd.DataFrame: A DataFrame filtered based on the emoji reaction count. - """ - df['author_and_message'] = df['author'] + ' ' + df['original_message'] + df["author_and_message"] = df["author"] + " " + df["original_message"] - # Count the occurrences of each emoji - emoji_counts = df['reply_emojis'].value_counts() + # Count occurrences of each emoji + emoji_counts = df["reply_emojis"].value_counts() # Filter to keep only rows where the emoji reaction count is 5 or more - filtered_df = df[df['reply_emojis'].map(emoji_counts) >= 5] + filtered_df = df[df["reply_emojis"].map(emoji_counts) >= 5] + logger.debug(f"Filtered data size: {len(filtered_df)} rows remaining.") return filtered_df + def retrain_bot(): - df = pd.read_csv('training.csv') + # Retrain the bot's components using the CSV file. + logger.info("Starting bot retraining process.") + + try: + df = pd.read_csv("training.csv") + logger.debug(f"Loaded training data with {len(df)} rows.") + except FileNotFoundError: + logger.error("Training CSV file not found. Skipping retraining.") + return None, None, None, None + + # Ensure there is enough data for retraining + if df.empty: + logger.warning("Training data is empty, skipping retraining.") + return None, None, None, None df = filter_df(df) + # Ensure filtered data is not empty + if df.empty: + logger.warning("Filtered training data is empty, skipping retraining.") + return None, None, None, None + encoded_to_string = update_encoder(df) vectorizer, tfidf_transformer, classifier = retrain_classifier(df) + logger.info("Bot retraining completed successfully.") return vectorizer, tfidf_transformer, classifier, encoded_to_string