diff --git a/ANSWERS_UTILS.md b/ANSWERS_UTILS.md new file mode 100644 index 000000000..837129414 --- /dev/null +++ b/ANSWERS_UTILS.md @@ -0,0 +1,54 @@ +# Answer Editor and Cleaner + +This project consists of two main Python scripts: `answer_editor.py` and `cleanse_answers.py`. These scripts work together to manage and clean a set of questions and answers stored in JSON format. + +## answer_editor.py + +This script is a Flask web application that provides a user interface for viewing and editing a set of questions and answers. + +### Key Features: +- Uses Flask and Flask-Bootstrap for the web interface +- Reads and writes data to a JSON file (`answers.json`) +- Allows viewing all questions and answers +- Supports editing answers +- Handles both radio button and text input answers +- Allows deletion of individual question-answer pairs + +### How it works: +1. The main route (`/`) displays all questions and answers when accessed via GET request +2. When a POST request is made (i.e., when the form is submitted), it updates the answers in the JSON file +3. It uses a template (`index.html`, not shown in the provided code) to render the web interface + +## cleanse_answers.py + +This script is designed to clean and sanitize the questions and answers stored in the JSON file. + +### Key Features: +- Removes duplicate words in questions +- Converts text to lowercase +- Removes common suffixes and unnecessary characters +- Eliminates non-ASCII characters +- Removes duplicate questions + +### How it works: +1. Reads the input JSON file (`answers.json`) +2. Sanitizes each question using the `sanitize_text` function +3. Removes duplicate questions +4. Writes the cleansed data to a new JSON file (`cleansed_answers.json`) + +## Usage + +1. Run `answer_editor.py` to start the web application for viewing and editing answers: + ``` + python answer_editor.py + ``` + Then open a web browser and navigate to `http://localhost:5000` + +2. After editing answers, run `cleanse_answers.py` to clean the data: + ``` + python cleanse_answers.py + ``` + +This will create a new file `cleansed_answers.json` with the sanitized data. + +Note: Make sure you have Flask and Flask-Bootstrap installed (`pip install flask flask-bootstrap`) before running `answer_editor.py`. (they are inlcuded in the requirements.txt file) \ No newline at end of file diff --git a/README.md b/README.md index 986a9683d..d8f181534 100644 --- a/README.md +++ b/README.md @@ -153,8 +153,25 @@ Auto_Jobs_Applier_AIHawk steps in as a game-changing solution to these challenge pip install -r requirements.txt ``` +6. **Copy example files in data_folder for configuration:** + ```bash + cp data_folder_example/*.yaml data_folder/ + ``` + ## Configuration + +### 0. Data Folder + +The `data_folder` directory contains all the files necessary for the bot to operate. This folder should be structured as follows: + + ```bash + data_folder/ + ├── config.yaml + ├── plain_text_resume.yaml + └── secrets.yaml + ``` + Examples of each file are provided in the `data_folder_example` directory. ### 1. secrets.yaml This file contains sensitive information. Never share or commit this file to version control. @@ -624,6 +641,10 @@ yaml.scanner.ScannerError: while scanning a simple key For further assistance, please create an issue on the [GitHub repository](https://github.com/feder-cr/Auto_Jobs_Applier_AIHawk/issues) with detailed information about your problem, including error messages and your configuration (with sensitive information removed). +**Answer Editor and Cleaner** + +See ANSWERS_UTILS.md for more information on the Answer Editor and Cleaner. + ## Setup Documents ### Ollama & Gemini Setup @@ -677,3 +698,4 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file ## Disclaimer This tool, Auto_Jobs_Applier_AIHawk, is intended for educational purposes only. The creator assumes no responsibility for any consequences arising from its use. Users are advised to comply with the terms of service of relevant platforms and adhere to all applicable laws, regulations, and ethical guidelines. The use of automated tools for job applications may carry risks, including potential impacts on user accounts. Proceed with caution and at your own discretion. + diff --git a/answer_editor.py b/answer_editor.py new file mode 100644 index 000000000..8dae5aca7 --- /dev/null +++ b/answer_editor.py @@ -0,0 +1,47 @@ +from flask import Flask, render_template, request, jsonify, redirect, url_for +import json +import os +from pathlib import Path +from flask_bootstrap import Bootstrap + +app = Flask(__name__) +Bootstrap(app) + +JSON_FILE = Path(__file__).parent / 'answers.json' + +@app.route('/', methods=['GET', 'POST']) +def index(): + if request.method == 'POST': + return update() + else: + if not JSON_FILE.exists(): + data = [] # Default empty list if file doesn't exist + else: + with open(JSON_FILE, 'r') as f: + data = json.load(f) + print(data) + return render_template('index.html', data=data if isinstance(data, list) else []) + +def update(): + if not JSON_FILE.exists(): + data = [] + else: + with open(JSON_FILE, 'r') as f: + data = json.load(f) + + updated_data = [] + for i, item in enumerate(data): + if f'delete_{i}' not in request.form: + if item['type'] == 'radio': + item['answer'] = request.form.get(f'answer_{i}_radio', item['answer']) + else: + item['answer'] = request.form.get(f'answer_{i}', item['answer']) + updated_data.append(item) + + with open(JSON_FILE, 'w') as f: + json.dump(updated_data, f, indent=2) + + return redirect(url_for('index')) + +if __name__ == '__main__': + app.run(debug=True) diff --git a/cleanse_answers.py b/cleanse_answers.py new file mode 100644 index 000000000..22f3ee131 --- /dev/null +++ b/cleanse_answers.py @@ -0,0 +1,47 @@ +import json +import re + +def sanitize_text(text: str) -> str: + # Remove duplicates by splitting and rejoining + text = text.rstrip() + text = re.sub(r'\s+', ' ', text) + text = text.replace('?', '').replace('"', '').replace('\\', '') + words = text.lower().split() + unique_words = [] + for word in words: + if word not in unique_words: + unique_words.append(word) + text = ' '.join(unique_words) + + # Remove common suffixes + text = re.sub(r'\s*\(?required\)?', '', text, flags=re.IGNORECASE) + text = re.sub(r'(\s*\(?yes\/no\)?|\s*\(?yes\)?|\s*\(?no\)?|\?)$', '', text, flags=re.IGNORECASE) + sanitized_text = re.sub(r'[^[:ascii:]]','', text) + return sanitized_text + +def cleanse_answers_json(input_file: str, output_file: str): + with open(input_file, 'r') as f: + data = json.load(f) + + cleansed_data = [] + seen_questions = set() + + for item in data: + sanitized_question = sanitize_text(item['question']) + if sanitized_question not in seen_questions: + seen_questions.add(sanitized_question) + cleansed_item = { + 'type': item['type'], + 'question': sanitized_question, + 'answer': item['answer'] + } + cleansed_data.append(cleansed_item) + + with open(output_file, 'w') as f: + json.dump(cleansed_data, f, indent=4) + +if __name__ == "__main__": + input_file = "answers.json" + output_file = "cleansed_answers.json" + cleanse_answers_json(input_file, output_file) + print(f"Cleansed answers have been saved to {output_file}") diff --git a/data_folder/config.yaml b/data_folder/config.yaml deleted file mode 100644 index f114bb0eb..000000000 --- a/data_folder/config.yaml +++ /dev/null @@ -1,50 +0,0 @@ -remote: true - -experienceLevel: - internship: false - entry: true - associate: true - mid-senior level: true - director: false - executive: false - -jobTypes: - full-time: true - contract: false - part-time: false - temporary: true - internship: false - other: false - volunteer: true - -date: - all time: false - month: false - week: false - 24 hours: true - -positions: - - Software engineer - -locations: - - Germany - -apply_once_at_company: true - -distance: 100 - -company_blacklist: - - wayfair - - Crossover - -title_blacklist: - - word1 - - word2 - -job_applicants_threshold: - min_applicants: 0 - max_applicants: 30 - -llm_model_type: openai -llm_model: 'gpt-4o-mini' -# llm_api_url: https://api.pawan.krd/cosmosrp/v1' diff --git a/data_folder/plain_text_resume.yaml b/data_folder/plain_text_resume.yaml deleted file mode 100644 index 7bf216da2..000000000 --- a/data_folder/plain_text_resume.yaml +++ /dev/null @@ -1,129 +0,0 @@ -personal_information: - name: "[Your Name]" - surname: "[Your Surname]" - date_of_birth: "[Your Date of Birth]" - country: "[Your Country]" - city: "[Your City]" - address: "[Your Address]" - phone_prefix: "[Your Phone Prefix]" - phone: "[Your Phone Number]" - email: "[Your Email Address]" - github: "[Your GitHub Profile URL]" - linkedin: "[Your LinkedIn Profile URL]" - -education_details: - - education_level: "[Your Education Level]" - institution: "[Your Institution]" - field_of_study: "[Your Field of Study]" - final_evaluation_grade: "[Your Final Evaluation Grade]" - start_date: "[Start Date]" - year_of_completion: "[Year of Completion]" - exam: - exam_name_1: "[Grade]" - exam_name_2: "[Grade]" - exam_name_3: "[Grade]" - exam_name_4: "[Grade]" - exam_name_5: "[Grade]" - exam_name_6: "[Grade]" - -experience_details: - - position: "[Your Position]" - company: "[Company Name]" - employment_period: "[Employment Period]" - location: "[Location]" - industry: "[Industry]" - key_responsibilities: - - responsibility_1: "[Responsibility Description]" - - responsibility_2: "[Responsibility Description]" - - responsibility_3: "[Responsibility Description]" - skills_acquired: - - "[Skill]" - - "[Skill]" - - "[Skill]" - - - position: "[Your Position]" - company: "[Company Name]" - employment_period: "[Employment Period]" - location: "[Location]" - industry: "[Industry]" - key_responsibilities: - - responsibility_1: "[Responsibility Description]" - - responsibility_2: "[Responsibility Description]" - - responsibility_3: "[Responsibility Description]" - skills_acquired: - - "[Skill]" - - "[Skill]" - - "[Skill]" - -projects: - - name: "[Project Name]" - description: "[Project Description]" - link: "[Project Link]" - - - name: "[Project Name]" - description: "[Project Description]" - link: "[Project Link]" - -achievements: - - name: "[Achievement Name]" - description: "[Achievement Description]" - - name: "[Achievement Name]" - description: "[Achievement Description]" - -certifications: - - name: "[Certification Name]" - description: "[Certification Description]" - - name: "[Certification Name]" - description: "[Certification Description]" - -languages: - - language: "[Language]" - proficiency: "[Proficiency Level]" - - language: "[Language]" - proficiency: "[Proficiency Level]" - -interests: - - "[Interest]" - - "[Interest]" - - "[Interest]" - -availability: - notice_period: "[Notice Period]" - -salary_expectations: - salary_range_usd: "[Salary Range]" - -self_identification: - gender: "[Gender]" - pronouns: "[Pronouns]" - veteran: "[Yes/No]" - disability: "[Yes/No]" - ethnicity: "[Ethnicity]" - - -legal_authorization: - eu_work_authorization: "[Yes/No]" - us_work_authorization: "[Yes/No]" - requires_us_visa: "[Yes/No]" - requires_us_sponsorship: "[Yes/No]" - requires_eu_visa: "[Yes/No]" - legally_allowed_to_work_in_eu: "[Yes/No]" - legally_allowed_to_work_in_us: "[Yes/No]" - requires_eu_sponsorship: "[Yes/No]" - canada_work_authorization: "[Yes/No]" - requires_canada_visa: "[Yes/No]" - legally_allowed_to_work_in_canada: "[Yes/No]" - requires_canada_sponsorship: "[Yes/No]" - uk_work_authorization: "[Yes/No]" - requires_uk_visa: "[Yes/No]" - legally_allowed_to_work_in_uk: "[Yes/No]" - requires_uk_sponsorship: "[Yes/No]" - - -work_preferences: - remote_work: "[Yes/No]" - in_person_work: "[Yes/No]" - open_to_relocation: "[Yes/No]" - willing_to_complete_assessments: "[Yes/No]" - willing_to_undergo_drug_tests: "[Yes/No]" - willing_to_undergo_background_checks: "[Yes/No]" diff --git a/data_folder/secrets.yaml b/data_folder/secrets.yaml deleted file mode 100644 index 62b4a747c..000000000 --- a/data_folder/secrets.yaml +++ /dev/null @@ -1 +0,0 @@ -llm_api_key: 'sk-11KRr4uuTwpRGfeRTfj1T9BlbkFJjP8QTrswHU1yGruru2FR' diff --git a/data_folder_example/plain_text_resume.yaml b/data_folder_example/plain_text_resume.yaml index 4d7f87cef..7bf216da2 100644 --- a/data_folder_example/plain_text_resume.yaml +++ b/data_folder_example/plain_text_resume.yaml @@ -1,138 +1,129 @@ personal_information: - name: "solid" - surname: "snake" - date_of_birth: "12/01/1861" - country: "Ireland" - city: "Dublin" - address: "12 Fox road" - phone_prefix: "+1" - phone: "7819117091" - email: "hi@gmail.com" - github: "https://github.com/lol" - linkedin: "https://www.linkedin.com/in/thezucc/" - + name: "[Your Name]" + surname: "[Your Surname]" + date_of_birth: "[Your Date of Birth]" + country: "[Your Country]" + city: "[Your City]" + address: "[Your Address]" + phone_prefix: "[Your Phone Prefix]" + phone: "[Your Phone Number]" + email: "[Your Email Address]" + github: "[Your GitHub Profile URL]" + linkedin: "[Your LinkedIn Profile URL]" education_details: - - education_level: "Master's Degree" - institution: "Bob academy" - field_of_study: "Bobs Engineering" - final_evaluation_grade: "4.0" - year_of_completion: "2023" - start_date: "2022" - additional_info: - exam: - Algorithms: "A" - Linear Algebra: "A" - Database Systems: "A" - Operating Systems: "A-" - Web Development: "A" + - education_level: "[Your Education Level]" + institution: "[Your Institution]" + field_of_study: "[Your Field of Study]" + final_evaluation_grade: "[Your Final Evaluation Grade]" + start_date: "[Start Date]" + year_of_completion: "[Year of Completion]" + exam: + exam_name_1: "[Grade]" + exam_name_2: "[Grade]" + exam_name_3: "[Grade]" + exam_name_4: "[Grade]" + exam_name_5: "[Grade]" + exam_name_6: "[Grade]" experience_details: - - position: "X" - company: "Y." - employment_period: "06/2019 - Present" - location: "San Francisco, CA" - industry: "Technology" + - position: "[Your Position]" + company: "[Company Name]" + employment_period: "[Employment Period]" + location: "[Location]" + industry: "[Industry]" key_responsibilities: - - responsibility: "Developed web applications using React and Node.js" - - responsibility: "Collaborated with cross-functional teams to design and implement new features" - - responsibility: "Troubleshot and resolved complex software issues" + - responsibility_1: "[Responsibility Description]" + - responsibility_2: "[Responsibility Description]" + - responsibility_3: "[Responsibility Description]" skills_acquired: - - "React" - - "Node.js" - - "Software Troubleshooting" - - position: "Software Developer" - company: "Innovatech" - employment_period: "06/2015 - 12/2017" - location: "Milan, Italy" - industry: "Technology" - key_responsibilities: - - responsibility: "Developed and maintained web applications using modern technologies" - - responsibility: "Collaborated with UX/UI designers to enhance user experience" - - responsibility: "Implemented automated testing procedures to ensure code quality" - skills_acquired: - - "Web development" - - "User experience design" - - "Automated testing" - - position: "Junior Developer" - company: "StartUp Hub" - employment_period: "01/2014 - 05/2015" - location: "Florence, Italy" - industry: "Startups" + - "[Skill]" + - "[Skill]" + - "[Skill]" + + - position: "[Your Position]" + company: "[Company Name]" + employment_period: "[Employment Period]" + location: "[Location]" + industry: "[Industry]" key_responsibilities: - - responsibility: "Assisted in the development of mobile applications and web platforms" - - responsibility: "Participated in code reviews and contributed to software design discussions" - - responsibility: "Resolved bugs and implemented feature enhancements" + - responsibility_1: "[Responsibility Description]" + - responsibility_2: "[Responsibility Description]" + - responsibility_3: "[Responsibility Description]" skills_acquired: - - "Mobile app development" - - "Code reviews" - - "Bug fixing" -projects: - - name: "X" - description: "Y blah blah blah " - link: "https://github.com/haveagoodday" + - "[Skill]" + - "[Skill]" + - "[Skill]" +projects: + - name: "[Project Name]" + description: "[Project Description]" + link: "[Project Link]" + - name: "[Project Name]" + description: "[Project Description]" + link: "[Project Link]" achievements: - - name: "Employee of the Month" - description: "Recognized for exceptional performance and contributions to the team." - - name: "Hackathon Winner" - description: "Won first place in a national hackathon competition." + - name: "[Achievement Name]" + description: "[Achievement Description]" + - name: "[Achievement Name]" + description: "[Achievement Description]" certifications: - #- "Certified Scrum Master" - #- "AWS Certified Solutions Architect" + - name: "[Certification Name]" + description: "[Certification Description]" + - name: "[Certification Name]" + description: "[Certification Description]" languages: - - language: "English" - proficiency: "Fluent" - - language: "Spanish" - proficiency: "Intermediate" + - language: "[Language]" + proficiency: "[Proficiency Level]" + - language: "[Language]" + proficiency: "[Proficiency Level]" interests: - - "Machine Learning" - - "Cybersecurity" - - "Open Source Projects" - - "Digital Marketing" - - "Entrepreneurship" + - "[Interest]" + - "[Interest]" + - "[Interest]" availability: - notice_period: "2 weeks" + notice_period: "[Notice Period]" salary_expectations: - salary_range_usd: "90000 - 110000" + salary_range_usd: "[Salary Range]" self_identification: - gender: "Female" - pronouns: "She/Her" - veteran: "No" - disability: "No" - ethnicity: "Asian" + gender: "[Gender]" + pronouns: "[Pronouns]" + veteran: "[Yes/No]" + disability: "[Yes/No]" + ethnicity: "[Ethnicity]" + legal_authorization: - eu_work_authorization: "Yes" - us_work_authorization: "Yes" - requires_us_visa: "No" - requires_us_sponsorship: "Yes" - requires_eu_visa: "No" - legally_allowed_to_work_in_eu: "Yes" - legally_allowed_to_work_in_us: "Yes" - requires_eu_sponsorship: "No" - canada_work_authorization: "Yes" - requires_canada_visa: "No" - legally_allowed_to_work_in_canada: "Yes" - requires_canada_sponsorship: "No" - uk_work_authorization: "Yes" - requires_uk_visa: "No" - legally_allowed_to_work_in_uk: "Yes" - requires_uk_sponsorship: "No" + eu_work_authorization: "[Yes/No]" + us_work_authorization: "[Yes/No]" + requires_us_visa: "[Yes/No]" + requires_us_sponsorship: "[Yes/No]" + requires_eu_visa: "[Yes/No]" + legally_allowed_to_work_in_eu: "[Yes/No]" + legally_allowed_to_work_in_us: "[Yes/No]" + requires_eu_sponsorship: "[Yes/No]" + canada_work_authorization: "[Yes/No]" + requires_canada_visa: "[Yes/No]" + legally_allowed_to_work_in_canada: "[Yes/No]" + requires_canada_sponsorship: "[Yes/No]" + uk_work_authorization: "[Yes/No]" + requires_uk_visa: "[Yes/No]" + legally_allowed_to_work_in_uk: "[Yes/No]" + requires_uk_sponsorship: "[Yes/No]" work_preferences: - remote_work: "Yes" - in_person_work: "Yes" - open_to_relocation: "Yes" - willing_to_complete_assessments: "Yes" - willing_to_undergo_drug_tests: "Yes" - willing_to_undergo_background_checks: "Yes" + remote_work: "[Yes/No]" + in_person_work: "[Yes/No]" + open_to_relocation: "[Yes/No]" + willing_to_complete_assessments: "[Yes/No]" + willing_to_undergo_drug_tests: "[Yes/No]" + willing_to_undergo_background_checks: "[Yes/No]" diff --git a/data_folder_example/resume_liam_murphy.txt b/data_folder_example/resume_liam_murphy.txt deleted file mode 100644 index edcac2b3b..000000000 --- a/data_folder_example/resume_liam_murphy.txt +++ /dev/null @@ -1,55 +0,0 @@ -Liam Murphy -Galway, Ireland -Email: liam.murphy@gmail.com | AIHawk: liam-murphy -GitHub: liam-murphy | Phone: +353 871234567 - -Education -Bachelor's Degree in Computer Science -National University of Ireland, Galway (GPA: 4/4) -Graduation Year: 2020 - -Experience -Co-Founder & Software Engineer -CryptoWave Solutions (03/2021 - Present) -Location: Ireland | Industry: Blockchain Technology - -Co-founded and led a startup specializing in app and software development with a focus on blockchain technology -Provided blockchain consultations for 10+ companies, enhancing their software capabilities with secure, decentralized solutions -Developed blockchain applications, integrated cutting-edge technology to meet client needs and drive industry innovation -Research Intern -National University of Ireland, Galway (11/2022 - 03/2023) -Location: Galway, Ireland | Industry: IoT Security Research - -Conducted in-depth research on IoT security, focusing on binary instrumentation and runtime monitoring -Performed in-depth study of the MQTT protocol and Falco -Developed multiple software components including MQTT packet analysis library, Falco adapter, and RML monitor in Prolog -Authored thesis "Binary Instrumentation for Runtime Monitoring of Internet of Things Systems Using Falco" -Software Engineer -University Hospital Galway (05/2022 - 11/2022) -Location: Galway, Ireland | Industry: Healthcare IT - -Integrated and enforced robust security protocols -Developed and maintained a critical software tool for password validation used by over 1,600 employees -Played an integral role in the hospital's cybersecurity team -Projects -JobBot -AI-driven tool to automate and personalize job applications on AIHawk, gained over 3000 stars on GitHub, improving efficiency and reducing application time -Link: JobBot - -mqtt-packet-parser -Developed a Node.js module for parsing MQTT packets, improved parsing efficiency by 40% -Link: mqtt-packet-parser - -Achievements -Winner of an Irish public competition - Won first place in a public competition with a perfect score of 70/70, securing a Software Developer position at University Hospital Galway -Galway Merit Scholarship - Awarded annually from 2018 to 2020 in recognition of academic excellence and contribution -GitHub Recognition - Gained over 3000 stars on GitHub with JobBot project -Certifications -C1 - -Languages -English - Native -Spanish - Professional -Interests -Full-Stack Development, Software Architecture, IoT system design and development, Artificial Intelligence, Cloud Technologies - diff --git a/data_folder_example/secrets.yaml b/data_folder_example/secrets.yaml index 781bfb946..1d5a3f6b5 100644 --- a/data_folder_example/secrets.yaml +++ b/data_folder_example/secrets.yaml @@ -1 +1,2 @@ -llm_api_key: 'sk-11KRr4uuTwpRGfeRTfj1T9BlbkFJjP8QTrswHU1yGruru2FR' \ No newline at end of file +llm_api_key: '' +# see also config for llm configuration diff --git a/requirements.txt b/requirements.txt index acd912e05..9f94c9edc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,6 +25,7 @@ regex==2024.7.24 reportlab==4.2.2 selenium==4.9.1 webdriver-manager==4.0.2 -pytest pytest-mock pytest-cov +flask +flask-bootstrap diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 000000000..2b53a6526 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,56 @@ +{% extends "bootstrap/base.html" %} +{% block title %}Answers Editor{% endblock %} + +{% block styles %} +{{super()}} + +{% endblock %} + +{% block content %} +
+

Edit Answers JSON Below

+
+ {% for item in data %} +
+ +
+ +
+ {% if item.type == 'textbox' or item.type == 'dropdown' %} + + {% elif item.type == 'numeric' %} + + {% elif item.type == 'radio' %} +
+ +
+
+ +
+ {% else %} + + {% endif %} +
+ {% if not loop.last %} +
+ {% endif %} + {% endfor %} + +
+
+{% endblock %}