diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..203ff8f13 --- /dev/null +++ b/.github/CODE_OF_CONDUCT.md @@ -0,0 +1,73 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, gender identity and expression, level of experience, +education, socio-economic status, nationality, personal appearance, race, +religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +- Using welcoming and inclusive language +- Being respectful of differing viewpoints and experiences +- Gracefully accepting constructive criticism +- Focusing on what is best for the community +- Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +- The use of sexualized language or imagery and unwelcome sexual attention or + advances +- Trolling, insulting/derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or electronic + address, without explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at . All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 000000000..5edba8fb9 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,79 @@ +# Issues Reporting Guidelines + +Welcome to the AI Hawk Contributing Guide and Issues Tracker! To keep things organized and ensure issues are resolved quickly, please follow the guidelines below when submitting a bug report, feature request, or any other issue. + +If you have a general question, are curious about how something in Python works, please remember that [Google](https://google.com) is your friend and it can answer many questions. + +This is a work in progress and you may encounter bugs. + +The employers who you are applying to are not looking for candidates who need someone to hold their hand and do everything for them, they are not your parents, they are your potential boses; they will be expecting you to be able to solve simple problems on your own, the AI Hawk mods and devs expect the same of you. + +Please do not beg in the issues tracker, discussions or chat. We are not here to give you a job, we are here to provide you with a tool for you to go out and find a job on your own. We will try to have instructions for all steps of the process, but you must read the docs, learn on your own, and understand that this is an open-source project run by volunteers. It will require you to do some work of your own. + +If you see something that needs to be documented, or some documentation which could be improved, submit a documentation request or document it yourself and submit a PR to help others understand how that part of the software functions and how to use it. + +## Before You Submit an Issue + +### 1. Search Existing Issues + +Please search through the existing open issues and closed issues to ensure your issue hasn’t already been reported. This helps avoid duplicates and allows us to focus on unresolved problems. + +### 2. Check Documentation + +Review the README and any available documentation to see if your issue is covered. + +Watch this [Intro to AI Hawk video on YouTube](https://www.youtube.com/watch?v=gdW9wogHEUM) + +Join us on [Telegram](https://t.me/AIhawkCommunity) to check with the community about issues and ask for help with issues. If a dev, mod, contributor or other community member is available, a live conversation will likely resolve your small issues and configuration problems faster than using this issues tracker would. + +### 3. Provide Detailed Information + +If you are reporting a bug, make sure you include enough details to reproduce the issue. The more information you provide, the faster we can diagnose and fix the problem. + +## Issue Types + +### 1. Bug Reports + +Please include the following information: + +- **Description:** A clear and concise description of the problem. +- **Steps to Reproduce:** Provide detailed steps to reproduce the bug. +- **Expected Behavior:** What should have happened. +- **Actual Behavior:** What actually happened. +- **Environment Details:** Include your OS, browser version (if applicable), which LLM you are using and any other relevant environment details. +- **Logs/Screenshots:** If applicable, attach screenshots or log outputs. + +### 2. Feature Requests + +For new features or improvements: + +- Clearly describe the feature you would like to see. +- Explain the problem this feature would solve or the benefit it would bring. +- If possible, provide examples or references to similar features in other tools or platforms. + +### 3. Questions/Discussions + +- If you’re unsure whether something is a bug or if you’re seeking clarification on functionality, you can ask a question. The best place to ask a question is on [Telegram](https://t.me/AIhawkCommunity). If you are asking a question on GitHub, please make sure to label your issue as a question. + +## Issue Labeling and Response Time + +We use the following labels to categorize issues: + +- **bug:** An issue where something isn't functioning as expected. +- **documentation:** Improvements or additions to project documentation. +- **duplicate:** This issue or pull request already exists elsewhere. +- **enhancement:** A request for a new feature or improvement. +- **good first issue:** A simple issue suitable for newcomers. +- **help wanted:** The issue needs extra attention or assistance. +- **invalid:** The issue is not valid or doesn't seem correct. +- **question:** Additional information or clarification is needed. +- **wontfix:** The issue will not be fixed or addressed. +- We aim to respond to issues as early as possible. Please be patient, as maintainers may have limited availability. + +## Contributing Fixes + +If you’re able to contribute a fix for an issue: + +1. Fork the repository and create a new branch for your fix. +2. Reference the issue number in your branch and pull request. +3. Submit a pull request with a detailed description of the changes and how they resolve the issue. diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 000000000..42abba60d --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: feder-cr diff --git a/.github/ISSUE_TEMPLATE/bug-issue.yml b/.github/ISSUE_TEMPLATE/bug-issue.yml new file mode 100644 index 000000000..0e5956da2 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-issue.yml @@ -0,0 +1,90 @@ +name: Bug report +description: Report a bug or an issue that isn't working as expected. +title: "[BUG]: " +labels: ["bug"] +assignees: [] + +body: + - type: markdown + attributes: + value: | + Please fill out the following information to help us resolve the issue. + + - type: input + id: description + attributes: + label: Describe the bug + description: A clear and concise description of what the bug is. + placeholder: "Describe the bug in detail..." + + - type: textarea + id: steps + attributes: + label: Steps to reproduce + description: | + Steps to reproduce the behavior: + 1. Use branch named '...' + 2. Go to file '...' + 3. Find property named '...' + 4. Change '...' + 5. Run program using command '...' + 6. See error + placeholder: "List the steps to reproduce the bug..." + + - type: input + id: expected + attributes: + label: Expected behavior + description: What you expected to happen. + placeholder: "What was the expected result?" + + - type: input + id: actual + attributes: + label: Actual behavior + description: What actually happened instead. + placeholder: "What happened instead?" + + - type: dropdown + id: branch + attributes: + label: Branch + description: Specify the branch you were using when the bug occurred. + options: + - main + - other + + - type: input + id: otherBranch + attributes: + label: Branch name + description: If you selected ```other``` branch for the previous question, what is the branch name? + placeholder: "what-is-the-name-of-the-branch-you-were-using" + + - type: input + id: pythonVersion + attributes: + label: Python version + description: Specify the version of Python you were using when the bug occurred. + placeholder: "e.g., 3.12.5(64b)" + + - type: input + id: llm + attributes: + label: LLM Used + description: Specify the LLM provider you were using when the bug occurred. + placeholder: "e.g., ChatGPT" + + - type: input + id: model + attributes: + label: Model used + description: Specify the LLM model you were using when the bug occurred. + placeholder: "e.g., GPT-4o-mini" + + - type: textarea + id: additional + attributes: + label: Additional context + description: Add any other context about the problem here. + placeholder: "Any additional information..." diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..07b1ca6e1 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,9 @@ +blank_issues_enabled: true +contact_links: + - name: Questions + url: t.me/AIhawkCommunity + about: You can join the discussions on Telegram. + - name: New issue + url: >- + https://github.com/feder-cr/Auto_Jobs_Applier_AIHawk/blob/v3/.github/CONTRIBUTING.md + about: "Before opening a new issue, please make sure to read CONTRIBUTING.md" diff --git a/.github/ISSUE_TEMPLATE/documentation-issue.yml b/.github/ISSUE_TEMPLATE/documentation-issue.yml new file mode 100644 index 000000000..14f63a447 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/documentation-issue.yml @@ -0,0 +1,39 @@ +name: Documentation request +description: Suggest improvements or additions to the project's documentation. +title: "[DOCS]: " +labels: ["documentation"] +assignees: [] + +body: + - type: markdown + attributes: + value: | + Thanks for helping to improve the project's documentation! Please provide the following details to ensure your request is clear. + + - type: input + id: doc_section + attributes: + label: Affected documentation section + description: Specify which part of the documentation needs improvement or addition. + placeholder: "e.g., Installation Guide, API Reference..." + + - type: textarea + id: description + attributes: + label: Documentation improvement description + description: Describe the specific improvements or additions you suggest. + placeholder: "Explain what changes you propose and why..." + + - type: input + id: reason + attributes: + label: Why is this change necessary? + description: Explain why the documentation needs to be updated or expanded. + placeholder: "Describe the issue or gap in the documentation..." + + - type: input + id: additional + attributes: + label: Additional context + description: Add any other context, such as related documentation, external resources, or screenshots. + placeholder: "Add any other supporting information..." diff --git a/.github/ISSUE_TEMPLATE/enhancement-issue.yml b/.github/ISSUE_TEMPLATE/enhancement-issue.yml new file mode 100644 index 000000000..433ef841b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/enhancement-issue.yml @@ -0,0 +1,46 @@ +name: Feature request +description: Suggest a new feature or improvement for the project. +title: "[FEATURE]: " +labels: ["enhancement"] +assignees: [] + +body: + - type: markdown + attributes: + value: | + Thank you for suggesting a feature! Please fill out the form below to help us understand your idea. + + - type: input + id: summary + attributes: + label: Feature summary + description: Provide a short summary of the feature you're requesting. + placeholder: "Summarize the feature in a few words..." + + - type: textarea + id: description + attributes: + label: Feature description + description: A detailed description of the feature or improvement. + placeholder: "Describe the feature in detail..." + + - type: input + id: motivation + attributes: + label: Motivation + description: Explain why this feature would be beneficial and how it solves a problem. + placeholder: "Why do you need this feature?" + + - type: textarea + id: alternatives + attributes: + label: Alternatives considered + description: List any alternative solutions or features you've considered. + placeholder: "Are there any alternative features or solutions you’ve considered?" + + - type: input + id: additional + attributes: + label: Additional context + description: Add any other context or screenshots to support your feature request. + placeholder: "Any additional information..." diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..00dc722e0 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,25 @@ +name: Python CI + +on: + push: + pull_request: + types: [opened, synchronize, reopened] + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.x' + + - name: Install dependencies + run: pip install -r requirements.txt + + - name: Run tests + run: pytest \ No newline at end of file diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml new file mode 100644 index 000000000..e2caac67d --- /dev/null +++ b/.github/workflows/stale.yml @@ -0,0 +1,42 @@ +name: Mark and Close Stale Issues + +on: + # Schedule the workflow to run periodically (e.g., daily at 1:30 AM UTC) + schedule: + - cron: "30 1 * * *" + workflow_dispatch: + +jobs: + stale: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + + steps: + - name: Run Stale Action + uses: actions/stale@v9 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + days-before-issue-stale: 10 # Days of inactivity before marking an issue as stale + days-before-issue-close: 5 # Days after being marked stale before closing the issue + stale-issue-label: "stale" # Label to apply to stale issues + exempt-issue-labels: "pinned,important" # Labels to exclude from being marked as stale + exempt-issue-assignees: true # Exempt issues with assignees from being marked as stale + stale-issue-message: "This issue has been marked as stale due to inactivity. Please comment or update if this is still relevant." + close-issue-message: "This issue was closed due to prolonged inactivity." + days-before-pr-stale: 10 # Days of inactivity before marking a PR as stale + days-before-pr-close: 2 # Days after being marked stale before closing the PR + stale-pr-label: "stale" # Label to apply to stale PRs + exempt-pr-labels: "pinned,important" # Labels to exclude from being marked as stale + stale-pr-message: > + "This pull request has been marked as stale due to inactivity. + To keep it open, you can: + - Show progress by updating the PR with new commits. + - Continue the conversation by adding comments or requesting clarification on any blockers. + - Resolve pending feedback by replying to unresolved comments or implementing suggested changes. + - Indicate readiness for review by explicitly requesting a review from maintainers or reviewers. + If no action is taken within 7 days, this pull request will be closed." + close-pr-message: "This PR was closed due to prolonged inactivity." + remove-stale-when-updated: true # Remove the stale label if there is new activity + operations-per-run: 20 # Number of issues to process per run (default is 30) diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..6a4eb916a --- /dev/null +++ b/.gitignore @@ -0,0 +1,164 @@ +# application files and logs +/generated_cv +/log/* + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +chrome_profile/* +data_folder/output/* +answers.json +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec +.venv +.pytest_cache +virtual + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv’s dependency resolution may lead to different +# Pipfile.lock files generated on each colleague’s machine. +# Thus, uncomment the following line if the pipenv environment is expected to be identical +# across all environments. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# MacOS +.DS_Store + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# PyCharm and all JetBrains IDEs +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 +.idea/ +*.iml + +# Visual Studio Code +.vscode/ + +# Visual Studio 2015/2017/2019/2022 +.vs/ +*.opendb +*.VC.db + +# User-specific files +*.suo +*.user +*.userosscache +*.sln.docstates + +# Mono Auto Generated Files +mono_crash.* + +job_applications/ \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..a9f3ab1bc --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,201 @@ +# Contributing to Auto_Jobs_Applier_AIHawk + +## Table of Contents + +- [Issue Labels](#issue-labels) +- [Bug Reports](#bug-reports) +- [Feature Requests](#feature-requests) +- [Branch Rules](#branch-rules) +- [Version Control](#version-control) +- [Release Process](#release-process) +- [Roles](#roles) +- [Pull Request Process](#pull-request-process) +- [Code Style Guidelines](#code-style-guidelines) +- [Development Setup](#development-setup) +- [Testing](#testing) +- [Communication](#communication) +- [Development Diagrams](./docs/development_diagrams.md) + +Thank you for your interest in contributing to Auto_Jobs_Applier_AIHawk. This document provides guidelines for contributing to the project. + +## Issue Labels + +The project uses the following labels: + +- **bug**: Something isn't working correctly +- **enhancement**: New feature requests +- **good first issue**: Good for newcomers +- **help wanted**: Extra attention needed +- **documentation**: Documentation improvements + +## Bug Reports + +When submitting a bug report, please include: + +- A clear, descriptive title prefixed with [BUG] +- Steps to reproduce the issue +- Expected behavior +- Actual behavior +- Any error messages or screenshots +- Your environment details (OS, Python version, etc.) + +## Feature Requests + +For feature requests, please: + +- Prefix the title with [FEATURE] +- Include a feature summary +- Provide detailed feature description +- Explain your motivation for the feature +- List any alternatives you've considered + +## Branch Rules + +- `main` - Production-ready code, protected branch +- `develop` - Integration branch for features +- `feature/*` - New features +- `release/*` - Release preparation +- `bugfix/*` - Bug fixes for development +- `hotfix/*` - Emergency production fixes + +## Version Control + +- Semantic versioning: `vMAJOR.MINOR.PATCH` +- Release tags on `main` branch only +- Package versions match git tags + +## Release Process + +week one for `release/v4.1.0` + +- Planning meeting for `release/v4.1.0` with release scope and milestone objectives set by the maintainers. Release and maintainer meeting agendas and schedules are posted on the project repository [wiki](https://github.com/AIHawk/AIHawk/wiki) and shared in the `#releases` channel on Discord. +- `release/v4.0.0` release candidate ready for release +- `release/v4.0.0` merge into `develop`, `main` +- tag `main` as `release/v4.0.0` +- `release/v4.0.0` published to AIHawk/releases and PyPI as a package with release documentation +- delete `release/v4.0.0` branch + +release/v4.1.0 release weeks + +- Contributers work on issues and PRs, prioritizing next milestone +- Maintainers review PRs from `feature/*`, `bugfix/*` branches and issues, merging into `develop` +- Maintainers review PRs from `hotfix/*` branches and issues, merged into `main` and `develop`, `main` tagged and merged into `4.0.1` package and `release/v4.0.1` and `release/v4.1.0`, documentation is updated + +last week, release candidate + +- `develop` is frozen, only bug fixes +- create release branch `release/v4.1.0` from `develop` +- only bug fixes are merged into `release/v4.1.0` +- additional testing and release candidate review + +week one is repeated for `release/v4.2.0` + +```mermaid +gantt + title Release Cycle Process + dateFormat YYYY-MM-DD + section Retro/Plan + Planning release/v4.1.0 : 2025-01-01, 2d + Publish release/v4.0.0 :milestone, m1, 2025-01-01, 1d + + section Dev Cycle + Feature Development :2025-01-03, 27d + PR Reviews :2025-01-03, 27d + + section Release + Freeze develop :milestone, m3, 2025-01-30, 1d + Create release/v4.1.0 :milestone, m4, 2025-01-30, 1d + Bug Fixes Only :2025-01-30, 2d + RC Testing :2025-01-30, 2d + + section Next Cycle + Skip Weekend :2025-02-01, 2d + Planning release/v4.2.0 :2025-02-03, 2d + Publish release/v4.1.0 :milestone, m4, 2025-02-03, 1d +``` + +## Roles + +### Organization Owner + +- Has full access to all repositories +- Controls organization-wide settings and permissions +- Can set base permissions for all members +- Manages repository settings and collaborator access + +### Release Manager + +- Creates and manages release branch from develop +- Coordinates release cycles and versioning +- Merges release into main + +### Maintainer + +- Reviews and approves develop, feature PRs +- Triage issues, bugs, PRs +- Manages feature, bugfix PRs merge into develop +- Leads feature development, bug prioritization +- Manages README, CONTRIBUTING, and other documentation + +### Moderator + +- Moderates Telegram, Discord channels +- Manages project wiki +- Contributes to README, CONTRIBUTING, and other documentation + +### Contributor + +- Creates feature branches from develop +- Implements new features, bug fixes, and other changes +- creates PRs on features +- Collaborates with other developers on features + +## Pull Request Process + +1. Fork the repository +2. Create a new branch for your feature or bug +3. Write clear commit messages +4. Update documentation as needed +5. Add tests for new functionality +6. Ensure tests pass +7. Submit a pull request with a clear description + +## Merging Pull Requests + +- All PRs are reviewed by maintainers +- At least 2 Maintainers approve PRs for merge +- PRs are merged into `develop` +- PRs are tested and verified to work as expected + +## Code Style Guidelines + +- Follow PEP 8 standards for Python code +- Include docstrings for new functions and classes +- Add comments for complex logic +- Maintain consistent naming conventions +- Security best practices +- Any performance considerations + +## Development Setup + +1. Clone the repository +2. Install dependencies from requirements.txt +3. Set up necessary API keys and configurations + +## Testing + +Before submitting a PR: + +- Test your changes thoroughly +- Ensure existing tests pass +- Add new tests for new functionality +- Verify functionality with different configurations + +## Communication + +- Be respectful and constructive in discussions +- Use clear and concise language +- Reference relevant issues in commits and PRs +- Ask for help when needed + +The project maintainers reserve the right to reject any contribution that doesn't meet these guidelines or align with the project's goals. diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..9aa17f0b6 --- /dev/null +++ b/LICENSE @@ -0,0 +1,661 @@ + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2024 AI Hawk FOSS
+ Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 1. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 000000000..15bd0e6e1 --- /dev/null +++ b/README.md @@ -0,0 +1,769 @@ + +
+ + + + + + +# AIHawk the first Jobs Applier AI Agent + + ![CI](https://github.com/feder-cr/Auto_Jobs_Applier_AIHawk/actions/workflows/ci.yml/badge.svg) + +**🤖🔍 Your AI-powered job search assistant. Automate applications, get personalized recommendations, and land your dream job faster.** + +Join our community: [Telegram](https://t.me/AIhawkCommunity) (for Normal user) | [Discord](https://discord.gg/MYYwG8JyrQ) (for Open Source contributors) + +[![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white)](https://t.me/AIhawkCommunity) +[![Discord](https://img.shields.io/badge/Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/MYYwG8JyrQ) + +
+ +**Creator** [feder-cr](https://github.com/feder-cr), Co-Founder of AIHawk
+As AIHawk is focusing on their proprietary product - solving problems in hiring for companies, currently this project is led, managed, and maintained by a group of open-source contributors, with a focus on building tools to help job seekers land the jobs they deserve. + +**Project Maintainers / Leads**: [surapuramakhil](https://github.com/surapuramakhil), [sarob](https://github.com/sarob), [cjbbb](https://github.com/cjbbb) + +We are looking to expand our FOSS maintainers team! If you are from a non-technical background, you can be part of the project management team, triaging issues, and shaping the project. If you are a technical person, you can join in doing code reviews, participating in releases, and buidling better version of this product. + +reach out to [surapuramakhil](https://github.com/surapuramakhil) on [Discord](https://discord.gg/MYYwG8JyrQ). [Special thanks](#special-thanks) + +Auto_Jobs_Applier_AIHawk is continuously evolving, and your feedback, suggestions, and contributions are highly valued. Feel free to open issues, suggest enhancements, or submit pull requests to help improve the project. Let's work together to make Auto_Jobs_Applier_AIHawk a powerful tool for job seekers worldwide. + +## Table of Contents + +1. [Introduction](#introduction) +2. [Features](#features) +3. [Installation](#installation) +4. [Configuration](#configuration) +5. [Usage](#usage) +6. [Documentation](#documentation) +7. [Troubleshooting](#troubleshooting) +8. [Conclusion](#conclusion) +9. [Contributors](#contributors) +10. [License](#license) +11. [Disclaimer](#disclaimer) + +## Introduction + +Auto_Jobs_Applier_AIHawk is a cutting-edge, automated tool designed to revolutionize the job search and application process. In today's fiercely competitive job market, where opportunities can vanish in the blink of an eye, this program offers job seekers a significant advantage. By leveraging the power of automation and artificial intelligence, Auto_Jobs_Applier_AIHawk enables users to apply to a vast number of relevant positions efficiently and in a personalized manner, maximizing their chances of landing their dream job. + +### The Challenge of Modern Job Hunting + +In the digital age, the job search landscape has undergone a dramatic transformation. While online platforms have opened up a world of opportunities, they have also intensified competition. Job seekers often find themselves spending countless hours scrolling through listings, tailoring applications, and repetitively filling out forms. This process can be not only time-consuming but also emotionally draining, leading to job search fatigue and missed opportunities. + +### Enter Auto_Jobs_Applier_AIHawk: Your Personal Job Search Assistant + +Auto_Jobs_Applier_AIHawk steps in as a game-changing solution to these challenges. It's not just a tool; it's your tireless, 24/7 job search partner. By automating the most time-consuming aspects of the job search process, it allows you to focus on what truly matters - preparing for interviews and developing your professional skills. + +## Features + +1. **Intelligent Job Search Automation** + - Customizable search criteria + - Continuous scanning for new openings + - Smart filtering to exclude irrelevant listings + +2. **Rapid and Efficient Application Submission** + - One-click applications + - Form auto-fill using your profile information + - Automatic document attachment (resume, cover letter) + +3. **AI-Powered Personalization** + - Dynamic response generation for employer-specific questions + - Tone and style matching to fit company culture + - Keyword optimization for improved application relevance + +4. **Volume Management with Quality** + - Bulk application capability + - Quality control measures + - Detailed application tracking + +5. **Intelligent Filtering and Blacklisting** + - Company blacklist to avoid unwanted employers + - Title filtering to focus on relevant positions + +6. **Dynamic Resume Generation** + - Automatically creates tailored resumes for each application + - Customizes resume content based on job requirements + +7. **Secure Data Handling** + - Manages sensitive information securely using YAML files + +## Installation + +**Confirmed successful runs on the following:** + +- Operating Systems: + - Windows 10 + - Ubuntu 22 +- Python versions: + - 3.10 + - 3.11.9(64b) + - 3.12.5(64b) + +### Option 1: Using Python Virtual Environment + +1. **Download and Install Python:** + + Ensure you have the last Python version installed. If not, download and install it from Python's official website. For detailed instructions, refer to the tutorials: + + - [How to Install Python on Windows](https://www.geeksforgeeks.org/how-to-install-python-on-windows/) + - [How to Install Python on Linux](https://www.geeksforgeeks.org/how-to-install-python-on-linux/) + - [How to Download and Install Python on macOS](https://www.geeksforgeeks.org/how-to-download-and-install-python-latest-version-on-macos-mac-os-x/) + +2. **Download and Install Google Chrome:** + - Download and install the latest version of Google Chrome in its default location from the [official website](https://www.google.com/chrome). + +3. **Clone the repository:** + + ```bash + git clone https://github.com/feder-cr/Auto_Jobs_Applier_AIHawk.git + + cd Auto_Jobs_Applier_AIHawk + ``` + +4. **Activate virtual environment:** + + ```bash + python3 -m venv virtual + ``` + + ```bash + source virtual/bin/activate + ``` + + or for Windows-based machines - + + ```bash + .\virtual\Scripts\activate + ``` + +5. **Install the required packages:** + + ```bash + pip install -r requirements.txt + ``` + +### Option 2: Using Conda + +1. **Install Conda:** + - Download and install Miniconda from the [official website](https://docs.conda.io/en/latest/miniconda.html) + - Or install Anaconda from the [Anaconda website](https://www.anaconda.com/download) + +2. **Create and activate conda environment:** + ```bash + # Create new environment + conda create -n aihawk python=3.11 + + # Activate environment + conda activate aihawk + ``` + +3. **Clone the repository:** + ```bash + git clone https://github.com/feder-cr/Auto_Jobs_Applier_AIHawk.git + cd Auto_Jobs_Applier_AIHawk + ``` + +4. **Install dependencies:** + ```bash + # Install from requirements.txt + pip install -r requirements.txt + ``` + + +## Configuration + +### 1. secrets.yaml + +This file contains sensitive information. Never share or commit this file to version control. + +- `llm_api_key: [Your OpenAI or Ollama API key or Gemini API key]` + - Replace with your OpenAI API key for GPT integration + - To obtain an API key, follow the tutorial at: + - Note: You need to add credit to your OpenAI account to use the API. You can add credit by visiting the [OpenAI billing dashboard](https://platform.openai.com/account/billing). + - According to the [OpenAI community](https://community.openai.com/t/usage-tier-free-to-tier-1/919150) and our users' reports, right after setting up the OpenAI account and purchasing the required credits, users still have a `Free` account type. This prevents them from having unlimited access to OpenAI models and allows only 200 requests per day. This might cause runtime errors such as: + `Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. ...}}` + `{'error': {'message': 'Rate limit reached for gpt-4o-mini in organization on requests per day (RPD): Limit 200, Used 200, Requested 1.}}` + OpenAI will update your account automatically, but it might take some time, ranging from a couple of hours to a few days. + You can find more about your organization limits on the [official page](https://platform.openai.com/settings/organization/limits). + - For obtaining Gemini API key visit [Google AI for Devs](https://ai.google.dev/gemini-api/docs/api-key) + +### 2. work_preferences.yaml + +This file defines your job search parameters and bot behavior. Each section contains options that you can customize: + +- `remote: [true/false]` + + - Set to `true` to include remote jobs, `false` to exclude them + +- `hybrid: [true/false]` + + - Set to `true` to include hybrid jobs, `false` to exclude them + +- `onsite: [true/false]` + + - Set to `true` to include onsite jobs, `false` to exclude them + +- `experience_level:` + + - Set desired experience levels to `true`, others to `false` + +- `job_types:` + - Set desired job types to `true`, others to `false` + +- `date:` + - Choose one time range for job postings by setting it to `true`, others to `false` + +- `positions:` + - List job titles you're interested in, one per line + - Example: + + ```yaml + positions: + - Software Developer + - Data Scientist + ``` + +- `locations:` + - List locations you want to search in, one per line + - Example: + + ```yaml + locations: + - Italy + - London + ``` + +- `apply_once_at_company: [True/False]` + - Set to `True` to apply only once per company, `False` to allow multiple applications per company + +- `distance: [number]` + - Set the radius for your job search in miles + - Example: `distance: 50` + +- `companyBlacklist:` + - List companies you want to exclude from your search, one per line + - Example: + + ```yaml + companyBlacklist: + - Company X + - Company Y + ``` + +- `titleBlacklist:` + - List keywords in job titles you want to avoid, one per line + - Example: + + ```yaml + titleBlacklist: + - Sales + - Marketing + ``` + +#### 2.1 config.py - Customize LLM model endpoint + +- `LLM_MODEL_TYPE`: + - Choose the model type, supported: openai / ollama / claude / gemini +- `LLM_MODEL`: + - Choose the LLM model, currently supported: + - openai: gpt-4o + - ollama: llama2, mistral:v0.3 + - claude: any model + - gemini: any model +- `LLM_API_URL`: + - Link of the API endpoint for the LLM model + - openai: + - ollama: + - claude: + - gemini: +- Note: To run local Ollama, follow the guidelines here: [Guide to Ollama deployment](https://github.com/ollama/ollama) + +### 3. plain_text_resume.yaml + +This file contains your resume information in a structured format. Fill it out with your personal details, education, work experience, and skills. This information is used to auto-fill application forms and generate customized resumes. + +Each section has specific fields to fill out: + +- `personal_information:` + - This section contains basic personal details to identify yourself and provide contact information. + - **name**: Your first name. + - **surname**: Your last name or family name. + - **date_of_birth**: Your birth date in the format DD/MM/YYYY. + - **country**: The country where you currently reside. + - **city**: The city where you currently live. + - **address**: Your full address, including street and number. + - **zip_code**: Your postal/ZIP code. + - **phone_prefix**: The international dialing code for your phone number (e.g., +1 for the USA, +44 for the UK). + - **phone**: Your phone number without the international prefix. + - **email**: Your primary email address. + - **github**: URL to your GitHub profile, if applicable. + - **linkedin**: URL to your LinkedIn profile, if applicable. + - Example + + ```yaml + personal_information: + name: "Jane" + surname: "Doe" + date_of_birth: "01/01/1990" + country: "USA" + city: "New York" + address: "123 Main St" + zip_code: "520123" + phone_prefix: "+1" + phone: "5551234567" + email: "jane.doe@example.com" + github: "https://github.com/janedoe" + linkedin: "https://www.linkedin.com/in/janedoe/" + ``` + +- `education_details:` + - This section outlines your academic background, including degrees earned and relevant coursework. + - **degree**: The type of degree obtained (e.g., Bachelor's Degree, Master's Degree). + - **university**: The name of the university or institution where you studied. + - **final_evaluation_grade**: Your Grade Point Average or equivalent measure of academic performance. + - **start_date**: The start year of your studies. + - **graduation_year**: The year you graduated. + - **field_of_study**: The major or focus area of your studies. + - **exam**: A list of courses or subjects taken along with their respective grades. + + - Example: + + ```yaml + education_details: + - education_level: "Bachelor's Degree" + institution: "University of Example" + field_of_study: "Software Engineering" + final_evaluation_grade: "4/4" + start_date: "2021" + year_of_completion: "2023" + exam: + Algorithms: "A" + Data Structures: "B+" + Database Systems: "A" + Operating Systems: "A-" + Web Development: "B" + ``` + +- `experience_details:` + - This section details your work experience, including job roles, companies, and key responsibilities. + - **position**: Your job title or role. + - **company**: The name of the company or organization where you worked. + - **employment_period**: The timeframe during which you were employed in the role, using the format MM/YYYY - MM/YYYY. + - **location**: The city and country where the company is located. + - **industry**: The industry or field in which the company operates. + - **key_responsibilities**: A list of major responsibilities or duties you had in the role, e.g. responsibility: "Developed web applications using React and Node.js". + - **skills_acquired**: Skills or expertise gained through this role, e.g. "React". + + - Example: + + ```yaml + experience_details: + - position: "Software Developer" + company: "Tech Innovations Inc." + employment_period: "06/2021 - Present" + location: "San Francisco, CA" + industry: "Technology" + key_responsibilities: + - responsibility: "Developed web applications using React and Node.js" + - responsibility: "Collaborated with cross-functional teams to design and implement new features" + - responsibility: "Troubleshot and resolved complex software issues" + skills_acquired: + - "React" + - "Node.js" + - "Software Troubleshooting" + ``` + +- `projects:` + - Include notable projects you have worked on, including personal or professional projects. + - **name**: The name or title of the project. + - **description**: A brief summary of what the project involves or its purpose. + - **link**: URL to the project, if available (e.g., GitHub repository, website). + + - Example: + + ```yaml + projects: + - name: "Weather App" + description: "A web application that provides real-time weather information using a third-party API." + link: "https://github.com/janedoe/weather-app" + - name: "Task Manager" + description: "A task management tool with features for tracking and prioritizing tasks." + link: "https://github.com/janedoe/task-manager" + ``` + +- `achievements:` + - Highlight notable accomplishments or awards you have received. + - **name**: The title or name of the achievement. + - **description**: A brief explanation of the achievement and its significance. + + - Example: + + ```yaml + achievements: + - name: "Employee of the Month" + description: "Recognized for exceptional performance and contributions to the team." + - name: "Hackathon Winner" + description: "Won first place in a national hackathon competition." + ``` + +- `certifications:` + - Include any professional certifications you have earned. + - name: "PMP" + description: "Certification for project management professionals, issued by the Project Management Institute (PMI)" + + - Example: + + ```yaml + certifications: + - "Certified Scrum Master" + - "AWS Certified Solutions Architect" + ``` + +- `languages:` + - Detail the languages you speak and your proficiency level in each. + - **language**: The name of the language. + - **proficiency**: Your level of proficiency (e.g., Native, Fluent, Intermediate). + + - Example: + + ```yaml + languages: + - language: "English" + proficiency: "Fluent" + - language: "Spanish" + proficiency: "Intermediate" + ``` + +- `interests:` + + - Mention your professional or personal interests that may be relevant to your career. + - **interest**: A list of interests or hobbies. + + - Example: + + ```yaml + interests: + - "Machine Learning" + - "Cybersecurity" + - "Open Source Projects" + - "Digital Marketing" + - "Entrepreneurship" + ``` + +- `availability:` + - State your current availability or notice period. + - **notice_period**: The amount of time required before you can start a new role (e.g., "2 weeks", "1 month"). + + - Example: + + ```yaml + availability: + notice_period: "2 weeks" + ``` + +- `salary_expectations:` + - Provide your expected salary range. + - **salary_range_usd**: The salary range you are expecting, expressed in USD. + + - Example: + + ```yaml + salary_expectations: + salary_range_usd: "80000 - 100000" + ``` + +- `self_identification:` + - Provide information related to personal identity, including gender and pronouns. + - **gender**: Your gender identity. + - **pronouns**: The pronouns you use (e.g., He/Him, She/Her, They/Them). + - **veteran**: Your status as a veteran (e.g., Yes, No). + - **disability**: Whether you have a disability (e.g., Yes, No). + - **ethnicity**: Your ethnicity. + + - Example: + + ```yaml + self_identification: + gender: "Female" + pronouns: "She/Her" + veteran: "No" + disability: "No" + ethnicity: "Asian" + ``` + +- `legal_authorization:` + - Indicate your legal ability to work in various locations. + - **eu_work_authorization**: Whether you are authorized to work in the European Union (Yes/No). + - **us_work_authorization**: Whether you are authorized to work in the United States (Yes/No). + - **requires_us_visa**: Whether you require a visa to work in the United States (Yes/No). + - **requires_us_sponsorship**: Whether you require sponsorship to work in the United States (Yes/No). + - **requires_eu_visa**: Whether you require a visa to work in the European Union (Yes/No). + - **legally_allowed_to_work_in_eu**: Whether you are legally allowed to work in the European Union (Yes/No). + - **legally_allowed_to_work_in_us**: Whether you are legally allowed to work in the United States (Yes/No). + - **requires_eu_sponsorship**: Whether you require sponsorship to work in the European Union (Yes/No). + - **canada_work_authorization**: Whether you are authorized to work in Canada (Yes/No). + - **requires_canada_visa**: Whether you require a visa to work in Canada (Yes/No). + - **legally_allowed_to_work_in_canada**: Whether you are legally allowed to work in Canada (Yes/No). + - **requires_canada_sponsorship**: Whether you require sponsorship to work in Canada (Yes/No). + - **uk_work_authorization**: Whether you are authorized to work in the United Kingdom (Yes/No). + - **requires_uk_visa**: Whether you require a visa to work in the United Kingdom (Yes/No). + - **legally_allowed_to_work_in_uk**: Whether you are legally allowed to work in the United Kingdom (Yes/No). + - **requires_uk_sponsorship**: Whether you require sponsorship to work in the United Kingdom (Yes/No). + + - Example: + + ```yaml + legal_authorization: + eu_work_authorization: "Yes" + us_work_authorization: "Yes" + requires_us_visa: "No" + requires_us_sponsorship: "Yes" + requires_eu_visa: "No" + legally_allowed_to_work_in_eu: "Yes" + legally_allowed_to_work_in_us: "Yes" + requires_eu_sponsorship: "No" + canada_work_authorization: "Yes" + requires_canada_visa: "No" + legally_allowed_to_work_in_canada: "Yes" + requires_canada_sponsorship: "No" + uk_work_authorization: "Yes" + requires_uk_visa: "No" + legally_allowed_to_work_in_uk: "Yes" + requires_uk_sponsorship: "No" + ``` + +- `work_preferences:` + - Specify your preferences for work arrangements and conditions. + - **remote_work**: Whether you are open to remote work (Yes/No). + - **in_person_work**: Whether you are open to in-person work (Yes/No). + - **open_to_relocation**: Whether you are willing to relocate for a job (Yes/No). + - **willing_to_complete_assessments**: Whether you are willing to complete job assessments (Yes/No). + - **willing_to_undergo_drug_tests**: Whether you are willing to undergo drug testing (Yes/No). + - **willing_to_undergo_background_checks**: Whether you are willing to undergo background checks (Yes/No). + + - Example: + + ```yaml + work_preferences: + remote_work: "Yes" + in_person_work: "No" + open_to_relocation: "Yes" + willing_to_complete_assessments: "Yes" + willing_to_undergo_drug_tests: "No" + willing_to_undergo_background_checks: "Yes" + ``` + +### PLUS. data_folder_example + +The `data_folder_example` folder contains a working example of how the files necessary for the bot's operation should be structured and filled out. This folder serves as a practical reference to help you correctly set up your work environment for the job search bot. + +#### Contents + +Inside this folder, you'll find example versions of the key files: + +- `secrets.yaml` +- `config.yaml` +- `plain_text_resume.yaml` + +These files are already populated with fictitious but realistic data. They show you the correct format and type of information to enter in each file. + +#### Using the data_folder_example + +Using this folder as a guide can be particularly helpful for: + +1. Understanding the correct structure of each configuration file +2. Seeing examples of valid data for each field +3. Having a reference point while filling out your personal files + +## Usage + +0. **Account language** + To ensure the bot works, your account language must be set to English. + +1. **Data Folder:** + Ensure that your data_folder contains the following files: + - `secrets.yaml` + - `config.yaml` + - `plain_text_resume.yaml` + +2. **Output Folder:** + Contains the output of the bot. + - `data.json` results of the --collect mode + - `failed.json` failed applications + - `open_ai_calls.json` all the calls made to the LLM model + - `skipped.json` applications that were skipped + - `success.json` successful applications + + **Note:** `answers.json` is not part of the output folder and can be found in the root of the project. It is used to store the answers of the questions asked to the user. Can be used to update the bot with corrected answers. Search for `Select an option`, `0`, `Authorized`, and `how many years of` to verify correct answers. + +3. **Run the Bot:** + + Auto_Jobs_Applier_AIHawk offers flexibility in how it handles your pdf resume: + +- **Dynamic Resume Generation:** + If you don't use the `--resume` option, the bot will automatically generate a unique resume for each application. This feature uses the information from your `plain_text_resume.yaml` file and tailors it to each specific job application, potentially increasing your chances of success by customizing your resume for each position. + + ```bash + python main.py + ``` + +- **Using a Specific Resume:** + If you want to use a specific PDF resume for all applications, place your resume PDF in the `data_folder` directory and run the bot with the `--resume` option: + + ```bash + python main.py --resume /path/to/your/resume.pdf + ``` + +- **Using the collect mode:** + If you want to collect job data only to perform any type of data analytics you can use the bot with the `--collect` option. This will store in output/data.json file all data found from linkedin jobs offers. + + ```bash + python main.py --collect + ``` + +### Troubleshooting + +#### 1. OpenAI API Rate Limit Errors + +**Error Message:** + +openai.RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: .', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}} + +**Solution:** + +- Check your OpenAI API billing settings at +- Ensure you have added a valid payment method to your OpenAI account +- Note that ChatGPT Plus subscription is different from API access +- If you've recently added funds or upgraded, wait 12-24 hours for changes to take effect +- Free tier has a 3 RPM limit; spend at least $5 on API usage to increase + +#### 2. Easy Apply Button Not Found + +**Error Message:** + +Exception: No clickable 'Easy Apply' button found + +**Solution:** + +- Ensure that you're logged properly +- Check if the job listings you're targeting actually have the "Easy Apply" option +- Verify that your search parameters in the `config.yaml` file are correct and returning jobs with the "Easy Apply" button +- Try increasing the wait time for page loading in the script to ensure all elements are loaded before searching for the button + +#### 3. Incorrect Information in Job Applications + +**Issue:** Bot provides inaccurate data for experience, CTC, and notice period + +**Solution:** + +- Update prompts for professional experience specificity +- Add fields in `config.yaml` for current CTC, expected CTC, and notice period +- Modify bot logic to use these new config fields + +#### 4. YAML Configuration Errors + +**Error Message:** + +yaml.scanner.ScannerError: while scanning a simple key + +**Solution:** + +- Copy example `config.yaml` and modify gradually +- Ensure proper YAML indentation and spacing +- Use a YAML validator tool +- Avoid unnecessary special characters or quotes + +#### 5. Bot Logs In But Doesn't Apply to Jobs + +**Issue:** Bot searches for jobs but continues scrolling without applying + +**Solution:** + +- Check for security checks or CAPTCHAs +- Verify `config.yaml` job search parameters +- Ensure your account profile meets job requirements +- Review console output for error messages + +### General Troubleshooting Tips + +- Use the latest version of the script +- Verify all dependencies are installed and updated +- Check internet connection stability +- Clear browser cache and cookies if issues persist + +For further assistance, please create an issue on the [GitHub repository](https://github.com/feder-cr/Auto_Jobs_Applier_AIHawk/issues) with detailed information about your problem, including error messages and your configuration (with sensitive information removed). + +## Documentation + +### For Users + +- Ollama & Gemini Setup + - To install and configure **Ollama** and **Gemini**, [Download Ollama and Gemini Setup Guide (PDF)](https://github.com/feder-cr/Auto_Jobs_Applier_AIHawk/blob/main/docs/guide_to_setup_ollama_and_gemini.pdf) + - Follow the instructions in these guides to ensure proper configuration of **AIHawk** with **Ollama** and **Gemini**. + - Written by Rushi, [Linkedin](https://www.linkedin.com/in/rushichaganti/), support him by following. + +- Editing YAML Files + - For detailed instructions on editing YAML configuration sections for **AIHawk**, refer to this document: + - [Download YAML Editing Guide (PDF)](https://github.com/feder-cr/Auto_Jobs_Applier_AIHawk/blob/main/docs/guide_yaml_sections.pdf) + - Written by Rushi, [Linkedin](https://www.linkedin.com/in/rushichaganti/), support him by following. + +- Auto-start AIHawk + - To make **AIHawk** automatically start when your system boots, follow the steps in this guide: + - [Download Auto-start AIHawk Guide (PDF)](https://github.com/feder-cr/Auto_Jobs_Applier_AIHawk/blob/main/docs/guide_to_autostart_aihawk.pdf) + - Written by Rushi, [Linkedin](https://www.linkedin.com/in/rushichaganti/), support him by following. + +- Video Tutorial + - [How to set up Auto_Jobs_Applier_AIHawk](https://youtu.be/gdW9wogHEUM) + - Written by Rushi, [Linkedin](https://www.linkedin.com/in/rushichaganti/), support him by following. + +- [OpenAI API Documentation](https://platform.openai.com/docs/) + +### For Developers + +- [Contribution Guidelines](CONTRIBUTING.md) + +- [Lang Chain Developer Documentation](https://python.langchain.com/v0.2/docs/integrations/components/) + +- [Workflow diagrams](docs/workflow_diagrams.md) + +- If you encounter any issues, you can open an issue on [GitHub](https://github.com/feder-cr/Auto_Jobs_Applier_AIHawk/issues). + Please add valuable details to the subject and to the description. If you need new feature then please reflect this. + I'll be more than happy to assist you! + +- Note for Contributors: If you would like to submit a Pull Request (PR), please target the `release` branch instead of `main`. The `release` branch is used for testing new code changes and will be periodically merged into `main` after validation. This approach ensures that only tested features make it into the main branch. + +## Conclusion + +Auto_Jobs_Applier_AIHawk provides a significant advantage in the modern job market by automating and enhancing the job application process. With features like dynamic resume generation and AI-powered personalization, it offers unparalleled flexibility and efficiency. Whether you're a job seeker aiming to maximize your chances of landing a job, a recruiter looking to streamline application submissions, or a career advisor seeking to offer better services, Auto_Jobs_Applier_AIHawk is an invaluable resource. By leveraging cutting-edge automation and artificial intelligence, this tool not only saves time but also significantly increases the effectiveness and quality of job applications in today's competitive landscape. + +## Star History + +[![Star History Chart](https://api.star-history.com/svg?repos=feder-cr/Auto_Jobs_Applier_AIHawk&type=Date)](https://star-history.com/#feder-cr/Auto_Jobs_Applier_AIHawk&Date) + +If you like the project please star ⭐ the repository! + +## Special Thanks +[![Contributors](https://img.shields.io/github/contributors/feder-cr/Auto_Jobs_Applier_AIHawk)](https://github.com/feder-cr/Auto_Jobs_Applier_AIHawk/graphs/contributors) + + + + + +Made with [contrib.rocks](https://contrib.rocks). + +## License + +This project is licensed under the AGPL License. Documentation is licensed under CC BY - see the [AGPL LICENSE](LICENSE) and [CC BY LICENSE](docs/LICENSE) files for details. + +The AGPL License requires that any derivative work must also be open source and distributed under the same license. + +The CC BY License permits others to distribute, remix, adapt, and build upon your work, even for commercial purposes, as long as they credit you for the original creation. + + +## Disclaimer + +This tool, Auto_Jobs_Applier_AIHawk, is intended for use at your own risk. The creators / maintainers / contributors assume no responsibility for any consequences arising from its use. Users are advised to comply with the terms of service of relevant platforms and adhere to all applicable laws, regulations, and ethical guidelines. The use of automated tools for job applications may carry risks, including potential impacts on user accounts. Proceed with caution and at your own discretion. + +[Back to top 🚀](#top) diff --git a/assets/AIHawk.png b/assets/AIHawk.png new file mode 100644 index 000000000..c3c7e75f7 Binary files /dev/null and b/assets/AIHawk.png differ diff --git a/assets/resume_schema.yaml b/assets/resume_schema.yaml new file mode 100644 index 000000000..8b3bb88ad --- /dev/null +++ b/assets/resume_schema.yaml @@ -0,0 +1,133 @@ +# YAML Schema for plain_text_resume.yaml + +personal_information: + type: object + properties: + name: {type: string} + surname: {type: string} + date_of_birth: {type: string, format: date} + country: {type: string} + zip_code: {type: string, pattern: "^[0-9]{5,10}$"} + city: {type: string} + address: {type: string} + phone_prefix: {type: string, format: phone_prefix} + phone: {type: string, format: phone} + email: {type: string, format: email} + github: {type: string, format: uri} + linkedin: {type: string, format: uri} + required: [name, surname, date_of_birth, country, city, address, zip_code, phone_prefix, phone, email] + +education_details: + type: array + items: + type: object + properties: + degree: {type: string} + university: {type: string} + gpa: {type: string} + graduation_year: {type: string} + field_of_study: {type: string} + exam: + type: object + additionalProperties: {type: string} + required: [degree, university, gpa, graduation_year, field_of_study] + +experience_details: + type: array + items: + type: object + properties: + position: {type: string} + company: {type: string} + employment_period: {type: string} + location: {type: string} + industry: {type: string} + key_responsibilities: + type: object + additionalProperties: {type: string} + skills_acquired: + type: array + items: {type: string} + required: [position, company, employment_period, location, industry, key_responsibilities, skills_acquired] + +projects: + type: array + items: + type: object + properties: + name: {type: string} + description: {type: string} + link: {type: string, format: uri} + required: [name, description] + +achievements: + type: array + items: + type: object + properties: + name: {type: string} + description: {type: string} + required: [name, description] + +certifications: + type: array + items: {type: string} + +languages: + type: array + items: + type: object + properties: + language: {type: string} + proficiency: {type: string, enum: [Native, Fluent, Intermediate, Beginner]} + required: [language, proficiency] + +interests: + type: array + items: {type: string} + +availability: + type: object + properties: + notice_period: {type: string} + required: [notice_period] + +salary_expectations: + type: object + properties: + salary_range_usd: {type: string} + required: [salary_range_usd] + +self_identification: + type: object + properties: + gender: {type: string} + pronouns: {type: string} + veteran: {type: string, enum: [Yes, No]} + disability: {type: string, enum: [Yes, No]} + ethnicity: {type: string} + required: [gender, pronouns, veteran, disability, ethnicity] + +legal_authorization: + type: object + properties: + eu_work_authorization: {type: string, enum: [Yes, No]} + us_work_authorization: {type: string, enum: [Yes, No]} + requires_us_visa: {type: string, enum: [Yes, No]} + requires_us_sponsorship: {type: string, enum: [Yes, No]} + requires_eu_visa: {type: string, enum: [Yes, No]} + legally_allowed_to_work_in_eu: {type: string, enum: [Yes, No]} + legally_allowed_to_work_in_us: {type: string, enum: [Yes, No]} + requires_eu_sponsorship: {type: string, enum: [Yes, No]} + required: [eu_work_authorization, us_work_authorization, requires_us_visa, requires_us_sponsorship, requires_eu_visa, legally_allowed_to_work_in_eu, legally_allowed_to_work_in_us, requires_eu_sponsorship] + +work_preferences: + type: object + properties: + remote_work: {type: string, enum: [Yes, No]} + in_person_work: {type: string, enum: [Yes, No]} + open_to_relocation: {type: string, enum: [Yes, No]} + willing_to_complete_assessments: {type: string, enum: [Yes, No]} + willing_to_undergo_drug_tests: {type: string, enum: [Yes, No]} + willing_to_undergo_background_checks: {type: string, enum: [Yes, No]} + required: [remote_work, in_person_work, open_to_relocation, willing_to_complete_assessments, willing_to_undergo_drug_tests, willing_to_undergo_background_checks] diff --git a/config.py b/config.py new file mode 100644 index 000000000..40c004b31 --- /dev/null +++ b/config.py @@ -0,0 +1,22 @@ +# In this file, you can set the configurations of the app. + +from constants import DEBUG, LLM_MODEL, OPENAI + +#config related to logging must have prefix LOG_ +LOG_LEVEL = DEBUG +LOG_SELENIUM_LEVEL = DEBUG +LOG_TO_FILE = True +LOG_TO_CONSOLE = True + +MINIMUM_WAIT_TIME_IN_SECONDS = 60 + +JOB_APPLICATIONS_DIR = "job_applications" +JOB_SUITABILITY_SCORE = 7 + +JOB_MAX_APPLICATIONS = 5 +JOB_MIN_APPLICATIONS = 1 + +LLM_MODEL_TYPE = 'openai' +LLM_MODEL = 'gpt-4o-mini' +# Only required for OLLAMA models +LLM_API_URL = '' \ No newline at end of file diff --git a/constants.py b/constants.py new file mode 100644 index 000000000..86ae70c68 --- /dev/null +++ b/constants.py @@ -0,0 +1,78 @@ +DATE_ALL_TIME = "all_time" +DATE_MONTH = "month" +DATE_WEEK = "week" +DATE_24_HOURS = "24_hours" + +LINKEDIN = "linkedin" + +# constants used in application +SECRETS_YAML = "secrets.yaml" +WORK_PREFERENCES_YAML = "work_preferences.yaml" +PLAIN_TEXT_RESUME_YAML = "plain_text_resume.yaml" + + +# String constants used in the application +DEBUG = "DEBUG" +INFO = "INFO" +WARNING = "WARNING" +ERROR = "ERROR" +CRITICAL = "CRITICAL" + +MINIMUM_LOG_LEVEL = "MINIMUM_LOG_LEVEL" + +# Constants in llm_manager.py +USAGE_METADATA = "usage_metadata" +OUTPUT_TOKENS = "output_tokens" +INPUT_TOKENS = "input_tokens" +TOTAL_TOKENS = "total_tokens" +TOKEN_USAGE = "token_usage" + +MODEL = "model" +TIME = "time" +PROMPTS = "prompts" +REPLIES = "replies" +CONTENT = "content" +TOTAL_COST = "total_cost" + +RESPONSE_METADATA = "response_metadata" +MODEL_NAME = "model_name" +SYSTEM_FINGERPRINT = "system_fingerprint" +FINISH_REASON = "finish_reason" +LOGPROBS = "logprobs" +ID = "id" +TEXT = "text" +PHRASE = "phrase" +QUESTION = "question" +OPTIONS = "options" +RESUME = "resume" +RESUME_SECTION = "resume_section" +JOB_DESCRIPTION = "job_description" +COMPANY = "company" +JOB_APPLICATION_PROFILE = "job_application_profile" +RESUME_EDUCATIONS = "resume_educations" +RESUME_JOBS = "resume_jobs" +RESUME_PROJECTS = "resume_projects" + +PERSONAL_INFORMATION = "personal_information" +SELF_IDENTIFICATION = "self_identification" +LEGAL_AUTHORIZATION = "legal_authorization" +WORK_PREFERENCES = "work_preferences" +EDUCATION_DETAILS = "education_details" +EXPERIENCE_DETAILS = "experience_details" +PROJECTS = "projects" +AVAILABILITY = "availability" +SALARY_EXPECTATIONS = "salary_expectations" +CERTIFICATIONS = "certifications" +LANGUAGES = "languages" +INTERESTS = "interests" +COVER_LETTER = "cover_letter" + +LLM_MODEL_TYPE = "llm_model_type" +LLM_API_URL = "llm_api_url" +LLM_MODEL = "llm_model" +OPENAI = "openai" +CLAUDE = "claude" +OLLAMA = "ollama" +GEMINI = "gemini" +HUGGINGFACE = "huggingface" +PERPLEXITY = "perplexity" diff --git a/data_folder/plain_text_resume.yaml b/data_folder/plain_text_resume.yaml new file mode 100644 index 000000000..1f822ac83 --- /dev/null +++ b/data_folder/plain_text_resume.yaml @@ -0,0 +1,130 @@ +personal_information: + name: "[Your Name]" + surname: "[Your Surname]" + date_of_birth: "[Your Date of Birth]" + country: "[Your Country]" + city: "[Your City]" + address: "[Your Address]" + zip_code: "[Your zip code]" + phone_prefix: "[Your Phone Prefix]" + phone: "[Your Phone Number]" + email: "[Your Email Address]" + github: "[Your GitHub Profile URL]" + linkedin: "[Your LinkedIn Profile URL]" + +education_details: + - education_level: "[Your Education Level]" + institution: "[Your Institution]" + field_of_study: "[Your Field of Study]" + final_evaluation_grade: "[Your Final Evaluation Grade]" + start_date: "[Start Date]" + year_of_completion: "[Year of Completion]" + exam: + exam_name_1: "[Grade]" + exam_name_2: "[Grade]" + exam_name_3: "[Grade]" + exam_name_4: "[Grade]" + exam_name_5: "[Grade]" + exam_name_6: "[Grade]" + +experience_details: + - position: "[Your Position]" + company: "[Company Name]" + employment_period: "[Employment Period]" + location: "[Location]" + industry: "[Industry]" + key_responsibilities: + - responsibility_1: "[Responsibility Description]" + - responsibility_2: "[Responsibility Description]" + - responsibility_3: "[Responsibility Description]" + skills_acquired: + - "[Skill]" + - "[Skill]" + - "[Skill]" + + - position: "[Your Position]" + company: "[Company Name]" + employment_period: "[Employment Period]" + location: "[Location]" + industry: "[Industry]" + key_responsibilities: + - responsibility_1: "[Responsibility Description]" + - responsibility_2: "[Responsibility Description]" + - responsibility_3: "[Responsibility Description]" + skills_acquired: + - "[Skill]" + - "[Skill]" + - "[Skill]" + +projects: + - name: "[Project Name]" + description: "[Project Description]" + link: "[Project Link]" + + - name: "[Project Name]" + description: "[Project Description]" + link: "[Project Link]" + +achievements: + - name: "[Achievement Name]" + description: "[Achievement Description]" + - name: "[Achievement Name]" + description: "[Achievement Description]" + +certifications: + - name: "[Certification Name]" + description: "[Certification Description]" + - name: "[Certification Name]" + description: "[Certification Description]" + +languages: + - language: "[Language]" + proficiency: "[Proficiency Level]" + - language: "[Language]" + proficiency: "[Proficiency Level]" + +interests: + - "[Interest]" + - "[Interest]" + - "[Interest]" + +availability: + notice_period: "[Notice Period]" + +salary_expectations: + salary_range_usd: "[Salary Range]" + +self_identification: + gender: "[Gender]" + pronouns: "[Pronouns]" + veteran: "[Yes/No]" + disability: "[Yes/No]" + ethnicity: "[Ethnicity]" + + +legal_authorization: + eu_work_authorization: "[Yes/No]" + us_work_authorization: "[Yes/No]" + requires_us_visa: "[Yes/No]" + requires_us_sponsorship: "[Yes/No]" + requires_eu_visa: "[Yes/No]" + legally_allowed_to_work_in_eu: "[Yes/No]" + legally_allowed_to_work_in_us: "[Yes/No]" + requires_eu_sponsorship: "[Yes/No]" + canada_work_authorization: "[Yes/No]" + requires_canada_visa: "[Yes/No]" + legally_allowed_to_work_in_canada: "[Yes/No]" + requires_canada_sponsorship: "[Yes/No]" + uk_work_authorization: "[Yes/No]" + requires_uk_visa: "[Yes/No]" + legally_allowed_to_work_in_uk: "[Yes/No]" + requires_uk_sponsorship: "[Yes/No]" + + +work_preferences: + remote_work: "[Yes/No]" + in_person_work: "[Yes/No]" + open_to_relocation: "[Yes/No]" + willing_to_complete_assessments: "[Yes/No]" + willing_to_undergo_drug_tests: "[Yes/No]" + willing_to_undergo_background_checks: "[Yes/No]" diff --git a/data_folder/secrets.yaml b/data_folder/secrets.yaml new file mode 100644 index 000000000..62b4a747c --- /dev/null +++ b/data_folder/secrets.yaml @@ -0,0 +1 @@ +llm_api_key: 'sk-11KRr4uuTwpRGfeRTfj1T9BlbkFJjP8QTrswHU1yGruru2FR' diff --git a/data_folder/work_preferences.yaml b/data_folder/work_preferences.yaml new file mode 100644 index 000000000..7ed987ffc --- /dev/null +++ b/data_folder/work_preferences.yaml @@ -0,0 +1,47 @@ +remote: true +hybrid: true +onsite: true + +experience_level: + internship: false + entry: true + associate: true + mid_senior_level: true + director: false + executive: false + +job_types: + full_time: true + contract: false + part_time: false + temporary: true + internship: false + other: false + volunteer: true + +date: + all_time: false + month: false + week: false + 24_hours: true + +positions: + - Software engineer + +locations: + - Germany + +apply_once_at_company: true + +distance: 100 + +company_blacklist: + - wayfair + - Crossover + +title_blacklist: + - word1 + - word2 + +location_blacklist: + - Brazil \ No newline at end of file diff --git a/data_folder_example/plain_text_resume.yaml b/data_folder_example/plain_text_resume.yaml new file mode 100644 index 000000000..4b6f95fe1 --- /dev/null +++ b/data_folder_example/plain_text_resume.yaml @@ -0,0 +1,141 @@ +personal_information: + name: "solid" + surname: "snake" + date_of_birth: "12/01/1861" + country: "Ireland" + city: "Dublin" + zip_code: "520123" + address: "12 Fox road" + phone_prefix: "+1" + phone: "7819117091" + email: "hi@gmail.com" + github: "https://github.com/lol" + linkedin: "https://www.linkedin.com/in/thezucc/" + + +education_details: + - education_level: "Master's Degree" + institution: "Bob academy" + field_of_study: "Bobs Engineering" + final_evaluation_grade: "4.0" + year_of_completion: "2023" + start_date: "2022" + additional_info: + exam: + Algorithms: "A" + Linear Algebra: "A" + Database Systems: "A" + Operating Systems: "A-" + Web Development: "A" + +experience_details: + - position: "X" + company: "Y." + employment_period: "06/2019 - Present" + location: "San Francisco, CA" + industry: "Technology" + key_responsibilities: + - responsibility: "Developed web applications using React and Node.js" + - responsibility: "Collaborated with cross-functional teams to design and implement new features" + - responsibility: "Troubleshot and resolved complex software issues" + skills_acquired: + - "React" + - "Node.js" + - "Software Troubleshooting" + - position: "Software Developer" + company: "Innovatech" + employment_period: "06/2015 - 12/2017" + location: "Milan, Italy" + industry: "Technology" + key_responsibilities: + - responsibility: "Developed and maintained web applications using modern technologies" + - responsibility: "Collaborated with UX/UI designers to enhance user experience" + - responsibility: "Implemented automated testing procedures to ensure code quality" + skills_acquired: + - "Web development" + - "User experience design" + - "Automated testing" + - position: "Junior Developer" + company: "StartUp Hub" + employment_period: "01/2014 - 05/2015" + location: "Florence, Italy" + industry: "Startups" + key_responsibilities: + - responsibility: "Assisted in the development of mobile applications and web platforms" + - responsibility: "Participated in code reviews and contributed to software design discussions" + - responsibility: "Resolved bugs and implemented feature enhancements" + skills_acquired: + - "Mobile app development" + - "Code reviews" + - "Bug fixing" +projects: + - name: "X" + description: "Y blah blah blah " + link: "https://github.com/haveagoodday" + + + +achievements: + - name: "Employee of the Month" + description: "Recognized for exceptional performance and contributions to the team." + - name: "Hackathon Winner" + description: "Won first place in a national hackathon competition." + +certifications: + - name: "Certified Scrum Master" + description: "Recognized certification for proficiency in Agile methodologies and Scrum framework." + - name: "AWS Certified Solutions Architect" + description: "Certification demonstrating expertise in designing, deploying, and managing applications on AWS." + +languages: + - language: "English" + proficiency: "Fluent" + - language: "Spanish" + proficiency: "Intermediate" + +interests: + - "Machine Learning" + - "Cybersecurity" + - "Open Source Projects" + - "Digital Marketing" + - "Entrepreneurship" + +availability: + notice_period: "2 weeks" + +salary_expectations: + salary_range_usd: "90000 - 110000" + +self_identification: + gender: "Female" + pronouns: "She/Her" + veteran: "No" + disability: "No" + ethnicity: "Asian" + +legal_authorization: + eu_work_authorization: "Yes" + us_work_authorization: "Yes" + requires_us_visa: "No" + requires_us_sponsorship: "Yes" + requires_eu_visa: "No" + legally_allowed_to_work_in_eu: "Yes" + legally_allowed_to_work_in_us: "Yes" + requires_eu_sponsorship: "No" + canada_work_authorization: "Yes" + requires_canada_visa: "No" + legally_allowed_to_work_in_canada: "Yes" + requires_canada_sponsorship: "No" + uk_work_authorization: "Yes" + requires_uk_visa: "No" + legally_allowed_to_work_in_uk: "Yes" + requires_uk_sponsorship: "No" + + +work_preferences: + remote_work: "Yes" + in_person_work: "Yes" + open_to_relocation: "Yes" + willing_to_complete_assessments: "Yes" + willing_to_undergo_drug_tests: "Yes" + willing_to_undergo_background_checks: "Yes" diff --git a/data_folder_example/resume_liam_murphy.txt b/data_folder_example/resume_liam_murphy.txt new file mode 100644 index 000000000..edcac2b3b --- /dev/null +++ b/data_folder_example/resume_liam_murphy.txt @@ -0,0 +1,55 @@ +Liam Murphy +Galway, Ireland +Email: liam.murphy@gmail.com | AIHawk: liam-murphy +GitHub: liam-murphy | Phone: +353 871234567 + +Education +Bachelor's Degree in Computer Science +National University of Ireland, Galway (GPA: 4/4) +Graduation Year: 2020 + +Experience +Co-Founder & Software Engineer +CryptoWave Solutions (03/2021 - Present) +Location: Ireland | Industry: Blockchain Technology + +Co-founded and led a startup specializing in app and software development with a focus on blockchain technology +Provided blockchain consultations for 10+ companies, enhancing their software capabilities with secure, decentralized solutions +Developed blockchain applications, integrated cutting-edge technology to meet client needs and drive industry innovation +Research Intern +National University of Ireland, Galway (11/2022 - 03/2023) +Location: Galway, Ireland | Industry: IoT Security Research + +Conducted in-depth research on IoT security, focusing on binary instrumentation and runtime monitoring +Performed in-depth study of the MQTT protocol and Falco +Developed multiple software components including MQTT packet analysis library, Falco adapter, and RML monitor in Prolog +Authored thesis "Binary Instrumentation for Runtime Monitoring of Internet of Things Systems Using Falco" +Software Engineer +University Hospital Galway (05/2022 - 11/2022) +Location: Galway, Ireland | Industry: Healthcare IT + +Integrated and enforced robust security protocols +Developed and maintained a critical software tool for password validation used by over 1,600 employees +Played an integral role in the hospital's cybersecurity team +Projects +JobBot +AI-driven tool to automate and personalize job applications on AIHawk, gained over 3000 stars on GitHub, improving efficiency and reducing application time +Link: JobBot + +mqtt-packet-parser +Developed a Node.js module for parsing MQTT packets, improved parsing efficiency by 40% +Link: mqtt-packet-parser + +Achievements +Winner of an Irish public competition - Won first place in a public competition with a perfect score of 70/70, securing a Software Developer position at University Hospital Galway +Galway Merit Scholarship - Awarded annually from 2018 to 2020 in recognition of academic excellence and contribution +GitHub Recognition - Gained over 3000 stars on GitHub with JobBot project +Certifications +C1 + +Languages +English - Native +Spanish - Professional +Interests +Full-Stack Development, Software Architecture, IoT system design and development, Artificial Intelligence, Cloud Technologies + diff --git a/data_folder_example/secrets.yaml b/data_folder_example/secrets.yaml new file mode 100644 index 000000000..781bfb946 --- /dev/null +++ b/data_folder_example/secrets.yaml @@ -0,0 +1 @@ +llm_api_key: 'sk-11KRr4uuTwpRGfeRTfj1T9BlbkFJjP8QTrswHU1yGruru2FR' \ No newline at end of file diff --git a/data_folder_example/work_preferences.yaml b/data_folder_example/work_preferences.yaml new file mode 100644 index 000000000..27d92935c --- /dev/null +++ b/data_folder_example/work_preferences.yaml @@ -0,0 +1,48 @@ +remote: true +hybrid: true +onsite: true + +experience_level: + internship: false + entry: true + associate: true + mid_senior_level: true + director: false + executive: false + +job_types: + full_time: true + contract: false + part_time: false + temporary: true + internship: false + other: false + volunteer: true + +date: + all_time: false + month: false + week: false + 24_hours: true + +positions: + - Software engineer + +locations: + - Germany + +apply_once_at_company: true + +distance: 100 + +company_blacklist: + - wayfair + - Crossover + +title_blacklist: + - word1 + - word2 + +location_blacklist: + - Brazil + diff --git a/docs/LICENSE b/docs/LICENSE new file mode 100644 index 000000000..12429f4b9 --- /dev/null +++ b/docs/LICENSE @@ -0,0 +1,397 @@ +Copyright (C) 2024 AI Hawk FOSS + +Attribution 4.0 International + +======================================================================= + +Creative Commons Corporation ("Creative Commons") is not a law firm and +does not provide legal services or legal advice. Distribution of +Creative Commons public licenses does not create a lawyer-client or +other relationship. Creative Commons makes its licenses and related +information available on an "as-is" basis. Creative Commons gives no +warranties regarding its licenses, any material licensed under their +terms and conditions, or any related information. Creative Commons +disclaims all liability for damages resulting from their use to the +fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and +conditions that creators and other rights holders may use to share +original works of authorship and other material subject to copyright +and certain other rights specified in the public license below. The +following considerations are for informational purposes only, are not +exhaustive, and do not form part of our licenses. + + Considerations for licensors: Our public licenses are + intended for use by those authorized to give the public + permission to use material in ways otherwise restricted by + copyright and certain other rights. Our licenses are + irrevocable. Licensors should read and understand the terms + and conditions of the license they choose before applying it. + Licensors should also secure all rights necessary before + applying our licenses so that the public can reuse the + material as expected. Licensors should clearly mark any + material not subject to the license. This includes other CC- + licensed material, or material used under an exception or + limitation to copyright. More considerations for licensors: + wiki.creativecommons.org/Considerations_for_licensors + + Considerations for the public: By using one of our public + licenses, a licensor grants the public permission to use the + licensed material under specified terms and conditions. If + the licensor's permission is not necessary for any reason--for + example, because of any applicable exception or limitation to + copyright--then that use is not regulated by the license. Our + licenses grant only permissions under copyright and certain + other rights that a licensor has authority to grant. Use of + the licensed material may still be restricted for other + reasons, including because others have copyright or other + rights in the material. A licensor may make special requests, + such as asking that all changes be marked or described. + Although not required by our licenses, you are encouraged to + respect those requests where reasonable. More_considerations + for the public: + wiki.creativecommons.org/Considerations_for_licensees + +======================================================================= + +Creative Commons Attribution 4.0 International Public License + +By exercising the Licensed Rights (defined below), You accept and agree +to be bound by the terms and conditions of this Creative Commons +Attribution 4.0 International Public License ("Public License"). To the +extent this Public License may be interpreted as a contract, You are +granted the Licensed Rights in consideration of Your acceptance of +these terms and conditions, and the Licensor grants You such rights in +consideration of benefits the Licensor receives from making the +Licensed Material available under these terms and conditions. + + +Section 1 -- Definitions. + + a. Adapted Material means material subject to Copyright and Similar + Rights that is derived from or based upon the Licensed Material + and in which the Licensed Material is translated, altered, + arranged, transformed, or otherwise modified in a manner requiring + permission under the Copyright and Similar Rights held by the + Licensor. For purposes of this Public License, where the Licensed + Material is a musical work, performance, or sound recording, + Adapted Material is always produced where the Licensed Material is + synched in timed relation with a moving image. + + b. Adapter's License means the license You apply to Your Copyright + and Similar Rights in Your contributions to Adapted Material in + accordance with the terms and conditions of this Public License. + + c. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or + categorized. For purposes of this Public License, the rights + specified in Section 2(b)(1)-(2) are not Copyright and Similar + Rights. + + d. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright + Treaty adopted on December 20, 1996, and/or similar international + agreements. + + e. Exceptions and Limitations means fair use, fair dealing, and/or + any other exception or limitation to Copyright and Similar Rights + that applies to Your use of the Licensed Material. + + f. Licensed Material means the artistic or literary work, database, + or other material to which the Licensor applied this Public + License. + + g. Licensed Rights means the rights granted to You subject to the + terms and conditions of this Public License, which are limited to + all Copyright and Similar Rights that apply to Your use of the + Licensed Material and that the Licensor has authority to license. + + h. Licensor means the individual(s) or entity(ies) granting rights + under this Public License. + + i. Share means to provide material to the public by any means or + process that requires permission under the Licensed Rights, such + as reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the + public may access the material from a place and at a time + individually chosen by them. + + j. Sui Generis Database Rights means rights other than copyright + resulting from Directive 96/9/EC of the European Parliament and of + the Council of 11 March 1996 on the legal protection of databases, + as amended and/or succeeded, as well as other essentially + equivalent rights anywhere in the world. + + k. You means the individual or entity exercising the Licensed Rights + under this Public License. Your has a corresponding meaning. + + +Section 2 -- Scope. + + a. License grant. + + 1. Subject to the terms and conditions of this Public License, + the Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to + exercise the Licensed Rights in the Licensed Material to: + + a. reproduce and Share the Licensed Material, in whole or + in part; and + + b. produce, reproduce, and Share Adapted Material. + + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public + License does not apply, and You do not need to comply with + its terms and conditions. + + 3. Term. The term of this Public License is specified in Section + 6(a). + + 4. Media and formats; technical modifications allowed. The + Licensor authorizes You to exercise the Licensed Rights in + all media and formats whether now known or hereafter created, + and to make technical modifications necessary to do so. The + Licensor waives and/or agrees not to assert any right or + authority to forbid You from making technical modifications + necessary to exercise the Licensed Rights, including + technical modifications necessary to circumvent Effective + Technological Measures. For purposes of this Public License, + simply making modifications authorized by this Section 2(a) + (4) never produces Adapted Material. + + 5. Downstream recipients. + + a. Offer from the Licensor -- Licensed Material. Every + recipient of the Licensed Material automatically + receives an offer from the Licensor to exercise the + Licensed Rights under the terms and conditions of this + Public License. + + b. No downstream restrictions. You may not offer or impose + any additional or different terms or conditions on, or + apply any Effective Technological Measures to, the + Licensed Material if doing so restricts exercise of the + Licensed Rights by any recipient of the Licensed + Material. + + 6. No endorsement. Nothing in this Public License constitutes or + may be construed as permission to assert or imply that You + are, or that Your use of the Licensed Material is, connected + with, or sponsored, endorsed, or granted official status by, + the Licensor or others designated to receive attribution as + provided in Section 3(a)(1)(A)(i). + + b. Other rights. + + 1. Moral rights, such as the right of integrity, are not + licensed under this Public License, nor are publicity, + privacy, and/or other similar personality rights; however, to + the extent possible, the Licensor waives and/or agrees not to + assert any such rights held by the Licensor to the limited + extent necessary to allow You to exercise the Licensed + Rights, but not otherwise. + + 2. Patent and trademark rights are not licensed under this + Public License. + + 3. To the extent possible, the Licensor waives any right to + collect royalties from You for the exercise of the Licensed + Rights, whether directly or through a collecting society + under any voluntary or waivable statutory or compulsory + licensing scheme. In all other cases the Licensor expressly + reserves any right to collect such royalties. + + +Section 3 -- License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the +following conditions. + + a. Attribution. + + 1. If You Share the Licensed Material (including in modified + form), You must: + + a. retain the following if it is supplied by the Licensor + with the Licensed Material: + + i. identification of the creator(s) of the Licensed + Material and any others designated to receive + attribution, in any reasonable manner requested by + the Licensor (including by pseudonym if + designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of + warranties; + + v. a URI or hyperlink to the Licensed Material to the + extent reasonably practicable; + + b. indicate if You modified the Licensed Material and + retain an indication of any previous modifications; and + + c. indicate the Licensed Material is licensed under this + Public License, and include the text of, or the URI or + hyperlink to, this Public License. + + 2. You may satisfy the conditions in Section 3(a)(1) in any + reasonable manner based on the medium, means, and context in + which You Share the Licensed Material. For example, it may be + reasonable to satisfy the conditions by providing a URI or + hyperlink to a resource that includes the required + information. + + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent + reasonably practicable. + + 4. If You Share Adapted Material You produce, the Adapter's + License You apply must not prevent recipients of the Adapted + Material from complying with this Public License. + + +Section 4 -- Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that +apply to Your use of the Licensed Material: + + a. for the avoidance of doubt, Section 2(a)(1) grants You the right + to extract, reuse, reproduce, and Share all or a substantial + portion of the contents of the database; + + b. if You include all or a substantial portion of the database + contents in a database in which You have Sui Generis Database + Rights, then the database in which You have Sui Generis Database + Rights (but not its individual contents) is Adapted Material; and + + c. You must comply with the conditions in Section 3(a) if You Share + all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not +replace Your obligations under this Public License where the Licensed +Rights include other Copyright and Similar Rights. + + +Section 5 -- Disclaimer of Warranties and Limitation of Liability. + + a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE + EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS + AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF + ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, + IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, + WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, + ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT + KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT + ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. + + b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE + TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, + NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, + INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, + COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR + USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN + ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR + DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR + IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. + + c. The disclaimer of warranties and limitation of liability provided + above shall be interpreted in a manner that, to the extent + possible, most closely approximates an absolute disclaimer and + waiver of all liability. + + +Section 6 -- Term and Termination. + + a. This Public License applies for the term of the Copyright and + Similar Rights licensed here. However, if You fail to comply with + this Public License, then Your rights under this Public License + terminate automatically. + + b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + + 1. automatically as of the date the violation is cured, provided + it is cured within 30 days of Your discovery of the + violation; or + + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any + right the Licensor may have to seek remedies for Your violations + of this Public License. + + c. For the avoidance of doubt, the Licensor may also offer the + Licensed Material under separate terms or conditions or stop + distributing the Licensed Material at any time; however, doing so + will not terminate this Public License. + + d. Sections 1, 5, 6, 7, and 8 survive termination of this Public + License. + + +Section 7 -- Other Terms and Conditions. + + a. The Licensor shall not be bound by any additional or different + terms or conditions communicated by You unless expressly agreed. + + b. Any arrangements, understandings, or agreements regarding the + Licensed Material not stated herein are separate from and + independent of the terms and conditions of this Public License. + + +Section 8 -- Interpretation. + + a. For the avoidance of doubt, this Public License does not, and + shall not be interpreted to, reduce, limit, restrict, or impose + conditions on any use of the Licensed Material that could lawfully + be made without permission under this Public License. + + b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the + minimum extent necessary to make it enforceable. If the provision + cannot be reformed, it shall be severed from this Public License + without affecting the enforceability of the remaining terms and + conditions. + + c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the + Licensor. + + d. Nothing in this Public License constitutes or may be interpreted + as a limitation upon, or waiver of, any privileges and immunities + that apply to the Licensor or You, including from the legal + processes of any jurisdiction or authority. + + +======================================================================= + +Creative Commons is not a party to its public +licenses. Notwithstanding, Creative Commons may elect to apply one of +its public licenses to material it publishes and in those instances +will be considered the “Licensor.” The text of the Creative Commons +public licenses is dedicated to the public domain under the CC0 Public +Domain Dedication. Except for the limited purpose of indicating that +material is shared under a Creative Commons public license or as +otherwise permitted by the Creative Commons policies published at +creativecommons.org/policies, Creative Commons does not authorize the +use of the trademark "Creative Commons" or any other trademark or logo +of Creative Commons without its prior written consent including, +without limitation, in connection with any unauthorized modifications +to any of its public licenses or any other arrangements, +understandings, or agreements concerning use of licensed material. For +the avoidance of doubt, this paragraph does not form part of the +public licenses. + +Creative Commons may be contacted at creativecommons.org. \ No newline at end of file diff --git a/docs/development_diagrams.md b/docs/development_diagrams.md new file mode 100644 index 000000000..12c6795db --- /dev/null +++ b/docs/development_diagrams.md @@ -0,0 +1,137 @@ +# AIHawk Development Diagrams + +## JobApplicationProfile class + +```mermaid +classDiagram + JobApplicationProfile *-- SelfIdentification + JobApplicationProfile *-- LegalAuthorization + JobApplicationProfile *-- WorkPreferences + JobApplicationProfile *-- Availability + JobApplicationProfile *-- SalaryExpectations + + class JobApplicationProfile { + +SelfIdentification self_identification + +LegalAuthorization legal_authorization + +WorkPreferences work_preferences + +Availability availability + +SalaryExpectations salary_expectations + +__init__(yaml_str) + +__str__() + } + + class SelfIdentification { + +str gender + +str pronouns + +str veteran + +str disability + +str ethnicity + } + + class LegalAuthorization { + +str eu_work_authorization + +str us_work_authorization + +str requires_us_visa + +str legally_allowed_to_work_in_us + +str requires_us_sponsorship + +str requires_eu_visa + +str legally_allowed_to_work_in_eu + +str requires_eu_sponsorship + +str canada_work_authorization + +str requires_canada_visa + +str legally_allowed_to_work_in_canada + +str requires_canada_sponsorship + +str uk_work_authorization + +str requires_uk_visa + +str legally_allowed_to_work_in_uk + +str requires_uk_sponsorship + } + + class WorkPreferences { + +str remote_work + +str in_person_work + +str open_to_relocation + +str willing_to_complete_assessments + +str willing_to_undergo_drug_tests + +str willing_to_undergo_background_checks + } + + class Availability { + +str notice_period + } + + class SalaryExpectations { + +str salary_range_usd + } +``` + +## Job application process + +```mermaid +sequenceDiagram + participant Main + participant AIHawkEasyApplier + participant JobManager + participant GPTAnswerer + participant Browser + participant FileSystem + + Main->>AIHawkEasyApplier: apply_to_job(job) + activate AIHawkEasyApplier + + AIHawkEasyApplier->>AIHawkEasyApplier: job_apply(job) + AIHawkEasyApplier->>Browser: Navigate to job.link + + AIHawkEasyApplier->>AIHawkEasyApplier: check_for_premium_redirect(job) + + AIHawkEasyApplier->>Browser: Find Easy Apply button + AIHawkEasyApplier->>Browser: Get job description + AIHawkEasyApplier->>Browser: Get recruiter link + + AIHawkEasyApplier->>GPTAnswerer: set_job(job) + AIHawkEasyApplier->>GPTAnswerer: is_job_suitable() + + alt Job Not Suitable + GPTAnswerer-->>AIHawkEasyApplier: False + AIHawkEasyApplier->>JobManager: write_to_file(job, "skipped") + AIHawkEasyApplier-->>Main: Return + end + + AIHawkEasyApplier->>Browser: Click Easy Apply button + + AIHawkEasyApplier->>AIHawkEasyApplier: _fill_application_form(job) + + loop Until Form Complete + AIHawkEasyApplier->>AIHawkEasyApplier: fill_up(job) + + alt Upload Fields Found + AIHawkEasyApplier->>AIHawkEasyApplier: _create_and_upload_resume() + AIHawkEasyApplier->>FileSystem: Save resume PDF + AIHawkEasyApplier->>Browser: Upload resume + + AIHawkEasyApplier->>AIHawkEasyApplier: _create_and_upload_cover_letter() + AIHawkEasyApplier->>GPTAnswerer: Generate cover letter + AIHawkEasyApplier->>Browser: Upload cover letter + end + + alt Additional Questions Found + AIHawkEasyApplier->>AIHawkEasyApplier: _fill_additional_questions() + AIHawkEasyApplier->>FileSystem: Load answers.json + AIHawkEasyApplier->>GPTAnswerer: Generate new answers + AIHawkEasyApplier->>FileSystem: Save to answers.json + AIHawkEasyApplier->>Browser: Fill in answers + end + + AIHawkEasyApplier->>AIHawkEasyApplier: _next_or_submit() + AIHawkEasyApplier->>AIHawkEasyApplier: _check_for_errors() + end + + alt Application Successful + AIHawkEasyApplier->>JobManager: write_to_file(job, "success") + else Application Failed + AIHawkEasyApplier->>AIHawkEasyApplier: _discard_application() + AIHawkEasyApplier->>JobManager: write_to_file(job, "failed") + end + + deactivate AIHawkEasyApplier +``` diff --git a/docs/guide_to_autostart_aihawk.pdf b/docs/guide_to_autostart_aihawk.pdf new file mode 100644 index 000000000..5f06ec63b Binary files /dev/null and b/docs/guide_to_autostart_aihawk.pdf differ diff --git a/docs/guide_to_setup_ollama_and_gemini.pdf b/docs/guide_to_setup_ollama_and_gemini.pdf new file mode 100644 index 000000000..adf728ebb Binary files /dev/null and b/docs/guide_to_setup_ollama_and_gemini.pdf differ diff --git a/docs/guide_yaml_sections.pdf b/docs/guide_yaml_sections.pdf new file mode 100644 index 000000000..f772d7ab8 Binary files /dev/null and b/docs/guide_yaml_sections.pdf differ diff --git a/docs/workflow_diagrams.md b/docs/workflow_diagrams.md new file mode 100644 index 000000000..37bca33f2 --- /dev/null +++ b/docs/workflow_diagrams.md @@ -0,0 +1,72 @@ +# Dev diagrams + +Note: All diagrams are created using [Mermaid](https://mermaid.js.org/). + +## 1. Application flow + +```mermaid +graph TD + A[Start] --> B[Parse Command Line Arguments] + B --> C[Validate Data Folder] + C --> D[Load Configuration] + D --> E[Initialize Components] + E --> F{Collect Mode?} + F -->|Yes| G[Collect Job Data] + F -->|No| H[Start Job Application Process] + G --> I[Save Data to JSON] + H --> J[Login to AIHawk] + J --> K[Search for Jobs] + K --> L[Apply to Jobs] + L --> M[Generate Reports] + I --> N[End] + M --> N +``` + +## 2. Job application process + +```mermaid +sequenceDiagram + participant User + participant AIHawkBot + participant AIHawk + participant GPTAnswerer + participant ResumeGenerator + + User->>AIHawkBot: Start application process + AIHawkBot->>AIHawk: Login + AIHawkBot->>AIHawk: Search for jobs + loop For each job + AIHawkBot->>AIHawk: Open job listing + AIHawkBot->>GPTAnswerer: Generate answers for application questions + AIHawkBot->>ResumeGenerator: Generate tailored resume + AIHawkBot->>AIHawk: Fill application form + AIHawkBot->>AIHawk: Upload resume and cover letter + AIHawkBot->>AIHawk: Submit application + AIHawkBot->>AIHawkBot: Log application result + end + AIHawkBot->>User: Display application summary +``` + +## 3. Resume generation process + +```mermaid +graph TD + A[Start Resume Generation] --> B[Extract Job Description] + B --> C[Analyze Job Requirements] + C --> D[Retrieve User Profile] + D --> E[Generate Tailored Content] + E --> F[Create PDF Resume] + F --> G[Return Base64 Encoded PDF] + G --> H[End Resume Generation] +``` + +## 4. GPTAnswerer workflow + +```mermaid +graph LR + A[Receive Question] --> B[Prepare Prompt] + B --> C[Send to LLM Model] + C --> D[Receive Response] + D --> E[Parse Response] + E --> F[Return Formatted Answer] +``` diff --git a/main.py b/main.py new file mode 100644 index 000000000..f9f05771f --- /dev/null +++ b/main.py @@ -0,0 +1,347 @@ +import os +import re +import sys +from pathlib import Path +import trace +import traceback +import yaml +import click +from selenium import webdriver +from selenium.webdriver.chrome.service import Service as ChromeService +from webdriver_manager.chrome import ChromeDriverManager +from selenium.common.exceptions import WebDriverException +from lib_resume_builder_AIHawk import ( + Resume, + FacadeManager, + ResumeGenerator, + StyleManager, +) +from typing import Optional +from constants import LINKEDIN, PLAIN_TEXT_RESUME_YAML, SECRETS_YAML, WORK_PREFERENCES_YAML +from src.job_portals.base_job_portal import get_job_portal +from src.utils.chrome_utils import chrome_browser_options +import undetected_chromedriver as uc + +from src.job_application_profile import JobApplicationProfile +from src.logging import logger + +# Suppress stderr only during specific operations +original_stderr = sys.stderr + +# Add the src directory to the Python path +sys.path.append(str(Path(__file__).resolve().parent / "src")) + + +from ai_hawk.bot_facade import AIHawkBotFacade +from ai_hawk.job_manager import AIHawkJobManager +from ai_hawk.llm.llm_manager import GPTAnswerer + + +class ConfigError(Exception): + pass + + +class ConfigValidator: + @staticmethod + def validate_email(email: str) -> bool: + return ( + re.match(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$", email) + is not None + ) + + @staticmethod + def validate_yaml_file(yaml_path: Path) -> dict: + try: + with open(yaml_path, "r") as stream: + return yaml.safe_load(stream) + except yaml.YAMLError as exc: + raise ConfigError(f"Error reading file {yaml_path}: {exc}") + except FileNotFoundError: + raise ConfigError(f"File not found: {yaml_path}") + + @staticmethod + def validate_config(config_yaml_path: Path) -> dict: + parameters = ConfigValidator.validate_yaml_file(config_yaml_path) + required_keys = { + "remote": bool, + "experience_level": dict, + "job_types": dict, + "date": dict, + "positions": list, + "locations": list, + "location_blacklist": list, + "distance": int, + "company_blacklist": list, + "title_blacklist": list, + } + + for key, expected_type in required_keys.items(): + if key not in parameters: + if key in [ + "company_blacklist", + "title_blacklist", + "location_blacklist", + ]: + parameters[key] = [] + else: + raise ConfigError( + f"Missing or invalid key '{key}' in config file {config_yaml_path}" + ) + elif not isinstance(parameters[key], expected_type): + if ( + key + in ["company_blacklist", "title_blacklist", "location_blacklist"] + and parameters[key] is None + ): + parameters[key] = [] + else: + raise ConfigError( + f"Invalid type for key '{key}' in config file {config_yaml_path}. Expected {expected_type}." + ) + + # Validate experience levels, ensure they are boolean + experience_levels = [ + "internship", + "entry", + "associate", + "mid_senior_level", + "director", + "executive", + ] + for level in experience_levels: + if not isinstance(parameters["experience_level"].get(level), bool): + raise ConfigError( + f"Experience level '{level}' must be a boolean in config file {config_yaml_path}" + ) + + # Validate job types, ensure they are boolean + job_types = [ + "full_time", + "contract", + "part_time", + "temporary", + "internship", + "other", + "volunteer", + ] + for job_type in job_types: + if not isinstance(parameters["job_types"].get(job_type), bool): + raise ConfigError( + f"Job type '{job_type}' must be a boolean in config file {config_yaml_path}" + ) + + # Validate date filters + date_filters = ["all_time", "month", "week", "24_hours"] + for date_filter in date_filters: + if not isinstance(parameters["date"].get(date_filter), bool): + raise ConfigError( + f"Date filter '{date_filter}' must be a boolean in config file {config_yaml_path}" + ) + + # Validate positions and locations as lists of strings + if not all(isinstance(pos, str) for pos in parameters["positions"]): + raise ConfigError( + f"'positions' must be a list of strings in config file {config_yaml_path}" + ) + if not all(isinstance(loc, str) for loc in parameters["locations"]): + raise ConfigError( + f"'locations' must be a list of strings in config file {config_yaml_path}" + ) + + # Validate distance + approved_distances = {0, 5, 10, 25, 50, 100} + if parameters["distance"] not in approved_distances: + raise ConfigError( + f"Invalid distance value in config file {config_yaml_path}. Must be one of: {approved_distances}" + ) + + # Ensure blacklists are lists + for blacklist in ["company_blacklist", "title_blacklist", "location_blacklist"]: + if not isinstance(parameters.get(blacklist), list): + raise ConfigError( + f"'{blacklist}' must be a list in config file {config_yaml_path}" + ) + if parameters[blacklist] is None: + parameters[blacklist] = [] + + return parameters + + @staticmethod + def validate_secrets(secrets_yaml_path: Path) -> str: + secrets = ConfigValidator.validate_yaml_file(secrets_yaml_path) + mandatory_secrets = ["llm_api_key"] + + for secret in mandatory_secrets: + if secret not in secrets: + raise ConfigError( + f"Missing secret '{secret}' in file {secrets_yaml_path}" + ) + + if not secrets["llm_api_key"]: + raise ConfigError( + f"llm_api_key cannot be empty in secrets file {secrets_yaml_path}." + ) + return secrets["llm_api_key"] + + +class FileManager: + @staticmethod + def validate_data_folder(app_data_folder: Path) -> tuple: + if not app_data_folder.exists() or not app_data_folder.is_dir(): + raise FileNotFoundError(f"Data folder not found: {app_data_folder}") + + required_files = [SECRETS_YAML, WORK_PREFERENCES_YAML, PLAIN_TEXT_RESUME_YAML] + missing_files = [ + file for file in required_files if not (app_data_folder / file).exists() + ] + + if missing_files: + raise FileNotFoundError( + f"Missing files in the data folder: {', '.join(missing_files)}" + ) + + output_folder = app_data_folder / "output" + output_folder.mkdir(exist_ok=True) + return ( + app_data_folder / SECRETS_YAML, + app_data_folder / WORK_PREFERENCES_YAML, + app_data_folder / PLAIN_TEXT_RESUME_YAML, + output_folder, + ) + + @staticmethod + def file_paths_to_dict( + resume_file: Path | None, plain_text_resume_file: Path + ) -> dict: + if not plain_text_resume_file.exists(): + raise FileNotFoundError( + f"Plain text resume file not found: {plain_text_resume_file}" + ) + + result = {"plainTextResume": plain_text_resume_file} + + if resume_file: + if not resume_file.exists(): + raise FileNotFoundError(f"Resume file not found: {resume_file}") + result["resume"] = resume_file + + return result + + +def init_browser() -> webdriver.Chrome: + try: + options = chrome_browser_options() + service = ChromeService(ChromeDriverManager().install()) + return webdriver.Chrome(service=service, options=options) + except Exception as e: + raise RuntimeError(f"Failed to initialize browser: {str(e)}") + + +def init_uc_browser() -> webdriver.Chrome: + try: + options = uc.ChromeOptions() + # Add any additional options you need + options.add_argument( + "--blink-settings=imagesEnabled=false" + ) # Optional: disable images + return uc.Chrome(options=options) + except Exception as e: + raise RuntimeError(f"Failed to initialize browser: {str(e)}") + + +def create_and_run_bot(parameters, llm_api_key): + try: + style_manager = StyleManager() + resume_generator = ResumeGenerator() + with open( + parameters["uploads"]["plainTextResume"], "r", encoding="utf-8" + ) as file: + plain_text_resume = file.read() + resume_object = Resume(plain_text_resume) + resume_generator_manager = FacadeManager( + llm_api_key, + style_manager, + resume_generator, + resume_object, + Path("data_folder/output"), + ) + + # Run the resume generator manager's functions if resume is not provided + if "resume" not in parameters["uploads"]: + resume_generator_manager.choose_style() + + job_application_profile_object = JobApplicationProfile(plain_text_resume) + + browser = init_uc_browser() + job_portal = get_job_portal( + driver=browser, portal_name=LINKEDIN, parameters=parameters + ) + login_component = job_portal.authenticator + apply_component = AIHawkJobManager(job_portal) + gpt_answerer_component = GPTAnswerer(parameters, llm_api_key) + bot = AIHawkBotFacade(login_component, apply_component) + bot.set_job_application_profile_and_resume( + job_application_profile_object, resume_object + ) + bot.set_gpt_answerer_and_resume_generator( + gpt_answerer_component, resume_generator_manager + ) + bot.set_parameters(parameters) + bot.start_login() + if parameters["collectMode"] == True: + logger.info("Collecting") + bot.start_collect_data() + else: + logger.info("Applying") + bot.start_apply() + except WebDriverException as e: + logger.error(f"WebDriver error occurred: {e}") + except Exception as e: + raise RuntimeError(f"Error running the bot: {str(e)}") + + +@click.command() +@click.option( + "--resume", + type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path), + help="Path to the resume PDF file", +) +@click.option( + "--collect", + is_flag=True, + help="Only collects data job information into data.json file", +) +def main(collect: bool = False, resume: Optional[Path] = None): + try: + data_folder = Path("data_folder") + secrets_file, config_file, plain_text_resume_file, output_folder = ( + FileManager.validate_data_folder(data_folder) + ) + + parameters = ConfigValidator.validate_config(config_file) + llm_api_key = ConfigValidator.validate_secrets(secrets_file) + + parameters["uploads"] = FileManager.file_paths_to_dict( + resume, plain_text_resume_file + ) + parameters["outputFileDirectory"] = output_folder + parameters["collectMode"] = collect + + create_and_run_bot(parameters, llm_api_key) + except ConfigError as ce: + logger.error(f"Configuration error: {str(ce)}") + logger.error( + f"Refer to the configuration guide for troubleshooting: https://github.com/feder-cr/Auto_Jobs_Applier_AIHawk?tab=readme-ov-file#configuration {str(ce)}" + ) + + except FileNotFoundError as fnf: + logger.error(f"File not found: {str(fnf)}") + logger.error("Ensure all required files are present in the data folder.") + except RuntimeError as re: + logger.error(f"Runtime error: {str(re)} {traceback.format_exc()}") + except Exception as e: + logger.error(f"An unexpected error occurred: {str(e)}") + + +if __name__ == "__main__": + main() diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..b3e4f235d --- /dev/null +++ b/pytest.ini @@ -0,0 +1,6 @@ +[pytest] +minversion = 6.0 +addopts = --strict-markers --tb=short --cov=src --cov-report=term-missing +testpaths = + tests +pythonpath = src \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..76214ed75 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,31 @@ +click +git+https://github.com/feder-cr/lib_resume_builder_AIHawk.git +httpx~=0.27.2 +inputimeout==1.0.4 +jsonschema==4.23.0 +jsonschema-specifications==2023.12.1 +langchain==0.2.11 +langchain-anthropic +langchain-huggingface +langchain-community==0.2.10 +langchain-core==0.2.36 +langchain-google-genai==1.0.10 +langchain-ollama==0.1.3 +langchain-openai==0.1.17 +langchain-text-splitters==0.2.2 +langsmith==0.1.93 +Levenshtein==0.25.1 +loguru==0.7.2 +openai==1.37.1 +pdfminer.six==20221105 +pytest>=8.3.3 +python-dotenv~=1.0.1 +PyYAML~=6.0.2 +regex==2024.7.24 +reportlab==4.2.2 +selenium==4.9.1 +webdriver-manager==4.0.2 +pytest +pytest-mock +pytest-cov +undetected-chromedriver==3.5.5 \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/ai_hawk/__init__.py b/src/ai_hawk/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/ai_hawk/authenticator.py b/src/ai_hawk/authenticator.py new file mode 100644 index 000000000..a345c5d1b --- /dev/null +++ b/src/ai_hawk/authenticator.py @@ -0,0 +1,84 @@ +import random +import time + +from abc import ABC, abstractmethod +from selenium.common.exceptions import NoSuchElementException, TimeoutException, NoAlertPresentException, TimeoutException, UnexpectedAlertPresentException +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.wait import WebDriverWait + +from src.logging import logger + +class AIHawkAuthenticator(ABC): + + @property + def home_url(self): + pass + + @abstractmethod + def navigate_to_login(self): + pass + + @property + def is_logged_in(self): + pass + + def __init__(self, driver): + self.driver = driver + logger.debug(f"AIHawkAuthenticator initialized with driver: {driver}") + + def start(self): + logger.info("Starting Chrome browser to log in to AIHawk.") + self.driver.get(self.home_url) + if self.is_logged_in: + logger.info("User is already logged in. Skipping login process.") + return + else: + logger.info("User is not logged in. Proceeding with login.") + self.handle_login() + + def handle_login(self): + try: + logger.info("Navigating to the AIHawk login page...") + self.navigate_to_login() + self.prompt_for_credentials() + except NoSuchElementException as e: + logger.error(f"Could not log in to AIHawk. Element not found: {e}") + self.handle_security_checks() + + + def prompt_for_credentials(self): + try: + logger.debug("Enter credentials...") + check_interval = 45 # Interval to log the current URL + elapsed_time = 0 + + while True: + # Bring the browser window to the front + current_window = self.driver.current_window_handle + self.driver.switch_to.window(current_window) + + # Log current URL every 4 seconds and remind the user to log in + current_url = self.driver.current_url + logger.info(f"Please login on {current_url}") + + # Check if the user is already on the feed page + if self.is_logged_in: + logger.debug("Login successful, redirected to feed page.") + break + else: + # Optionally wait for the password field (or any other element you expect on the login page) + WebDriverWait(self.driver, 60).until( + EC.presence_of_element_located((By.ID, "password")) + ) + logger.debug("Password field detected, waiting for login completion.") + + time.sleep(check_interval) + elapsed_time += check_interval + + except TimeoutException: + logger.error("Login form not found. Aborting login.") + + @abstractmethod + def handle_security_checks(self): + pass diff --git a/src/ai_hawk/bot_facade.py b/src/ai_hawk/bot_facade.py new file mode 100644 index 000000000..f4b13d6b0 --- /dev/null +++ b/src/ai_hawk/bot_facade.py @@ -0,0 +1,100 @@ +from ai_hawk.job_manager import AIHawkJobManager +from src.logging import logger + + +class AIHawkBotState: + def __init__(self): + logger.debug("Initializing AIHawkBotState") + self.reset() + + def reset(self): + logger.debug("Resetting AIHawkBotState") + self.credentials_set = False + self.api_key_set = False + self.job_application_profile_set = False + self.gpt_answerer_set = False + self.parameters_set = False + self.logged_in = False + + def validate_state(self, required_keys): + logger.debug(f"Validating AIHawkBotState with required keys: {required_keys}") + for key in required_keys: + if not getattr(self, key): + logger.error(f"State validation failed: {key} is not set") + raise ValueError(f"{key.replace('_', ' ').capitalize()} must be set before proceeding.") + logger.debug("State validation passed") + + +class AIHawkBotFacade: + def __init__(self, login_component, apply_component): + logger.debug("Initializing AIHawkBotFacade") + self.login_component = login_component + self.apply_component : AIHawkJobManager = apply_component + self.state = AIHawkBotState() + self.job_application_profile = None + self.resume = None + self.email = None + self.password = None + self.parameters = None + + def set_job_application_profile_and_resume(self, job_application_profile, resume): + logger.debug("Setting job application profile and resume") + self._validate_non_empty(job_application_profile, "Job application profile") + self._validate_non_empty(resume, "Resume") + self.job_application_profile = job_application_profile + self.resume = resume + self.state.job_application_profile_set = True + logger.debug("Job application profile and resume set successfully") + + + def set_gpt_answerer_and_resume_generator(self, gpt_answerer_component, resume_generator_manager): + logger.debug("Setting GPT answerer and resume generator") + self._ensure_job_profile_and_resume_set() + gpt_answerer_component.set_job_application_profile(self.job_application_profile) + gpt_answerer_component.set_resume(self.resume) + self.apply_component.set_gpt_answerer(gpt_answerer_component) + self.apply_component.set_resume_generator_manager(resume_generator_manager) + self.state.gpt_answerer_set = True + logger.debug("GPT answerer and resume generator set successfully") + + def set_parameters(self, parameters): + logger.debug("Setting parameters") + self._validate_non_empty(parameters, "Parameters") + self.parameters = parameters + self.apply_component.set_parameters(parameters) + self.state.credentials_set = True + self.state.parameters_set = True + logger.debug("Parameters set successfully") + + def start_login(self): + logger.debug("Starting login process") + self.state.validate_state(['credentials_set']) + self.login_component.start() + self.state.logged_in = True + logger.debug("Login process completed successfully") + + def start_apply(self): + logger.debug("Starting apply process") + self.state.validate_state(['logged_in', 'job_application_profile_set', 'gpt_answerer_set', 'parameters_set']) + self.apply_component.start_applying() + logger.debug("Apply process started successfully") + + def start_collect_data(self): + logger.debug("Starting collecting data process") + self.state.validate_state(['logged_in', 'job_application_profile_set', 'gpt_answerer_set', 'parameters_set']) + self.apply_component.start_collecting_data() + logger.debug("Collecting data process started successfully") + + def _validate_non_empty(self, value, name): + logger.debug(f"Validating that {name} is not empty") + if not value: + logger.error(f"Validation failed: {name} is empty") + raise ValueError(f"{name} cannot be empty.") + logger.debug(f"Validation passed for {name}") + + def _ensure_job_profile_and_resume_set(self): + logger.debug("Ensuring job profile and resume are set") + if not self.state.job_application_profile_set: + logger.error("Job application profile and resume are not set") + raise ValueError("Job application profile and resume must be set before proceeding.") + logger.debug("Job profile and resume are set") diff --git a/src/ai_hawk/job_applier.py b/src/ai_hawk/job_applier.py new file mode 100644 index 000000000..f2f30644e --- /dev/null +++ b/src/ai_hawk/job_applier.py @@ -0,0 +1,714 @@ +import base64 +from calendar import c +import json +from math import log +from operator import is_ +import os +import random +import re +import time +import traceback +from typing import List, Optional, Any, Text, Tuple + +from httpx import HTTPStatusError +from regex import W +from reportlab.lib.pagesizes import A4 +from reportlab.pdfgen import canvas +from reportlab.pdfbase.pdfmetrics import stringWidth + +from selenium.webdriver.remote.webelement import WebElement +from selenium.webdriver.support import expected_conditions as EC + +from jobContext import JobContext +from job_application import JobApplication +from job_application_saver import ApplicationSaver +from job_portals.application_form_elements import SelectQuestion, TextBoxQuestionType +from job_portals.base_job_portal import BaseJobPage, BaseJobPortal + +from src.logging import logger +from src.job import Job +from src.ai_hawk.llm.llm_manager import GPTAnswerer + + +def question_already_exists_in_data(question: str, data: List[dict]) -> bool: + """ + Check if a question already exists in the data list. + + Args: + question: The question text to search for + data: List of question dictionaries to search through + + Returns: + bool: True if question exists, False otherwise + """ + return any(item["question"] == question for item in data) + + +class AIHawkJobApplier: + def __init__( + self, + job_portal: BaseJobPortal, + resume_dir: Optional[str], + set_old_answers: List[Tuple[str, str, str]], + gpt_answerer: GPTAnswerer, + resume_generator_manager, + ): + logger.debug("Initializing AIHawkEasyApplier") + if resume_dir is None or not os.path.exists(resume_dir): + resume_dir = None + self.job_page = job_portal.job_page + self.job_application_page = job_portal.application_page + self.resume_path = resume_dir + self.set_old_answers = set_old_answers + self.gpt_answerer = gpt_answerer + self.resume_generator_manager = resume_generator_manager + self.all_data = self._load_questions_from_json() + self.current_job : Job | None = None + + logger.debug("AIHawkEasyApplier initialized successfully") + + def _load_questions_from_json(self) -> List[dict]: + output_file = "answers.json" + logger.debug(f"Loading questions from JSON file: {output_file}") + try: + with open(output_file, "r") as f: + try: + data = json.load(f) + if not isinstance(data, list): + raise ValueError( + "JSON file format is incorrect. Expected a list of questions." + ) + except json.JSONDecodeError: + logger.error("JSON decoding failed") + data = [] + logger.debug("Questions loaded successfully from JSON") + return data + except FileNotFoundError: + logger.warning("JSON file not found, returning empty list") + return [] + except Exception: + tb_str = traceback.format_exc() + logger.error(f"Error loading questions data from JSON file: {tb_str}") + raise Exception( + f"Error loading questions data from JSON file: \nTraceback:\n{tb_str}" + ) + + def apply_to_job(self, job: Job) -> None: + """ + Starts the process of applying to a job. + :param job: A job object with the job details. + :return: None + """ + logger.debug(f"Applying to job: {job}") + try: + self.job_apply(job) + logger.info(f"Successfully applied to job: {job.title}") + except Exception as e: + logger.error(f"Failed to apply to job: {job.title}, error: {str(e)}") + raise e + + def job_apply(self, job: Job): + logger.debug(f"Starting job application for job: {job}") + job_context = JobContext() + job_context.job = job + job_context.job_application = JobApplication(job) + self.job_page.goto_job_page(job) + + try: + + job_description = self.job_page.get_job_description(job) + logger.debug(f"Job description set: {job_description[:100]}") + + job.set_job_description(job_description) + + recruiter_link = self.job_page.get_recruiter_link() + job.set_recruiter_link(recruiter_link) + + self.current_job = job + + logger.debug("Passing job information to GPT Answerer") + self.gpt_answerer.set_job(job) + + # Todo: add this job to skip list with it's reason + if not self.gpt_answerer.is_job_suitable(): + return + + self.job_page.click_apply_button(job_context) + + logger.debug("Filling out application form") + self._fill_application_form(job_context) + logger.debug( + f"Job application process completed successfully for job: {job}" + ) + + except Exception as e: + + tb_str = traceback.format_exc() + logger.error(f"Failed to apply to job: {job}, error: {tb_str}") + + logger.debug("Saving application process due to failure") + self.job_application_page.save() + + raise Exception( + f"Failed to apply to job! Original exception:\nTraceback:\n{tb_str}" + ) + + def _fill_application_form(self, job_context: JobContext): + job = job_context.job + job_application = job_context.job_application + logger.debug(f"Filling out application form for job: {job}") + + self.fill_up(job_context) + + while self.job_application_page.has_next_button(): + self.fill_up(job_context) + self.job_application_page.click_next_button() + self.job_application_page.handle_errors() + + if self.job_application_page.has_submit_button(): + self.job_application_page.click_submit_button() + ApplicationSaver.save(job_application) + logger.debug("Application form submitted") + return + + logger.warning(f"submit button not found, discarding application {job}") + + def fill_up(self, job_context: JobContext) -> None: + job = job_context.job + logger.debug(f"Filling up form sections for job: {job}") + + input_elements = self.job_application_page.get_input_elements() + + try: + for element in input_elements: + self._process_form_element(element, job_context) + + except Exception as e: + logger.error( + f"Failed to fill up form sections: {e} {traceback.format_exc()}" + ) + + def _process_form_element( + self, element: WebElement, job_context: JobContext + ) -> None: + logger.debug(f"Processing form element {element}") + if self.job_application_page.is_upload_field(element): + self._handle_upload_fields(element, job_context) + else: + self._fill_additional_questions(job_context) + + def _handle_upload_fields( + self, element: WebElement, job_context: JobContext + ) -> None: + logger.debug("Handling upload fields") + + file_upload_elements = self.job_application_page.get_file_upload_elements() + + for element in file_upload_elements: + + file_upload_element_heading = ( + self.job_application_page.get_upload_element_heading(element) + ) + + output = self.gpt_answerer.determine_resume_or_cover( + file_upload_element_heading + ) + + if "resume" in output: + logger.debug("Uploading resume") + if self.resume_path is not None and os.path.isfile(self.resume_path): + resume_file_path = os.path.abspath(self.resume_path) + self.job_application_page.upload_file(element, resume_file_path) + job_context.job.resume_path = resume_file_path + job_context.job_application.resume_path = str(resume_file_path) + logger.debug(f"Resume uploaded from path: {resume_file_path}") + else: + logger.debug( + "Resume path not found or invalid, generating new resume" + ) + self._create_and_upload_resume(element, job_context) + + elif "cover" in output: + logger.debug("Uploading cover letter") + self._create_and_upload_cover_letter(element, job_context) + + logger.debug("Finished handling upload fields") + + def _create_and_upload_resume(self, element, job_context: JobContext): + job = job_context.job + job_application = job_context.job_application + logger.debug("Starting the process of creating and uploading resume.") + folder_path = "generated_cv" + + try: + if not os.path.exists(folder_path): + logger.debug(f"Creating directory at path: {folder_path}") + os.makedirs(folder_path, exist_ok=True) + except Exception as e: + logger.error(f"Failed to create directory: {folder_path}. Error: {e}") + raise + + while True: + try: + timestamp = int(time.time()) + file_path_pdf = os.path.join(folder_path, f"CV_{timestamp}.pdf") + logger.debug(f"Generated file path for resume: {file_path_pdf}") + + logger.debug(f"Generating resume for job: {job.title} at {job.company}") + resume_pdf_base64 = self.resume_generator_manager.pdf_base64( + job_description_text=job.description + ) + with open(file_path_pdf, "xb") as f: + f.write(base64.b64decode(resume_pdf_base64)) + logger.debug( + f"Resume successfully generated and saved to: {file_path_pdf}" + ) + + break + except HTTPStatusError as e: + if e.response.status_code == 429: + + retry_after = e.response.headers.get("retry-after") + retry_after_ms = e.response.headers.get("retry-after-ms") + + if retry_after: + wait_time = int(retry_after) + logger.warning( + f"Rate limit exceeded, waiting {wait_time} seconds before retrying..." + ) + elif retry_after_ms: + wait_time = int(retry_after_ms) / 1000.0 + logger.warning( + f"Rate limit exceeded, waiting {wait_time} milliseconds before retrying..." + ) + else: + wait_time = 20 + logger.warning( + f"Rate limit exceeded, waiting {wait_time} seconds before retrying..." + ) + + time.sleep(wait_time) + else: + logger.error(f"HTTP error: {e}") + raise + + except Exception as e: + logger.error(f"Failed to generate resume: {e}") + tb_str = traceback.format_exc() + logger.error(f"Traceback: {tb_str}") + if "RateLimitError" in str(e): + logger.warning("Rate limit error encountered, retrying...") + time.sleep(20) + else: + raise + + file_size = os.path.getsize(file_path_pdf) + max_file_size = 2 * 1024 * 1024 # 2 MB + logger.debug(f"Resume file size: {file_size} bytes") + if file_size > max_file_size: + logger.error(f"Resume file size exceeds 2 MB: {file_size} bytes") + raise ValueError("Resume file size exceeds the maximum limit of 2 MB.") + + allowed_extensions = {".pdf", ".doc", ".docx"} + file_extension = os.path.splitext(file_path_pdf)[1].lower() + logger.debug(f"Resume file extension: {file_extension}") + if file_extension not in allowed_extensions: + logger.error(f"Invalid resume file format: {file_extension}") + raise ValueError( + "Resume file format is not allowed. Only PDF, DOC, and DOCX formats are supported." + ) + + try: + logger.debug(f"Uploading resume from path: {file_path_pdf}") + element.send_keys(os.path.abspath(file_path_pdf)) + job.resume_path = os.path.abspath(file_path_pdf) + job_application.resume_path = os.path.abspath(file_path_pdf) + time.sleep(2) + logger.debug(f"Resume created and uploaded successfully: {file_path_pdf}") + except Exception as e: + tb_str = traceback.format_exc() + logger.error(f"Resume upload failed: {tb_str}") + raise Exception(f"Upload failed: \nTraceback:\n{tb_str}") + + def _create_and_upload_cover_letter( + self, element: WebElement, job_context: JobContext + ) -> None: + job = job_context.job + logger.debug("Starting the process of creating and uploading cover letter.") + + cover_letter_text = self.gpt_answerer.answer_question_textual_wide_range( + "Write a cover letter" + ) + + folder_path = "generated_cv" + + try: + + if not os.path.exists(folder_path): + logger.debug(f"Creating directory at path: {folder_path}") + os.makedirs(folder_path, exist_ok=True) + except Exception as e: + logger.error(f"Failed to create directory: {folder_path}. Error: {e}") + raise + + while True: + try: + timestamp = int(time.time()) + file_path_pdf = os.path.join( + folder_path, f"Cover_Letter_{timestamp}.pdf" + ) + logger.debug(f"Generated file path for cover letter: {file_path_pdf}") + + c = canvas.Canvas(file_path_pdf, pagesize=A4) + page_width, page_height = A4 + text_object = c.beginText(50, page_height - 50) + text_object.setFont("Helvetica", 12) + + max_width = page_width - 100 + bottom_margin = 50 + available_height = page_height - bottom_margin - 50 + + def split_text_by_width(text, font, font_size, max_width): + wrapped_lines = [] + for line in text.splitlines(): + + if stringWidth(line, font, font_size) > max_width: + words = line.split() + new_line = "" + for word in words: + if ( + stringWidth(new_line + word + " ", font, font_size) + <= max_width + ): + new_line += word + " " + else: + wrapped_lines.append(new_line.strip()) + new_line = word + " " + wrapped_lines.append(new_line.strip()) + else: + wrapped_lines.append(line) + return wrapped_lines + + lines = split_text_by_width( + cover_letter_text, "Helvetica", 12, max_width + ) + + for line in lines: + text_height = text_object.getY() + if text_height > bottom_margin: + text_object.textLine(line) + else: + + c.drawText(text_object) + c.showPage() + text_object = c.beginText(50, page_height - 50) + text_object.setFont("Helvetica", 12) + text_object.textLine(line) + + c.drawText(text_object) + c.save() + logger.debug( + f"Cover letter successfully generated and saved to: {file_path_pdf}" + ) + + break + except Exception as e: + logger.error(f"Failed to generate cover letter: {e}") + tb_str = traceback.format_exc() + logger.error(f"Traceback: {tb_str}") + raise + + file_size = os.path.getsize(file_path_pdf) + max_file_size = 2 * 1024 * 1024 # 2 MB + logger.debug(f"Cover letter file size: {file_size} bytes") + if file_size > max_file_size: + logger.error(f"Cover letter file size exceeds 2 MB: {file_size} bytes") + raise ValueError( + "Cover letter file size exceeds the maximum limit of 2 MB." + ) + + allowed_extensions = {".pdf", ".doc", ".docx"} + file_extension = os.path.splitext(file_path_pdf)[1].lower() + logger.debug(f"Cover letter file extension: {file_extension}") + if file_extension not in allowed_extensions: + logger.error(f"Invalid cover letter file format: {file_extension}") + raise ValueError( + "Cover letter file format is not allowed. Only PDF, DOC, and DOCX formats are supported." + ) + + try: + + logger.debug(f"Uploading cover letter from path: {file_path_pdf}") + element.send_keys(os.path.abspath(file_path_pdf)) + job.cover_letter_path = os.path.abspath(file_path_pdf) + job_context.job_application.cover_letter_path = os.path.abspath( + file_path_pdf + ) + time.sleep(2) + logger.debug( + f"Cover letter created and uploaded successfully: {file_path_pdf}" + ) + except Exception as e: + tb_str = traceback.format_exc() + logger.error(f"Cover letter upload failed: {tb_str}") + raise Exception(f"Upload failed: \nTraceback:\n{tb_str}") + + def _fill_additional_questions(self, job_context: JobContext) -> None: + logger.debug("Filling additional questions") + form_sections = self.job_application_page.get_form_sections() + for section in form_sections: + self._process_form_section(job_context, section) + + def _process_form_section( + self, job_context: JobContext, section: WebElement + ) -> None: + logger.debug("Processing form section") + if self.job_application_page.is_terms_of_service(section): + logger.debug("Handled terms of service") + self.job_application_page.accept_terms_of_service(section) + return + + if self.job_application_page.is_radio_question(section): + radio_question = self.job_application_page.web_element_to_radio_question( + section + ) + self._handle_radio_question(job_context, radio_question, section) + logger.debug("Handled radio button") + return + + if self.job_application_page.is_textbox_question(section): + self._handle_textbox_question(job_context, section) + logger.debug("Handled textbox question") + return + + if self.job_application_page.is_dropdown_question(section): + self._handle_dropdown_question(job_context, section) + logger.debug("Handled dropdown question") + return + + def _handle_radio_question( + self, + job_context: JobContext, + radio_question: SelectQuestion, + section: WebElement, + ) -> None: + job_application = job_context.job_application + + question_text = radio_question.question + options = radio_question.options + + existing_answer = None + current_question_sanitized = self._sanitize_text(question_text) + for item in self.all_data: + if ( + current_question_sanitized in item["question"] + and item["type"] == "radio" + ): + existing_answer = item + break + + if existing_answer: + self.job_application_page.select_radio_option( + section, existing_answer["answer"] + ) + job_application.save_application_data(existing_answer) + logger.debug("Selected existing radio answer") + return + + answer = self.gpt_answerer.answer_question_from_options(question_text, options) + self._save_questions_to_json( + {"type": "radio", "question": question_text, "answer": answer} + ) + self.all_data = self._load_questions_from_json() + job_application.save_application_data( + {"type": "radio", "question": question_text, "answer": answer} + ) + self.job_application_page.select_radio_option(section, answer) + logger.debug("Selected new radio answer") + return + + def _handle_textbox_question( + self, job_context: JobContext, section: WebElement + ) -> None: + + textbox_question = self.job_application_page.web_element_to_textbox_question( + section + ) + + question_text = textbox_question.question + question_type = textbox_question.type.value + is_cover_letter = "cover letter" in question_text.lower() + is_numeric = textbox_question.type is TextBoxQuestionType.NUMERIC + + # Look for existing answer if it's not a cover letter field + existing_answer = None + if not is_cover_letter: + current_question_sanitized = self._sanitize_text(question_text) + for item in self.all_data: + if ( + item["question"] == current_question_sanitized + and item.get("type") == question_type + ): + existing_answer = item["answer"] + logger.debug(f"Found existing answer: {existing_answer}") + break + + if existing_answer and not is_cover_letter: + answer = existing_answer + logger.debug(f"Using existing answer: {answer}") + else: + if is_numeric: + answer = self.gpt_answerer.answer_question_numeric(question_text) + logger.debug(f"Generated numeric answer: {answer}") + else: + answer = self.gpt_answerer.answer_question_textual_wide_range( + question_text + ) + logger.debug(f"Generated textual answer: {answer}") + + # Save non-cover letter answers + if not is_cover_letter and not existing_answer: + self._save_questions_to_json( + {"type": question_type, "question": question_text, "answer": answer} + ) + self.all_data = self._load_questions_from_json() + logger.debug("Saved non-cover letter answer to JSON.") + + self.job_application_page.fill_textbox_question(section, answer) + logger.debug("Entered answer into the textbox.") + + job_context.job_application.save_application_data( + {"type": question_type, "question": question_text, "answer": answer} + ) + + return + + def _handle_dropdown_question( + self, job_context: JobContext, section: WebElement + ) -> None: + job_application = job_context.job_application + + dropdown = self.job_application_page.web_element_to_dropdown_question(section) + + question_text = dropdown.question + existing_answer = None + current_question_sanitized = self._sanitize_text(question_text) + options = dropdown.options + + for item in self.all_data: + if ( + current_question_sanitized in item["question"] + and item["type"] == "dropdown" + ): + existing_answer = item["answer"] + break + + if existing_answer: + logger.debug( + f"Found existing answer for question '{question_text}': {existing_answer}" + ) + job_application.save_application_data( + { + "type": "dropdown", + "question": question_text, + "answer": existing_answer, + } + ) + + answer = existing_answer + + else: + logger.debug( + f"No existing answer found, querying model for: {question_text}" + ) + answer = self.gpt_answerer.answer_question_from_options( + question_text, options + ) + self._save_questions_to_json( + { + "type": "dropdown", + "question": question_text, + "answer": answer, + } + ) + self.all_data = self._load_questions_from_json() + job_application.save_application_data( + { + "type": "dropdown", + "question": question_text, + "answer": answer, + } + ) + + self.job_application_page.select_dropdown_option(section, answer) + logger.debug(f"Selected new dropdown answer: {answer}") + return + + def _save_questions_to_json(self, question_data: dict) -> None: + output_file = "answers.json" + question_data["question"] = self._sanitize_text(question_data["question"]) + + logger.debug(f"Checking if question data already exists: {question_data}") + try: + with open(output_file, "r+") as f: + try: + data = json.load(f) + if not isinstance(data, list): + raise ValueError( + "JSON file format is incorrect. Expected a list of questions." + ) + except json.JSONDecodeError: + logger.error("JSON decoding failed") + data = [] + + should_be_saved: bool = not question_already_exists_in_data( + question_data["question"], data + ) and not self.answer_contians_company_name(question_data["answer"]) + + if should_be_saved: + logger.debug("New question found, appending to JSON") + data.append(question_data) + f.seek(0) + json.dump(data, f, indent=4) + f.truncate() + logger.debug("Question data saved successfully to JSON") + else: + logger.debug("Question already exists, skipping save") + except FileNotFoundError: + logger.warning("JSON file not found, creating new file") + with open(output_file, "w") as f: + json.dump([question_data], f, indent=4) + logger.debug("Question data saved successfully to new JSON file") + except Exception: + tb_str = traceback.format_exc() + logger.error(f"Error saving questions data to JSON file: {tb_str}") + raise Exception( + f"Error saving questions data to JSON file: \nTraceback:\n{tb_str}" + ) + + def _sanitize_text(self, text: str) -> str: + sanitized_text = text.lower().strip().replace('"', "").replace("\\", "") + sanitized_text = ( + re.sub(r"[\x00-\x1F\x7F]", "", sanitized_text) + .replace("\n", " ") + .replace("\r", "") + .rstrip(",") + ) + logger.debug(f"Sanitized text: {sanitized_text}") + return sanitized_text + + def _find_existing_answer(self, question_text): + for item in self.all_data: + if self._sanitize_text(item["question"]) == self._sanitize_text( + question_text + ): + return item + return None + + def answer_contians_company_name(self, answer: Any) -> bool: + return ( + isinstance(answer, str) + and self.current_job is not None + and self.current_job.company is not None + and self.current_job.company in answer + ) diff --git a/src/ai_hawk/job_manager.py b/src/ai_hawk/job_manager.py new file mode 100644 index 000000000..d705cc59e --- /dev/null +++ b/src/ai_hawk/job_manager.py @@ -0,0 +1,430 @@ +import json +import os +import random +import time +from itertools import product +from pathlib import Path +import traceback + +from inputimeout import inputimeout, TimeoutOccurred + +from ai_hawk.job_applier import AIHawkJobApplier +from config import JOB_MAX_APPLICATIONS, JOB_MIN_APPLICATIONS, MINIMUM_WAIT_TIME_IN_SECONDS + +from job_portals.base_job_portal import BaseJobPortal, get_job_portal +from src.job import Job +from src.logging import logger + +from src.regex_utils import look_ahead_patterns +import re + +import utils.browser_utils as browser_utils +import utils.time_utils + + +class EnvironmentKeys: + def __init__(self): + logger.debug("Initializing EnvironmentKeys") + self.skip_apply = self._read_env_key_bool("SKIP_APPLY") + self.disable_description_filter = self._read_env_key_bool("DISABLE_DESCRIPTION_FILTER") + logger.debug(f"EnvironmentKeys initialized: skip_apply={self.skip_apply}, disable_description_filter={self.disable_description_filter}") + + @staticmethod + def _read_env_key(key: str) -> str: + value = os.getenv(key, "") + logger.debug(f"Read environment key {key}: {value}") + return value + + @staticmethod + def _read_env_key_bool(key: str) -> bool: + value = os.getenv(key) == "True" + logger.debug(f"Read environment key {key} as bool: {value}") + return value + + +class AIHawkJobManager: + def __init__(self, job_portal : BaseJobPortal): + logger.debug("Initializing AIHawkJobManager") + self.job_portal = job_portal + self.set_old_answers = set() + self.easy_applier_component = None + logger.debug("AIHawkJobManager initialized successfully") + + def set_parameters(self, parameters): + logger.debug("Setting parameters for AIHawkJobManager") + self.company_blacklist = parameters.get('company_blacklist', []) or [] + self.title_blacklist = parameters.get('title_blacklist', []) or [] + self.location_blacklist = parameters.get('location_blacklist', []) or [] + self.positions = parameters.get('positions', []) + self.locations = parameters.get('locations', []) + self.apply_once_at_company = parameters.get('apply_once_at_company', False) + self.seen_jobs = [] + + self.min_applicants = JOB_MIN_APPLICATIONS + self.max_applicants = JOB_MAX_APPLICATIONS + + # Generate regex patterns from blacklist lists + self.title_blacklist_patterns = look_ahead_patterns(self.title_blacklist) + self.company_blacklist_patterns = look_ahead_patterns(self.company_blacklist) + self.location_blacklist_patterns = look_ahead_patterns(self.location_blacklist) + + resume_path = parameters.get('uploads', {}).get('resume', None) + self.resume_path = Path(resume_path) if resume_path and Path(resume_path).exists() else None + self.output_file_directory = Path(parameters['outputFileDirectory']) + self.env_config = EnvironmentKeys() + logger.debug("Parameters set successfully") + + def set_gpt_answerer(self, gpt_answerer): + logger.debug("Setting GPT answerer") + self.gpt_answerer = gpt_answerer + + def set_resume_generator_manager(self, resume_generator_manager): + logger.debug("Setting resume generator manager") + self.resume_generator_manager = resume_generator_manager + + def start_collecting_data(self): + searches = list(product(self.positions, self.locations)) + random.shuffle(searches) + page_sleep = 0 + minimum_time = 60 * 5 + minimum_page_time = time.time() + minimum_time + + for position, location in searches: + location_url = "&location=" + location + job_page_number = -1 + logger.info(f"Collecting data for {position} in {location}.",color="yellow") + try: + while True: + page_sleep += 1 + job_page_number += 1 + logger.info(f"Going to job page {job_page_number}", color="yellow") + self.job_portal.jobs_page.next_job_page(position, location_url, job_page_number) + utils.time_utils.medium_sleep() + logger.info("Starting the collecting process for this page", color="yellow") + self.read_jobs() + logger.info("Collecting data on this page has been completed!", color="yellow") + + time_left = minimum_page_time - time.time() + if time_left > 0: + logger.info(f"Sleeping for {time_left} seconds.",color="yellow") + time.sleep(time_left) + minimum_page_time = time.time() + minimum_time + if page_sleep % 5 == 0: + sleep_time = random.randint(1, 5) + logger.info(f"Sleeping for {sleep_time / 60} minutes.",color="yellow") + time.sleep(sleep_time) + page_sleep += 1 + except Exception: + pass + time_left = minimum_page_time - time.time() + if time_left > 0: + logger.info(f"Sleeping for {time_left} seconds.",color="yellow") + time.sleep(time_left) + minimum_page_time = time.time() + minimum_time + if page_sleep % 5 == 0: + sleep_time = random.randint(50, 90) + logger.info(f"Sleeping for {sleep_time / 60} minutes.",color="yellow") + time.sleep(sleep_time) + page_sleep += 1 + + def start_applying(self): + logger.debug("Starting job application process") + self.easy_applier_component = AIHawkJobApplier(self.job_portal, self.resume_path, self.set_old_answers, + self.gpt_answerer, self.resume_generator_manager) + searches = list(product(self.positions, self.locations)) + random.shuffle(searches) + page_sleep = 0 + minimum_time = MINIMUM_WAIT_TIME_IN_SECONDS + minimum_page_time = time.time() + minimum_time + + for position, location in searches: + location_url = "&location=" + location + job_page_number = -1 + logger.debug(f"Starting the search for {position} in {location}.") + + try: + while True: + page_sleep += 1 + job_page_number += 1 + logger.debug(f"Going to job page {job_page_number}") + self.job_portal.jobs_page.next_job_page(position, location_url, job_page_number) + utils.time_utils.medium_sleep() + logger.debug("Starting the application process for this page...") + + try: + jobs = self.job_portal.jobs_page.get_jobs_from_page(scroll=True) + if not jobs: + logger.debug("No more jobs found on this page. Exiting loop.") + break + except Exception as e: + logger.error(f"Failed to retrieve jobs: {e}") + break + + try: + self.apply_jobs() + except Exception as e: + logger.error(f"Error during job application: {e} {traceback.format_exc()}") + continue + + logger.debug("Applying to jobs on this page has been completed!") + + time_left = minimum_page_time - time.time() + + # Ask user if they want to skip waiting, with timeout + if time_left > 0: + try: + user_input = inputimeout( + prompt=f"Sleeping for {time_left} seconds. Press 'y' to skip waiting. Timeout 60 seconds : ", + timeout=60).strip().lower() + except TimeoutOccurred: + user_input = '' # No input after timeout + if user_input == 'y': + logger.debug("User chose to skip waiting.") + else: + logger.debug(f"Sleeping for {time_left} seconds as user chose not to skip.") + time.sleep(time_left) + + minimum_page_time = time.time() + minimum_time + + if page_sleep % 5 == 0: + sleep_time = random.randint(5, 34) + try: + user_input = inputimeout( + prompt=f"Sleeping for {sleep_time / 60} minutes. Press 'y' to skip waiting. Timeout 60 seconds : ", + timeout=60).strip().lower() + except TimeoutOccurred: + user_input = '' # No input after timeout + if user_input == 'y': + logger.debug("User chose to skip waiting.") + else: + logger.debug(f"Sleeping for {sleep_time} seconds.") + time.sleep(sleep_time) + page_sleep += 1 + except Exception as e: + logger.error(f"Unexpected error during job search: {e}") + continue + + time_left = minimum_page_time - time.time() + + if time_left > 0: + try: + user_input = inputimeout( + prompt=f"Sleeping for {time_left} seconds. Press 'y' to skip waiting. Timeout 60 seconds : ", + timeout=60).strip().lower() + except TimeoutOccurred: + user_input = '' # No input after timeout + if user_input == 'y': + logger.debug("User chose to skip waiting.") + else: + logger.debug(f"Sleeping for {time_left} seconds as user chose not to skip.") + time.sleep(time_left) + + minimum_page_time = time.time() + minimum_time + + if page_sleep % 5 == 0: + sleep_time = random.randint(50, 90) + try: + user_input = inputimeout( + prompt=f"Sleeping for {sleep_time / 60} minutes. Press 'y' to skip waiting: ", + timeout=60).strip().lower() + except TimeoutOccurred: + user_input = '' # No input after timeout + if user_input == 'y': + logger.debug("User chose to skip waiting.") + else: + logger.debug(f"Sleeping for {sleep_time} seconds.") + time.sleep(sleep_time) + page_sleep += 1 + + def read_jobs(self): + + job_element_list = self.job_portal.jobs_page.get_jobs_from_page() + job_list = [self.job_portal.jobs_page.job_tile_to_job(job_element) for job_element in job_element_list] + for job in job_list: + if self.is_blacklisted(job.title, job.company, job.link, job.location): + logger.info(f"Blacklisted {job.title} at {job.company} in {job.location}, skipping...") + self.write_to_file(job, "skipped") + continue + try: + self.write_to_file(job,'data') + except Exception as e: + self.write_to_file(job, "failed") + continue + + def apply_jobs(self): + job_element_list = self.job_portal.jobs_page.get_jobs_from_page() + + job_list = [self.job_portal.jobs_page.job_tile_to_job(job_element) for job_element in job_element_list] + + for job in job_list: + + logger.debug(f"Starting applicant for job: {job.title} at {job.company}") + #TODO fix apply threshold + """ + # Initialize applicants_count as None + applicants_count = None + + # Iterate over each job insight element to find the one containing the word "applicant" + for element in job_insight_elements: + logger.debug(f"Checking element text: {element.text}") + if "applicant" in element.text.lower(): + # Found an element containing "applicant" + applicants_text = element.text.strip() + logger.debug(f"Applicants text found: {applicants_text}") + + # Extract numeric digits from the text (e.g., "70 applicants" -> "70") + applicants_count = ''.join(filter(str.isdigit, applicants_text)) + logger.debug(f"Extracted applicants count: {applicants_count}") + + if applicants_count: + if "over" in applicants_text.lower(): + applicants_count = int(applicants_count) + 1 # Handle "over X applicants" + logger.debug(f"Applicants count adjusted for 'over': {applicants_count}") + else: + applicants_count = int(applicants_count) # Convert the extracted number to an integer + break + + # Check if applicants_count is valid (not None) before performing comparisons + if applicants_count is not None: + # Perform the threshold check for applicants count + if applicants_count < self.min_applicants or applicants_count > self.max_applicants: + logger.debug(f"Skipping {job.title} at {job.company}, applicants count: {applicants_count}") + self.write_to_file(job, "skipped_due_to_applicants") + continue # Skip this job if applicants count is outside the threshold + else: + logger.debug(f"Applicants count {applicants_count} is within the threshold") + else: + # If no applicants count was found, log a warning but continue the process + logger.warning( + f"Applicants count not found for {job.title} at {job.company}, continuing with application.") + except NoSuchElementException: + # Log a warning if the job insight elements are not found, but do not stop the job application process + logger.warning( + f"Applicants count elements not found for {job.title} at {job.company}, continuing with application.") + except ValueError as e: + # Handle errors when parsing the applicants count + logger.error(f"Error parsing applicants count for {job.title} at {job.company}: {e}") + except Exception as e: + # Catch any other exceptions to ensure the process continues + logger.error( + f"Unexpected error during applicants count processing for {job.title} at {job.company}: {e}") + + # Continue with the job application process regardless of the applicants count check + """ + + + if self.is_previously_failed_to_apply(job.link): + logger.debug(f"Previously failed to apply for {job.title} at {job.company}, skipping...") + continue + if self.is_blacklisted(job.title, job.company, job.link, job.location): + logger.debug(f"Job blacklisted: {job.title} at {job.company} in {job.location}") + self.write_to_file(job, "skipped", "Job blacklisted") + continue + if self.is_already_applied_to_job(job.title, job.company, job.link): + self.write_to_file(job, "skipped", "Already applied to this job") + continue + if self.is_already_applied_to_company(job.company): + self.write_to_file(job, "skipped", "Already applied to this company") + continue + try: + if job.apply_method not in {"Continue", "Applied", "Apply"}: + self.easy_applier_component.job_apply(job) + self.write_to_file(job, "success") + logger.debug(f"Applied to job: {job.title} at {job.company}") + except Exception as e: + logger.error(f"Failed to apply for {job.title} at {job.company}: {e}",exc_info=True) + self.write_to_file(job, "failed", f"Application error: {str(e)}") + continue + + def write_to_file(self, job : Job, file_name, reason=None): + logger.debug(f"Writing job application result to file: {file_name}") + pdf_path = Path(job.resume_path).resolve() + pdf_path = pdf_path.as_uri() + data = { + "company": job.company, + "job_title": job.title, + "link": job.link, + "job_recruiter": job.recruiter_link, + "job_location": job.location, + "pdf_path": pdf_path + } + + if reason: + data["reason"] = reason + + file_path = self.output_file_directory / f"{file_name}.json" + if not file_path.exists(): + with open(file_path, 'w', encoding='utf-8') as f: + json.dump([data], f, indent=4) + logger.debug(f"Job data written to new file: {file_name}") + else: + with open(file_path, 'r+', encoding='utf-8') as f: + try: + existing_data = json.load(f) + except json.JSONDecodeError: + logger.error(f"JSON decode error in file: {file_path}") + existing_data = [] + existing_data.append(data) + f.seek(0) + json.dump(existing_data, f, indent=4) + f.truncate() + logger.debug(f"Job data appended to existing file: {file_name}") + + def is_blacklisted(self, job_title, company, link, job_location): + logger.debug(f"Checking if job is blacklisted: {job_title} at {company} in {job_location}") + title_blacklisted = any(re.search(pattern, job_title, re.IGNORECASE) for pattern in self.title_blacklist_patterns) + company_blacklisted = any(re.search(pattern, company, re.IGNORECASE) for pattern in self.company_blacklist_patterns) + location_blacklisted = any(re.search(pattern, job_location, re.IGNORECASE) for pattern in self.location_blacklist_patterns) + link_seen = link in self.seen_jobs + is_blacklisted = title_blacklisted or company_blacklisted or location_blacklisted or link_seen + logger.debug(f"Job blacklisted status: {is_blacklisted}") + + return is_blacklisted + + def is_already_applied_to_job(self, job_title, company, link): + link_seen = link in self.seen_jobs + if link_seen: + logger.debug(f"Already applied to job: {job_title} at {company}, skipping...") + return link_seen + + def is_already_applied_to_company(self, company): + if not self.apply_once_at_company: + return False + + output_files = ["success.json"] + for file_name in output_files: + file_path = self.output_file_directory / file_name + if file_path.exists(): + with open(file_path, 'r', encoding='utf-8') as f: + try: + existing_data = json.load(f) + for applied_job in existing_data: + if applied_job['company'].strip().lower() == company.strip().lower(): + logger.debug( + f"Already applied at {company} (once per company policy), skipping...") + return True + except json.JSONDecodeError: + continue + return False + + def is_previously_failed_to_apply(self, link): + file_name = "failed" + file_path = self.output_file_directory / f"{file_name}.json" + + if not file_path.exists(): + with open(file_path, "w", encoding="utf-8") as f: + json.dump([], f) + + with open(file_path, 'r', encoding='utf-8') as f: + try: + existing_data = json.load(f) + except json.JSONDecodeError: + logger.error(f"JSON decode error in file: {file_path}") + return False + + for data in existing_data: + data_link = data['link'] + if data_link == link: + return True + + return False diff --git a/src/ai_hawk/llm/llm_manager.py b/src/ai_hawk/llm/llm_manager.py new file mode 100644 index 000000000..ae627c364 --- /dev/null +++ b/src/ai_hawk/llm/llm_manager.py @@ -0,0 +1,709 @@ +import json +import os +import re +import textwrap +import time +from abc import ABC, abstractmethod +from datetime import datetime +from pathlib import Path +from typing import Dict, List, Union + +import httpx +from dotenv import load_dotenv +from langchain_core.messages import BaseMessage +from langchain_core.messages.ai import AIMessage +from langchain_core.output_parsers import StrOutputParser +from langchain_core.prompt_values import StringPromptValue +from langchain_core.prompts import ChatPromptTemplate +from Levenshtein import distance + +import ai_hawk.llm.prompts as prompts +from config import JOB_SUITABILITY_SCORE +from constants import ( + AVAILABILITY, + CERTIFICATIONS, + CLAUDE, + COMPANY, + CONTENT, + COVER_LETTER, + EDUCATION_DETAILS, + EXPERIENCE_DETAILS, + FINISH_REASON, + GEMINI, + HUGGINGFACE, + ID, + INPUT_TOKENS, + INTERESTS, + JOB_APPLICATION_PROFILE, + JOB_DESCRIPTION, + LANGUAGES, + LEGAL_AUTHORIZATION, + LLM_MODEL_TYPE, + LOGPROBS, + MODEL, + MODEL_NAME, + OLLAMA, + OPENAI, + PERPLEXITY, + OPTIONS, + OUTPUT_TOKENS, + PERSONAL_INFORMATION, + PHRASE, + PROJECTS, + PROMPTS, + QUESTION, + REPLIES, + RESPONSE_METADATA, + RESUME, + RESUME_EDUCATIONS, + RESUME_JOBS, + RESUME_PROJECTS, + RESUME_SECTION, + SALARY_EXPECTATIONS, + SELF_IDENTIFICATION, + SYSTEM_FINGERPRINT, + TEXT, + TIME, + TOKEN_USAGE, + TOTAL_COST, + TOTAL_TOKENS, + USAGE_METADATA, + WORK_PREFERENCES, +) +from src.job import Job +from src.logging import logger +import config as cfg + +load_dotenv() + + +class AIModel(ABC): + @abstractmethod + def invoke(self, prompt: str) -> str: + pass + + +class OpenAIModel(AIModel): + def __init__(self, api_key: str, llm_model: str): + from langchain_openai import ChatOpenAI + + self.model = ChatOpenAI( + model_name=llm_model, openai_api_key=api_key, temperature=0.4 + ) + + def invoke(self, prompt: str) -> BaseMessage: + logger.debug("Invoking OpenAI API") + response = self.model.invoke(prompt) + return response + + +class ClaudeModel(AIModel): + def __init__(self, api_key: str, llm_model: str): + from langchain_anthropic import ChatAnthropic + + self.model = ChatAnthropic(model=llm_model, api_key=api_key, temperature=0.4) + + def invoke(self, prompt: str) -> BaseMessage: + response = self.model.invoke(prompt) + logger.debug("Invoking Claude API") + return response + + +class OllamaModel(AIModel): + def __init__(self, llm_model: str, llm_api_url: str): + from langchain_ollama import ChatOllama + + if len(llm_api_url) > 0: + logger.debug(f"Using Ollama with API URL: {llm_api_url}") + self.model = ChatOllama(model=llm_model, base_url=llm_api_url) + else: + self.model = ChatOllama(model=llm_model) + + def invoke(self, prompt: str) -> BaseMessage: + response = self.model.invoke(prompt) + return response + +class PerplexityModel(AIModel): + def __init__(self, api_key: str, llm_model: str): + from langchain_community.chat_models import ChatPerplexity + self.model = ChatPerplexity(model=llm_model, api_key=api_key, temperature=0.4) + + def invoke(self, prompt: str) -> BaseMessage: + response = self.model.invoke(prompt) + return response + +# gemini doesn't seem to work because API doesn't rstitute answers for questions that involve answers that are too short +class GeminiModel(AIModel): + def __init__(self, api_key: str, llm_model: str): + from langchain_google_genai import ( + ChatGoogleGenerativeAI, + HarmBlockThreshold, + HarmCategory, + ) + + self.model = ChatGoogleGenerativeAI( + model=llm_model, + google_api_key=api_key, + safety_settings={ + HarmCategory.HARM_CATEGORY_UNSPECIFIED: HarmBlockThreshold.BLOCK_NONE, + HarmCategory.HARM_CATEGORY_DEROGATORY: HarmBlockThreshold.BLOCK_NONE, + HarmCategory.HARM_CATEGORY_TOXICITY: HarmBlockThreshold.BLOCK_NONE, + HarmCategory.HARM_CATEGORY_VIOLENCE: HarmBlockThreshold.BLOCK_NONE, + HarmCategory.HARM_CATEGORY_SEXUAL: HarmBlockThreshold.BLOCK_NONE, + HarmCategory.HARM_CATEGORY_MEDICAL: HarmBlockThreshold.BLOCK_NONE, + HarmCategory.HARM_CATEGORY_DANGEROUS: HarmBlockThreshold.BLOCK_NONE, + HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, + HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, + HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, + HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, + }, + ) + + def invoke(self, prompt: str) -> BaseMessage: + response = self.model.invoke(prompt) + return response + + +class HuggingFaceModel(AIModel): + def __init__(self, api_key: str, llm_model: str): + from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint + + self.model = HuggingFaceEndpoint( + repo_id=llm_model, huggingfacehub_api_token=api_key, temperature=0.4 + ) + self.chatmodel = ChatHuggingFace(llm=self.model) + + def invoke(self, prompt: str) -> BaseMessage: + response = self.chatmodel.invoke(prompt) + logger.debug( + f"Invoking Model from Hugging Face API. Response: {response}, Type: {type(response)}" + ) + return response + + +class AIAdapter: + def __init__(self, config: dict, api_key: str): + self.model = self._create_model(config, api_key) + + def _create_model(self, config: dict, api_key: str) -> AIModel: + llm_model_type = cfg.LLM_MODEL_TYPE + llm_model = cfg.LLM_MODEL + + llm_api_url = cfg.LLM_API_URL + + logger.debug(f"Using {llm_model_type} with {llm_model}") + + if llm_model_type == OPENAI: + return OpenAIModel(api_key, llm_model) + elif llm_model_type == CLAUDE: + return ClaudeModel(api_key, llm_model) + elif llm_model_type == OLLAMA: + return OllamaModel(llm_model, llm_api_url) + elif llm_model_type == GEMINI: + return GeminiModel(api_key, llm_model) + elif llm_model_type == HUGGINGFACE: + return HuggingFaceModel(api_key, llm_model) + elif llm_model_type == PERPLEXITY: + return PerplexityModel(api_key, llm_model) + else: + raise ValueError(f"Unsupported model type: {llm_model_type}") + + def invoke(self, prompt: str) -> str: + return self.model.invoke(prompt) + + +class LLMLogger: + def __init__(self, llm: Union[OpenAIModel, OllamaModel, ClaudeModel, GeminiModel]): + self.llm = llm + logger.debug(f"LLMLogger successfully initialized with LLM: {llm}") + + @staticmethod + def log_request(prompts, parsed_reply: Dict[str, Dict]): + logger.debug("Starting log_request method") + logger.debug(f"Prompts received: {prompts}") + logger.debug(f"Parsed reply received: {parsed_reply}") + + try: + calls_log = os.path.join(Path("data_folder/output"), "open_ai_calls.json") + logger.debug(f"Logging path determined: {calls_log}") + except Exception as e: + logger.error(f"Error determining the log path: {str(e)}") + raise + + if isinstance(prompts, StringPromptValue): + logger.debug("Prompts are of type StringPromptValue") + prompts = prompts.text + logger.debug(f"Prompts converted to text: {prompts}") + elif isinstance(prompts, Dict): + logger.debug("Prompts are of type Dict") + try: + prompts = { + f"prompt_{i + 1}": prompt.content + for i, prompt in enumerate(prompts.messages) + } + logger.debug(f"Prompts converted to dictionary: {prompts}") + except Exception as e: + logger.error(f"Error converting prompts to dictionary: {str(e)}") + raise + else: + logger.debug("Prompts are of unknown type, attempting default conversion") + try: + prompts = { + f"prompt_{i + 1}": prompt.content + for i, prompt in enumerate(prompts.messages) + } + logger.debug( + f"Prompts converted to dictionary using default method: {prompts}" + ) + except Exception as e: + logger.error(f"Error converting prompts using default method: {str(e)}") + raise + + try: + current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + logger.debug(f"Current time obtained: {current_time}") + except Exception as e: + logger.error(f"Error obtaining current time: {str(e)}") + raise + + try: + token_usage = parsed_reply[USAGE_METADATA] + output_tokens = token_usage[OUTPUT_TOKENS] + input_tokens = token_usage[INPUT_TOKENS] + total_tokens = token_usage[TOTAL_TOKENS] + logger.debug( + f"Token usage - Input: {input_tokens}, Output: {output_tokens}, Total: {total_tokens}" + ) + except KeyError as e: + logger.error(f"KeyError in parsed_reply structure: {str(e)}") + raise + + try: + model_name = parsed_reply[RESPONSE_METADATA][MODEL_NAME] + logger.debug(f"Model name: {model_name}") + except KeyError as e: + logger.error(f"KeyError in response_metadata: {str(e)}") + raise + + try: + prompt_price_per_token = 0.00000015 + completion_price_per_token = 0.0000006 + total_cost = (input_tokens * prompt_price_per_token) + ( + output_tokens * completion_price_per_token + ) + logger.debug(f"Total cost calculated: {total_cost}") + except Exception as e: + logger.error(f"Error calculating total cost: {str(e)}") + raise + + try: + log_entry = { + MODEL: model_name, + TIME: current_time, + PROMPTS: prompts, + REPLIES: parsed_reply[CONTENT], + TOTAL_TOKENS: total_tokens, + INPUT_TOKENS: input_tokens, + OUTPUT_TOKENS: output_tokens, + TOTAL_COST: total_cost, + } + logger.debug(f"Log entry created: {log_entry}") + except KeyError as e: + logger.error( + f"Error creating log entry: missing key {str(e)} in parsed_reply" + ) + raise + + try: + with open(calls_log, "a", encoding="utf-8") as f: + json_string = json.dumps(log_entry, ensure_ascii=False, indent=4) + f.write(json_string + "\n") + logger.debug(f"Log entry written to file: {calls_log}") + except Exception as e: + logger.error(f"Error writing log entry to file: {str(e)}") + raise + + +class LoggerChatModel: + def __init__(self, llm: Union[OpenAIModel, OllamaModel, ClaudeModel, GeminiModel]): + self.llm = llm + logger.debug(f"LoggerChatModel successfully initialized with LLM: {llm}") + + def __call__(self, messages: List[Dict[str, str]]) -> str: + logger.debug(f"Entering __call__ method with messages: {messages}") + while True: + try: + logger.debug("Attempting to call the LLM with messages") + + reply = self.llm.invoke(messages) + logger.debug(f"LLM response received: {reply}") + + parsed_reply = self.parse_llmresult(reply) + logger.debug(f"Parsed LLM reply: {parsed_reply}") + + LLMLogger.log_request(prompts=messages, parsed_reply=parsed_reply) + logger.debug("Request successfully logged") + + return reply + + except httpx.HTTPStatusError as e: + logger.error(f"HTTPStatusError encountered: {str(e)}") + if e.response.status_code == 429: + retry_after = e.response.headers.get("retry-after") + retry_after_ms = e.response.headers.get("retry-after-ms") + + if retry_after: + wait_time = int(retry_after) + logger.warning( + f"Rate limit exceeded. Waiting for {wait_time} seconds before retrying (extracted from 'retry-after' header)..." + ) + time.sleep(wait_time) + elif retry_after_ms: + wait_time = int(retry_after_ms) / 1000.0 + logger.warning( + f"Rate limit exceeded. Waiting for {wait_time} seconds before retrying (extracted from 'retry-after-ms' header)..." + ) + time.sleep(wait_time) + else: + wait_time = 30 + logger.warning( + f"'retry-after' header not found. Waiting for {wait_time} seconds before retrying (default)..." + ) + time.sleep(wait_time) + else: + logger.error( + f"HTTP error occurred with status code: {e.response.status_code}, waiting 30 seconds before retrying" + ) + time.sleep(30) + + except Exception as e: + logger.error(f"Unexpected error occurred: {str(e)}") + logger.info( + "Waiting for 30 seconds before retrying due to an unexpected error." + ) + time.sleep(30) + continue + + def parse_llmresult(self, llmresult: AIMessage) -> Dict[str, Dict]: + logger.debug(f"Parsing LLM result: {llmresult}") + + try: + if hasattr(llmresult, USAGE_METADATA): + content = llmresult.content + response_metadata = llmresult.response_metadata + id_ = llmresult.id + usage_metadata = llmresult.usage_metadata + + parsed_result = { + CONTENT: content, + RESPONSE_METADATA: { + MODEL_NAME: response_metadata.get( + MODEL_NAME, "" + ), + SYSTEM_FINGERPRINT: response_metadata.get( + SYSTEM_FINGERPRINT, "" + ), + FINISH_REASON: response_metadata.get( + FINISH_REASON, "" + ), + LOGPROBS: response_metadata.get( + LOGPROBS, None + ), + }, + ID: id_, + USAGE_METADATA: { + INPUT_TOKENS: usage_metadata.get( + INPUT_TOKENS, 0 + ), + OUTPUT_TOKENS: usage_metadata.get( + OUTPUT_TOKENS, 0 + ), + TOTAL_TOKENS: usage_metadata.get( + TOTAL_TOKENS, 0 + ), + }, + } + else: + content = llmresult.content + response_metadata = llmresult.response_metadata + id_ = llmresult.id + token_usage = response_metadata[TOKEN_USAGE] + + parsed_result = { + CONTENT: content, + RESPONSE_METADATA: { + MODEL_NAME: response_metadata.get( + MODEL, "" + ), + FINISH_REASON: response_metadata.get( + FINISH_REASON, "" + ), + }, + ID: id_, + USAGE_METADATA: { + INPUT_TOKENS: token_usage.prompt_tokens, + OUTPUT_TOKENS: token_usage.completion_tokens, + TOTAL_TOKENS: token_usage.total_tokens, + }, + } + logger.debug(f"Parsed LLM result successfully: {parsed_result}") + return parsed_result + + except KeyError as e: + logger.error(f"KeyError while parsing LLM result: missing key {str(e)}") + raise + + except Exception as e: + logger.error(f"Unexpected error while parsing LLM result: {str(e)}") + raise + + +class GPTAnswerer: + def __init__(self, config, llm_api_key): + self.ai_adapter = AIAdapter(config, llm_api_key) + self.llm_cheap = LoggerChatModel(self.ai_adapter) + + @property + def job_description(self): + return self.job.description + + @staticmethod + def find_best_match(text: str, options: list[str]) -> str: + logger.debug(f"Finding best match for text: '{text}' in options: {options}") + distances = [ + (option, distance(text.lower(), option.lower())) for option in options + ] + best_option = min(distances, key=lambda x: x[1])[0] + logger.debug(f"Best match found: {best_option}") + return best_option + + @staticmethod + def _remove_placeholders(text: str) -> str: + logger.debug(f"Removing placeholders from text: {text}") + text = text.replace("PLACEHOLDER", "") + return text.strip() + + @staticmethod + def _preprocess_template_string(template: str) -> str: + logger.debug("Preprocessing template string") + return textwrap.dedent(template) + + def set_resume(self, resume): + logger.debug(f"Setting resume: {resume}") + self.resume = resume + + def set_job(self, job: Job): + logger.debug(f"Setting job: {job}") + self.job = job + self.job.set_summarize_job_description( + self.summarize_job_description(self.job.description) + ) + + def set_job_application_profile(self, job_application_profile): + logger.debug(f"Setting job application profile: {job_application_profile}") + self.job_application_profile = job_application_profile + + def _clean_llm_output(self, output: str) -> str: + return output.replace("*", "").replace("#", "").strip() + + def summarize_job_description(self, text: str) -> str: + logger.debug(f"Summarizing job description: {text}") + prompts.summarize_prompt_template = self._preprocess_template_string( + prompts.summarize_prompt_template + ) + prompt = ChatPromptTemplate.from_template(prompts.summarize_prompt_template) + chain = prompt | self.llm_cheap | StrOutputParser() + raw_output = chain.invoke({TEXT: text}) + output = self._clean_llm_output(raw_output) + logger.debug(f"Summary generated: {output}") + return output + + def _create_chain(self, template: str): + logger.debug(f"Creating chain with template: {template}") + prompt = ChatPromptTemplate.from_template(template) + return prompt | self.llm_cheap | StrOutputParser() + + def answer_question_textual_wide_range(self, question: str) -> str: + logger.debug(f"Answering textual question: {question}") + chains = { + PERSONAL_INFORMATION: self._create_chain( + prompts.personal_information_template + ), + SELF_IDENTIFICATION: self._create_chain( + prompts.self_identification_template + ), + LEGAL_AUTHORIZATION: self._create_chain( + prompts.legal_authorization_template + ), + WORK_PREFERENCES: self._create_chain( + prompts.work_preferences_template + ), + EDUCATION_DETAILS: self._create_chain( + prompts.education_details_template + ), + EXPERIENCE_DETAILS: self._create_chain( + prompts.experience_details_template + ), + PROJECTS: self._create_chain(prompts.projects_template), + AVAILABILITY: self._create_chain(prompts.availability_template), + SALARY_EXPECTATIONS: self._create_chain( + prompts.salary_expectations_template + ), + CERTIFICATIONS: self._create_chain( + prompts.certifications_template + ), + LANGUAGES: self._create_chain(prompts.languages_template), + INTERESTS: self._create_chain(prompts.interests_template), + COVER_LETTER: self._create_chain(prompts.coverletter_template), + } + + prompt = ChatPromptTemplate.from_template(prompts.determine_section_template) + chain = prompt | self.llm_cheap | StrOutputParser() + raw_output = chain.invoke({QUESTION: question}) + output = self._clean_llm_output(raw_output) + + match = re.search( + r"(Personal information|Self Identification|Legal Authorization|Work Preferences|Education " + r"Details|Experience Details|Projects|Availability|Salary " + r"Expectations|Certifications|Languages|Interests|Cover letter)", + output, + re.IGNORECASE, + ) + if not match: + raise ValueError("Could not extract section name from the response.") + + section_name = match.group(1).lower().replace(" ", "_") + + if section_name == "cover_letter": + chain = chains.get(section_name) + raw_output = chain.invoke( + { + RESUME: self.resume, + JOB_DESCRIPTION: self.job_description, + COMPANY: self.job.company, + } + ) + output = self._clean_llm_output(raw_output) + logger.debug(f"Cover letter generated: {output}") + return output + resume_section = getattr(self.resume, section_name, None) or getattr( + self.job_application_profile, section_name, None + ) + if resume_section is None: + logger.error( + f"Section '{section_name}' not found in either resume or job_application_profile." + ) + raise ValueError( + f"Section '{section_name}' not found in either resume or job_application_profile." + ) + chain = chains.get(section_name) + if chain is None: + logger.error(f"Chain not defined for section '{section_name}'") + raise ValueError(f"Chain not defined for section '{section_name}'") + raw_output = chain.invoke( + {RESUME_SECTION: resume_section, QUESTION: question} + ) + output = self._clean_llm_output(raw_output) + logger.debug(f"Question answered: {output}") + return output + + def answer_question_numeric( + self, question: str, default_experience: str = 3 + ) -> str: + logger.debug(f"Answering numeric question: {question}") + func_template = self._preprocess_template_string( + prompts.numeric_question_template + ) + prompt = ChatPromptTemplate.from_template(func_template) + chain = prompt | self.llm_cheap | StrOutputParser() + raw_output_str = chain.invoke( + { + RESUME_EDUCATIONS: self.resume.education_details, + RESUME_JOBS: self.resume.experience_details, + RESUME_PROJECTS: self.resume.projects, + QUESTION: question, + } + ) + output_str = self._clean_llm_output(raw_output_str) + logger.debug(f"Raw output for numeric question: {output_str}") + try: + output = self.extract_number_from_string(output_str) + logger.debug(f"Extracted number: {output}") + except ValueError: + logger.warning( + f"Failed to extract number, using default experience: {default_experience}" + ) + output = default_experience + return output + + def extract_number_from_string(self, output_str): + logger.debug(f"Extracting number from string: {output_str}") + numbers = re.findall(r"\d+", output_str) + if numbers: + logger.debug(f"Numbers found: {numbers}") + return str(numbers[0]) + else: + logger.error("No numbers found in the string") + raise ValueError("No numbers found in the string") + + def answer_question_from_options(self, question: str, options: list[str]) -> str: + logger.debug(f"Answering question from options: {question}") + func_template = self._preprocess_template_string(prompts.options_template) + prompt = ChatPromptTemplate.from_template(func_template) + chain = prompt | self.llm_cheap | StrOutputParser() + raw_output_str = chain.invoke( + { + RESUME: self.resume, + JOB_APPLICATION_PROFILE: self.job_application_profile, + QUESTION: question, + OPTIONS: options, + } + ) + output_str = self._clean_llm_output(raw_output_str) + logger.debug(f"Raw output for options question: {output_str}") + best_option = self.find_best_match(output_str, options) + logger.debug(f"Best option determined: {best_option}") + return best_option + + def determine_resume_or_cover(self, phrase: str) -> str: + logger.debug( + f"Determining if phrase refers to resume or cover letter: {phrase}" + ) + prompt = ChatPromptTemplate.from_template( + prompts.resume_or_cover_letter_template + ) + chain = prompt | self.llm_cheap | StrOutputParser() + raw_response = chain.invoke({PHRASE: phrase}) + response = self._clean_llm_output(raw_response) + logger.debug(f"Response for resume_or_cover: {response}") + if "resume" in response: + return "resume" + elif "cover" in response: + return "cover" + else: + return "resume" + + def is_job_suitable(self): + logger.info("Checking if job is suitable") + prompt = ChatPromptTemplate.from_template(prompts.is_relavant_position_template) + chain = prompt | self.llm_cheap | StrOutputParser() + raw_output = chain.invoke( + { + RESUME: self.resume, + JOB_DESCRIPTION: self.job_description, + } + ) + output = self._clean_llm_output(raw_output) + logger.debug(f"Job suitability output: {output}") + + try: + score = re.search(r"Score:\s*(\d+)", output, re.IGNORECASE).group(1) + reasoning = re.search(r"Reasoning:\s*(.+)", output, re.IGNORECASE | re.DOTALL).group(1) + except AttributeError: + logger.warning("Failed to extract score or reasoning from LLM. Proceeding with application, but job may or may not be suitable.") + return True + + logger.info(f"Job suitability score: {score}") + if int(score) < JOB_SUITABILITY_SCORE: + logger.debug(f"Job is not suitable: {reasoning}") + return int(score) >= JOB_SUITABILITY_SCORE diff --git a/src/ai_hawk/llm/prompts.py b/src/ai_hawk/llm/prompts.py new file mode 100644 index 000000000..cc7bc80a1 --- /dev/null +++ b/src/ai_hawk/llm/prompts.py @@ -0,0 +1,547 @@ +# Personal Information Template +personal_information_template = """ +Answer the following question based on the provided personal information. + +## Rules +- Answer questions directly. + +## Example +My resume: John Doe, born on 01/01/1990, living in Milan, Italy. +Question: What is your city? + Milan + +Personal Information: {resume_section} +Question: {question} +Do not output anything else in the response other than the answer. +""" + +# Self Identification Template +self_identification_template = """ +Answer the following question based on the provided self-identification details. + +## Rules +- Answer questions directly. + +## Example +My resume: Male, uses he/him pronouns, not a veteran, no disability. +Question: What are your gender? +Male + +Self-Identification: {resume_section} +Question: {question} +Do not output anything else in the response other than the answer. +""" + +# Legal Authorization Template +legal_authorization_template = """ +Answer the following question based on the provided legal authorization details. + +## Rules +- Answer questions directly. + +## Example +My resume: Authorized to work in the EU, no US visa required. +Question: Are you legally allowed to work in the EU? +Yes + +Legal Authorization: {resume_section} +Question: {question} +Do not output anything else in the response other than the answer. +""" + +# Work Preferences Template +work_preferences_template = """ +Answer the following question based on the provided work preferences. + +## Rules +- Answer questions directly. + +## Example +My resume: Open to remote work, willing to relocate. +Question: Are you open to remote work? +Yes + +Work Preferences: {resume_section} +Question: {question} +Do not output anything else in the response other than the answer. +""" + +# Education Details Template +education_details_template = """ +Answer the following question based on the provided education details. + +## Rules +- Answer questions directly. +- If it seems likely that you have the experience, even if not explicitly defined, answer as if you have the experience. +- If unsure, respond with "I have no experience with that, but I learn fast" or "Not yet, but willing to learn." +- Keep the answer under 140 characters. + +## Example +My resume: Bachelor's degree in Computer Science with experience in Python. +Question: Do you have experience with Python? +Yes, I have experience with Python. + +Education Details: {resume_section} +Question: {question} +Do not output anything else in the response other than the answer. +""" + +# Experience Details Template +experience_details_template = """ +Answer the following question based on the provided experience details. + +## Rules +- Answer questions directly. +- If it seems likely that you have the experience, even if not explicitly defined, answer as if you have the experience. +- If unsure, respond with "I have no experience with that, but I learn fast" or "Not yet, but willing to learn." +- Keep the answer under 140 characters. + +## Example +My resume: 3 years as a software developer with leadership experience. +Question: Do you have leadership experience? +Yes, I have 3 years of leadership experience. + +Experience Details: {resume_section} +Question: {question} +Do not output anything else in the response other than the answer. +""" + +# Projects Template +projects_template = """ +Answer the following question based on the provided project details. + +## Rules +- Answer questions directly. +- If it seems likely that you have the experience, even if not explicitly defined, answer as if you have the experience. +- Keep the answer under 140 characters. + +## Example +My resume: Led the development of a mobile app, repository available. +Question: Have you led any projects? +Yes, led the development of a mobile app + +Projects: {resume_section} +Question: {question} +Do not output anything else in the response other than the answer. +""" + +# Availability Template +availability_template = """ +Answer the following question based on the provided availability details. + +## Rules +- Answer questions directly. +- Keep the answer under 140 characters. +- Use periods only if the answer has multiple sentences. + +## Example +My resume: Available to start immediately. +Question: When can you start? +I can start immediately. + +Availability: {resume_section} +Question: {question} +Do not output anything else in the response other than the answer. +""" + +# Salary Expectations Template +salary_expectations_template = """ +Answer the following question based on the provided salary expectations. + +## Rules +- Answer questions directly. +- Keep the answer under 140 characters. +- Use periods only if the answer has multiple sentences. + +## Example +My resume: Looking for a salary in the range of 50k-60k USD. +Question: What are your salary expectations? +55000. + +Salary Expectations: {resume_section} +Question: {question} +Do not output anything else in the response other than the answer. +""" + +# Certifications Template +certifications_template = """ +Answer the following question based on the provided certifications. + +## Rules +- Answer questions directly. +- If it seems likely that you have the experience, even if not explicitly defined, answer as if you have the experience. +- If unsure, respond with "I have no experience with that, but I learn fast" or "Not yet, but willing to learn." +- Keep the answer under 140 characters. + +## Example +My resume: Certified in Project Management Professional (PMP). +Question: Do you have PMP certification? +Yes, I am PMP certified. + +Certifications: {resume_section} +Question: {question} +Do not output anything else in the response other than the answer. +""" + +# Languages Template +languages_template = """ +Answer the following question based on the provided language skills. + +## Rules +- Answer questions directly. +- If it seems likely that you have the experience, even if not explicitly defined, answer as if you have the experience. +- If unsure, respond with "I have no experience with that, but I learn fast" or "Not yet, but willing to learn." +- Keep the answer under 140 characters. Do not add any additional languages what is not in my experience + +## Example +My resume: Fluent in Italian and English. +Question: What languages do you speak? +Fluent in Italian and English. + +Languages: {resume_section} +Question: {question} +Do not output anything else in the response other than the answer. +""" + +# Interests Template +interests_template = """ +Answer the following question based on the provided interests. + +## Rules +- Answer questions directly. +- Keep the answer under 140 characters. +- Use periods only if the answer has multiple sentences. + +## Example +My resume: Interested in AI and data science. +Question: What are your interests? +AI and data science. + +Interests: {resume_section} +Question: {question} +Do not output anything else in the response other than the answer. +""" + +summarize_prompt_template = """ +As a seasoned HR expert, your task is to identify and outline the key skills and requirements necessary for the position of this job. Use the provided job description as input to extract all relevant information. This will involve conducting a thorough analysis of the job's responsibilities and the industry standards. You should consider both the technical and soft skills needed to excel in this role. Additionally, specify any educational qualifications, certifications, or experiences that are essential. Your analysis should also reflect on the evolving nature of this role, considering future trends and how they might affect the required competencies. + +Rules: +Remove boilerplate text +Include only relevant information to match the job description against the resume + +# Analysis Requirements +Your analysis should include the following sections: +Technical Skills: List all the specific technical skills required for the role based on the responsibilities described in the job description. +Soft Skills: Identify the necessary soft skills, such as communication abilities, problem-solving, time management, etc. +Educational Qualifications and Certifications: Specify the essential educational qualifications and certifications for the role. +Professional Experience: Describe the relevant work experiences that are required or preferred. +Role Evolution: Analyze how the role might evolve in the future, considering industry trends and how these might influence the required skills. + +# Final Result: +Your analysis should be structured in a clear and organized document with distinct sections for each of the points listed above. Each section should contain: +This comprehensive overview will serve as a guideline for the recruitment process, ensuring the identification of the most qualified candidates. + +# Job Description: +``` +{text} +``` + +--- + +# Job Description Summary""" + +coverletter_template = """ +Compose a brief and impactful cover letter based on the provided job description and resume. The letter should be no longer than three paragraphs and should be written in a professional, yet conversational tone. Avoid using any placeholders, and ensure that the letter flows naturally and is tailored to the job. + +Analyze the job description to identify key qualifications and requirements. Introduce the candidate succinctly, aligning their career objectives with the role. Highlight relevant skills and experiences from the resume that directly match the job’s demands, using specific examples to illustrate these qualifications. Reference notable aspects of the company, such as its mission or values, that resonate with the candidate’s professional goals. Conclude with a strong statement of why the candidate is a good fit for the position, expressing a desire to discuss further. + +Please write the cover letter in a way that directly addresses the job role and the company’s characteristics, ensuring it remains concise and engaging without unnecessary embellishments. The letter should be formatted into paragraphs and should not include a greeting or signature. + +## Rules: +- Provide only the text of the cover letter. +- Do not include any introductions, explanations, or additional information. +- The letter should be formatted into paragraph. + +## Company Name: +{company} + +## Job Description: +``` +{job_description} +``` +## My resume: +``` +{resume} +``` +""" + +numeric_question_template = """ +Read the following resume carefully and answer the specific questions regarding the candidate's experience with a number of years. Follow these strategic guidelines when responding: + +1. **Related and Inferred Experience:** + - **Similar Technologies:** If experience with a specific technology is not explicitly stated, but the candidate has experience with similar or related technologies, provide a plausible number of years reflecting this related experience. For instance, if the candidate has experience with Python and projects involving technologies similar to Java, estimate a reasonable number of years for Java. + - **Projects and Studies:** Examine the candidate’s projects and studies to infer skills not explicitly mentioned. Complex and advanced projects often indicate deeper expertise. + +2. **Indirect Experience and Academic Background:** + - **Type of University and Studies:** Consider the type of university and course followed. + - **Exam Grades:** Consider exam grades achieved. High grades in relevant subjects can indicate stronger proficiency and understanding. + - **Relevant thesis:** Consider the thesis of the candidate has worked. Advanced projects suggest deeper skills. + - **Roles and Responsibilities:** Evaluate the roles and responsibilities held to estimate experience with specific technologies or skills. + + +3. **Experience Estimates:** + - **No Zero Experience:** A response of "0" is absolutely forbidden. If direct experience cannot be confirmed, provide a minimum of "2" years based on inferred or related experience. + - **For Low Experience (up to 5 years):** Estimate experience based on inferred bacherol, skills and projects, always providing at least "2" years when relevant. + - **For High Experience:** For high levels of experience, provide a number based on clear evidence from the resume. Avoid making inferences for high experience levels unless the evidence is strong. + +4. **Rules:** + - Answer the question directly with a number, avoiding "0" entirely. + +## Example 1 +``` +## Curriculum + +I had a degree in computer science. I have worked years with MQTT protocol. + +## Question + +How many years of experience do you have with IoT? + +## Answer + +4 +``` +## Example 1 +``` +## Curriculum + +I had a degree in computer science. + +## Question + +How many years of experience do you have with Bash? + +## Answer + +2 +``` + +## Example 2 +``` +## Curriculum + +I am a software engineer with 5 years of experience in Swift and Python. I have worked on an AI project. + +## Question + +How many years of experience do you have with AI? + +## Answer + +2 +``` + +## Resume: +``` +{resume_educations} +{resume_jobs} +{resume_projects} +``` + +## Question: +{question} + +--- + +When responding, consider all available information, including projects, work experience, and academic background, to provide an accurate and well-reasoned answer. Make every effort to infer relevant experience and avoid defaulting to 0 if any related experience can be estimated. +Do not output anything else in the response other than the answer. +""" + +options_template = """The following is a resume and an answered question about the resume, the answer is one of the options. + +## Rules +- Never choose the default/placeholder option, examples are: 'Select an option', 'None', 'Choose from the options below', etc. +- The answer must be one of the options. +- The answer must exclusively contain one of the options. + +## Example +My resume: I'm a software engineer with 10 years of experience on swift, python, C, C++. +Question: How many years of experience do you have on python? +Options: [1-2, 3-5, 6-10, 10+] +10+ + +----- + +## My resume: +``` +{resume} +{job_application_profile} +``` + +## Question: +{question} + +## Options: +{options} +----- +Do not output anything else in the response other than the answer. +## """ + +try_to_fix_template = """\ +The objective is to fix the text of a form input on a web page. + +## Rules +- Use the error to fix the original text. +- The error "Please enter a valid answer" usually means the text is too large, shorten the reply to less than a tweet. +- For errors like "Enter a whole number between 3 and 30", just need a number. + +----- + +## Form Question +{question} + +## Input +{input} + +## Error +{error} + +## Fixed Input +""" + +func_summarize_prompt_template = """ + Following are two texts, one with placeholders and one without, the second text uses information from the first text to fill the placeholders. + + ## Rules + - A placeholder is a string like "[[placeholder]]". E.g. "[[company]]", "[[job_title]]", "[[years_of_experience]]"... + - The task is to remove the placeholders from the text. + - If there is no information to fill a placeholder, remove the placeholder, and adapt the text accordingly. + - No placeholders should remain in the text. + + ## Example + Text with placeholders: "I'm a software engineer engineer with 10 years of experience on [placeholder] and [placeholder]." + Text without placeholders: "I'm a software engineer with 10 years of experience." + + ----- + + ## Text with placeholders: + {text_with_placeholders} + + ## Text without placeholders:""" + +is_relavant_position_template = """ + Evaluate whether the provided resume meets the requirements outlined in the job description. Determine if the candidate is suitable for the job based on the information provided. + +Job Description: {job_description} + +Resume: {resume} + +Instructions: +1. Extract the key requirements from the job description, identifying hard requirements (must-haves) and soft requirements (nice-to-haves). +2. Identify the relevant qualifications from the resume. +3. Compare the qualifications against the requirements, ensuring all hard requirements are met. Allow for a 1-year experience gap if applicable, as experience is usually a hard requirement. +4. Provide a suitability score from 1 to 10. where 1 indicates the candidate does not meet any requirements and 10 indicates the candidate meets all requirements. +5. Provide a brief reasoning for the score, highlighting which requirements are met and which are not. + +Output Format (Strictly follow this format): +Score: [numerical score] +Reasoning: [brief explanation] +Do not output anything else in the response other than the score and reasoning. +""" + +resume_or_cover_letter_template = """ +Given the following phrase, respond with only 'resume' if the phrase is about a resume, or 'cover' if it's about a cover letter. +If the phrase contains only one word 'upload', consider it as 'cover'. +If the phrase contains 'upload resume', consider it as 'resume'. +Do not provide any additional information or explanations. + +phrase: {phrase} +""" + +determine_section_template = """You are assisting a bot designed to automatically apply for jobs on AIHawk. The bot receives various questions about job applications and needs to determine the most relevant section of the resume to provide an accurate response. + +For the following question: '{question}', determine which section of the resume is most relevant. +Respond with exactly one of the following options: +- Personal information +- Self Identification +- Legal Authorization +- Work Preferences +- Education Details +- Experience Details +- Projects +- Availability +- Salary Expectations +- Certifications +- Languages +- Interests +- Cover letter + +Here are detailed guidelines to help you choose the correct section: + +1. **Personal Information**: +- **Purpose**: Contains your basic contact details and online profiles. +- **Use When**: The question is about how to contact you or requests links to your professional online presence. +- **Examples**: Email address, phone number, AIHawk profile, GitHub repository, personal website. + +2. **Self Identification**: +- **Purpose**: Covers personal identifiers and demographic information. +- **Use When**: The question pertains to your gender, pronouns, veteran status, disability status, or ethnicity. +- **Examples**: Gender, pronouns, veteran status, disability status, ethnicity. + +3. **Legal Authorization**: +- **Purpose**: Details your work authorization status and visa requirements. +- **Use When**: The question asks about your ability to work in specific countries or if you need sponsorship or visas. +- **Examples**: Work authorization in EU and US, visa requirements, legally allowed to work. + +4. **Work Preferences**: +- **Purpose**: Specifies your preferences regarding work conditions and job roles. +- **Use When**: The question is about your preferences for remote work, in-person work, relocation, and willingness to undergo assessments or background checks. +- **Examples**: Remote work, in-person work, open to relocation, willingness to complete assessments. + +5. **Education Details**: +- **Purpose**: Contains information about your academic qualifications. +- **Use When**: The question concerns your degrees, universities attended, GPA, and relevant coursework. +- **Examples**: Degree, university, GPA, field of study, exams. + +6. **Experience Details**: +- **Purpose**: Details your professional work history and key responsibilities. +- **Use When**: The question pertains to your job roles, responsibilities, and achievements in previous positions. +- **Examples**: Job positions, company names, key responsibilities, skills acquired. + +7. **Projects**: +- **Purpose**: Highlights specific projects you have worked on. +- **Use When**: The question asks about particular projects, their descriptions, or links to project repositories. +- **Examples**: Project names, descriptions, links to project repositories. + +8. **Availability**: +- **Purpose**: Provides information on your availability for new roles. +- **Use When**: The question is about how soon you can start a new job or your notice period. +- **Examples**: Notice period, availability to start. + +9. **Salary Expectations**: +- **Purpose**: Covers your expected salary range. +- **Use When**: The question pertains to your salary expectations or compensation requirements. +- **Examples**: Desired salary range. + +10. **Certifications**: + - **Purpose**: Lists your professional certifications or licenses. + - **Use When**: The question involves your certifications or qualifications from recognized organizations. + - **Examples**: Certification names, issuing bodies, dates of validity. + +11. **Languages**: + - **Purpose**: Describes the languages you can speak and your proficiency levels. + - **Use When**: The question asks about your language skills or proficiency in specific languages. + - **Examples**: Languages spoken, proficiency levels. + +12. **Interests**: + - **Purpose**: Details your personal or professional interests. + - **Use When**: The question is about your hobbies, interests, or activities outside of work. + - **Examples**: Personal hobbies, professional interests. + +13. **Cover Letter**: + - **Purpose**: Contains your personalized cover letter or statement. + - **Use When**: The question involves your cover letter or specific written content intended for the job application. + - **Examples**: Cover letter content, personalized statements. + +Provide only the exact name of the section from the list above with no additional text. +""" \ No newline at end of file diff --git a/src/job.py b/src/job.py new file mode 100644 index 000000000..13ac84ba9 --- /dev/null +++ b/src/job.py @@ -0,0 +1,49 @@ +from dataclasses import dataclass +from src.logging import logger + +@dataclass +class Job: + id: str = "" + title: str = "" + company: str = "" + location: str = "" + link: str = "" + apply_method: str = "" + description: str = "" + summarize_job_description: str = "" + recruiter_link: str = "" + # TODO: to move these properties to JobApplication + resume_path: str = "" + cover_letter_path: str = "" + + def set_summarize_job_description(self, summarize_job_description): + logger.debug(f"Setting summarized job description: {summarize_job_description}") + self.summarize_job_description = summarize_job_description + + def set_job_description(self, description): + logger.debug(f"Setting job description: {description}") + self.description = description + + def set_recruiter_link(self, recruiter_link): + logger.debug(f"Setting recruiter link: {recruiter_link}") + self.recruiter_link = recruiter_link + + def formatted_job_information(self): + """ + Formats the job information as a markdown string. + """ + logger.debug(f"Formatting job information for job: {self.title} at {self.company}") + job_information = f""" + # Job Description + ## Job Information + - Position: {self.title} + - At: {self.company} + - Location: {self.location} + - Recruiter Profile: {self.recruiter_link or 'Not available'} + + ## Description + {self.description or 'No description provided.'} + """ + formatted_information = job_information.strip() + logger.debug(f"Formatted job information: {formatted_information}") + return formatted_information \ No newline at end of file diff --git a/src/jobContext.py b/src/jobContext.py new file mode 100644 index 000000000..645947925 --- /dev/null +++ b/src/jobContext.py @@ -0,0 +1,10 @@ +from src.job import Job +from src.job_application import JobApplication + + +from dataclasses import dataclass + +@dataclass +class JobContext: + job: Job = None + job_application: JobApplication = None \ No newline at end of file diff --git a/src/job_application.py b/src/job_application.py new file mode 100644 index 000000000..ad3fe0047 --- /dev/null +++ b/src/job_application.py @@ -0,0 +1,19 @@ +from src.job import Job + +class JobApplication: + + def __init__(self, job: Job): + self.job :Job = job + self.application = [] + self.resume_path = "" + self.cover_letter_path = "" + + def save_application_data(self, application_questions: dict): + self.application.append(application_questions) + + def set_resume_path(self, resume_path: str): + self.resume_path = resume_path + + def set_cover_letter_path(self, cv_path: str): + self.cover_letter_path = cv_path + diff --git a/src/job_application_profile.py b/src/job_application_profile.py new file mode 100644 index 000000000..8a74bdb7e --- /dev/null +++ b/src/job_application_profile.py @@ -0,0 +1,186 @@ +from dataclasses import dataclass + +import yaml + +from src.logging import logger + + +@dataclass +class SelfIdentification: + gender: str + pronouns: str + veteran: str + disability: str + ethnicity: str + + +@dataclass +class LegalAuthorization: + eu_work_authorization: str + us_work_authorization: str + requires_us_visa: str + legally_allowed_to_work_in_us: str + requires_us_sponsorship: str + requires_eu_visa: str + legally_allowed_to_work_in_eu: str + requires_eu_sponsorship: str + canada_work_authorization: str + requires_canada_visa: str + legally_allowed_to_work_in_canada: str + requires_canada_sponsorship: str + uk_work_authorization: str + requires_uk_visa: str + legally_allowed_to_work_in_uk: str + requires_uk_sponsorship: str + + + +@dataclass +class WorkPreferences: + remote_work: str + in_person_work: str + open_to_relocation: str + willing_to_complete_assessments: str + willing_to_undergo_drug_tests: str + willing_to_undergo_background_checks: str + + +@dataclass +class Availability: + notice_period: str + + +@dataclass +class SalaryExpectations: + salary_range_usd: str + + +@dataclass +class JobApplicationProfile: + self_identification: SelfIdentification + legal_authorization: LegalAuthorization + work_preferences: WorkPreferences + availability: Availability + salary_expectations: SalaryExpectations + + def __init__(self, yaml_str: str): + logger.debug("Initializing JobApplicationProfile with provided YAML string") + try: + data = yaml.safe_load(yaml_str) + logger.debug(f"YAML data successfully parsed: {data}") + except yaml.YAMLError as e: + logger.error(f"Error parsing YAML file: {e}") + raise ValueError("Error parsing YAML file.") from e + except Exception as e: + logger.error(f"Unexpected error occurred while parsing the YAML file: {e}") + raise RuntimeError("An unexpected error occurred while parsing the YAML file.") from e + + if not isinstance(data, dict): + logger.error(f"YAML data must be a dictionary, received: {type(data)}") + raise TypeError("YAML data must be a dictionary.") + + # Process self_identification + try: + logger.debug("Processing self_identification") + self.self_identification = SelfIdentification(**data['self_identification']) + logger.debug(f"self_identification processed: {self.self_identification}") + except KeyError as e: + logger.error(f"Required field {e} is missing in self_identification data.") + raise KeyError(f"Required field {e} is missing in self_identification data.") from e + except TypeError as e: + logger.error(f"Error in self_identification data: {e}") + raise TypeError(f"Error in self_identification data: {e}") from e + except AttributeError as e: + logger.error(f"Attribute error in self_identification processing: {e}") + raise AttributeError("Attribute error in self_identification processing.") from e + except Exception as e: + logger.error(f"An unexpected error occurred while processing self_identification: {e}") + raise RuntimeError("An unexpected error occurred while processing self_identification.") from e + + # Process legal_authorization + try: + logger.debug("Processing legal_authorization") + self.legal_authorization = LegalAuthorization(**data['legal_authorization']) + logger.debug(f"legal_authorization processed: {self.legal_authorization}") + except KeyError as e: + logger.error(f"Required field {e} is missing in legal_authorization data.") + raise KeyError(f"Required field {e} is missing in legal_authorization data.") from e + except TypeError as e: + logger.error(f"Error in legal_authorization data: {e}") + raise TypeError(f"Error in legal_authorization data: {e}") from e + except AttributeError as e: + logger.error(f"Attribute error in legal_authorization processing: {e}") + raise AttributeError("Attribute error in legal_authorization processing.") from e + except Exception as e: + logger.error(f"An unexpected error occurred while processing legal_authorization: {e}") + raise RuntimeError("An unexpected error occurred while processing legal_authorization.") from e + + # Process work_preferences + try: + logger.debug("Processing work_preferences") + self.work_preferences = WorkPreferences(**data['work_preferences']) + logger.debug(f"Work_preferences processed: {self.work_preferences}") + except KeyError as e: + logger.error(f"Required field {e} is missing in work_preferences data.") + raise KeyError(f"Required field {e} is missing in work_preferences data.") from e + except TypeError as e: + logger.error(f"Error in work_preferences data: {e}") + raise TypeError(f"Error in work_preferences data: {e}") from e + except AttributeError as e: + logger.error(f"Attribute error in work_preferences processing: {e}") + raise AttributeError("Attribute error in work_preferences processing.") from e + except Exception as e: + logger.error(f"An unexpected error occurred while processing work_preferences: {e}") + raise RuntimeError("An unexpected error occurred while processing work_preferences.") from e + + # Process availability + try: + logger.debug("Processing availability") + self.availability = Availability(**data['availability']) + logger.debug(f"Availability processed: {self.availability}") + except KeyError as e: + logger.error(f"Required field {e} is missing in availability data.") + raise KeyError(f"Required field {e} is missing in availability data.") from e + except TypeError as e: + logger.error(f"Error in availability data: {e}") + raise TypeError(f"Error in availability data: {e}") from e + except AttributeError as e: + logger.error(f"Attribute error in availability processing: {e}") + raise AttributeError("Attribute error in availability processing.") from e + except Exception as e: + logger.error(f"An unexpected error occurred while processing availability: {e}") + raise RuntimeError("An unexpected error occurred while processing availability.") from e + + # Process salary_expectations + try: + logger.debug("Processing salary_expectations") + self.salary_expectations = SalaryExpectations(**data['salary_expectations']) + logger.debug(f"salary_expectations processed: {self.salary_expectations}") + except KeyError as e: + logger.error(f"Required field {e} is missing in salary_expectations data.") + raise KeyError(f"Required field {e} is missing in salary_expectations data.") from e + except TypeError as e: + logger.error(f"Error in salary_expectations data: {e}") + raise TypeError(f"Error in salary_expectations data: {e}") from e + except AttributeError as e: + logger.error(f"Attribute error in salary_expectations processing: {e}") + raise AttributeError("Attribute error in salary_expectations processing.") from e + except Exception as e: + logger.error(f"An unexpected error occurred while processing salary_expectations: {e}") + raise RuntimeError("An unexpected error occurred while processing salary_expectations.") from e + + logger.debug("JobApplicationProfile initialization completed successfully.") + + def __str__(self): + logger.debug("Generating string representation of JobApplicationProfile") + + def format_dataclass(obj): + return "\n".join(f"{field.name}: {getattr(obj, field.name)}" for field in obj.__dataclass_fields__.values()) + + formatted_str = (f"Self Identification:\n{format_dataclass(self.self_identification)}\n\n" + f"Legal Authorization:\n{format_dataclass(self.legal_authorization)}\n\n" + f"Work Preferences:\n{format_dataclass(self.work_preferences)}\n\n" + f"Availability: {self.availability.notice_period}\n\n" + f"Salary Expectations: {self.salary_expectations.salary_range_usd}\n\n") + logger.debug(f"String representation generated: {formatted_str}") + return formatted_str diff --git a/src/job_application_saver.py b/src/job_application_saver.py new file mode 100644 index 000000000..a8554d2a0 --- /dev/null +++ b/src/job_application_saver.py @@ -0,0 +1,92 @@ +from src.logging import logger +import os +import json +import shutil + +from dataclasses import asdict + +from config import JOB_APPLICATIONS_DIR +from job import Job +from job_application import JobApplication + +# Base directory where all applications will be saved +BASE_DIR = JOB_APPLICATIONS_DIR + + +class ApplicationSaver: + + def __init__(self, job_application: JobApplication): + self.job_application = job_application + self.job_application_files_path = None + + # Function to create a directory for each job application + def create_application_directory(self): + job = self.job_application.job + + # Create a unique directory name using the application ID and company name + dir_name = f"{job.id} - {job.company} {job.title}" + dir_path = os.path.join(BASE_DIR, dir_name) + + # Create the directory if it doesn't exist + os.makedirs(dir_path, exist_ok=True) + self.job_application_files_path = dir_path + return dir_path + + # Function to save the job application details as a JSON file + def save_application_details(self): + + if self.job_application_files_path is None: + raise ValueError( + "Job application file path is not set. Please create the application directory first." + ) + + json_file_path = os.path.join( + self.job_application_files_path, "job_application.json" + ) + with open(json_file_path, "w") as json_file: + json.dump(self.job_application.application, json_file, indent=4) + + # Function to save files like Resume and CV + def save_file(self, dir_path, file_path, new_filename): + if dir_path is None: + raise ValueError("dir path cannot be None") + + # Copy the file to the application directory with a new name + destination = os.path.join(dir_path, new_filename) + shutil.copy(file_path, destination) + + # Function to save job description as a text file + def save_job_description(self): + if self.job_application_files_path is None: + raise ValueError( + "Job application file path is not set. Please create the application directory first." + ) + + job: Job = self.job_application.job + + json_file_path = os.path.join( + self.job_application_files_path, "job_description.json" + ) + with open(json_file_path, "w") as json_file: + json.dump(asdict(job), json_file, indent=4) + + @staticmethod + def save(job_application: JobApplication): + saver = ApplicationSaver(job_application) + saver.create_application_directory() + saver.save_application_details() + saver.save_job_description() + # todo: tempory fix, to rely on resume and cv path from job object instead of job application object + if job_application.resume_path: + saver.save_file( + saver.job_application_files_path, + job_application.job.resume_path, + "resume.pdf", + ) + logger.debug(f"Saving cover letter to path: {job_application.cover_letter_path}") + if job_application.cover_letter_path: + saver.save_file( + saver.job_application_files_path, + job_application.job.cover_letter_path, + "cover_letter.pdf" + ) diff --git a/src/job_portals/__init__.py b/src/job_portals/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/job_portals/application_form_elements.py b/src/job_portals/application_form_elements.py new file mode 100644 index 000000000..c01cc2ee9 --- /dev/null +++ b/src/job_portals/application_form_elements.py @@ -0,0 +1,24 @@ +from enum import Enum + +from attr import dataclass + + +class TextBoxQuestionType(Enum): + NUMERIC = "numeric" + TEXTBOX = "textbox" + +class SelectQuestionType(Enum): + SINGLE_SELECT = "single_select" + MULTI_SELECT = "multi_select" + +@dataclass +class SelectQuestion: + question: str + options: list[str] + type: SelectQuestionType + + +@dataclass +class TextBoxQuestion: + question: str + type: TextBoxQuestionType diff --git a/src/job_portals/base_job_portal.py b/src/job_portals/base_job_portal.py new file mode 100644 index 000000000..502e3a867 --- /dev/null +++ b/src/job_portals/base_job_portal.py @@ -0,0 +1,222 @@ +from abc import ABC, abstractmethod +from re import A + +from constants import LINKEDIN +from src.job_portals.application_form_elements import SelectQuestion, TextBoxQuestion +from src.ai_hawk.authenticator import AIHawkAuthenticator +from src.job import Job +from src.jobContext import JobContext + +from selenium.webdriver.remote.webelement import WebElement +from typing import List + + +class WebPage(ABC): + + def __init__(self, driver): + self.driver = driver + + +class BaseJobsPage(WebPage): + + def __init__(self, driver, parameters): + super().__init__(driver) + self.parameters = parameters + + @abstractmethod + def next_job_page(self, position, location, page_number): + pass + + @abstractmethod + def job_tile_to_job(self, job_tile: WebElement) -> Job: + pass + + @abstractmethod + def get_jobs_from_page(self, scroll=False) -> List[WebElement]: + pass + + +class BaseJobPage(WebPage): + + def __init__(self, driver): + super().__init__(driver) + + @abstractmethod + def goto_job_page(self, job: Job): + pass + + @abstractmethod + def get_apply_button(self, job_context: JobContext) -> WebElement: + pass + + @abstractmethod + def get_job_description(self, job: Job) -> str: + pass + + @abstractmethod + def get_recruiter_link(self) -> str: + pass + + @abstractmethod + def click_apply_button(self, job_context: JobContext) -> None: + pass + + +class BaseApplicationPage(WebPage): + + def __init__(self, driver): + super().__init__(driver) + + @abstractmethod + def has_next_button(self) -> bool: + pass + + @abstractmethod + def click_next_button(self) -> None: + pass + + @abstractmethod + def has_submit_button(self) -> bool: + pass + + @abstractmethod + def click_submit_button(self) -> None: + pass + + @abstractmethod + def has_errors(self) -> None: + pass + + @abstractmethod + def handle_errors(self) -> None: + """this methos is also called as fix errors""" + pass + + @abstractmethod + def check_for_errors(self) -> None: + """As the current impl needs this, later when we add retry mechanism, we will be moving to has errors and handle errors""" + pass + + @abstractmethod + def get_input_elements(self) -> List[WebElement]: + """this method will update to Enum / other easy way (in future) instead of webList""" + pass + + @abstractmethod + def is_upload_field(self, element: WebElement) -> bool: + pass + + @abstractmethod + def get_file_upload_elements(self) -> List[WebElement]: + pass + + @abstractmethod + def get_upload_element_heading(self, element: WebElement) -> str: + pass + + @abstractmethod + def upload_file(self, element: WebElement, file_path: str) -> None: + pass + + @abstractmethod + def get_form_sections(self) -> List[WebElement]: + pass + + @abstractmethod + def is_terms_of_service(self, section: WebElement) -> bool: + pass + + @abstractmethod + def accept_terms_of_service(self, section: WebElement) -> None: + pass + + @abstractmethod + def is_radio_question(self, section: WebElement) -> bool: + pass + + @abstractmethod + def web_element_to_radio_question(self, section: WebElement) -> SelectQuestion: + pass + + @abstractmethod + def select_radio_option( + self, radio_question_web_element: WebElement, answer: str + ) -> None: + pass + + @abstractmethod + def is_textbox_question(self, section: WebElement) -> bool: + pass + + @abstractmethod + def web_element_to_textbox_question(self, section: WebElement) -> TextBoxQuestion: + pass + + @abstractmethod + def fill_textbox_question(self, section: WebElement, answer: str) -> None: + pass + + @abstractmethod + def is_dropdown_question(self, section: WebElement) -> bool: + pass + + @abstractmethod + def web_element_to_dropdown_question(self, section: WebElement) -> SelectQuestion: + pass + + @abstractmethod + def select_dropdown_option(self, section: WebElement, answer: str) -> None: + pass + + @abstractmethod + def discard(self) -> None: + pass + + @abstractmethod + def save(self) -> None: + """ this can be also be considered as save draft / save progress """ + pass + + +class BaseJobPortal(ABC): + + def __init__(self, driver): + self.driver = driver + + @property + @abstractmethod + def jobs_page(self) -> BaseJobsPage: + pass + + @property + @abstractmethod + def job_page(self) -> BaseJobPage: + pass + + @property + @abstractmethod + def authenticator(self) -> AIHawkAuthenticator: + pass + + @property + @abstractmethod + def application_page(self) -> BaseApplicationPage: + pass + + +def get_job_portal(portal_name, driver, parameters): + from src.job_portals.linkedIn.linkedin import LinkedIn + + if portal_name == LINKEDIN: + return LinkedIn(driver, parameters) + else: + raise ValueError(f"Unknown job portal: {portal_name}") + + +def get_authenticator(driver, platform): + from src.job_portals.linkedIn.authenticator import LinkedInAuthenticator + + if platform == LINKEDIN: + return LinkedInAuthenticator(driver) + else: + raise NotImplementedError(f"Platform {platform} not implemented yet.") diff --git a/src/logging.py b/src/logging.py new file mode 100644 index 000000000..20b1448c0 --- /dev/null +++ b/src/logging.py @@ -0,0 +1,80 @@ +import logging.handlers +import os +import sys +import logging +from loguru import logger +from selenium.webdriver.remote.remote_connection import LOGGER as selenium_logger + +from config import LOG_LEVEL, LOG_SELENIUM_LEVEL, LOG_TO_CONSOLE, LOG_TO_FILE + + +def remove_default_loggers(): + """Remove default loggers from root logger.""" + root_logger = logging.getLogger() + if root_logger.hasHandlers(): + root_logger.handlers.clear() + if os.path.exists("log/app.log"): + os.remove("log/app.log") + +def init_loguru_logger(): + """Initialize and configure loguru logger.""" + + def get_log_filename(): + return f"log/app.log" + + log_file = get_log_filename() + + os.makedirs(os.path.dirname(log_file), exist_ok=True) + + logger.remove() + + # Add file logger if LOG_TO_FILE is True + if LOG_TO_FILE: + logger.add( + log_file, + level=LOG_LEVEL, + rotation="10 MB", + retention="1 week", + compression="zip", + format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} - {message}", + backtrace=True, + diagnose=True, + ) + + # Add console logger if LOG_TO_CONSOLE is True + if LOG_TO_CONSOLE: + logger.add( + sys.stderr, + level=LOG_LEVEL, + format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} - {message}", + backtrace=True, + diagnose=True, + ) + + +def init_selenium_logger(): + """Initialize and configure selenium logger to write to selenium.log.""" + log_file = "log/selenium.log" + os.makedirs(os.path.dirname(log_file), exist_ok=True) + + selenium_logger.handlers.clear() + + selenium_logger.setLevel(LOG_SELENIUM_LEVEL) + + # Create file handler for selenium logger + file_handler = logging.handlers.TimedRotatingFileHandler( + log_file, when="D", interval=1, backupCount=5 + ) + file_handler.setLevel(LOG_SELENIUM_LEVEL) + + # Define a simplified format for selenium logger entries + formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") + file_handler.setFormatter(formatter) + + # Add the file handler to selenium_logger + selenium_logger.addHandler(file_handler) + + +remove_default_loggers() +init_loguru_logger() +init_selenium_logger() diff --git a/src/regex_utils.py b/src/regex_utils.py new file mode 100644 index 000000000..236e9b5f2 --- /dev/null +++ b/src/regex_utils.py @@ -0,0 +1,24 @@ +import re + +def look_ahead_patterns(keyword_list): + # Converts each blacklist entry to a regex pattern that ensures all words appear, in any order + # + # Example of pattern for job title: + # title_blacklist = ["Data Engineer", "Software Engineer"] + # patterns = ['(?=.*\\bData\\b)(?=.*\\bEngineer\\b)', '(?=.*\\bSoftware\\b)(?=.*\\bEngineer\\b)'] + # + # Description: + # '?=.*' => Regex expression that allows us to check if the following pattern appears + # somewhere in the string searched, even if there are any characters before the word + # '\b{WORD}\b' => Regex expression for a word boundry, that the WORD is treated as whole words + # rather than as parts of other words. + patterns = [] + for term in keyword_list: + # Split term into individual words + words = term.split() + # Create a lookahead for each word to ensure it appears independently + lookaheads = [fr"(?=.*\b{re.escape(word)}\b)" for word in words] + # Combine lookaheads with a pattern that allows flexible separators between the words + pattern = "".join(lookaheads) # Ensures all words are present + patterns.append(pattern) + return patterns \ No newline at end of file diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/utils/browser_utils.py b/src/utils/browser_utils.py new file mode 100644 index 000000000..6b1504614 --- /dev/null +++ b/src/utils/browser_utils.py @@ -0,0 +1,86 @@ + +import random +import time + +from src.logging import logger + + +def is_scrollable(element): + scroll_height = element.get_attribute("scrollHeight") + client_height = element.get_attribute("clientHeight") + scrollable = int(scroll_height) > int(client_height) + logger.debug(f"Element scrollable check: scrollHeight={scroll_height}, clientHeight={client_height}, scrollable={scrollable}") + return scrollable + + +def scroll_slow(driver, scrollable_element, start=0, end=3600, step=300, reverse=False): + logger.debug(f"Starting slow scroll: start={start}, end={end}, step={step}, reverse={reverse}") + + if reverse: + start, end = end, start + step = -step + + if step == 0: + logger.error("Step value cannot be zero.") + raise ValueError("Step cannot be zero.") + + max_scroll_height = int(scrollable_element.get_attribute("scrollHeight")) + current_scroll_position = int(float(scrollable_element.get_attribute("scrollTop"))) + logger.debug(f"Max scroll height of the element: {max_scroll_height}") + logger.debug(f"Current scroll position: {current_scroll_position}") + + if reverse: + if current_scroll_position < start: + start = current_scroll_position + logger.debug(f"Adjusted start position for upward scroll: {start}") + else: + if end > max_scroll_height: + logger.warning(f"End value exceeds the scroll height. Adjusting end to {max_scroll_height}") + end = max_scroll_height + + script_scroll_to = "arguments[0].scrollTop = arguments[1];" + + try: + if scrollable_element.is_displayed(): + if not is_scrollable(scrollable_element): + logger.warning("The element is not scrollable.") + return + + if (step > 0 and start >= end) or (step < 0 and start <= end): + logger.warning("No scrolling will occur due to incorrect start/end values.") + return + + position = start + previous_position = None # Tracking the previous position to avoid duplicate scrolls + while (step > 0 and position < end) or (step < 0 and position > end): + if position == previous_position: + # Avoid re-scrolling to the same position + logger.debug(f"Stopping scroll as position hasn't changed: {position}") + break + + try: + driver.execute_script(script_scroll_to, scrollable_element, position) + logger.debug(f"Scrolled to position: {position}") + except Exception as e: + logger.error(f"Error during scrolling: {e}") + + previous_position = position + position += step + + # Decrease the step but ensure it doesn't reverse direction + step = max(10, abs(step) - 10) * (-1 if reverse else 1) + + time.sleep(random.uniform(0.6, 1.5)) + + # Ensure the final scroll position is correct + driver.execute_script(script_scroll_to, scrollable_element, end) + logger.debug(f"Scrolled to final position: {end}") + time.sleep(0.5) + else: + logger.warning("The element is not visible.") + except Exception as e: + logger.error(f"Exception occurred during scrolling: {e}") + +def remove_focus_active_element(driver): + driver.execute_script("document.activeElement.blur();") + logger.debug("Removed focus from active element.") \ No newline at end of file diff --git a/src/utils/chrome_utils.py b/src/utils/chrome_utils.py new file mode 100644 index 000000000..3d3a84ac3 --- /dev/null +++ b/src/utils/chrome_utils.py @@ -0,0 +1,60 @@ +import os +from selenium import webdriver +from src.logging import logger + +chromeProfilePath = os.path.join(os.getcwd(), "chrome_profile", "linkedin_profile") + +def ensure_chrome_profile(): + logger.debug(f"Ensuring Chrome profile exists at path: {chromeProfilePath}") + profile_dir = os.path.dirname(chromeProfilePath) + if not os.path.exists(profile_dir): + os.makedirs(profile_dir) + logger.debug(f"Created directory for Chrome profile: {profile_dir}") + if not os.path.exists(chromeProfilePath): + os.makedirs(chromeProfilePath) + logger.debug(f"Created Chrome profile directory: {chromeProfilePath}") + return chromeProfilePath + +def chrome_browser_options(): + logger.debug("Setting Chrome browser options") + ensure_chrome_profile() + options = webdriver.ChromeOptions() + options.add_argument("--start-maximized") + options.add_argument("--no-sandbox") + options.add_argument("--disable-dev-shm-usage") + options.add_argument("--ignore-certificate-errors") + options.add_argument("--disable-extensions") + options.add_argument("--disable-gpu") + options.add_argument("window-size=1200x800") + options.add_argument("--disable-background-timer-throttling") + options.add_argument("--disable-backgrounding-occluded-windows") + options.add_argument("--disable-translate") + options.add_argument("--disable-popup-blocking") + options.add_argument("--no-first-run") + options.add_argument("--no-default-browser-check") + options.add_argument("--disable-logging") + options.add_argument("--disable-autofill") + options.add_argument("--disable-plugins") + options.add_argument("--disable-animations") + options.add_argument("--disable-cache") + options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"]) + + prefs = { + "profile.default_content_setting_values.images": 2, + "profile.managed_default_content_settings.stylesheets": 2, + } + options.add_experimental_option("prefs", prefs) + + if len(chromeProfilePath) > 0: + initial_path = os.path.dirname(chromeProfilePath) + profile_dir = os.path.basename(chromeProfilePath) + options.add_argument('--user-data-dir=' + initial_path) + options.add_argument("--profile-directory=" + profile_dir) + logger.debug(f"Using Chrome profile directory: {chromeProfilePath}") + else: + options.add_argument("--incognito") + logger.debug("Using Chrome in incognito mode") + + return options + + diff --git a/src/utils/time_utils.py b/src/utils/time_utils.py new file mode 100644 index 000000000..fd8e065cc --- /dev/null +++ b/src/utils/time_utils.py @@ -0,0 +1,10 @@ +import random +import time + + +def short_sleep() -> None: + time.sleep(random.uniform(1.2, 3)) + + +def medium_sleep() -> None: + time.sleep(random.uniform(3, 5)) \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/test_aihawk_bot_facade.py b/tests/test_aihawk_bot_facade.py new file mode 100644 index 000000000..edccf6278 --- /dev/null +++ b/tests/test_aihawk_bot_facade.py @@ -0,0 +1,14 @@ +import pytest +# from src.aihawk_job_manager import JobManager + +@pytest.fixture +def job_manager(): + """Fixture for JobManager.""" + return None # Replace with valid instance or mock later + +def test_bot_functionality(job_manager): + """Test AIHawk bot facade.""" + # Example: test job manager interacts with the bot facade correctly + job = {"title": "Software Engineer"} + # job_manager.some_method_to_apply(job) + assert job is not None # Placeholder for actual test diff --git a/tests/test_aihawk_easy_applier.py b/tests/test_aihawk_easy_applier.py new file mode 100644 index 000000000..7329c835c --- /dev/null +++ b/tests/test_aihawk_easy_applier.py @@ -0,0 +1,99 @@ +# import pytest +# from unittest import mock + +# from ai_hawk.job_applier import AIHawkJobApplier + + + +# @pytest.fixture +# def mock_driver(): +# """Fixture to mock Selenium WebDriver.""" +# return mock.Mock() + + +# @pytest.fixture +# def mock_gpt_answerer(): +# """Fixture to mock GPT Answerer.""" +# return mock.Mock() + + +# @pytest.fixture +# def mock_resume_generator_manager(): +# """Fixture to mock Resume Generator Manager.""" +# return mock.Mock() + + +# @pytest.fixture +# def easy_applier(mock_driver, mock_gpt_answerer, mock_resume_generator_manager): +# """Fixture to initialize AIHawkEasyApplier with mocks.""" +# return AIHawkJobApplier( +# driver=mock_driver, +# resume_dir="/path/to/resume", +# set_old_answers=[('Question 1', 'Answer 1', 'Type 1')], +# gpt_answerer=mock_gpt_answerer, +# resume_generator_manager=mock_resume_generator_manager +# ) + + +# def test_initialization(mocker, easy_applier): +# """Test that AIHawkEasyApplier is initialized correctly.""" +# # Mock os.path.exists to return True +# mocker.patch('os.path.exists', return_value=True) + +# easy_applier = AIHawkJobApplier( +# driver=mocker.Mock(), +# resume_dir="/path/to/resume", +# set_old_answers=[('Question 1', 'Answer 1', 'Type 1')], +# gpt_answerer=mocker.Mock(), +# resume_generator_manager=mocker.Mock() +# ) + +# assert easy_applier.resume_path == "/path/to/resume" +# assert len(easy_applier.set_old_answers) == 1 +# assert easy_applier.gpt_answerer is not None +# assert easy_applier.resume_generator_manager is not None + + +# def test_apply_to_job_success(mocker, easy_applier): +# """Test successfully applying to a job.""" +# mock_job = mock.Mock() + +# # Mock job_apply so we don't actually try to apply +# mocker.patch.object(easy_applier, 'job_apply') + +# easy_applier.apply_to_job(mock_job) +# easy_applier.job_apply.assert_called_once_with(mock_job) + + +# def test_apply_to_job_failure(mocker, easy_applier): +# """Test failure while applying to a job.""" +# mock_job = mock.Mock() +# mocker.patch.object(easy_applier, 'job_apply', +# side_effect=Exception("Test error")) + +# with pytest.raises(Exception, match="Test error"): +# easy_applier.apply_to_job(mock_job) + +# easy_applier.job_apply.assert_called_once_with(mock_job) + + +# def test_check_for_premium_redirect_no_redirect(mocker, easy_applier): +# """Test that check_for_premium_redirect works when there's no redirect.""" +# mock_job = mock.Mock() +# easy_applier.driver.current_url = "https://www.linkedin.com/jobs/view/1234" + +# easy_applier.check_for_premium_redirect(mock_job) +# easy_applier.driver.get.assert_not_called() + + +# def test_check_for_premium_redirect_with_redirect(mocker, easy_applier): +# """Test that check_for_premium_redirect handles linkedin Premium redirects.""" +# mock_job = mock.Mock() +# easy_applier.driver.current_url = "https://www.linkedin.com/premium" +# mock_job.link = "https://www.linkedin.com/jobs/view/1234" + +# with pytest.raises(Exception, match="Redirected to linkedIn Premium page and failed to return after 3 attempts. Job application aborted."): +# easy_applier.check_for_premium_redirect(mock_job) + +# # Verify that it attempted to return to the job page 3 times +# assert easy_applier.driver.get.call_count == 3 diff --git a/tests/test_aihawk_job_manager.py b/tests/test_aihawk_job_manager.py new file mode 100644 index 000000000..de09a097d --- /dev/null +++ b/tests/test_aihawk_job_manager.py @@ -0,0 +1,185 @@ +# import json +# import re +# from src.job import Job +# from unittest import mock +# from pathlib import Path +# import os +# import pytest +# from ai_hawk.job_manager import AIHawkJobManager +# from selenium.common.exceptions import NoSuchElementException +# from src.logging import logger + + +# @pytest.fixture +# def job_manager(mocker): +# """Fixture to create a AIHawkJobManager instance with mocked driver.""" +# mock_driver = mocker.Mock() +# return AIHawkJobManager(mock_driver) + + +# def test_initialization(job_manager): +# """Test AIHawkJobManager initialization.""" +# assert job_manager.driver is not None +# assert job_manager.set_old_answers == set() +# assert job_manager.easy_applier_component is None + + +# def test_set_parameters(mocker, job_manager): +# """Test setting parameters for the AIHawkJobManager.""" +# # Mocking os.path.exists to return True for the resume path +# mocker.patch('pathlib.Path.exists', return_value=True) + +# params = { +# 'company_blacklist': ['Company A', 'Company B'], +# 'title_blacklist': ['Intern', 'Junior'], +# 'positions': ['Software Engineer', 'Data Scientist'], +# 'locations': ['New York', 'San Francisco'], +# 'apply_once_at_company': True, +# 'uploads': {'resume': '/path/to/resume'}, # Resume path provided here +# 'outputFileDirectory': '/path/to/output', +# 'job_applicants_threshold': { +# 'min_applicants': 5, +# 'max_applicants': 50 +# }, +# 'remote': False, +# 'distance': 50, +# 'date': {'all_time': True} +# } + +# job_manager.set_parameters(params) + +# # Normalize paths to handle platform differences (e.g., Windows vs Unix-like systems) +# assert str(job_manager.resume_path) == os.path.normpath('/path/to/resume') +# assert str(job_manager.output_file_directory) == os.path.normpath( +# '/path/to/output') + + +# def next_job_page(self, position, location, job_page): +# logger.debug(f"Navigating to next job page: {position} in {location}, page {job_page}") +# self.driver.get( +# f"https://www.linkedin.com/jobs/search/{self.base_search_url}&keywords={position}&location={location}&start={job_page * 25}") + + +# def test_get_jobs_from_page_no_jobs(mocker, job_manager): +# """Test get_jobs_from_page when no jobs are found.""" +# mocker.patch.object(job_manager.driver, 'find_element', +# side_effect=NoSuchElementException) + +# jobs = job_manager.get_jobs_from_page() +# assert jobs == [] + + +# def test_get_jobs_from_page_with_jobs(mocker, job_manager): +# """Test get_jobs_from_page when job elements are found.""" +# # Mock no_jobs_element to simulate the absence of "No matching jobs found" banner +# no_jobs_element_mock = mocker.Mock() +# no_jobs_element_mock.text = "" # Empty text means "No matching jobs found" is not present + +# # Mock the driver to simulate the page source +# mocker.patch.object(job_manager.driver, 'page_source', return_value="") + +# # Mock the outer find_element +# container_mock = mocker.Mock() + +# # Mock the inner find_elements to return job list items +# job_element_mock = mocker.Mock() +# # Simulating two job items +# job_elements_list = [job_element_mock, job_element_mock] + +# # Return the container mock, which itself returns the job elements list +# container_mock.find_elements.return_value = job_elements_list +# mocker.patch.object(job_manager.driver, 'find_element', side_effect=[ +# no_jobs_element_mock, +# container_mock +# ]) + +# job_manager.get_jobs_from_page() + +# assert job_manager.driver.find_element.call_count == 2 +# assert container_mock.find_elements.call_count == 1 + + + +# def test_apply_jobs_with_no_jobs(mocker, job_manager): +# """Test apply_jobs when no jobs are found.""" +# # Mocking find_element to return a mock element that simulates no jobs +# mock_element = mocker.Mock() +# mock_element.text = "No matching jobs found" + +# # Mock the driver to return the mock element when find_element is called +# mocker.patch.object(job_manager.driver, 'find_element', +# return_value=mock_element) + +# # Call apply_jobs and ensure no exceptions are raised +# job_manager.apply_jobs() + +# # Ensure it attempted to find the job results list +# assert job_manager.driver.find_element.call_count == 1 + + +# def test_apply_jobs_with_jobs(mocker, job_manager): +# """Test apply_jobs when jobs are present.""" + +# # Mock the page_source to simulate what the page looks like when jobs are present +# mocker.patch.object(job_manager.driver, 'page_source', +# return_value="some job content") + +# # Simulating two job elements +# job_element_mock = mocker.Mock() +# job_elements_list = [job_element_mock, job_element_mock] + +# mocker.patch.object(job_manager, 'get_jobs_from_page', return_value=job_elements_list) + +# job = Job( +# title="Title", +# company="Company", +# location="Location", +# apply_method="", +# link="Link" +# ) + +# # Mock the extract_job_information_from_tile method to return sample job info +# mocker.patch.object(job_manager, 'job_tile_to_job', return_value=job) + +# # Mock other methods like is_blacklisted, is_already_applied_to_job, and is_already_applied_to_company +# mocker.patch.object(job_manager, 'is_blacklisted', return_value=False) +# mocker.patch.object( +# job_manager, 'is_already_applied_to_job', return_value=False) +# mocker.patch.object( +# job_manager, 'is_already_applied_to_company', return_value=False) + +# # Mock the AIHawkEasyApplier component +# job_manager.easy_applier_component = mocker.Mock() + +# # Mock the output_file_directory as a valid Path object +# job_manager.output_file_directory = Path("/mocked/path/to/output") + +# # Mock Path.exists() to always return True (so no actual file system interaction is needed) +# mocker.patch.object(Path, 'exists', return_value=True) + +# # Mock the open function to prevent actual file writing +# failed_mock_data = [{ +# "company": "TestCompany", +# "job_title": "Test Data Engineer", +# "link": "https://www.example.com/jobs/view/1234567890/", +# "job_recruiter": "", +# "job_location": "Anywhere (Remote)", +# "pdf_path": "file:///mocked/path/to/pdf" +# }] + +# # Serialize the dictionary to a JSON string +# json_read_data = json.dumps(failed_mock_data) + +# mock_open = mocker.mock_open(read_data=json_read_data) +# mocker.patch('builtins.open', mock_open) + +# # Run the apply_jobs method +# job_manager.apply_jobs() + +# # Assertions +# assert job_manager.get_jobs_from_page.call_count == 1 +# # Called for each job element +# assert job_manager.job_tile_to_job.call_count == 2 +# # Called for each job element +# assert job_manager.easy_applier_component.job_apply.call_count == 2 +# mock_open.assert_called() # Ensure that the open function was called diff --git a/tests/test_job_application_profile.py b/tests/test_job_application_profile.py new file mode 100644 index 000000000..f59ac3a9d --- /dev/null +++ b/tests/test_job_application_profile.py @@ -0,0 +1,185 @@ +import pytest +from src.job_application_profile import JobApplicationProfile + +@pytest.fixture +def valid_yaml(): + """Valid YAML string for initializing JobApplicationProfile.""" + return """ + self_identification: + gender: Male + pronouns: He/Him + veteran: No + disability: No + ethnicity: Asian + legal_authorization: + eu_work_authorization: "Yes" + us_work_authorization: "Yes" + requires_us_visa: "No" + requires_us_sponsorship: "Yes" + requires_eu_visa: "No" + legally_allowed_to_work_in_eu: "Yes" + legally_allowed_to_work_in_us: "Yes" + requires_eu_sponsorship: "No" + canada_work_authorization: "Yes" + requires_canada_visa: "No" + legally_allowed_to_work_in_canada: "Yes" + requires_canada_sponsorship: "No" + uk_work_authorization: "Yes" + requires_uk_visa: "No" + legally_allowed_to_work_in_uk: "Yes" + requires_uk_sponsorship: "No" + work_preferences: + remote_work: "Yes" + in_person_work: "No" + open_to_relocation: "Yes" + willing_to_complete_assessments: "Yes" + willing_to_undergo_drug_tests: "Yes" + willing_to_undergo_background_checks: "Yes" + availability: + notice_period: "2 weeks" + salary_expectations: + salary_range_usd: "80000-120000" + """ + +@pytest.fixture +def missing_field_yaml(): + """YAML string missing a required field (self_identification).""" + return """ + legal_authorization: + eu_work_authorization: "Yes" + us_work_authorization: "Yes" + requires_us_visa: "No" + requires_us_sponsorship: "Yes" + requires_eu_visa: "No" + legally_allowed_to_work_in_eu: "Yes" + legally_allowed_to_work_in_us: "Yes" + requires_eu_sponsorship: "No" + canada_work_authorization: "Yes" + requires_canada_visa: "No" + legally_allowed_to_work_in_canada: "Yes" + requires_canada_sponsorship: "No" + uk_work_authorization: "Yes" + requires_uk_visa: "No" + legally_allowed_to_work_in_uk: "Yes" + requires_uk_sponsorship: "No" + work_preferences: + remote_work: "Yes" + in_person_work: "No" + open_to_relocation: "Yes" + willing_to_complete_assessments: "Yes" + willing_to_undergo_drug_tests: "Yes" + willing_to_undergo_background_checks: "Yes" + availability: + notice_period: "2 weeks" + salary_expectations: + salary_range_usd: "80000-120000" + """ + +@pytest.fixture +def invalid_type_yaml(): + """YAML string with an invalid type for a field.""" + return """ + self_identification: + gender: Male + pronouns: He/Him + veteran: No + disability: No + ethnicity: Asian + legal_authorization: + eu_work_authorization: "Yes" + us_work_authorization: "Yes" + requires_us_visa: "No" + requires_us_sponsorship: "Yes" + requires_eu_visa: "No" + legally_allowed_to_work_in_eu: "Yes" + legally_allowed_to_work_in_us: "Yes" + requires_eu_sponsorship: "No" + canada_work_authorization: "Yes" + requires_canada_visa: "No" + legally_allowed_to_work_in_canada: "Yes" + requires_canada_sponsorship: "No" + uk_work_authorization: "Yes" + requires_uk_visa: "No" + legally_allowed_to_work_in_uk: "Yes" + requires_uk_sponsorship: "No" + work_preferences: + remote_work: 12345 # Invalid type, expecting a string + in_person_work: "No" + open_to_relocation: "Yes" + willing_to_complete_assessments: "Yes" + willing_to_undergo_drug_tests: "Yes" + willing_to_undergo_background_checks: "Yes" + availability: + notice_period: "2 weeks" + salary_expectations: + salary_range_usd: "80000-120000" + """ + +def test_initialize_with_valid_yaml(valid_yaml): + """Test initializing JobApplicationProfile with valid YAML.""" + profile = JobApplicationProfile(valid_yaml) + + # Check that the profile fields are correctly initialized + assert profile.self_identification.gender == "Male" + assert profile.self_identification.pronouns == "He/Him" + assert profile.legal_authorization.eu_work_authorization == "Yes" + assert profile.work_preferences.remote_work == "Yes" + assert profile.availability.notice_period == "2 weeks" + assert profile.salary_expectations.salary_range_usd == "80000-120000" + +def test_initialize_with_missing_field(missing_field_yaml): + """Test initializing JobApplicationProfile with missing required fields.""" + with pytest.raises(KeyError) as excinfo: + JobApplicationProfile(missing_field_yaml) + assert "self_identification" in str(excinfo.value) + +def test_initialize_with_invalid_yaml(): + """Test initializing JobApplicationProfile with invalid YAML.""" + invalid_yaml_str = """ + self_identification: + gender: Male + pronouns: He/Him + veteran: No + disability: No + ethnicity: Asian + legal_authorization: + eu_work_authorization: "Yes" + us_work_authorization: "Yes" + requires_us_visa: "No" + requires_us_sponsorship: "Yes" + requires_eu_visa: "No" + legally_allowed_to_work_in_eu: "Yes" + legally_allowed_to_work_in_us: "Yes" + requires_eu_sponsorship: "No" + canada_work_authorization: "Yes" + requires_canada_visa: "No" + legally_allowed_to_work_in_canada: "Yes" + requires_canada_sponsorship: "No" + uk_work_authorization: "Yes" + requires_uk_visa: "No" + legally_allowed_to_work_in_uk: "Yes" + requires_uk_sponsorship: "No" + work_preferences: + remote_work: "Yes" + in_person_work: "No" + availability: + notice_period: "2 weeks" + salary_expectations: + salary_range_usd: "80000-120000" + """ # Missing fields in work_preferences + + with pytest.raises(TypeError): + JobApplicationProfile(invalid_yaml_str) + +def test_str_representation(valid_yaml): + """Test the string representation of JobApplicationProfile.""" + profile = JobApplicationProfile(valid_yaml) + profile_str = str(profile) + + assert "Self Identification:" in profile_str + assert "Legal Authorization:" in profile_str + assert "Work Preferences:" in profile_str + assert "Availability:" in profile_str + assert "Salary Expectations:" in profile_str + assert "Male" in profile_str + assert "80000-120000" in profile_str diff --git a/tests/test_linkedIn_authenticator.py b/tests/test_linkedIn_authenticator.py new file mode 100644 index 000000000..af2a5757b --- /dev/null +++ b/tests/test_linkedIn_authenticator.py @@ -0,0 +1,105 @@ +from httpx import get +from numpy import place +import pytest +from selenium.webdriver.common.by import By +from selenium.webdriver.support.wait import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from ai_hawk.authenticator import AIHawkAuthenticator +from selenium.common.exceptions import NoSuchElementException, TimeoutException + +from job_portals.base_job_portal import get_authenticator +from job_portals.linkedIn.authenticator import LinkedInAuthenticator + + + + +@pytest.fixture +def mock_driver(mocker): + """Fixture to mock the Selenium WebDriver.""" + return mocker.Mock() + + +@pytest.fixture +def authenticator(mock_driver): + """Fixture to initialize AIHawkAuthenticator with a mocked driver.""" + return get_authenticator(mock_driver, platform='linkedin') + + +def test_handle_login(mocker, authenticator): + """Test handling the AIHawk login process.""" + mocker.patch.object(authenticator.driver, 'get') + mocker.patch.object(authenticator, 'prompt_for_credentials') + mocker.patch.object(authenticator, 'handle_security_checks') + + # Mock current_url as a regular return value, not PropertyMock + mocker.patch.object(authenticator.driver, 'current_url', + return_value='https://www.linkedin.com/login') + + authenticator.handle_login() + + authenticator.driver.get.assert_called_with( + 'https://www.linkedin.com/login') + authenticator.prompt_for_credentials.assert_called_once() + authenticator.handle_security_checks.assert_called_once() + + +def test_enter_credentials_success(mocker, authenticator): + """Test entering credentials.""" + email_mock = mocker.Mock() + password_mock = mocker.Mock() + + mocker.patch.object(WebDriverWait, 'until', return_value=email_mock) + mocker.patch.object(authenticator.driver, 'find_element', + return_value=password_mock) + +def test_is_logged_in_true(mock_driver): + # Mock the current_url to simulate a logged-in state + mock_driver.current_url = "https://www.linkedin.com/feed/" + authenticator = LinkedInAuthenticator(mock_driver) + + assert authenticator.is_logged_in == True + +def test_is_logged_in_false(mock_driver): + # Mock the current_url to simulate a logged-out state + mock_driver.current_url = "https://www.linkedin.com/login" + authenticator = LinkedInAuthenticator(mock_driver) + + assert authenticator.is_logged_in == False + +def test_is_logged_in_partial_keyword(mock_driver): + # Mock the current_url to simulate a URL containing a keyword but not logged in + mock_driver.current_url = "https://www.linkedin.com/jobs/search/" + authenticator = LinkedInAuthenticator(mock_driver) + + assert authenticator.is_logged_in == True + +def test_is_logged_in_no_linkedin(mock_driver): + # Mock the current_url to simulate a URL not related to LinkedIn + mock_driver.current_url = "https://www.example.com/feed/" + authenticator = LinkedInAuthenticator(mock_driver) + + assert authenticator.is_logged_in == False + + +def test_handle_security_check_success(mocker, authenticator): + """Test handling security check successfully.""" + mocker.patch.object(WebDriverWait, 'until', side_effect=[ + mocker.Mock(), # Security checkpoint detection + mocker.Mock() # Security check completion + ]) + + authenticator.handle_security_checks() + + # Verify WebDriverWait is called with EC.url_contains for both the challenge and feed + WebDriverWait(authenticator.driver, 10).until.assert_any_call(mocker.ANY) + WebDriverWait(authenticator.driver, 300).until.assert_any_call(mocker.ANY) + + +def test_handle_security_check_timeout(mocker, authenticator): + """Test handling security check timeout.""" + mocker.patch.object(WebDriverWait, 'until', side_effect=TimeoutException) + + authenticator.handle_security_checks() + + # Verify WebDriverWait is called with EC.url_contains for the challenge + WebDriverWait(authenticator.driver, 10).until.assert_any_call(mocker.ANY) diff --git a/tests/test_regex_utils.py b/tests/test_regex_utils.py new file mode 100644 index 000000000..ae51f2fd5 --- /dev/null +++ b/tests/test_regex_utils.py @@ -0,0 +1,55 @@ +import pytest +from ai_hawk.job_manager import AIHawkJobManager +from src.regex_utils import look_ahead_patterns + +apply_component = AIHawkJobManager(None) # For this test we dont need the web driver + +# Test title, company and location blacklist definition +title_blacklist = ["Data Engineer", "Software Engineer"] +company_blacklist = ["ABC Corp", "XYZ Inc"] +location_blacklist = ["Brazil"] +seen_jobs = set() + +# Creating regex patterns +apply_component.title_blacklist_patterns = look_ahead_patterns(title_blacklist) +apply_component.company_blacklist_patterns = look_ahead_patterns(company_blacklist) +apply_component.location_blacklist_patterns = look_ahead_patterns(location_blacklist) +apply_component.seen_jobs = seen_jobs +apply_component.seen_jobs.add("link14") # added link for 'seen link' test + +test_cases = [ + # Blacklist matches for "Data Engineer" in various forms + ("Data Engineer", "Tech Corp", "link1", "USA", True), # Exact match (blacklist) + ("Data Engineer (Gen AI)", "Tech Corp", "link2", "USA", True), # Partial match with parentheses (blacklist) + ("Senior Data Engineer", "Tech Corp", "link3", "USA", True), # Partial match with prefix (blacklist) + ("Engineer, Data", "Tech Corp", "link4", "USA", True), # Words reordered (blacklist) + ("Data-Engineer", "Tech Corp", "link5", "USA", True), # Hyphenated (blacklist) + ("Data & Engineer", "Tech Corp", "link6", "USA", True), # Ampersand separator (blacklist) + + # Blacklist matches for "Brazil" in location in various forms + ("Project Manager", "Tech Corp", "link7", "Brazil", True), # Exact match (blacklist) + ("Project Manager", "Tech Corp", "link8", "Rio de Janeiro, Brazil", True), # Location with city and country (blacklist) + ("Project Manager", "Tech Corp", "link9", "São Paulo - Brazil", True), # Location with hyphen separator (blacklist) + ("Project Manager", "Tech Corp", "link10", "Brazil, South America", True), # Location with continent (blacklist) + + # Blacklist matches for "ABC Corp" in various forms + ("Marketing Specialist", "ABC Corp", "link11", "USA", True), # Exact match (blacklist) + ("Marketing Specialist", "ABC Corporation", "link12", "USA", False), # Variants on corporation, part of a different word + ("Marketing Specialist", "ABC CORP", "link13", "USA", True), # Uppercase variant (blacklist) + + # Seen job link test + ("Marketing Specialist", "DEF Corp", "link14", "USA", True), # Link has been seen (blacklist) + + # Cases that should NOT be blacklisted (expected to pass) + ("Software Developer", "Tech Corp", "link15", "USA", False), # Title not blacklisted + ("Product Engineer", "XYZ Ltd", "link16", "Canada", False), # Title and location not blacklisted + ("Data Science Specialist", "DEF Corp", "link17", "USA", False), # Title similar but not matching blacklist + ("Project Manager", "GHI Inc", "link18", "Argentina", False), # Location close to blacklist but distinct + ("Operations Manager", "ABC Technology", "link19", "USA", False) # Company name similar but not matching +] + +@pytest.mark.parametrize("job_title, company, link, job_location, expected_output", test_cases) +def test_is_blacklisted(job_title, company, link, job_location, expected_output): + actual_output = apply_component.is_blacklisted(job_title, company, link, job_location) + + assert actual_output == expected_output, f"Failed for case: {job_title} at {company} in {job_location} (link: {link})" diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 000000000..2ca828b44 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,86 @@ +# tests/test_utils.py +import pytest +import os +import time +from unittest import mock +from selenium.webdriver.remote.webelement import WebElement +from src.utils.browser_utils import is_scrollable, scroll_slow +from src.utils.chrome_utils import chrome_browser_options, ensure_chrome_profile + +# Mocking logging to avoid actual file writing +@pytest.fixture(autouse=True) +def mock_logger(mocker): + mocker.patch("src.logging.logger") + +# Test ensure_chrome_profile function +def test_ensure_chrome_profile(mocker): + mocker.patch("os.path.exists", return_value=False) # Pretend directory doesn't exist + mocker.patch("os.makedirs") # Mock making directories + + # Call the function + profile_path = ensure_chrome_profile() + + # Verify that os.makedirs was called twice to create the directory + assert profile_path.endswith("linkedin_profile") + assert os.path.exists.called + assert os.makedirs.called + +# Test is_scrollable function +def test_is_scrollable(mocker): + mock_element = mocker.Mock(spec=WebElement) + mock_element.get_attribute.side_effect = lambda attr: "1000" if attr == "scrollHeight" else "500" + + # Call the function + scrollable = is_scrollable(mock_element) + + # Check the expected outcome + assert scrollable is True + mock_element.get_attribute.assert_any_call("scrollHeight") + mock_element.get_attribute.assert_any_call("clientHeight") + +# Test scroll_slow function +def test_scroll_slow(mocker): + mock_driver = mocker.Mock() + mock_element = mocker.Mock(spec=WebElement) + + # Mock element's attributes for scrolling + mock_element.get_attribute.side_effect = lambda attr: "2000" if attr == "scrollHeight" else "0" + mock_element.is_displayed.return_value = True + mocker.patch("time.sleep") # Mock time.sleep to avoid waiting + + # Call the function + scroll_slow(mock_driver, mock_element, start=0, end=1000, step=100, reverse=False) + + # Ensure that scrolling happened multiple times + assert mock_driver.execute_script.called + mock_element.is_displayed.assert_called_once() + +def test_scroll_slow_element_not_scrollable(mocker): + mock_driver = mocker.Mock() + mock_element = mocker.Mock(spec=WebElement) + + # Mock the attributes so the element is not scrollable + mock_element.get_attribute.side_effect = lambda attr: "1000" if attr == "scrollHeight" else "1000" + mock_element.is_displayed.return_value = True + + scroll_slow(mock_driver, mock_element, start=0, end=1000, step=100) + + # Ensure it detected non-scrollable element + mock_driver.execute_script.assert_not_called() + +# Test chrome_browser_options function +def test_chrome_browser_options(mocker): + mocker.patch("src.utils.chrome_utils.ensure_chrome_profile") + mocker.patch("os.path.dirname", return_value="/mocked/path") + mocker.patch("os.path.basename", return_value="profile_directory") + + mock_options = mocker.Mock() + + mocker.patch("selenium.webdriver.ChromeOptions", return_value=mock_options) + + # Call the function + options = chrome_browser_options() + + # Ensure options were set + assert mock_options.add_argument.called + assert options == mock_options