Skip to content

Commit

Permalink
now we use undetected-chromedriver
Browse files Browse the repository at this point in the history
  • Loading branch information
feder-cr committed Nov 30, 2024
1 parent 363351b commit fd26653
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 31 deletions.
14 changes: 4 additions & 10 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from lib_resume_builder_AIHawk import Resume, FacadeManager, ResumeGenerator, StyleManager
from typing import Optional
from constants import PLAIN_TEXT_RESUME_YAML, SECRETS_YAML, WORK_PREFERENCES_YAML
from src.utils.chrome_utils import chrome_browser_options
from src.utils.chrome_utils import init_browser

from src.job_application_profile import JobApplicationProfile
from src.logging import logger
Expand All @@ -26,6 +26,7 @@
from ai_hawk.bot_facade import AIHawkBotFacade
from ai_hawk.job_manager import AIHawkJobManager
from ai_hawk.llm.llm_manager import GPTAnswerer
from ai_hawk.llm.llm_manager import GPTParser


class ConfigError(Exception):
Expand Down Expand Up @@ -155,14 +156,6 @@ def file_paths_to_dict(resume_file: Path | None, plain_text_resume_file: Path) -

return result

def init_browser() -> webdriver.Chrome:
try:
options = chrome_browser_options()
service = ChromeService(ChromeDriverManager().install())
return webdriver.Chrome(service=service, options=options)
except Exception as e:
raise RuntimeError(f"Failed to initialize browser: {str(e)}")

def create_and_run_bot(parameters, llm_api_key):
try:
style_manager = StyleManager()
Expand All @@ -182,9 +175,10 @@ def create_and_run_bot(parameters, llm_api_key):
login_component = get_authenticator(driver=browser, platform='linkedin')
apply_component = AIHawkJobManager(browser)
gpt_answerer_component = GPTAnswerer(parameters, llm_api_key)
undetected-chromedriver gpt_parser_component = GPTParser(parameters, llm_api_key)
bot = AIHawkBotFacade(login_component, apply_component)
bot.set_job_application_profile_and_resume(job_application_profile_object, resume_object)
bot.set_gpt_answerer_and_resume_generator(gpt_answerer_component, resume_generator_manager)
bot.set_gpt_answerer_and_resume_generator(gpt_parser_component, gpt_answerer_component, resume_generator_manager)
bot.set_parameters(parameters)
bot.start_login()
if (parameters['collectMode'] == True):
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ webdriver-manager==4.0.2
pytest
pytest-mock
pytest-cov
undetected_chromedriver
3 changes: 2 additions & 1 deletion src/ai_hawk/bot_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,13 @@ def set_job_application_profile_and_resume(self, job_application_profile, resume
logger.debug("Job application profile and resume set successfully")


def set_gpt_answerer_and_resume_generator(self, gpt_answerer_component, resume_generator_manager):
def set_gpt_answerer_and_resume_generator(self, gpt_parser_component, gpt_answerer_component, resume_generator_manager):
logger.debug("Setting GPT answerer and resume generator")
self._ensure_job_profile_and_resume_set()
gpt_answerer_component.set_job_application_profile(self.job_application_profile)
gpt_answerer_component.set_resume(self.resume)
self.apply_component.set_gpt_answerer(gpt_answerer_component)
self.apply_component.set_gpt_parser(gpt_parser_component)
self.apply_component.set_resume_generator_manager(resume_generator_manager)
self.state.gpt_answerer_set = True
logger.debug("GPT answerer and resume generator set successfully")
Expand Down
17 changes: 9 additions & 8 deletions src/ai_hawk/job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from inputimeout import inputimeout, TimeoutOccurred
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By

from src.ai_hawk.llm.llm_manager import GPTAnswerer

from ai_hawk.linkedIn_easy_applier import AIHawkEasyApplier
from config import JOB_MAX_APPLICATIONS, JOB_MIN_APPLICATIONS, MINIMUM_WAIT_TIME_IN_SECONDS
Expand Down Expand Up @@ -82,6 +82,10 @@ def set_parameters(self, parameters):
def set_gpt_answerer(self, gpt_answerer):
logger.debug("Setting GPT answerer")
self.gpt_answerer = gpt_answerer

def set_gpt_parser(self, gpt_parser):
logger.debug("Setting GPT parser")
self.gpt_parser = gpt_parser

def set_resume_generator_manager(self, resume_generator_manager):
logger.debug("Setting resume generator manager")
Expand Down Expand Up @@ -168,7 +172,7 @@ def start_applying(self):
try:
self.apply_jobs()
except Exception as e:
logger.error(f"Error during job application: {e} {traceback.format_exc()}")
logger.error(f"Error during job application: {e}")
continue

logger.debug("Applying to jobs on this page has been completed!")
Expand Down Expand Up @@ -481,18 +485,15 @@ def job_tile_to_job(self, job_tile) -> Job:
# Extract company name and location
try:
# contains both with a delimter '·'
company_location = job_tile.find_element(
By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span"
).text
company, location = company_location.split('·')
company, location = self.gpt_parser.extract_company_and_title(job_tile.get_attribute("outerHTML"))
job.company = company.strip()
logger.debug(f"Job company extracted: {job.company}")
job.location = location.strip()
logger.debug(f"Job location extracted: {job.location}")
except ValueError:
except ValueError as e:
logger.warning(f"Could not find the company and location. {e} {traceback.format.exc()}")

except NoSuchElementException:
except NoSuchElementException as e:
logger.warning(f"Job comapy and location are missing. {e} {traceback.format.exc()}")

# Extract job State
Expand Down
87 changes: 79 additions & 8 deletions src/ai_hawk/llm/llm_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ def parse_llmresult(self, llmresult: AIMessage) -> Dict[str, Dict]:
class GPTAnswerer:
def __init__(self, config, llm_api_key):
self.ai_adapter = AIAdapter(config, llm_api_key)
self.llm_cheap = LoggerChatModel(self.ai_adapter)
self.llm = LoggerChatModel(self.ai_adapter)

@property
def job_description(self):
Expand Down Expand Up @@ -512,7 +512,7 @@ def summarize_job_description(self, text: str) -> str:
prompts.summarize_prompt_template
)
prompt = ChatPromptTemplate.from_template(prompts.summarize_prompt_template)
chain = prompt | self.llm_cheap | StrOutputParser()
chain = prompt | self.llm | StrOutputParser()
raw_output = chain.invoke({TEXT: text})
output = self._clean_llm_output(raw_output)
logger.debug(f"Summary generated: {output}")
Expand All @@ -521,7 +521,7 @@ def summarize_job_description(self, text: str) -> str:
def _create_chain(self, template: str):
logger.debug(f"Creating chain with template: {template}")
prompt = ChatPromptTemplate.from_template(template)
return prompt | self.llm_cheap | StrOutputParser()
return prompt | self.llm | StrOutputParser()

def answer_question_textual_wide_range(self, question: str) -> str:
logger.debug(f"Answering textual question: {question}")
Expand Down Expand Up @@ -558,7 +558,7 @@ def answer_question_textual_wide_range(self, question: str) -> str:
}

prompt = ChatPromptTemplate.from_template(prompts.determine_section_template)
chain = prompt | self.llm_cheap | StrOutputParser()
chain = prompt | self.llm | StrOutputParser()
raw_output = chain.invoke({QUESTION: question})
output = self._clean_llm_output(raw_output)

Expand Down Expand Up @@ -615,7 +615,7 @@ def answer_question_numeric(
prompts.numeric_question_template
)
prompt = ChatPromptTemplate.from_template(func_template)
chain = prompt | self.llm_cheap | StrOutputParser()
chain = prompt | self.llm | StrOutputParser()
raw_output_str = chain.invoke(
{
RESUME_EDUCATIONS: self.resume.education_details,
Expand Down Expand Up @@ -650,7 +650,7 @@ def answer_question_from_options(self, question: str, options: list[str]) -> str
logger.debug(f"Answering question from options: {question}")
func_template = self._preprocess_template_string(prompts.options_template)
prompt = ChatPromptTemplate.from_template(func_template)
chain = prompt | self.llm_cheap | StrOutputParser()
chain = prompt | self.llm | StrOutputParser()
raw_output_str = chain.invoke(
{
RESUME: self.resume,
Expand All @@ -672,7 +672,7 @@ def resume_or_cover(self, phrase: str) -> str:
prompt = ChatPromptTemplate.from_template(
prompts.resume_or_cover_letter_template
)
chain = prompt | self.llm_cheap | StrOutputParser()
chain = prompt | self.llm | StrOutputParser()
raw_response = chain.invoke({PHRASE: phrase})
response = self._clean_llm_output(raw_response)
logger.debug(f"Response for resume_or_cover: {response}")
Expand All @@ -686,7 +686,7 @@ def resume_or_cover(self, phrase: str) -> str:
def is_job_suitable(self):
logger.info("Checking if job is suitable")
prompt = ChatPromptTemplate.from_template(prompts.is_relavant_position_template)
chain = prompt | self.llm_cheap | StrOutputParser()
chain = prompt | self.llm | StrOutputParser()
raw_output = chain.invoke(
{
RESUME: self.resume,
Expand All @@ -707,3 +707,74 @@ def is_job_suitable(self):
if int(score) < JOB_SUITABILITY_SCORE:
logger.debug(f"Job is not suitable: {reasoning}")
return int(score) >= JOB_SUITABILITY_SCORE

import re
import json
import logging

logger = logging.getLogger(__name__)

class GPTParser:
def __init__(self, config, llm_api_key):
self.ai_adapter = AIAdapter(config, llm_api_key)
self.llm = LoggerChatModel(self.ai_adapter)

@staticmethod
def _clean_llm_output(output: str) -> str:
return output.replace("*", "").replace("#", "").strip()

@staticmethod
def _preprocess_template_string(template: str) -> str:
return textwrap.dedent(template)

def extract_company_and_title(self, html_content: str) -> dict:
"""
Uses AI to extract the company name and job title from HTML code.
Args:
html_content (str): The HTML code to analyze.
Returns:
dict: A dictionary with 'company' and 'title' as keys.
"""
logger.debug("Extracting company and title from HTML content.")

# AI prompt template
extract_prompt_template = """
You are an AI assistant extracting information from HTML code.
Extract the company name and job title from the following HTML code:
{html_content}
Provide the response in JSON format with keys "company" and "title", Provide only the exact JSON without any explanations or additional text and also without ```json ```
"""

# Preprocess the template
extract_prompt_template = self._preprocess_template_string(extract_prompt_template)

# Create the prompt
prompt = ChatPromptTemplate.from_template(extract_prompt_template)

# Create the chain
chain = prompt | self.llm | StrOutputParser()

# Invoke the chain with the HTML
raw_output = chain.invoke({"html_content": html_content})

# Clean the output
output = self._clean_llm_output(raw_output)
logger.debug(f"Raw output from AI: {output}")

# Parse the JSON output
try:
result = json.loads(output)
company = result.get('company', 'Company not found')
title = result.get('title', 'Title not found')
except json.JSONDecodeError as e:
logger.error(f"JSON decoding failed: {e}")
company = 'Company not found'
title = 'Title not found'

logger.debug(f"Extracted company: {company}, title: {title}")
return company, title
12 changes: 8 additions & 4 deletions src/utils/chrome_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from selenium import webdriver
import undetected_chromedriver as uc
from src.logging import logger

chromeProfilePath = os.path.join(os.getcwd(), "chrome_profile", "linkedin_profile")
Expand All @@ -18,7 +18,7 @@ def ensure_chrome_profile():
def chrome_browser_options():
logger.debug("Setting Chrome browser options")
ensure_chrome_profile()
options = webdriver.ChromeOptions()
options = uc.ChromeOptions()
options.add_argument("--start-maximized")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
Expand All @@ -37,7 +37,6 @@ def chrome_browser_options():
options.add_argument("--disable-plugins")
options.add_argument("--disable-animations")
options.add_argument("--disable-cache")
options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])

prefs = {
"profile.default_content_setting_values.images": 2,
Expand All @@ -57,4 +56,9 @@ def chrome_browser_options():

return options


def init_browser() -> uc.Chrome:
try:
options = chrome_browser_options()
return uc.Chrome(options=options)
except Exception as e:
raise RuntimeError(f"Failed to initialize browser: {str(e)}")

0 comments on commit fd26653

Please sign in to comment.