From f2964d52f2332e89380eb216ef9e299ecd1aecf1 Mon Sep 17 00:00:00 2001
From: andreireporter13 <andrei.reporter13@gmail.com>
Date: Sat, 28 Sep 2024 12:55:34 +0300
Subject: [PATCH] added and deleted something

---
 .gitignore                      |   1 +
 old_sites/tesla_scraper.py      | 102 --------------
 sites/sustainalytics_scraper.py | 237 --------------------------------
 test_file.py                    |  29 ----
 4 files changed, 1 insertion(+), 368 deletions(-)
 delete mode 100644 old_sites/tesla_scraper.py
 delete mode 100644 sites/sustainalytics_scraper.py
 delete mode 100644 test_file.py

diff --git a/.gitignore b/.gitignore
index 327df5c..2782ba8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 /Bash_scrapers
 **/__pycache__
 sites/000_delete_api_data.py
+/hack_api_nemlig.py
diff --git a/old_sites/tesla_scraper.py b/old_sites/tesla_scraper.py
deleted file mode 100644
index 6893deb..0000000
--- a/old_sites/tesla_scraper.py
+++ /dev/null
@@ -1,102 +0,0 @@
-#
-#
-#
-# This is script for Tesla!
-# Link to tesla ---> https://www.tesla.com/ro_ro/careers
-#
-# https://www.tesla.com/ro_RO/careers/search/job
-# (mai departe titlul prin - - - concatenat + id)
-#
-# Example - Title - Open Tech Day Bucharest - 27th April 2023
-# ---------> link - https://www.tesla.com/ro_RO/careers/search/job/open-tech-day-bucharest-27th-april-2023-179440
-# ---------> link - https://www.tesla.com/cua-api/apps/careers/state
-#
-from A_OO_get_post_soup_update_dec import DEFAULT_HEADERS, update_peviitor_api
-from L_00_logo import update_logo
-import requests
-#
-import uuid
-
-
-def return_response_from_api(url: str, headers: dict):
-    """
-    This func make a full request to tesla api.
-    """
-
-    response = requests.get(url=url, headers=DEFAULT_HEADERS).json()
-
-    return response
-
-
-def return_all_dict_data_jobs():
-    """
-    This func() return all nums from api, Romania.
-    """
-
-    data = return_response_from_api('https://www.tesla.com/cua-api/apps/careers/state', DEFAULT_HEADERS)
-
-    data_lst = []
-    ###
-    try:
-        all_l_nums = data['geo'][1]['sites'][22]['cities']
-        for dl in all_l_nums:
-            data_lst.extend(all_l_nums[dl])
-    except:
-        all_l_nums = data['geo'][1]['sites'][23]['cities']
-        for dl in all_l_nums:
-            data_lst.extend(all_l_nums[dl])
-    ###
-
-    lst_with_dict_data = []
-    for data in data['listings']:
-        cod = data['l']
-
-        if cod in data_lst:
-            lst_with_dict_data.append(data)
-
-    return lst_with_dict_data
-
-
-def return_links_with_jobs():
-    """
-    This func() return all jobs from this nums...
-    """
-    list_dict = return_all_dict_data_jobs()
-
-    list_for_pe_viitor = []
-    for ide in list_dict:
-        link_1 = ide['t'].lower().replace('- ', '').split()
-        link_2 = str(ide['id'])
-
-        title = ide['t']
-        link_final = 'https://www.tesla.com/ro_RO/careers/search/job/' + '-'.join(link_1) + '-' + link_2
-
-        list_for_pe_viitor.append({
-            "id": str(uuid.uuid4()),
-            "job_title": title,
-            "job_link":  link_final,
-            "company": "Tesla",
-            "country": "Romania",
-            "city": "Romania"
-            })
-
-    return list_for_pe_viitor
-
-
-# update data on peviitor!
-@update_peviitor_api
-def scrape_and_update_peviitor(company_name, data_list):
-    """
-    Update data on peviitor API!
-    """
-
-    return data_list
-
-
-company_name = 'Tesla'
-data_list = return_links_with_jobs()
-scrape_and_update_peviitor(company_name, data_list)
-
-print(update_logo('Tesla',
-                  'https://autostickere.ro/image/cache/catalog/Stickere/Stickere%20Auto/Stickere%20Marci/Tesla/sticker-tesla-text-1000x1000.jpg'
-                  ))
diff --git a/sites/sustainalytics_scraper.py b/sites/sustainalytics_scraper.py
deleted file mode 100644
index 1241178..0000000
--- a/sites/sustainalytics_scraper.py
+++ /dev/null
@@ -1,237 +0,0 @@
-#
-#
-# Config for Dynamic Post Method -> For Json format!
-#
-# Company ---> Sustainalytics
-# Link ------> https://careers.morningstar.com/sustainalytics/us/en/home
-#
-# ------ IMPORTANT! ------
-# if you need return soup object:
-# you cand import from __utils -> GetHtmlSoup
-# if you need return regex object:
-# you cand import from __utils ->
-# ---> get_data_with_regex(expression: str, object: str)
-#
-#
-from __utils import (
-    PostRequestJson,
-    #
-    GetRequestJson, # for another locations need to make GetRequestJson
-    #
-    get_county,
-    get_job_type,
-    Item,
-    UpdateAPI,
-    #
-    GetHeadersDict,
-    #
-    counties,
-)
-#
-import re
-from typing import Union
-
-
-def make_dict_jobs_dict(title: str,
-                        job_link: str,
-                        county: Union[str, list],
-                        city: Union[str, list]):
-    '''
-    >>> >>> >>> this function make dict for SustainAnalytics API --->
-
-    params: title: str, job_link: str, county: str, city: str
-    return: dict[str, str]
-    '''
-
-    # get jobs items from response
-    return  Item(
-                job_title=title,
-                job_link=job_link,
-                company='Sustainalytics',
-                country='Romania',
-                county=county,
-                city=city,
-                remote=['on-site', 'remote'],
-            ).to_dict()
-
-
-def get_special_keys():
-    '''
-    >>> >>> >>> Get special keys from MyWorkDayJobs ---> Sustainanalytics <---
-
-    params: None
-    return: play_session: str,
-            wday_vps_cookie: str,
-            __cflb: str,
-            wd_browser_id: str,
-            calypso_csrf_token: str,
-    '''
-
-    str_headers = str(GetHeadersDict('https://morningstar.wd5.myworkdayjobs.com/en-US/Sustainalytics/jobs?locations=0e19b52288b5019ff735e44eda00fe40&locations=0e19b52288b501a1c52ad54eda00f640'))
-
-    new_dict = dict(wday_vps_cookie=r'wday_vps_cookie=([^;]+)',
-                    PLAY_SESSION='PLAY_SESSION=([^;]+)',
-                    wd_broser_id='wd-browser-id=([^;]+)',
-                    CALYPSO_CSRF_TOKEN='CALYPSO_CSRF_TOKEN=([^;]+)')
-
-    # catch data from requests and store it in new dict
-    regex_dict = dict()
-    for key, value in new_dict.items():
-        match = re.search(value, str_headers)
-        if match:
-            regex_form = match.group(1)
-            regex_dict[key] = regex_form
-
-    return regex_dict
-
-
-# all of this keys need to store in one Session # # # SESION KEYS FOR COOKIES # # # 
-keys_dict_ids = get_special_keys()
-
-
-def get_dynamic_headers(all_jobs: str=None, one_job_info: str=None):
-    '''
-    >>> >>> >>> get dynamic headers to scrape MyWorkDayJobs API ---> 
-    >>> >>> >>> But, params need to be one True and one False, not both True
-
-    params: all_jobs: str=None, one_job_info: str=None
-    return: url: str, headers: dict, payload: json
-    '''
-
-    # check if params not both True
-    url = None
-    if all_jobs is not None:
-        url = f'https://morningstar.wd5.myworkdayjobs.com/wday/cxs/morningstar/Sustainalytics{all_jobs}'
-    elif one_job_info is not None:
-        url = f'https://morningstar.wd5.myworkdayjobs.com/wday/cxs/morningstar/Sustainalytics{one_job_info}'
-    else:
-        raise ValueError("'all_jobs' and 'one_job_info' can not be True both.")
-
-    headers = {
-        'authority': 'morningstar.wd5.myworkdayjobs.com',
-        'accept': 'application/json',
-        'accept-language': 'en-US',
-        'content-type': 'application/json',
-        'cookie': f"PLAY_SESSION={keys_dict_ids.get('PLAY_SESSION')}; wday_vps_cookie={keys_dict_ids.get('wday_vps_cookie')}; timezoneOffset=-120; wd-browser-id={keys_dict_ids.get('wd_broser_id')}; CALYPSO_CSRF_TOKEN={keys_dict_ids.get('CALYPSO_CSRF_TOKEN')};",
-        'origin': 'https://morningstar.wd5.myworkdayjobs.com',
-        'referer': f'https://morningstar.wd5.myworkdayjobs.com/en-US/Sustainalytics{"/jobs?locations=0e19b52288b5019ff735e44eda00fe40&locations=0e19b52288b501a1c52ad54eda00f640" if all_jobs is not None else one_job_info }',
-        'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
-        'x-calypso-csrf-token': f"{keys_dict_ids.get('CALYPSO_CSRF_TOKEN')}",
-    }
-
-    json_data = {
-        'appliedFacets': {
-            'locations': [
-                '0e19b52288b5019ff735e44eda00fe40',
-                '0e19b52288b501a1c52ad54eda00f640',
-            ],
-        },
-        'limit': 20,
-        'offset': 0,
-        'searchText': '',
-    }
-
-    #
-    if all_jobs is not None:
-        return url, headers, json_data
-    else:
-        return url, headers
-
-
-def scraper():
-    '''
-    ... scrape data from Sustainalytics scraper.
-    '''
-    
-    url, headers, json_data = get_dynamic_headers(all_jobs='/jobs') # need scrape all_jobs
-
-    post_data = PostRequestJson(url=url, custom_headers=headers, data_json=json_data)
-
-    job_list = []
-    for job in post_data.get('jobPostings'):
-
-        location: Union[str, list] = None
-        if (location := job.get('locationsText').lower()) == 'bucharest':
-            location = 'bucuresti'
-        elif '2 locations' == location:
-            
-            # # # # # HERE MAKE ANOTHER REQUEST TO JOB PAGE INFO AND SCRAPE ACCURATE LOCATION # # # # # #
-            # Make headers for 
-            url_get, headers_get = get_dynamic_headers(one_job_info=job.get('externalPath'))
-            json_get_data = GetRequestJson(url=url_get, custom_headers=headers_get)
-
-            #
-            location = list()
-            loc = None
-            if (loc := json_get_data.get('jobPostingInfo').get('location').lower()) == 'bucharest' or loc == 'timisoara':
-                if loc == 'bucharest':
-                    loc = 'bucuresti'
-                
-                # to lower()
-                loc = loc.lower()
-                #
-                location.append(loc)
-
-            # additional locations
-            for job_sec in json_get_data.get('jobPostingInfo').get('additionalLocations'):
-                new_loc = None
-                for search_city in counties:
-                    for v in search_city.values():
-                        for ccity in v:
-                            if re.search(r'\b{}\b'.format(re.escape(ccity.split()[-1].lower())), job_sec.lower()):
-                                new_loc = ccity.lower()
-                                break
-
-                if new_loc:
-                    location.append(new_loc)
-            
-        #
-        if type(location).__name__ == 'str':
-            #
-            location_finish = get_county(location=location)
-            #
-            job_list.append(make_dict_jobs_dict(title=job.get('title'),
-                                                job_link=f"https://morningstar.wd5.myworkdayjobs.com/en-US/Sustainalytics{job.get('externalPath')}",
-                                                county=location_finish[0] if True in location_finish else None,
-                                                city=location.title()))
-        #
-        elif type(location).__name__ == 'list' and len(location) == 1:
-            #
-            location_finish = get_county(location=location[0])
-            #
-            job_list.append(make_dict_jobs_dict(title=job.get('title'),
-                                                job_link=f"https://morningstar.wd5.myworkdayjobs.com/en-US/Sustainalytics{job.get('externalPath')}",
-                                                county=location_finish[0] if True in location_finish else None,
-                                                city=location[0].title()))
-        #
-        elif type(location).__name__ == 'list' and len(location) > 1:
-            #
-            location_finish = [get_county(location=city_jud) for city_jud in location]
-            get_locations_with_none = [location_finish[0] if True in location_finish else None for location_finish in location_finish]
-            #
-            job_list.append(make_dict_jobs_dict(title=job.get('title'),
-                                                job_link=f"https://morningstar.wd5.myworkdayjobs.com/en-US/Sustainalytics{job.get('externalPath')}",
-                                                county=None if None in get_locations_with_none else get_locations_with_none,
-                                                city=[xx.title() for xx in location]))
-    #
-    return job_list
-
-
-def main():
-    '''
-    ... Main:
-    ---> call scraper()
-    ---> update_jobs() and update_logo()
-    '''
-
-    company_name = "Sustainalytics"
-    logo_link = "https://www.sgs.com/-/media/sgscorp/images/infographics-and-charts/sustainalytics-logo.cdn.en-US.1.png"
-
-    jobs = scraper()
-
-    # uncomment if your scraper done
-    UpdateAPI().update_jobs(company_name, jobs)
-    UpdateAPI().update_logo(company_name, logo_link)
-
-if __name__ == '__main__':
-    main()
diff --git a/test_file.py b/test_file.py
deleted file mode 100644
index e22db4f..0000000
--- a/test_file.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-#
-#
-#import requests
-
-# ---> Daily update
-# cookies = {
-#     'PHPSESSID': 'sxi2PjU4lVYHwwU83juWfnNwS4VMrrHJ',
-#     '_cfuvid': '6nf6PvOXmaHQJN4so.LtllqhAS37tR9qtdujabj7RxY-1726315836551-0.0.1.1-604800000',
-# }
-
-# headers = {
-#     'accept': 'application/json, text/plain, */*',
-#     'accept-language': 'en-US,en;q=0.9',
-#     # 'cookie': 'PHPSESSID=sxi2PjU4lVYHwwU83juWfnNwS4VMrrHJ; _cfuvid=6nf6PvOXmaHQJN4so.LtllqhAS37tR9qtdujabj7RxY-1726315836551-0.0.1.1-604800000',
-#     'priority': 'u=1, i',
-#     'referer': 'https://skywindgroup.bamboohr.com/careers/',
-#     'sec-ch-ua': '"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"',
-#     'sec-ch-ua-mobile': '?0',
-#     'sec-ch-ua-platform': '"Linux"',
-#     'sec-fetch-dest': 'empty',
-#     'sec-fetch-mode': 'cors',
-#     'sec-fetch-site': 'same-origin',
-#     'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
-# }
-
-# response = requests.get('https://skywindgroup.bamboohr.com/careers/list', headers=headers)
-
-# print(response.json())