Avoid SSL certificate error #2

ciscogeek · 2022-02-19T08:00:16Z

aiohttp.client_exceptions.ClientConnectorCertificateError: Cannot connect to host spa5.scrape.center:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:997)')]

bestKeyal · 2023-05-07T12:57:35Z

I fixed it, check my code.

Coder: KSM_YBKX

import asyncio
import aiohttp
import logging
import json
from motor.motor_asyncio import AsyncIOMotorClient

BASE_URL = 'https://spa5.scrape.center/api/book/?limit=18&offset={offset}'
DETAIL_URL = 'https://spa5.scrape.center/api/book/{id}'

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s: %(message)s")

PAGE_SIZE = 18
PAGE_NUMBER = 100
CONCURRENCY = 5

semaphore = asyncio.Semaphore(CONCURRENCY)
session = None

MONGO_CONNECTION_STRING = 'mongodb://localhost:27017'
MONGO_DB_NAME = 'books'
MONGO_COLLECTION_NAME = 'books'

client = AsyncIOMotorClient(MONGO_CONNECTION_STRING)
db = client[MONGO_DB_NAME]
collection = db[MONGO_COLLECTION_NAME]

async def scrape_api(url):
async with semaphore:
try:
logging.info("scraping %s", url)
async with session.get(url) as response:
return await response.json()
except aiohttp.ClientError:
logging.error("error occurred while scraping %s", url, exc_info=True)

async def save_data(data):
logging.info(f"saving data {data}")
if data:
return await collection.update_one(
{
'id': data.get('id')
},
{
"$set": data
},
upsert=True)

async def scrape_detail(id):
url = DETAIL_URL.format(id=id)
data = await scrape_api(url)
await save_data(data)

async def scrape_index(page):
url = BASE_URL.format(offset=PAGE_SIZE * (page - 1))
return await scrape_api(url)

async def main():
global session
conn = aiohttp.TCPConnector(ssl=False)
session = aiohttp.ClientSession(connector=conn)
scrape_index_tasks = [asyncio.ensure_future(scrape_index(page)) for page in range(1, PAGE_NUMBER + 1)]
result = await asyncio.gather(*scrape_index_tasks)
logging.info("result %s", json.dumps(result, ensure_ascii=False, indent=2))
ids = []
for index_data in result:
if not index_data: continue
for item in index_data.get('results'):
ids.append(item.get('id'))
scrape_id_tasks = [asyncio.ensure_future(scrape_detail(id)) for id in ids]
await asyncio.wait(scrape_id_tasks)
await session.close()

asyncio.get_event_loop().run_until_complete(main())

Avoid SSL certificate error

754d0e0

aiohttp.client_exceptions.ClientConnectorCertificateError: Cannot connect to host spa5.scrape.center:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:997)')]

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Avoid SSL certificate error #2

Avoid SSL certificate error #2

ciscogeek commented Feb 19, 2022

bestKeyal commented May 7, 2023

Avoid SSL certificate error #2

Are you sure you want to change the base?

Avoid SSL certificate error #2

Conversation

ciscogeek commented Feb 19, 2022

bestKeyal commented May 7, 2023

Coder: KSM_YBKX