Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid SSL certificate error #2

Open
wants to merge 1 commit into
base: master
Choose a base branch
from

Conversation

ciscogeek
Copy link

aiohttp.client_exceptions.ClientConnectorCertificateError: Cannot connect to host spa5.scrape.center:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:997)')]

aiohttp.client_exceptions.ClientConnectorCertificateError: Cannot connect to host spa5.scrape.center:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:997)')]
@bestKeyal
Copy link

I fixed it, check my code.

Coder: KSM_YBKX

import asyncio
import aiohttp
import logging
import json
from motor.motor_asyncio import AsyncIOMotorClient

BASE_URL = 'https://spa5.scrape.center/api/book/?limit=18&offset={offset}'
DETAIL_URL = 'https://spa5.scrape.center/api/book/{id}'

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s: %(message)s")

PAGE_SIZE = 18
PAGE_NUMBER = 100
CONCURRENCY = 5

semaphore = asyncio.Semaphore(CONCURRENCY)
session = None

MONGO_CONNECTION_STRING = 'mongodb://localhost:27017'
MONGO_DB_NAME = 'books'
MONGO_COLLECTION_NAME = 'books'

client = AsyncIOMotorClient(MONGO_CONNECTION_STRING)
db = client[MONGO_DB_NAME]
collection = db[MONGO_COLLECTION_NAME]

async def scrape_api(url):
async with semaphore:
try:
logging.info("scraping %s", url)
async with session.get(url) as response:
return await response.json()
except aiohttp.ClientError:
logging.error("error occurred while scraping %s", url, exc_info=True)

async def save_data(data):
logging.info(f"saving data {data}")
if data:
return await collection.update_one(
{
'id': data.get('id')
},
{
"$set": data
},
upsert=True)

async def scrape_detail(id):
url = DETAIL_URL.format(id=id)
data = await scrape_api(url)
await save_data(data)

async def scrape_index(page):
url = BASE_URL.format(offset=PAGE_SIZE * (page - 1))
return await scrape_api(url)

async def main():
global session
conn = aiohttp.TCPConnector(ssl=False)
session = aiohttp.ClientSession(connector=conn)
scrape_index_tasks = [asyncio.ensure_future(scrape_index(page)) for page in range(1, PAGE_NUMBER + 1)]
result = await asyncio.gather(*scrape_index_tasks)
logging.info("result %s", json.dumps(result, ensure_ascii=False, indent=2))
ids = []
for index_data in result:
if not index_data: continue
for item in index_data.get('results'):
ids.append(item.get('id'))
scrape_id_tasks = [asyncio.ensure_future(scrape_detail(id)) for id in ids]
await asyncio.wait(scrape_id_tasks)
await session.close()

asyncio.get_event_loop().run_until_complete(main())

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants