Skip to content

Commit

Permalink
handle 404
Browse files Browse the repository at this point in the history
  • Loading branch information
yzqzss committed Jun 14, 2024
1 parent 975061b commit 5fc14dd
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
4 changes: 2 additions & 2 deletions ChinaXivXiv/mongo_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ async def claim_task(queue: motor.motor_asyncio.AsyncIOMotorCollection,
)
return Task(**TASK) if TASK else None

async def update_task(queue: motor.motor_asyncio.AsyncIOMotorCollection, TASK: Task, status: str):
assert status in Status.__dict__.values()
async def update_task(queue: motor.motor_asyncio.AsyncIOMotorCollection, TASK: Task, status: str|int):
# assert status in Status.__dict__.values()
update = {"$set": {
"status": status,
}}
Expand Down
4 changes: 4 additions & 0 deletions ChinaXivXiv/workers/IA_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ async def IA_upload_worker(client: httpx.AsyncClient, collection: motor.motor_as
'Connection': 'close', # 对面服务器有点奇葩,HEAD 不会关闭连接……
}
r_html = await client.get(chinaxiv_permanent_with_version_url, headers=headers, follow_redirects=False)
if r_html.status_code == 404:
print(f"404, skipping {chinaxiv_permanent_with_version_url}")
await update_task(collection, TASK, status=404)
continue
assert r_html.status_code == 200

html_metadata = get_chinaxivhtmlmetadata_from_html(html=r_html.content, url=chinaxiv_permanent_with_version_url)
Expand Down

0 comments on commit 5fc14dd

Please sign in to comment.