diff --git a/ChinaXivXiv/mongo_ops.py b/ChinaXivXiv/mongo_ops.py index d02c033..6dd1c44 100644 --- a/ChinaXivXiv/mongo_ops.py +++ b/ChinaXivXiv/mongo_ops.py @@ -53,8 +53,8 @@ async def claim_task(queue: motor.motor_asyncio.AsyncIOMotorCollection, ) return Task(**TASK) if TASK else None -async def update_task(queue: motor.motor_asyncio.AsyncIOMotorCollection, TASK: Task, status: str): - assert status in Status.__dict__.values() +async def update_task(queue: motor.motor_asyncio.AsyncIOMotorCollection, TASK: Task, status: str|int): + # assert status in Status.__dict__.values() update = {"$set": { "status": status, }} diff --git a/ChinaXivXiv/workers/IA_uploader.py b/ChinaXivXiv/workers/IA_uploader.py index 3349011..4cc2d5c 100644 --- a/ChinaXivXiv/workers/IA_uploader.py +++ b/ChinaXivXiv/workers/IA_uploader.py @@ -68,6 +68,10 @@ async def IA_upload_worker(client: httpx.AsyncClient, collection: motor.motor_as 'Connection': 'close', # 对面服务器有点奇葩,HEAD 不会关闭连接…… } r_html = await client.get(chinaxiv_permanent_with_version_url, headers=headers, follow_redirects=False) + if r_html.status_code == 404: + print(f"404, skipping {chinaxiv_permanent_with_version_url}") + await update_task(collection, TASK, status=404) + continue assert r_html.status_code == 200 html_metadata = get_chinaxivhtmlmetadata_from_html(html=r_html.content, url=chinaxiv_permanent_with_version_url)