-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
38 lines (31 loc) · 1.22 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import requests
from concurrent.futures import ThreadPoolExecutor
from bs4 import BeautifulSoup
# Define a function to check website status and homepage content
def check_website(url):
try:
response = requests.get(url, timeout=5)
response.raise_for_status()
if response.status_code == 304:
print(f"{url} has a 304 redirect.")
soup = BeautifulSoup(response.text, 'html.parser')
if soup.find('h2', text='主页'):
print(f"{url} is a homepage.")
except requests.exceptions.RequestException as e:
print(f"{url} is not reachable: {e}")
# Load URLs from a text file
def load_urls_from_file(file_path):
with open(file_path, 'r') as f:
urls = f.read().splitlines()
return urls
if __name__ == "__main__":
# Replace 'urls.txt' with the path to your input text file containing URLs
urls = load_urls_from_file('urls.txt')
# Set proxies if needed
proxies = {
'http': 'http://user:password@proxy_host:proxy_port',
'https': 'https://user:password@proxy_host:proxy_port',
}
# Use ThreadPoolExecutor for high concurrency
with ThreadPoolExecutor(max_workers=20) as executor:
executor.map(check_website, urls)