Skip to content
Snippets Groups Projects
collect_html.py 763 B
Newer Older
Xin-Hao Zhu's avatar
Xin-Hao Zhu committed
import requests
import os
import time
from utils import IP2NAME, CLIENT_PORT, CACHE_DIR, CLIENT_PATH, SNAPSHOT_INTERVAL, TOKEN

Xin-Hao Zhu's avatar
Xin-Hao Zhu committed

Xin-Hao Zhu's avatar
Xin-Hao Zhu committed
def collect_html():
    session = requests.Session()
    session.trust_env = False

    while True:
        for ip in IP2NAME:
            url = f"http://{ip}:{CLIENT_PORT}{CLIENT_PATH}?token={TOKEN}"
            try:
                resp = session.get(url, timeout=10)
                assert resp.status_code == 200
                text = resp.text
            except Exception as e:
                print(f"Failed to collect HTML from {ip}: {e}")
                text = ""
Xin-Hao Zhu's avatar
Xin-Hao Zhu committed

Xin-Hao Zhu's avatar
Xin-Hao Zhu committed
            with open(os.path.join(CACHE_DIR, f"{ip}.html"), "w", encoding="utf-8") as f:
                f.write(text)

Xin-Hao Zhu's avatar
Xin-Hao Zhu committed
        time.sleep(SNAPSHOT_INTERVAL)