defgetListUrl(province): r = requests.get(province_list[province], headers=getRandomUA()) dom = BeautifulSoup(r.text, "html.parser") table = dom.find("table") rows = table.find('tbody').find_all('tr') # 表头是第一行 header = [th.text for th in rows[0].find_all('td')] # 表格的数据 data = [[td.text for td in row.find_all('td')] for row in rows[1:]]
# 存为csv文件 withopen(f"{province}.csv", "w") as f: f.write(",".join(header) + "\n") for row in data: f.write(",".join(row) + "\n")
defmain(): for province in province_list: getListUrl(province) time.sleep(1)