批量查询域名备案情况的Python脚本

对于米商来说,备案域名的利润还是很不错的,个人有时候也能捡漏搞到一些已备案的过期域名,如何批量查询就很关键了,今天来个批量查询备案域名的Python脚本!

该脚本利用的是Chinaz的域名查询API,多的就不说了,直接上脚本:

import re, requests, time, xlrd, random, csv
 from lxml import etree
 domain_list = []
 workbook = xlrd.open_workbook('未查询域名.xls')
 sheet0 = workbook.sheet_by_index(0)
 cols = sheet0.col_slice(0,1)
 for col in cols:
 domain_list.append(col.value)
 with open('未查询域名.txt',encoding='utf-8',newline='')as fk:
     for i in fk:
         domain = re.sub(r'\r\n$','',i)
         domain_list.append(domain)
 user_Agent_list = [
     "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36",
     "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0",
     "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
     "Opera/9.80 (Windows NT 10.0; U; zh-cn) Presto/2.9.168 Version/11.50",
     "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36"
 ]
 a = 0
 url = 'http://icp.chinaz.com/searchs'
 with open('test1.csv','a',newline='')as fk:
     writer = csv.writer(fk)
     csv_header = ['域名','主办单位名称','单位性质','网站备案/许可证号','网站名称','审核时间']
     writer.writerow(csv_header)
     for num in range(len(domain_list)//20-a//20+1):
         local_time = int(time.time())
         Cookie = 'UM_distinctid=176dabed56b80a-080978cc77e73-376b4502-1fa400-176dabed56c952; __guid=31546918.3636803744203470000.1610026817897.5715; __gads=ID=2c6886bd82b7280d-2280524a92c500e4:T=1610026898:RT=1610026898:S=ALNI_MbyH5W7HulKDaCwCpZdoBvNfCv5fg; toolbox_urls=www.ccbechina.cn|www.apple.iducs.cn; qHistory=aHR0cDovL3dob2lzLmNoaW5hei5jb20vK1dob2lz5p+l6K+ifGh0dHA6Ly90b29sLmNoaW5hei5jb21f56uZ6ZW/5bel5YW3fGh0dHA6Ly9yYW5rLmNoaW5hei5jb20vcmFua2FsbC9f5p2D6YeN57u85ZCI5p+l6K+ifGh0dHA6Ly9zZW8uY2hpbmF6LmNvbV9TRU/nu7zlkIjmn6Xor6J8aHR0cDovL3Rvb2wuY2hpbmF6LmNvbS9kbnMvX0Ruc+afpeivog==; Hm_lvt_aecc9715b0f5d5f7f34fba48a3c511d6=1614822213,1614822577,1614822679,1614823108; CNZZDATA5082706=cnzz_eid%3D817694234-1610023182-%26ntime%3D1615443601; Hm_lvt_ca96c3507ee04e182fb6d097cb2a1a4c=1614158602,1614757255,1615271947,1615445490; .AspNetCore.Antiforgery.-Z5WMyCX4K0=CfDJ8GYV1qq4FPhNvMPl1WmHHp4TJ3UkqvXhcEpr97APD9DsO6WWhvHhS1Ur7lynrac2voNvP_6CKjUNJ7GfxR0Y8Hvzh5CzvnqGj1zDfZqI-uOal1z6njEfqbHpCtZTRIzvCbnZaH2ylkWKvQ8HYwdptNY; bbsmax_user=a3edcb67-4488-4058-ac01-ec29ef25c2d3; avatarId=14f364b2-af95-4174-a58f-b95415e9ad1c-; .AspNetCore.Session=CfDJ8GYV1qq4FPhNvMPl1WmHHp7xuHiscEHg0wvC5lfyLapGtl8WJLhWLk%2BrvYNFAM9CCKxQ6RnvIiEgR86UrCH%2FavX838WgaU%2BU%2FbXLz305a97b8qWufBUMzpsSVhMKMMBzfgT%2F9LSOKw1wTwgn9ND7ySHWDWbXg%2BXyPKaPVWG7dFdm; monitor_count=4; Hm_lpvt_ca96c3507ee04e182fb6d097cb2a1a4c={0}'.format(local_time)
         headers = {
             'User-Agent': random.choice(user_Agent_list),
             'Cookie':Cookie,
             'Host':'icp.chinaz.com',
             'Referer':'http://icp.chinaz.com/web'
         }
         form_data = {
             'hosts':','.join(domain_list[a:a+20])
         }
     res = requests.post(url,headers=headers,data=form_data)     # print(res.content.decode('utf-8'))     html = etree.HTML(res.content.decode('utf-8'))     trs = html.xpath('//tbody[@id="result_table"]/tr')     for tr in trs:         domain = tr.xpath('./td[1]//text()')[0]         organizer = tr.xpath('./td[2]//text()')[0]         unit_nature = tr.xpath('./td[3]//text()')[0]         passcord = tr.xpath('./td[4]//text()')[0]         website_name = tr.xpath('./td[5]//text()')[0]         audit_time = tr.xpath('./td[8]//text()')[0]         csv_content = [domain,organizer,unit_nature,passcord,website_name,audit_time]         print(csv_content)         writer.writerow(csv_content)     a += 20     time.sleep(1)     print('现在是第%s个'% a)     # print(form_data)