python网络爬虫基于selenium爬取斗鱼直播信息

  from selenium import webdriver

  from time import sleep

  import json

  #创建一个类

  class Douyu():

  def __init__(self):

  self.url = 'https://www.douyu.com/directory/all'

  #解析数据的函数

  def parse(self):

  #强制等待两秒,等待页面数据加载完毕

  sleep(2)

  li_list = self.bro.find_elements_by_xpath('//*[@id="listAll"]/section[2]/div[2]/ul/li')

  #print(len(li_list))

  data_list = []

  for li in li_list:

  dic_data = {}

  dic_data['title'] = li.find_element_by_xpath('http://www.jb51.net/article/div/a/div[2]/div[1]/h3').text

  dic_data['name'] = li.find_element_by_xpath('http://www.jb51.net/article/div/a/div[2]/div[2]/h2/div').text

  dic_data['art_type'] = li.find_element_by_xpath('http://www.jb51.net/article/div/a/div[2]/div[1]/span').text

  dic_data['hot'] = li.find_element_by_xpath('http://www.jb51.net/article/div/a/div[2]/div[2]/span').text

  data_list.append(dic_data)

  return data_list

  #保存数据的函数

  def save_data(self,data_list,i):

  #在当前目录下将数据存为txt文件

  with open('http://www.jb51.net/article/douyu.txt','w',encoding='utf-8') as fp:

  for data in data_list:

  data = str(data)

  fp.write(data+'

  ')

  print("第%d页保存完成!" % i)

  # json文件的存法

  # with open('http://www.jb51.net/article/douyu.json','w',encoding='utf-8') as fp:

  # 里面有中文,所以注意ensure_ascii=False

  # data = json.dumps(data_list,ensure_ascii=False)

  # fp.write(data)

  # print("第%d页保存完成!" % i)

  #主函数

  def run(self):

  #输入要爬取的页数,如果输入负整数,转化成她的绝对值

  page_num = abs(int(input("请输入你要爬取的页数:")))

  #初始化页数为1

  i = 1

  #判断输入的数是否为整数

  if isinstance(page_num,int):

  #实例化浏览器对象

  self.bro = webdriver.Chrome(executable_path='/可执行文件/chromedriver.exe')

  # chromedriver.exe如果已添加到环境变量,可省略executable_path='/可执行文件/chromedriver.exe'

  self.bro.get(self.url)

  while i <= page_num:

  #调用解析函数

  data_list = self.parse()

  #调用保存函数

  self.save_data(data_list,i)

  try:

  #定位包含“下一页”字段的按钮并点击

  button = self.bro.find_element_by_xpath('//span[contains(text(),"下一页")]')

  button.click()

  i += 1

  except:

  break

  self.bro.quit()

  else:

  print("输入格式错误!")

  if __name__ == '__main__':

  douyu = Douyu()

  douyu.run()