有趣的python爬虫项目

内容摘要
这篇文章主要为大家详细介绍了有趣的python爬虫项目,具有一定的参考价值,可以用来参考一下。

感兴趣有趣的python爬虫项目的小伙伴,下面一起跟随php教程的小编罗X来看看吧。<b
文章正文

这篇文章主要为大家详细介绍了有趣的python爬虫项目,具有一定的参考价值,可以用来参考一下。

感兴趣有趣的python爬虫项目的小伙伴,下面一起跟随php教程的小编罗X来看看吧。<br>
from tkinter import *
import time
import requests
from bs4 import BeautifulSoup
import bs4
import random
import re 
def getHTMLText(url):
    try:
        r = requests.get(url, timeout=30)
        r.raise_for_status()
        #r.encoding = r.apparent_encoding
        return r.text
    except:
        print("faile")
        return ""
 
def fillUnivList(ulist, html):
    soup = BeautifulSoup(html, "html.parser")
    for tr in soup.find_all('article'):
        if isinstance(tr, bs4.element.Tag):
            tds = tr('a')
            ulist.append([tds[0].string, tds[1].string, tds[2].string])
def printUnivList(ulist,k):
    return ulist[k][0]
    
def getduanzi():
    uinfo = []
    k=1
    u = 'http://duanziwang.com/category/%E4%B8%80%E5%8F%A5%E8%AF%9D%E6%AE%B5%E5%AD%90/'
    i=random.randint(1,49)
    url=u+str(i)+'/'
    html = getHTMLText(url)
    fillUnivList(uinfo, html)
    k=random.randint(0,9)
    return printUnivList(uinfo,k)
def fill2(ulist,html):
    soup = BeautifulSoup(html, "html.parser")
    for tr in soup.find_all('div','article block untagged mb15 typs_hot'):
        if isinstance(tr, bs4.element.Tag):
            tds = tr.find('div','content')
            tdss=tds('span')         
            reg = re.compile('<[^>]*>')
            text=reg.sub('',str(tdss))
            regg = re.compile('\\[|\\]|\\n')
            text=regg.sub('',text)
            ulist.append(text)
def getjoke():
    ulist=[]
    u='https://www.qiushibaike.com/text/page/'
    i=random.randint(1,13)
    url=u+str(i)+'/'
    html=getHTMLText(url)
    fill2(ulist,html)
    k=random.randint(0,16)
    #print(str(ulist[k]))
    return str(ulist[k])
def getHTMLText2(url):
    try:
        r = requests.get(url, timeout=30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return ""
def geturl(url,text):
    html=getHTMLText2(url)
    soup = BeautifulSoup(html, "html.parser")
    for tds in soup.find_all('a'):
        #print(tds.string)
        if str(tds.string) in text:
            #print(tds.attrs['href'])
            return tds.attrs['href']
    return ''
def getmoreurl(url):
    html=getHTMLText2(url)
    soup = BeautifulSoup(html, "html.parser")
    try:
        li=soup.find_all('li',attrs={'class':'articleTitle fl'})
        k=random.randint(0,len(li)-1)
        return li[k]('a')[0].attrs['href']
    except:
        return ''
def getsen(url):
    html=getHTMLText2(url)
    soup = BeautifulSoup(html, "html.parser")
    try:
        li=soup.find('p')
        reg = re.compile('<p>.*')      
        l=reg.findall(str(li))
        #print(l)
        k=random.randint(0,len(l)-1)
        text=re.findall('[\u4e00-\u9fa5]+',l[k])
        x=''
        if len(text):
            for t in text[:-2]:
                x=x+t+','
            return x+text[-1]+'。'
        else:
            return "不好意思,出了点小问题,请重试!"
    except:
        return "抱歉,没找到你想要的"
def getsentance(text):
    start_url = 'http://www.siandian.com'
    urll='http://www.siandian.com/tags.html'
    end1=geturl(urll,text)
    if end1=='':
        return "抱歉,没有找到你想要的。"
    else:
        end2=getmoreurl(start_url+end1)
        if end2=='':
            return "抱歉,没有找到你想要的。"
        else:
            #print(start_url+end2)
            return getsen(start_url+end2)
def xiaotang(s):
    sign=1;
    while(sign):
        if '段子' in s:
            while('段子' in s or '继续' in s or '再来' in s or s==''):
                return getduanzi()
        elif '笑话' in s:
            while('笑话' in s or '继续' in s or '再来' in s or s==''):
                return getjoke()
        elif '句' in s or '话' in s:
                return getsentance(s)  
        elif '傻子' in s or '草' in s or '日' in s:
            return '这是脏话不可以说哦'
        elif '二' in s or '垃圾' in s or '傻逼' in s:
            t='你是魔鬼吗?'
            x=''
            for i in range(10):
                x=x+t+' !'+'\n'
            return x
        else:
            return "我好像不明白\n"
def main():
  def start():
      strMsg = '小糖:' + time.strftime("%Y-%m-%d %H:%M:%S",
                                  time.localtime()) + '\n '
      txtget.insert(END, strMsg, 'redcolor')
      txtget.insert(END, '你好,请问有什么可以帮忙的?')
  def sendMsg():#发送消息
    t=txtMsg.get('0.0', END)
    txtMsg.delete('0.0', END)
    strMsg = '我:' + time.strftime("%Y-%m-%d %H:%M:%S",
                                  time.localtime()) + '\n '
    for i in range(int(txtget.index(END).split(".")[0])-int(txtMsgList.index(END).split(".")[0])+1):
        txtMsgList.insert(END, '\n')
    txtMsgList.insert(END, strMsg, 'greencolor')
    txtMsgList.insert(END, t)
    txtMsgList.see(END)
    for i in range(int(txtMsgList.index(END).split(".")[0])-int(txtget.index(END).split(".")[0])+1):
        txtget.insert(END, '\n')
        txtget.see(END)
    
    
    
    strMsg = '小糖:' + time.strftime("%Y-%m-%d %H:%M:%S",
                                  time.localtime()) + '\n '
    for i in range(int(txtMsgList.index(END).split(".")[0])-int(txtget.index(END).split(".")[0])+1):
        txtget.insert(END, '\n')
    txtget.insert(END, strMsg, 'redcolor')
    txtget.insert(END, xiaotang(t))
    txtget.see(END)
    for i in range(int(txtget.index(END).split(".")[0])-int(txtMsgList.index(END).split(".")[0])+1):
        txtMsgList.insert(END, '\n')
        txtMsgList.see(END)
    
     
  def cancelMsg():#取消消息
    txtMsg.delete('0.0', END)
 
  def sendMsgEvent(event): #发送消息事件
      sendMsg()
 
  #创建窗口 
  t = Tk()
  t.title('小糖助手')
       
  #创建frame容器
  frmLT = Frame(width=500, height=320, bg='#F19C8B')
  frmLC = Frame(width=500, height=150, bg='#F19C8B')
  frmLB = Frame(width=500, height=30,bg='white')
  frmRT = Frame(width=200, height=500,bg='#F19C8B')
   
  #创建控件
  txtMsgList = Text(frmLT,width=40,bd=0)
  txtMsgList.tag_config('greencolor', foreground='#008C00')#创建tag
  txtMsg = Text(frmLC)
  txtget = Text(frmLT,width=40,bd=0)
  txtget.tag_config('redcolor', foreground='#DC143C')#创建tag
  start()
  #txtMsg.bind("", sendMsgEvent)
  txtMsg.bind('<Return>',sendMsgEvent)
  btnSend = Button(frmLB, text='发 送', width = 8, command=sendMsg,bg='#E88384',bd=0)
  btnCancel = Button(frmLB, text='取消', width = 8, command=cancelMsg,bg='#F3ADA0',bd=0)
  scollor=Scrollbar(bg='white')
  scollor.config(command=txtget.yview)
  scollor.config(command=txtMsgList.yview)
  txtget.config(yscrollcommand=scollor.set)
  txtMsgList.config(yscrollcommand=scollor.set)
  imgInfo = PhotoImage(file = "aa.png")
  lblImage = Label(frmRT, image = imgInfo)
  lblImage.image = imgInfo
 
  #窗口布局
  frmLT.grid(row=0, column=0, columnspan=2, padx=0, pady=0)
  frmLC.grid(row=1, column=0, columnspan=2, padx=0, pady=0)
  frmLB.grid(row=2, column=0, columnspan=2,padx=0)
  scollor.grid(row=0,column=2,sticky=N+S)
  frmRT.grid(row=0, column=3, rowspan=3, padx=0, pady=0)
  #固定大小
  frmLT.grid_propagate(0)
  frmLC.grid_propagate(0)
  frmLB.grid_propagate(0)
  frmRT.grid_propagate(0)
   
  btnSend.grid(row=2, column=0)
  btnCancel.grid(row=2, column=1)
  lblImage.grid()
  txtget.grid(row=0,column=0)
  
  txtMsgList.grid(row=0,column=1)
  
  txtMsg.grid()
 
  #主事件循环
  t.mainloop()
 
if __name__ == '__main__':
    main()

注:关于有趣的python爬虫项目的内容就先介绍到这里,更多相关文章的可以留意

代码注释

作者:喵哥笔记

IDC笔记

学的不仅是技术,更是梦想!