使用tkinter输入到一个变量中,以便调用。

13

我目前正在开发一个类似于爬虫的程序,它将进入维基百科页面,并在其当前形式下从页面中抓取引用。

我想要一个GUI界面,让用户输入维基百科页面。我希望输入被附加到selectWikiPage变量上,但迄今为止没有成功。

以下是我的当前代码。

import requests
from bs4 import BeautifulSoup
import re
from tkinter import *

#begin tkinter gui
def show_entry_fields():
   print("Wikipedia URL: %s" % (e1.get()))
   e1.delete(0,END)
master = Tk()
Label(master, text="Wikipedia URL").grid(row=0)
e1 = Entry(master)
e1.insert(10,"http://en.wikipedia.org/wiki/randomness")
e1.grid(row=0, column=1)
Button(master, text='Scrape', command=master.quit).grid(row=3, column=0, sticky=W, pady=4)
mainloop( )

session = requests.Session()
selectWikiPage = input(print("Please enter the Wikipedia page you wish to scrape from"))
if "wikipedia" in selectWikiPage:
    html = session.post(selectWikiPage)
    bsObj = BeautifulSoup(html.text, "html.parser")

    findReferences = bsObj.find('ol', {'class': 'references'}) #isolate refereces section of page
    href = BeautifulSoup(str(findReferences), "html.parser")
    links = [a["href"] for a in href.find_all("a", href=True)]

    for link in links:
        print("Link: " + link)

else:
    print("Error: Please enter a valid Wikipedia URL")

非常感谢您的帮助。


1
你能否更具体地说明你想要什么,以及会接受什么样的答案? - Reblochon Masque
The code works well, right? - Steffi Keran Rani J
2个回答

8

以下是基于您的代码的一个小例子;它允许使用输入字段来捕获要访问的维基页面的值,并在控制台上打印出来。
然后,您可以使用此URL继续进行抓取。

from tkinter import *

def m_quit():
    global wiki_url
    wiki_url += e1.get() + '/'
    print('quitting')
    master.destroy()

wiki_url = 'http://en.wikipedia.org/wiki/'    

master = Tk()
Label(master, text="Wikipedia URL").grid(row=0)

e1 = Entry(master)
e1.grid(row=0, column=1)

Button(master, text='Scrape', command=m_quit).grid(row=3, column=0, sticky=W, pady=4)

mainloop()

print(wiki_url)

2
代码几乎正确。只需要进行一些修改即可。希望这可以帮到你。如果需要进一步的澄清,请留言评论。
    import requests
    from bs4 import BeautifulSoup
    import re
    from tkinter import *


    # you can declare selectWikiPage and master to be global and then avoid passing them around

    # begin tkinter gui
    def show_entry_fields():
        print("Wikipedia URL: %s" % (e1.get()))
        e1.delete(0, END)

    #utility which selects all the text from start to end in widget
    def select_all(event=None):
        event.widget.select_range(0, 'end')
        return 'break'


    #the code is same, just obtained the URL from the widget using get()

    def custom_scrape(e1, master):
        session = requests.Session()
        # selectWikiPage = input("Please enter the Wikipedia page you wish to scrape from")
        selectWikiPage = e1.get()
        if "wikipedia" in selectWikiPage:
            html = session.post(selectWikiPage)
            bsObj = BeautifulSoup(html.text, "html.parser")
            findReferences = bsObj.find('ol', {'class': 'references'})  # isolate refereces section of page
            href = BeautifulSoup(str(findReferences), "html.parser")
            links = [a["href"] for a in href.find_all("a", href=True)]
            for link in links:
                print("Link: " + link)
        else:
            print("Error: Please enter a valid Wikipedia URL")

        master.quit()


    master = Tk()
    Label(master, text="Wikipedia URL").grid(row=0)
    e1 = Entry(master)

    #bind ctrl+a for selecting all the contents of Entry widget
    e1.bind('<Control-a>', select_all)
    e1.insert(10, "Enter a wikipedia URL")
    e1.grid(row=0, column=1)

    #here, command attribute takes a lambda which itself doesnot take any args,
    #but in turn calls another function which accepts
    #arguments
    #Refer to: https://stackoverflow.com/questions/22723039/python-tkinter-passing-values-with-a-button for details

    Button(master, text='Scrape', command=lambda: custom_scrape(e1, master)).grid(row=3, column=0, sticky=W, pady=4)
    mainloop()

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接