我该如何爬取这个网站? 我如何使用负载发送POST请求并获取数据?如果我使用此代码,我能够爬取第一页,但是我如何爬取第二页?我需要使用Selenium还是Scrapy就足够了?
import scrapy
from scrapy import log
from scrapy.http import *
import urllib2
class myntra_spider(scrapy.Spider):
name="myntra"
allowed_domain=[]
start_urls=["http://www.myntra.com/men-footwear"]
logfile=open('testlog.log','w')
log_observer=log.ScrapyFileLogObserver(logfile,level=log.ERROR)
log_observer.start()
# sub_category=[]
def parse(self,response):
print "response url ",response.url
link=response.xpath("//ul[@class='results small']/li/a/@href").extract()
print links
yield Request('http://www.myntra.com/search-service/searchservice/search/filteredSearch', callback=self.nextpages,body="")
def nextpages(self,response):
link=response.xpath("//ul[@class='results small']/li/a/@href").extract()
for i in range(10):
print "link ",link[i]