1class SuperSpider(CrawlSpider):
2 name = 'spider'
3 allowed_domains = ['quotes.toscrape.com']
4 start_urls = ['http://quotes.toscrape.com/']
5 base_url = 'http://quotes.toscrape.com'
6 rules = [Rule(LinkExtractor(allow = 'page/', deny='tag/'),
7 callback='parse_filter_book', follow=True)]
8
9 def parse_filter_book(self, response):
10 for quote in response.css('div.quote'):
11 yield {
12 'Author': quote.xpath('.//span/a/@href').get(),
13 'Quote': quote.xpath('.//span[@class= "text"]/text()').get(),
14
1# -*- coding: utf-8 -*-
2import scrapy
3
4
5class AliexpressTabletsSpider(scrapy.Spider):
6 name = 'aliexpress_tablets'
7 allowed_domains = ['aliexpress.com']
8 start_urls = ['https://www.aliexpress.com/category/200216607/tablets.html',
9 'https://www.aliexpress.com/category/200216607/tablets/2.html?site=glo&g=y&tag=']
10
11
12 def parse(self, response):
13
14 print("procesing:"+response.url)
15 #Extract data using css selectors
16 product_name=response.css('.product::text').extract()
17 price_range=response.css('.value::text').extract()
18 #Extract data using xpath
19 orders=response.xpath("//em[@title='Total Orders']/text()").extract()
20 company_name=response.xpath("//a[@class='store $p4pLog']/text()").extract()
21
22 row_data=zip(product_name,price_range,orders,company_name)
23
24 #Making extracted data row wise
25 for item in row_data:
26 #create a dictionary to store the scraped info
27 scraped_info = {
28 #key:value
29 'page':response.url,
30 'product_name' : item[0], #item[0] means product in the list and so on, index tells what value to assign
31 'price_range' : item[1],
32 'orders' : item[2],
33 'company_name' : item[3],
34 }
35
36 #yield or give the scraped info to scrapy
37 yield scraped_info
38