1from scrapy.spiders import CrawlSpider
2
3class SuperSpider(CrawlSpider):
4 name = 'extractor'
5 allowed_domains = ['en.wikipedia.org']
6 start_urls = ['https://en.wikipedia.org/wiki/Python_(programming_language)']
7 base_url = 'https://en.wikipedia.org'
8
9 def parse(self, response):
10 for link in response.xpath('//div/p/a'):
11 yield {
12 "link": self.base_url + link.xpath('.//@href').get()
13 }
14