web scraping python

Solutions on MaxInterview for web scraping python by the best coders in the world

home

app

help

profile

we are a community of more than 2 million smartest coders

registration for

employee referral programs

are now open

get referred to google, amazon, flipkart and more

showing results for - "web scraping python"

Cyprien

08 Aug 2017

1import requests
2from bs4 import BeautifulSoup
3
4URL = 'https://www.monster.com/jobs/search/?q=Software-Developer&where=Australia'
5page = requests.get(URL)
6
7soup = BeautifulSoup(page.content, 'html.parser')
8

source

Henri

11 Jan 2019

1#pip install beautifulsoup4
2
3import os
4import requests
5from bs4 import BeautifulSoup
6
7url = "https://www.google.com/"
8reponse = requests.get(url)
9
10if reponse.ok:
11	soup = BeautifulSoup(reponse.text, "lxml")
12	title = str(soup.find("title"))
13
14	title = title.replace("<title>", "")
15	title = title.replace("</title>", "")
16	print("The title is : " + str(title))
17
18os.system("pause")
19
20#python (code name).py

source

Lennart

13 Jul 2018

1# basic web scraping with python
2# Import libraries
3import requests
4import urllib.request
5import time
6from bs4 import BeautifulSoup
7
8# Set the URL you want to webscrape from
9url = 'http://web.mta.info/developers/turnstile.html'
10
11# Connect to the URL
12response = requests.get(url)
13
14# Parse HTML and save to BeautifulSoup object¶
15soup = BeautifulSoup(response.text, "html.parser")
16
17# To download the whole data set, let's do a for loop through all a tags
18line_count = 1 #variable to track what line you are on
19for one_a_tag in soup.findAll('a'):  #'a' tags are for links
20    if line_count >= 36: #code for text files starts at line 36
21        link = one_a_tag['href']
22        download_url = 'http://web.mta.info/developers/'+ link
23        urllib.request.urlretrieve(download_url,'./'+link[link.find('/turnstile_')+1:]) 
24        time.sleep(1) #pause the code for a sec
25    #add 1 for next line
26    line_count +=1

Benjamin

16 Mar 2020

1import scrapy
2from ..items import SampletestItem #items class
3
4class QuoteTestSpider(scrapy.Spider):
5    name = 'quote_test'
6    start_urls = ['https://quotes.toscrape.com/']
7
8    def parse(self, response):
9        items = SampletestItem() #items class
10        quotes = response.css("div.quote")
11        for quote in quotes:
12            items['title'] = quote.css("span.text::text").get()
13            items['author'] = quote.css(".author::text").get()
14            items['tags'] = quote.css(".tags .tag::text").getall()
15            
16            yield items
17            next_page = response.css(".next a::attr(href)").get()
18            if next_page is not None:
19                next_url = response.urljoin(next_page)
20                yield scrapy.Request(next_url, callback=self.parse)

Oscar

05 May 2017

1# example of web scraping links using asyncio and using all cores
2import asyncio, requests, aiohttp, os
3from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
4from bs4 import BeautifulSoup as BS
5
6executor = ThreadPoolExecutor(max_workers=8)
7loop = asyncio.get_event_loop()
8
9async def make_requests():
10    urls = ['http://www.filedropper.com/lister.php?id=0', 'http://www.filedropper.com/lister.php?id=1', 'http://www.filedropper.com/lister.php?id=2', 'http://www.filedropper.com/lister.php?id=3', 'http://www.filedropper.com/lister.php?id=4', 'http://www.filedropper.com/lister.php?id=5', 'http://www.filedropper.com/lister.php?id=6', 'http://www.filedropper.com/lister.php?id=7', 'http://www.filedropper.com/lister.php?id=8', 'http://www.filedropper.com/lister.php?id=9', 'http://www.filedropper.com/lister.php?id=a', 'http://www.filedropper.com/lister.php?id=b', 'http://www.filedropper.com/lister.php?id=c', 'http://www.filedropper.com/lister.php?id=d', 'http://www.filedropper.com/lister.php?id=e', 'http://www.filedropper.com/lister.php?id=f', 'http://www.filedropper.com/lister.php?id=g', 'http://www.filedropper.com/lister.php?id=h', 'http://www.filedropper.com/lister.php?id=i', 'http://www.filedropper.com/lister.php?id=j', 'http://www.filedropper.com/lister.php?id=k', 'http://www.filedropper.com/lister.php?id=l', 'http://www.filedropper.com/lister.php?id=m', 'http://www.filedropper.com/lister.php?id=n', 'http://www.filedropper.com/lister.php?id=o', 'http://www.filedropper.com/lister.php?id=p', 'http://www.filedropper.com/lister.php?id=q', 'http://www.filedropper.com/lister.php?id=r', 'http://www.filedropper.com/lister.php?id=s', 'http://www.filedropper.com/lister.php?id=t', 'http://www.filedropper.com/lister.php?id=u', 'http://www.filedropper.com/lister.php?id=v', 'http://www.filedropper.com/lister.php?id=w', 'http://www.filedropper.com/lister.php?id=x', 'http://www.filedropper.com/lister.php?id=y', 'http://www.filedropper.com/lister.php?id=z']
11
12    futures = [loop.run_in_executor(executor, requests.get, url) for url in urls]
13    await asyncio.wait(futures)
14
15    for future in futures:
16        soup = BS(future.result().content)
17        for all_links in soup.find_all('a', href=True):
18            print("URL:", all_links['href'])    
19            with open('filedropper_com.txt', 'a') as f:
20                f.write(all_links['href'] + '\n')
21
22loop.run_until_complete(make_requests())
23

Simone

05 Nov 2017

1from requests import get
2from requests.exceptions import RequestException
3from contextlib import closing
4from bs4 import BeautifulSoup
5

similar questions

how to get the code of a website in python python selenium web scraping example python scrape filedropper python web crawler python proxy scraper make python web crawler web scraper python beautifulsoup scraping list from html how to scrape data from a html page saved locally scrape all the p tags in a python best scraping package in python python scrape data from aspx page how to read a website in python scrape phone numbers from website python python download from mediafire with scraping use beautifulsoup or scrapy to scrape a book store

queries leading to this page