how to know google index of a page using python

Solutions on MaxInterview for how to know google index of a page using python by the best coders in the world

showing results for - "how to know google index of a page using python"
Celia
09 Jul 2020
1import requests
2import pandas as pd
3import time
4from bs4 import BeautifulSoup
5from urllib.parse import urlencode
6
7seconds = 3
8proxies = {
9    'https' : 'https://localhost:8123',
10    'http' : 'http://localhost:8123'
11    }
12
13user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36'
14headers = { 'User-Agent' : user_agent}
15
16df = pd.read_excel('url_links.xlsx')
17for i in range(0, len(df)):
18    line = df.loc[i,'links']
19    #print(line)
20    if line:
21        query = {'q': 'site:' + line}
22        google = "https://www.google.com/search?" + urlencode(query)
23        data = requests.get(google, headers=headers)
24        data.encoding = 'ISO-8859-1'
25        soup = BeautifulSoup(str(data.content), "html.parser")
26        try:
27            check = soup.find(id="rso").find("div").find("div").find("div").find("div").find("div").find("a")["href"]
28            print("URL is Index ")
29        except AttributeError:
30            print("URL Not Index")
31        time.sleep(float(seconds))
32    else:
33        print("Invalid Url")
34