showing results for - "web scraping with node js using puppeteer"
Jaelynn
19 Jun 2019
1/*
2    This code comes from Vincent Lab
3    And it has a video version linked here: https://www.youtube.com/watch?v=GAvpZuVzeA8
4*/
5
6// AQI	Air Pollution Level	Health Implications	Cautionary Statement (for PM2.5)
7// 0 - 50	Good	Air quality is considered satisfactory, and air pollution poses little or no risk	None
8// 51 -100	Moderate	Air quality is acceptable; however, for some pollutants there may be a moderate health concern for a very small number of people who are unusually sensitive to air pollution.	Active children and adults, and people with respiratory disease, such as asthma, should limit prolonged outdoor exertion.
9// 101-150	Unhealthy for Sensitive Groups	Members of sensitive groups may experience health effects. The general public is not likely to be affected.	Active children and adults, and people with respiratory disease, such as asthma, should limit prolonged outdoor exertion.
10// 151-200	Unhealthy	Everyone may begin to experience health effects; members of sensitive groups may experience more serious health effects	Active children and adults, and people with respiratory disease, such as asthma, should avoid prolonged outdoor exertion; everyone else, especially children, should limit prolonged outdoor exertion
11// 201-300	Very Unhealthy	Health warnings of emergency conditions. The entire population is more likely to be affected.	Active children and adults, and people with respiratory disease, such as asthma, should avoid all outdoor exertion; everyone else, especially children, should limit outdoor exertion.
12// 300+	Hazardous	Health alert: everyone may experience more serious health effects	Everyone should avoid all outdoor exertion
13
14// Import dependencies
15const puppeteer = require("puppeteer");
16const moment = require("moment");
17const fs = require("fs");
18
19(async () => {
20
21    // The location / URL
22    const url = "https://aqicn.org/city/denmark/copenhagen/h.c.andersens-boulevard/";
23
24    // Create the browser
25    const browser = await puppeteer.launch({
26        headless: true
27    });
28
29    // Navigate to the website
30    const page = await browser.newPage();
31    await page.goto(url, { waitUntil: "load" });
32
33    // Get the Air quality index
34    const aqi = await page.$(`#aqiwgtvalue`);
35
36    // Extract the index
37    const index = await (await aqi.getProperty("textContent")).jsonValue();
38
39    // Extract the title
40    const title = await (await aqi.getProperty("title")).jsonValue();
41
42    // Extract the location
43    const location = await (await (await page.$(`#aqiwgttitle1`)).getProperty("textContent")).jsonValue();
44
45    // Write the data to a CSV file
46    if (fs.existsSync("air-pollution.csv")) {
47        fs.appendFileSync("air-pollution.csv", `\n${moment.utc()}, ${index}, ${title}, ${location.replace(/,/g, '')}`)
48    } else {
49        fs.writeFileSync("air-pollution.csv", `datetime, index, title, location\n${moment.utc()}, ${index}, ${title}, ${location.replace(/,/g, '')}`)
50    }
51
52    // Close the browser
53    await browser.close();
54