Skip to content

Commit

Permalink
minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
y-dashev committed Mar 2, 2021
1 parent a71eff8 commit d2d23a5
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 3 deletions.
File renamed without changes.
File renamed without changes.
Empty file added electron-forge
Empty file.
6 changes: 3 additions & 3 deletions scripts.js
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
}
}

let ArcticDataSet = require('./data2.json');
let ArcticDataSet = require('./dataSets/arcticBases.json');



Expand All @@ -92,12 +92,12 @@

let lat = parseFloat(s[0]);
let log = parseFloat(s[1])
console.log( ArcticDataSet[i].link)

let obj = {
'type': 'Feature',
'properties': {
'description':
'<strong>'+ArcticDataSet[i].name+' Station</strong><p>Координати:64°19′S 62°57′W</p><p> <a href="https://en.wikipedia.org/'+ArcticDataSet[i].link+'" target="_blank" title="Opens in a new window"> Additional infor about this station </a> ',
'<strong>'+ArcticDataSet[i].name+' Station</strong><p>Координати:64°19′S 62°57′W</p><p>The <a href="https://en.wikipedia.org/'+ArcticDataSet[i].link+'" target="_blank" title="Opens in a new window"> Additional infor about this station </a> ',
'icon': 'harbor'
},
'geometry': {
Expand Down
46 changes: 46 additions & 0 deletions webScraper/arcticBasesWebScraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from bs4 import BeautifulSoup
import urllib
from urllib.request import urlopen
from collections import defaultdict
import pandas as pd
from lxml import html
import requests
import re as regular_expression
import json
url = "https://en.wikipedia.org/wiki/Research_stations_in_Antarctica"
website_url = requests.get(url).text
#df = pd.read_html(url, attrs={"class": "wikitable"})[0] # 0 is for the 1st table in this particular page
#df.head()
#print(df)
soup = BeautifulSoup(urlopen(url))
table = soup.find('table', class_="wikitable")
coords = []
coords_element = table.tbody.find_all('span', class_="geo")
header = table.find_all('th', class_="headerShort")

bases = []
names = []
l = []
#coords_element = table.find_all('td')
for row in coords_element:
coords.append(row.text)


for row in table.find_all('tr')[1:]:
bases.append(row.find_all('td')[0].text)


for row in table.tbody.find_all('tr')[1:]:
l.append(row.find_all('td')[0].find('a').get('href'))





#print(coords)
#print(bases)
obj = [{'name' : bases , 'coordinates': coords, 'link' : l} for bases,coords,l in zip(bases, coords, l)]
json.dumps(obj)

with open('data2.json', 'w') as outfile:
json.dump(obj, outfile)

0 comments on commit d2d23a5

Please sign in to comment.