Source code for bradata.agencias.infraero
import bradata
from bradata.connection import Connection
import requests
from bs4 import BeautifulSoup
import time
import os
def _get_links(xml):
"""
Get all links from a html page
:param xml: string of a html page
:return: links: A list of all the link in the page
"""
soup = BeautifulSoup(xml)
links = soup("a")
return links
[docs]def get(year="2015"):
"""
Get all statistics xls files from Infraero website for a given year
:param year: string year, from 2017 to 2012
:return: links: A list of all the links downloaded
"""
database_links = set()
conn = Connection()
statistics_page = conn.perform_request("http://www.infraero.gov.br/index.php/br/estatisticas/estatisticas.html")
links = _get_links(statistics_page["content"])
for link in links:
if (('Estatistica' in link['href']) and (year in link['href'])):
file_name = str(link)
file_name = file_name.split('"')[1]
complete_link = "http://www.infraero.gov.br" + str(file_name)
database_links.add(complete_link)
for link in database_links:
name = str.split(link, "/")[-1]
print("Downloading: {}".format(link))
resp = requests.get(link)
with open(os.path.join(bradata.__download_dir__, "{}-{}".format(year, name)), mode='wb') as f:
f.write(resp.content)
time.sleep(0.05)
return database_links