Source code for bradata.agencias.infraero

import bradata
from bradata.connection import Connection
import requests
from bs4 import BeautifulSoup
import time
import os


def _get_links(xml):
    """
    Get all links from a html page
    :param xml: string of a html page
    :return: links: A list of all the link in the page
    """
    soup = BeautifulSoup(xml)
    links = soup("a")

    return links

[docs]def get(year="2015"): """ Get all statistics xls files from Infraero website for a given year :param year: string year, from 2017 to 2012 :return: links: A list of all the links downloaded """ database_links = set() conn = Connection() statistics_page = conn.perform_request("http://www.infraero.gov.br/index.php/br/estatisticas/estatisticas.html") links = _get_links(statistics_page["content"]) for link in links: if (('Estatistica' in link['href']) and (year in link['href'])): file_name = str(link) file_name = file_name.split('"')[1] complete_link = "http://www.infraero.gov.br" + str(file_name) database_links.add(complete_link) for link in database_links: name = str.split(link, "/")[-1] print("Downloading: {}".format(link)) resp = requests.get(link) with open(os.path.join(bradata.__download_dir__, "{}-{}".format(year, name)), mode='wb') as f: f.write(resp.content) time.sleep(0.05) return database_links