Skip to content

Commit

Permalink
added parameters to cases_by_age_date
Browse files Browse the repository at this point in the history
  • Loading branch information
fccoelho committed May 25, 2021
1 parent 13c2dec commit ac7c15b
Showing 1 changed file with 15 additions and 17 deletions.
32 changes: 15 additions & 17 deletions pysus/preprocessing/ESUS.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
import pandas as pd


def cases_by_age_and_sex(UF):
def cases_by_age_and_sex(UF, start='2020-03-01', end='2020-08-31'):
"""
Fetches ESUS covid line list and aggregates by age and sex returning these counts.
Fetches ESUS covid line list and aggregates by age and sex returning these counts between start and end dates.
:param UF: State code
:param start: Start date
:param end: end date
:return: dataframe
"""
df = download(uf=UF)
Expand All @@ -18,15 +20,14 @@ def cases_by_age_and_sex(UF):

# Eliminando os valores nulos nas colunas com datas importantes
old_size = len(df)
df.dropna(subset = ['dataNotificacao', 'dataInicioSintomas', 'dataTeste'], inplace = True)
print(f"Removed {old_size-len(df)} rows with missing dates of symptoms, notification or testing")

df.dropna(subset=['dataNotificacao', 'dataInicioSintomas', 'dataTeste'], inplace=True)
print(f"Removed {old_size - len(df)} rows with missing dates of symptoms, notification or testing")

# Desconsiderando os resultados negativos ou inconclusivos
df = df.loc[~df.resultadoTeste.isin(['Negativo','Inconclusivo ou Indeterminado'])]
df = df.loc[~df.resultadoTeste.isin(['Negativo', 'Inconclusivo ou Indeterminado'])]

# Removendo sexo indeterminado
df = df.loc[df.sexo.isin(['Masculino','Feminino'])]
df = df.loc[df.sexo.isin(['Masculino', 'Feminino'])]

# determinando a data dos primeiros sintomas como a data do index

Expand All @@ -36,20 +37,17 @@ def cases_by_age_and_sex(UF):

# vamos limitar a data inicial e a data final considerando apenas a primeira onda

d1 = '2020-03-01'
d2 = '2020-08-31'

df = df.loc[d1:d2]
df = df.loc[start:end]

ini = np.arange(0,81,5)
fin = np.arange(5,86, 5)
fin[-1]=120
faixa_etaria = {f'[{i},{f})':(i,f) for i,f in zip(ini,fin)}
ini = np.arange(0, 81, 5)
fin = np.arange(5, 86, 5)
fin[-1] = 120
faixa_etaria = {f'[{i},{f})': (i, f) for i, f in zip(ini, fin)}

labels = list(faixa_etaria.keys())
df['faixa_etaria'] = [labels[i-1] for i in np.digitize(df.idade,bins=ini)]
df['faixa_etaria'] = [labels[i - 1] for i in np.digitize(df.idade, bins=ini)]

agreg = df[['sexo', 'faixa_etaria']].groupby(['faixa_etaria', 'sexo']).size()
agreg = agreg.reset_index()
agreg.columns=['faixa_etaria', 'sexo','n']
agreg.columns = ['faixa_etaria', 'sexo', 'n']
return agreg

0 comments on commit ac7c15b

Please sign in to comment.