Skip to content

Preprocesar Datos

Preprocesar Datos #93

name: Preprocesar Datos
on:
schedule:
- cron: '0 12 */21 * *' # Ejecuta cada 18 días a las 12:00 PM UTC
workflow_dispatch: # Permite ejecución manual
repository_dispatch:
types: [microdata-updated] # Triggered by the dispatch event from microdatos-EPH-INDEC
jobs:
preprocesar-datos:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Configurar Python
uses: actions/setup-python@v2
with:
python-version: '3.8'
- name: Instalar dependencias
run: |
# python -m pip install --upgrade pip
pip install pandas
pip install requests
- name: Listar directorio actual y archivos
run: |
pwd
ls -lah
- name: Crear y verificar directorio temporal
run: |
mkdir -p ./temp_data
ls -lah ./temp_data
- name: Preprocesar Datos para Entrenamiento
run: python ./codigo/routines/preprocesar_datos.py -y 2022 2026 -ow
- name: Verificar directorio después del procesamiento
run: ls -lah ./
- name: Configurar Git
run: |
git config --global user.name 'matuteiglesias'
git config --global user.email 'matuteiglesias@gmail.com'
- name: Check file sizes and commit if less than 50 MB
run: |
max_size=52428800 # 50 MB in bytes
add_files=""
for file in $(find ./data/info -type f) $(find ./data/training -type f); do
size=$(stat -c%s "$file")
if [ $size -le $max_size ]; then
add_files="$add_files $file"
else
echo "Skipping $file as it is larger than 50 MB"
fi
done
if [ -n "$add_files" ]; then
git add -f $add_files
# Check if there are changes to commit
if git diff --staged --quiet; then
echo "No changes to commit"
else
git commit -m "Add processed training data and rankings"
git push origin main
fi
else
echo "No files to commit"
fi
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}