From 57dcebd05faf9da66fb192f5dd020a9f5e76243f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacy=20=C5=9Awiderski?= Date: Fri, 15 Sep 2023 19:27:53 +0200 Subject: [PATCH] Implement easy import of schools from the MoE --- oioioi/oi/files/rspo.csv | 8 + oioioi/oi/fixtures/test_schools_import.json | 23 + oioioi/oi/forms.py | 2 +- .../oi/management/commands/export_schools.py | 2 +- .../management/commands/export_schools_id.py | 2 +- .../oi/management/commands/import_schools.py | 591 +++++++++++++++--- .../commands/import_schools_legacy.py | 122 ++++ .../0007_schooltype_school_rspo_and_more.py | 45 ++ oioioi/oi/models.py | 9 + oioioi/oi/tests.py | 18 +- 10 files changed, 724 insertions(+), 98 deletions(-) create mode 100644 oioioi/oi/files/rspo.csv create mode 100644 oioioi/oi/fixtures/test_schools_import.json create mode 100644 oioioi/oi/management/commands/import_schools_legacy.py create mode 100644 oioioi/oi/migrations/0007_schooltype_school_rspo_and_more.py diff --git a/oioioi/oi/files/rspo.csv b/oioioi/oi/files/rspo.csv new file mode 100644 index 000000000..f7c135bee --- /dev/null +++ b/oioioi/oi/files/rspo.csv @@ -0,0 +1,8 @@ +Numer RSPO;REGON;NIP;Typ;Nazwa;Kod terytorialny województwo;Kod terytorialny powiat;Kod terytorialny gmina;Kod terytorialny miejscowość;Kod terytorialny ulica;Województwo;Powiat;Gmina;Miejscowość;Rodzaj miejscowości;Ulica;Numer budynku;Numer lokalu;Kod pocztowy;Poczta;Telefon;Faks;E-mail;Strona www;Publiczność status;Kategoria uczniów;Specyfika placówki;Imię i nazwisko dyrektora;Data założenia;Data rozpoczęcia działalności;Data likwidacji;Typ organu prowadzącego;Nazwa organu prowadzącego;REGON organu prowadzącego;NIP organu prowadzącego;Województwo organu prowadzącego;Powiat organu prowadzącego;Gmina organu prowadzącego;Miejsce w strukturze;RSPO podmiotu nadrzędnego;Typ podmiotu nadrzędnego;Nazwa podmiotu nadrzędnego;Liczba uczniów;Tereny sportowe;Języki nauczane;Czy zatrudnia logopedę;Czy zatrudnia psychologa;Czy zatrudnia pedagoga;Oddziały podstawowe wg specyfiki;Oddziały dodatkowe +76;532397913;7531676389;Liceum ogólnokształcące;EUROPEJSKA SZKOŁA PODSTAWOWA W KIELCACH;02;0262;0262011;0954047;19998;DOLNOŚLĄSKIE;Legnica;Legnica;Legnica;miasto;ul. Fryderyka Skarbka;4;;59-220;Legnica;768523705;768523705;zsemleg1@wp.pl;www.zsem.legnica.pl;publiczna;Dzieci lub młodzież;brak specyfiki; ;31.08.2002;31.08.2002;;Miasto na prawach powiatu;GMINA LEGNICA;390647251;;DOLNOŚLĄSKIE;Legnica;Legnica;szkoła/placówka wchodząca w skład jednostki złożonej;48856;Zespół szkół i placówek oświatowych;ZESPÓŁ SZKÓŁ ELEKTRYCZNO-MECHANICZNYCH W LEGNICY;620;;angielski,niemiecki;Nie;Nie;Tak;ogólnodostępny; +79;532397563;7471067208;Liceum ogólnokształcące;I LICEUM OGÓLNOKSZTAŁCĄCE IM. BOLESŁAWA CHROBREGO W BRZEGU;16;1601;1601011;0965252;00432;OPOLSKIE;brzeski;Brzeg;Brzeg;miasto;ul. Armii Krajowej;7;;49-300;Brzeg;774163625;774242143;lo1brzeg@wodip.opole.pl;lo1brzeg.szkolnastrona.pl;publiczna;Dzieci lub młodzież;brak specyfiki;Katarzyna Grochowska;01.09.1945;01.09.1945;;Powiat ziemski;POWIAT BRZESKI;531412444;7471567388;OPOLSKIE;brzeski;Brzeg;samodzielna;;;;348;boiska do siatkówki,boiska do koszykówki,boiska do piłki ręcznej,rzutnie;angielski,niemiecki,włoski;Nie;Tak;Tak;ogólnodostępny; +51;000735055;5431021630;Technikum;TECHNIKUM NR 5 W LEGNICY;20;2003;2003011;0922685;11205;PODLASKIE;bielski;Bielsk Podlaski;Bielsk Podlaski;miasto;ul. 11 Listopada;6;;17-100;Bielsk Podlaski;858332673;;1lo_bielsk_podlaski@wp.pl;www.1lobielskpodlaski.edupage.org;publiczna;Dzieci lub młodzież;brak specyfiki;Marzena Pogorzelska-Ciołek;07.11.1918;10.02.1919;;Powiat ziemski;POWIAT BIELSKI;050658574;5432012248;PODLASKIE;bielski;Bielsk Podlaski;samodzielna;;;;367;;angielski,francuski,niemiecki,rosyjski;Nie;Tak;Tak;ogólnodostępny; +98;532400897;7471042148;Liceum ogólnokształcące;LICEUM OGÓLNOKSZTAŁCĄCE W KROŚNIEWICACH;16;1601;1601011;0965252;11926;OPOLSKIE;brzeski;Brzeg;Brzeg;miasto;ul. 1 Maja;7;;49-305;Brzeg;774111408;774111408;lo2brzeg@wodip.opole.pl;lo2brzeg.wodip.opole.pl;publiczna;Dzieci lub młodzież;brak specyfiki;Leszek Lipiński;01.09.1990;01.09.1990;;Powiat ziemski;POWIAT BRZESKI;531412444;7471567388;OPOLSKIE;brzeski;Brzeg;samodzielna;;;;397;boiska do piłki nożnej;angielski,francuski,hiszpański,niemiecki;Nie;Nie;Tak;ogólnodostępny,dwujęzyczny w szkole podstawowej, liceum i technikum; +101;146242052;8212636505;Szkoła podstawowa;SZKOŁA PODSTAWOWA NR 3 IM. DOKTORA JANUSZA PETERA W TOMASZOWIE LUBELSKIM;14;1426;1426092;0687570;21970;MAZOWIECKIE;siedlecki;Skórzec;Gołąbek;wieś;ul. Szkolna;26;;08-114;Gołąbek;256316682;;dyrektor@nspgolabek.pl;nspgolabek.pl;niepubliczna;Dzieci lub młodzież;brak specyfiki;EWA PIEKART;05.08.2012;31.08.2012;01.09.1945;Stowarzyszenia;STOWARZYSZENIE KULTURALNO-OŚWIATOWE TĘCZA;145856190;8212636505;MAZOWIECKIE;siedlecki;Skórzec;samodzielna;;;;77;boiska uniwersalne/wielozadaniowe;angielski,niemiecki;Tak;Nie;Tak;ogólnodostępny; +115;000273608;9211378556;Szkoła podstawowa;SZKOŁA PODSTAWOWA W BĘDZINIE Z SIEDZIBĄ W ŁEKNIE;06;0618;0618011;0988075;26608;LUBELSKIE;tomaszowski;Tomaszów Lubelski;Tomaszów Lubelski;miasto;ul. Żwirki i Wigury;6;;22-600;Tomaszów Lubelski;846642443;;spnr3tom@post.pl;www.spnr3tom.superszkolna.pl;publiczna;Dzieci lub młodzież;brak specyfiki;PIOTR SZUMILAK;31.08.1964;31.08.1964;;Gmina;MIASTO TOMASZÓW LUBELSKI;950369110;;LUBELSKIE;tomaszowski;Tomaszów Lubelski;samodzielna;;;;539;boiska do piłki nożnej,boiska uniwersalne/wielozadaniowe;angielski,niemiecki;Tak;Tak;Tak;ogólnodostępny,integracyjny,sportowy; +116;260627938;6572607539;Liceum ogólnokształcące;LICEUM OGÓLNOKSZTAŁCĄCE NR 3 WE WROCŁAWIU;26;2661;2661011;0945930;20291;ŚWIĘTOKRZYSKIE;Kielce;Kielce;Kielce;miasto;ul. Juliusza Słowackiego;5;;25-365;Kielce;413435199;;sekretariat@nazaret.kielce.pl;www.nazaret.kielce.pl;publiczna;Dzieci lub młodzież;brak specyfiki;MAŁGORZATA BIAŁEK;29.09.2003;01.09.2004;;Organizacje Wyznaniowe;ZGROMADZENIE SIÓSTR NAJŚWIĘTSZEJ RODZINY Z NAZARETU, PROWINCJA KRAKOWSKA;006228572;6792527340;MAŁOPOLSKIE;Kraków;Kraków-Podgórze;samodzielna;;;;317;;angielski,francuski,łacina,niemiecki,włoski;Nie;Tak;Tak;ogólnodostępny;Grupa nauczania języka mniejszości diff --git a/oioioi/oi/fixtures/test_schools_import.json b/oioioi/oi/fixtures/test_schools_import.json new file mode 100644 index 000000000..5807ef5d3 --- /dev/null +++ b/oioioi/oi/fixtures/test_schools_import.json @@ -0,0 +1,23 @@ +[ + { + "pk": 1, + "model": "oi.schooltype", + "fields": { + "name": "Szkoła podstawowa" + } + }, + { + "pk": 2, + "model": "oi.schooltype", + "fields": { + "name": "Liceum ogólnokształcące" + } + }, + { + "pk": 3, + "model": "oi.schooltype", + "fields": { + "name": "Technikum" + } + } +] diff --git a/oioioi/oi/forms.py b/oioioi/oi/forms.py index 03f78dc61..4d59a21ea 100644 --- a/oioioi/oi/forms.py +++ b/oioioi/oi/forms.py @@ -13,7 +13,7 @@ class AddSchoolForm(forms.ModelForm): class Meta(object): model = School - exclude = ['is_active', 'is_approved'] + exclude = ['is_active', 'is_approved', 'rspo', 'type'] def city_options(province): diff --git a/oioioi/oi/management/commands/export_schools.py b/oioioi/oi/management/commands/export_schools.py index e9d95bb61..7d6a2efd0 100644 --- a/oioioi/oi/management/commands/export_schools.py +++ b/oioioi/oi/management/commands/export_schools.py @@ -3,7 +3,7 @@ from django.core.management.base import BaseCommand from django.utils.translation import gettext as _ -from oioioi.oi.management.commands.import_schools import COLUMNS +from oioioi.oi.management.commands.import_schools_legacy import COLUMNS from oioioi.oi.models import School diff --git a/oioioi/oi/management/commands/export_schools_id.py b/oioioi/oi/management/commands/export_schools_id.py index 48ca801c4..c4cce450a 100644 --- a/oioioi/oi/management/commands/export_schools_id.py +++ b/oioioi/oi/management/commands/export_schools_id.py @@ -4,7 +4,7 @@ from django.core.management.base import BaseCommand from django.utils.translation import gettext as _ -from oioioi.oi.management.commands.import_schools import COLUMNS +from oioioi.oi.management.commands.import_schools_legacy import COLUMNS from oioioi.oi.models import School COLUMNS = ['id'] + COLUMNS diff --git a/oioioi/oi/management/commands/import_schools.py b/oioioi/oi/management/commands/import_schools.py index 8513f8c0d..820745099 100644 --- a/oioioi/oi/management/commands/import_schools.py +++ b/oioioi/oi/management/commands/import_schools.py @@ -1,122 +1,533 @@ # ~*~ coding: utf-8 ~*~ import os -import string +import csv +import datetime +from typing import Type, Optional, Union -import urllib.request -import unicodecsv -from django.core.exceptions import ValidationError from django.core.management.base import BaseCommand, CommandError -from django.db import transaction +from django.db import utils, transaction from django.utils.translation import gettext as _ -from oioioi.oi.models import School +from oioioi.oi.models import School, SchoolType -COLUMNS = ['name', 'address', 'postal_code', 'city', 'province', 'phone', 'email'] + +""" +This script is used to import schools from the RSPO database, +which is provided by the Ministry of Education and contains all +educational institutions in Poland. Due to the poor quality of this data, +it is likely that SIO administrators will modify school data. +The task of this script is to detect which corrections from newer +and more recent versions of the RSPO database should be accepted and applied, +and which should not. + +There are three main data types in this script called: +* db_school: school as an object, +* dict_school: a dictionary in which the keys are the attribute names of the School class, +* rspo_school: a dictionary in which the keys are the column headers from the RSPO database. +""" + + +CURRENT_TIME = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") +DRY_RUN = False +VERBOSITY = 1 + +BASE_DIR = fr'{os.getcwd()}/schools' +LOG_FILENAME = fr'{BASE_DIR}/school_import_log_{CURRENT_TIME}.log' +LATEST_LOG_FILENAME = fr'{BASE_DIR}/latest.log' +BACKUP_FILENAME = fr'{BASE_DIR}/rspo_{CURRENT_TIME}.back' + +CHILDREN_OR_YOUTH = 'Dzieci lub młodzież' +RSPO_CSV_COLUMNS = [ + 'Numer RSPO', + 'REGON', + 'NIP', + 'Typ', + 'Nazwa', + 'Kod terytorialny województwo', + 'Kod terytorialny powiat', + 'Kod terytorialny gmina', + 'Kod terytorialny miejscowość', + 'Kod terytorialny ulica', + 'Województwo', + 'Powiat', + 'Gmina', + 'Miejscowość', + 'Rodzaj miejscowości', + 'Ulica', + 'Numer budynku', + 'Numer lokalu', + 'Kod pocztowy', + 'Poczta', + 'Telefon', + 'Faks', + 'E-mail', + 'Strona www', + 'Publiczność status', + 'Kategoria uczniów', + 'Specyfika placówki', + 'Imię i nazwisko dyrektora', + 'Data założenia', + 'Data rozpoczęcia działalności', + 'Data likwidacji', + 'Typ organu prowadzącego', + 'Nazwa organu prowadzącego', + 'REGON organu prowadzącego', + 'NIP organu prowadzącego', + 'Województwo organu prowadzącego', + 'Powiat organu prowadzącego', + 'Gmina organu prowadzącego', + 'Miejsce w strukturze', + 'RSPO podmiotu nadrzędnego', + 'Typ podmiotu nadrzędnego', + 'Nazwa podmiotu nadrzędnego', + 'Liczba uczniów', + 'Tereny sportowe', + 'Języki nauczane', + 'Czy zatrudnia logopedę', + 'Czy zatrudnia psychologa', + 'Czy zatrudnia pedagoga', + 'Oddziały podstawowe wg specyfiki', + 'Oddziały dodatkowe', +] +SCHOOL_FIELDS = [ + field.column for field in School._meta.fields if 'id' not in str(field) +] +SCHOOL_FIELDS_TO_ITERATE = SCHOOL_FIELDS +SCHOOL_FIELDS_TO_ITERATE.remove('rspo') +SCHOOL_FIELDS_TO_ITERATE.remove('is_active') +SCHOOL_FIELDS_TO_ITERATE.remove('is_approved') + + +# region utils + + +def prepare_dir(directory): + """Creates dir if not exists.""" + if not os.path.exists(directory): + os.makedirs(directory) + + +def get_object_or_none(klass, *args, **kwargs): + """Uses get() to return an object, or returns None if the object does not exist. + Argument klass must has get() attr.""" + try: + return klass.objects.get(*args, **kwargs) + except klass.DoesNotExist: + return None + + +def prepare_address(school: 'dict[str, str]') -> str: + """Preprocesses an address of a given school in RSPO format.""" + address = school['Ulica'] + if school['Numer budynku'] != '': + address += f" {school['Numer budynku']}" + if school['Numer lokalu'] != '': + address += f" lokal {school['Numer lokalu']}" + address += f"{', ' if address != '' else ''}{school['Miejscowość']}" + return address + + +def translate_rspo_school(school: 'dict[str, str]') -> School: + """Translates school (rspo_school) to a School object (without id).""" + return School( + rspo=school['Numer RSPO'], + type=SchoolType.objects.get(name=school['Typ']), + name=school['Nazwa'], + address=prepare_address(school), + postal_code=school['Kod pocztowy'], + city=school['Miejscowość'], + province=school['Województwo'].lower(), + phone=(school['Telefon'][-9:] if school['Telefon'] != '' else '000000000'), + email=school['E-mail'], + ) + + +def translate_dict_school(school: 'dict[str, str]') -> School: + """Translates school (dict_school) to a School object (without id).""" + return School( + rspo=school['rspo'], + type=SchoolType.objects.get(pk=school['type_id']), + name=school['name'], + address=school['address'], + postal_code=school['postal_code'], + city=school['city'], + province=school['province'], + phone=school['phone'], + email=school['email'], + ) + + +def find_school( + school: 'Union[dict[str, str], School]', schools: 'list[dict[str, str]]' +): + """Searches for a particular school in the dictionary list. + Arguments: + - school: db_school or dict_school + - schools: list of dict_schools""" + if isinstance(school, School): + school = school.__dict__ + for s in schools: + if str(s['rspo']) == str(school['rspo']): + return s + return None + + +def are_the_same(school1: Union[dict, School], school2: Union[dict, School]) -> bool: + """Checks if two schools are the same. Two schools must be in the same format.""" + if isinstance(school1, School): + school1 = school1.__dict__ + if isinstance(school2, School): + school2 = school2.__dict__ + for field in SCHOOL_FIELDS: + if str(school1[field]).lower() != str(school2[field]).lower(): + return False + return True + + +def update_school(school_dict: 'dict[str, str]', school_obj: School): + """Updates the school object with data from the school dictionary (dict_school).""" + for field in SCHOOL_FIELDS_TO_ITERATE: + setattr(school_obj, field, school_dict[field]) + school_obj.save() + + +def generate_school_diff_message(s_before: School, s_after: School) -> str: + """Generates a message, about what is different between the two given schools. Both schools are objects.""" + message = f'Changes between schools [id: {s_before.pk}, rspo: {s_before.rspo}] and [id: {s_after.pk}, rspo: {s_after.rspo}]:' + for field in SCHOOL_FIELDS_TO_ITERATE: + if getattr(s_before, field) != getattr(s_after, field): + message += f'\n{field}: was {getattr(s_before, field)}, is {getattr(s_after, field)}' + return message + + +def copy_log_file(): + """Copies the log file and calls it 'latest'.""" + with open(LOG_FILENAME, 'r', encoding='utf-8-sig') as log_file: + content = log_file.read() + with open(LATEST_LOG_FILENAME, 'w', encoding='utf-8-sig') as latest_log_file: + latest_log_file.write(content) + + +def set_all_schools_inactive(): + """Sets all schools as inactive.""" + for school in School.objects.all(): + school.is_active = False + school.save() + + +def unify_schools(schools): + unique_tuples = set() + result = [] + for school in schools: + tuple = (school['Nazwa'], school['Kod pocztowy']) + if tuple not in unique_tuples: + unique_tuples.add(tuple) + result.append(school) + return result + + +def fix_file(filename): + with open(filename, 'r', encoding='utf-8-sig') as file: + text = file.read() + text = text.replace('\"', '').replace('=', '') + with open(filename, 'w', encoding='utf-8-sig') as file: + file.write(text) + + +# endregion + + +class Clean: + def __init__(self, dictionary): + self.dictionary = dictionary + + def clean(self): + for key in self.dictionary: + clean_method = getattr(self, f'clean_{key}'.replace(' ', '_'), None) + if callable(clean_method): + self.dictionary[key] = clean_method(self.dictionary[key]) + + def clean_Kod_pocztowy(self, value: str): + return value.replace('=', '') class Command(BaseCommand): - columns_str = ', '.join(COLUMNS) + help = ( + "Imports from the RSPO database (www.rspo.gov.pl) and " + "identifies changes intelligently. Please read the output and logs carefully. " + "This script is used to import schools from the RSPO database, " + "which is provided by the Ministry of Education and contains all " + "educational institutions in Poland. Due to the poor quality of this data, " + "it is likely that SIO administrators will modify school data. " + "The task of this script is to detect which corrections from newer " + "and more recent versions of the RSPO database should be accepted and applied, " + "and which should not." + ) - help = _( - "Updates the list of schools from the given CSV file " - ", with the following columns: %(columns)s.\n\n" - "Given CSV file should contain a header row with column names " - "(respectively %(columns)s) separated by commas. Following rows " - "should contain school data." - ) % {'columns': columns_str} + def log( + self, + message: str, + additional_info: Optional[str] = None, + exception: Optional[Type[BaseException]] = None, + school_before: Optional[School] = None, + school_after: Optional[School] = None, + ): + """logs the information to a file and displays it on the screen. + You can add additional message (additional_info), which will appear only in the log file, + and an exception (exception), which will be thrown when the information is displayed on the screen. + In addition, you can specify two schools (school_before, school_after) whose diff will be in the logs. + """ + message_to_log = ( + f'{exception.__name__}: {message}' if exception is not None else message + ) + message_to_log = ( + f'{message_to_log} {additional_info}' + if additional_info is not None + else message_to_log + ) + stdout_message = ( + f'{message} More info in logs!' if additional_info is not None else message + ) + if school_before is not None and school_after is not None: + diff = generate_school_diff_message(school_before, school_after) + message_to_log += f'\n{diff}' + if VERBOSITY == 0: + message_to_log = message + stdout_message = '' + with open(LOG_FILENAME, 'a', encoding='utf-8-sig') as file: + file.write(f'{message_to_log}\n') + if exception is not None: + raise exception(stdout_message) + else: + self.stdout.write(stdout_message) - requires_model_validation = True + def read_rspo_csv_file(self, filename): + """Loads schools from CSV file in RSPO format and filters them to match SIO system needs.""" + with open(filename, encoding='utf-8-sig') as file: + reader = csv.DictReader(file, delimiter=';') + if reader.fieldnames is not None and not all( + item in reader.fieldnames for item in RSPO_CSV_COLUMNS + ): + self.log( + 'Missing header or invalid columns.', + f"Excepted a csv file from the www.rspo.gov.pl website with headers: {', '.join(RSPO_CSV_COLUMNS)}.", + exception=CommandError, + ) + SCHOOL_TYPES = [type.name for type in SchoolType.objects.all()] + result = [] + for school in reader: + if ( + school['Typ'] in SCHOOL_TYPES + and school['Kategoria uczniów'] == CHILDREN_OR_YOUTH + and school['Data likwidacji'] == '' + and int(school['Liczba uczniów']) > 0 + ): + cleaner = Clean(school) + cleaner.clean() + result.append(school) + return result + + def get_last_rspo_backup(self, filename=None): + """Searches and returns the contents of the backup file from the last RSPO database. The backup is stored in RSPO format.""" + if filename is not None: + return self.read_rspo_csv_file(filename) + files = [file for file in os.listdir(BASE_DIR) if 'rspo_' in file] + sorted_files = sorted( + files, + key=lambda x: os.path.getmtime(os.path.join(BASE_DIR, x)), + reverse=True, + ) + if not sorted_files: + return [] + filename = sorted_files[0] + return self.read_rspo_csv_file(fr'{BASE_DIR}/{filename}') + + def save_rspo_backup(self, schools: list): + """Saves a backup of the currently imported RSPO database to disk.""" + if DRY_RUN: + return + with open(BACKUP_FILENAME, 'w', encoding='utf-8-sig') as file: + writer = csv.DictWriter(file, fieldnames=RSPO_CSV_COLUMNS, delimiter=';') + writer.writeheader() + writer.writerows(schools) def add_arguments(self, parser): - parser.add_argument('filename_or_url', type=str, help='Source CSV file') + parser.add_argument('filename', type=str, help="Source CSV file") + parser.add_argument( + '--dry-run', action='store_true', default=False, help='Run in dry-run mode' + ) + parser.add_argument( + '--first-import', + action='store_true', + default=False, + help='Marks all schools from the DB as inactive', + ) + parser.add_argument( + '--backup-filename', + type=str, + default=None, + help='Backup to be read (default: latest)', + ) + parser.add_argument( + '--ignore-backup', + action='store_true', + default=False, + help='Does not read the RSPO backup file', + ) - def handle(self, *args, **options): - arg = options['filename_or_url'] + @transaction.atomic + def handle(self, *args, **kwargs): + rows_affected = 0 + new_records = 0 - if arg.startswith('http://') or arg.startswith('https://'): - self.stdout.write(_("Fetching %s...\n") % (arg,)) - stream = urllib.request.urlopen(arg) - else: - if not os.path.exists(arg): - raise CommandError(_("File not found: %s") % arg) - stream = open(arg, 'rb') + input_filename = kwargs['filename'] + dry_run = kwargs['dry_run'] + DRY_RUN = dry_run + first_import = kwargs['first_import'] + ignore_backup = kwargs['ignore_backup'] + backup_path = kwargs['backup_filename'] + VERBOSITY = kwargs['verbosity'] - reader = unicodecsv.DictReader(stream) - fields = reader.fieldnames - if fields != COLUMNS: - raise CommandError( - _("Missing header or invalid columns: %(h)s. Expected: %(col)s") - % {'h': ', '.join(fields), 'col': ', '.join(COLUMNS)} + fix_file(input_filename) + prepare_dir(BASE_DIR) + + last_rspo_schools = [] + if not ignore_backup: + last_rspo_schools = self.get_last_rspo_backup( + backup_path + ) # List of RSPO school dicts + self.log( + f'INFO: Read {len(last_rspo_schools)} records from the RSPO database backup.' ) - with transaction.atomic(): - ok = True - all_count = 0 - created_count = 0 - for row in reader: - all_count += 1 + if last_rspo_schools == [] and not first_import: + raise CommandError( + 'Make sure to set first-import if you are importing for the first time!' + ) - row['address'] = row['address'].replace('ul.', '') - row['address'] = row['address'].strip(' ') - row['address'] = string.capwords(row['address']) + curr_rspo_schools = unify_schools( + self.read_rspo_csv_file(input_filename) + ) # List of RSPO school dicts + self.log( + f'INFO: Read {len(curr_rspo_schools)} records from the currently imported RSPO database.' + ) - row['postal_code'] = ''.join(row['postal_code'].split()) + if not DRY_RUN: + self.save_rspo_backup(curr_rspo_schools) - for hypen in (' - ', u'\u2010'): - row['city'] = row['city'].replace(hypen, '-') - row['city'] = row['city'].title() + curr_rspo_schools = [ + translate_rspo_school(school).__dict__ for school in curr_rspo_schools + ] # List of school dicts + last_rspo_schools = [ + translate_rspo_school(school).__dict__ for school in last_rspo_schools + ] # List of school dicts - row['province'] = row['province'].lower() + if first_import: + set_all_schools_inactive() - row['phone'] = row['phone'].split(',')[0] - row['phone'] = row['phone'].split(';')[0] - for c in ['tel.', 'fax.', '(', ')', '-', ' ']: - row['phone'] = row['phone'].replace(c, '') - row['phone'] = row['phone'].lstrip('0') + for db_school in School.objects.filter(is_active=True): + db_school_dict = db_school.__dict__ + curr_rspo_school = find_school(db_school_dict, curr_rspo_schools) + last_rspo_school = find_school(db_school_dict, last_rspo_schools) - row['email'] = row['email'].split(',')[0] - row['email'] = row['email'].split(';')[0] + curr_rspo_school_exists = curr_rspo_school is not None + last_rspo_school_exists = last_rspo_school is not None - school, created = School.objects.get_or_create( - name=row['name'], postal_code=row['postal_code'] + if curr_rspo_school_exists and last_rspo_school_exists: + db_and_curr_the_same = are_the_same(db_school_dict, curr_rspo_school) + db_and_last_the_same = are_the_same(db_school_dict, last_rspo_school) + curr_and_last_the_same = are_the_same( + curr_rspo_school, last_rspo_school ) - if created: - created_count += 1 - for column in COLUMNS: - setattr(school, column, row[column]) + # If the MoE has made changes that we have not, then accept MoE changes. + if not db_and_curr_the_same and db_and_last_the_same: + self.log( + f'Note: The school [rspo: {db_school.rspo}] has been changed in the RSPO database. The changes have been applied.', + school_before=db_school, + school_after=translate_dict_school(curr_rspo_school), + ) + update_school(curr_rspo_school, db_school) + rows_affected += 1 + # If MoE has made changes and we have also made changes, but different ones, highlight in logs. + elif ( + not db_and_curr_the_same + and not db_and_last_the_same + and not curr_and_last_the_same + ): + self.log( + f'Note: The school [rspo: {db_school.rspo}] has been changed in the SIO database and in the RSPO database. ' + 'The changes have not been applied. Please check the school manually.', + additional_info='Below is the diff between the school in the SIO database and the one in the currently imported RSPO database. ' + 'Keep in mind that the changes HAVE NOT BEEN applied. If you think they should be applied, do it manually.', + school_before=db_school, + school_after=translate_dict_school(curr_rspo_school), + ) - school.is_active = True - school.is_approved = True + curr_rspo_schools.remove(curr_rspo_school) + last_rspo_schools.remove(last_rspo_school) - try: - school.full_clean() - school.save() - except ValidationError as e: - for k, v in e.message_dict.items(): - for msg in v: - if k == '__all__': - self.stdout.write( - _("Line %(lineNum)s: %(msg)s\n") - % {'lineNum': reader.line_num, 'msg': msg} - ) - else: - self.stdout.write( - _("Line %(lineNum)s, field %(field)s: %(msg)s\n") - % { - 'lineNum': reader.line_num, - 'field': k, - 'msg': msg, - } - ) - ok = False - - if ok: - self.stdout.write( - _("Processed %(all_count)d entries (%(new_count)d new)\n") - % {'all_count': all_count, 'new_count': created_count} + # If the school no longer appears in the MoE database, i.e. has been deleted, mark the school as inactive. + elif not curr_rspo_school_exists and last_rspo_school_exists: + self.log( + f'Note: The school [rspo: {db_school.rspo}] no longer appears in the RSPO database.', + additional_info='It has been set as inactive. Please check that the school with the ' + 'changed RSPO number does not appear in the currently imported RSPO database.', ) - else: - raise CommandError( - _("There were some errors. Database not changed\n") + db_school.is_active = False + db_school.save() + rows_affected += 1 + last_rspo_schools.remove(last_rspo_school) + + if len(curr_rspo_schools) > 0: + self.log( + f'Warning: There are still {len(curr_rspo_schools)} schools left that did not have an active counterpart in the SIO database.' + ) + + # Check all currently imported schools that do not have counterparts in the SIO database. + for school in curr_rspo_schools: + queried_school = get_object_or_none(School, rspo=school['rspo']) + # If there is inactive school in the SIO database, we inform about it. + if queried_school is not None: + self.log( + f'Note: The school [rspo: {queried_school.rspo}] has been found in the imported RSPO database that is not ' + 'currently active in the SIO database. Please verify that this is the way it is supposed to be.' ) + else: + try: + # If there has been no school with this RSPO number in the past, + # i.e. a new school has appeared in the RSPO database, we import it. + if find_school(school, last_rspo_schools) is None: + s = translate_dict_school(school) + with transaction.atomic(): + s.save() + self.log( + f'Note: A new school [rspo: {school["rspo"]}] has been found, which previously was not in the RSPO database. ' + 'It has been added to the SIO database.' + ) + new_records += 1 + # If an attempt is made to add a school that was present in the last RSPO backup, + # but is not present in the SIO database (probably deleted). + else: + self.log( + f'Warning: An attempt was made to add a school [rspo: {school["rspo"]}] that has probably been deleted from the SIO ' + 'database. If you want to add it, do it manually.', + additional_info='Tip: If you want to add schools that have been deleted since the last import, delete the last two backups.', + ) + except utils.IntegrityError: + self.log( + f'Warning: Django found a duplicate school. Please manually check the school {school["rspo"]} data in SIO database, ' + 'current RSPO and the last backup. Something suspicious may be going on.' + ) + + self.log( + 'Warning: Keep in mind that schools with wrong type in RSPO database are not taken into account!' + ) + copy_log_file() + + if DRY_RUN: + transaction.set_rollback(True) + self.log( + 'IMPORTANT: The following data shows the number of queries if dry-run were disabled. Since it was on, NO CHANGES were made.' + ) + self.log(f'Rows affected: {rows_affected}\nNew records: {new_records}') diff --git a/oioioi/oi/management/commands/import_schools_legacy.py b/oioioi/oi/management/commands/import_schools_legacy.py new file mode 100644 index 000000000..2e69cc605 --- /dev/null +++ b/oioioi/oi/management/commands/import_schools_legacy.py @@ -0,0 +1,122 @@ +# ~*~ coding: utf-8 ~*~ +import os +import string + +import urllib.request +import unicodecsv +from django.core.exceptions import ValidationError +from django.core.management.base import BaseCommand, CommandError +from django.db import transaction +from django.utils.translation import gettext as _ + +from oioioi.oi.models import School + +COLUMNS = ['name', 'address', 'postal_code', 'city', 'province', 'phone', 'email'] + + +class Command(BaseCommand): + columns_str = ', '.join(COLUMNS) + + help = _( + "Updates the list of schools from the given CSV file " + ", with the following columns: %(columns)s.\n\n" + "Given CSV file should contain a header row with column names " + "(respectively %(columns)s) separated by commas. Following rows " + "should contain school data." + ) % {'columns': columns_str} + + requires_model_validation = True + + def add_arguments(self, parser): + parser.add_argument('filename_or_url', type=str, help='Source CSV file') + + def handle(self, *args, **options): + arg = options['filename_or_url'] + + if arg.startswith('http://') or arg.startswith('https://'): + self.stdout.write(_("Fetching %s...\n") % (arg,)) + stream = urllib.request.urlopen(arg) + else: + if not os.path.exists(arg): + raise CommandError(_("File not found: %s") % arg) + stream = open(arg, 'rb') + + reader = unicodecsv.DictReader(stream) + fields = reader.fieldnames + if fields != COLUMNS: + raise CommandError( + _("Missing header or invalid columns: %(h)s. Expected: %(col)s") + % {'h': ', '.join(fields), 'col': ', '.join(COLUMNS)} + ) + + with transaction.atomic(): + ok = True + all_count = 0 + created_count = 0 + for row in reader: + all_count += 1 + + row['address'] = row['address'].replace('ul.', '') + row['address'] = row['address'].strip(' ') + row['address'] = string.capwords(row['address']) + + row['postal_code'] = ''.join(row['postal_code'].split()) + + for hypen in (' - ', u'\u2010'): + row['city'] = row['city'].replace(hypen, '-') + row['city'] = row['city'].title() + + row['province'] = row['province'].lower() + + row['phone'] = row['phone'].split(',')[0] + row['phone'] = row['phone'].split(';')[0] + for c in ['tel.', 'fax.', '(', ')', '-', ' ']: + row['phone'] = row['phone'].replace(c, '') + row['phone'] = row['phone'].lstrip('0') + + row['email'] = row['email'].split(',')[0] + row['email'] = row['email'].split(';')[0] + + school, created = School.objects.get_or_create( + name=row['name'], postal_code=row['postal_code'] + ) + if created: + created_count += 1 + + for column in COLUMNS: + setattr(school, column, row[column]) + + school.is_active = True + school.is_approved = True + + try: + school.full_clean() + school.save() + except ValidationError as e: + for k, v in e.message_dict.items(): + for msg in v: + if k == '__all__': + self.stdout.write( + _("Line %(lineNum)s: %(msg)s\n") + % {'lineNum': reader.line_num, 'msg': msg} + ) + else: + self.stdout.write( + _("Line %(lineNum)s, field %(field)s: %(msg)s\n") + % { + 'lineNum': reader.line_num, + 'field': k, + 'msg': msg, + } + ) + ok = False + + if ok: + self.stdout.write( + _("Processed %(all_count)d entries (%(new_count)d new)\n") + % {'all_count': all_count, 'new_count': created_count} + ) + else: + raise CommandError( + _("There were some errors. Database not changed\n") + ) \ No newline at end of file diff --git a/oioioi/oi/migrations/0007_schooltype_school_rspo_and_more.py b/oioioi/oi/migrations/0007_schooltype_school_rspo_and_more.py new file mode 100644 index 000000000..bcd164764 --- /dev/null +++ b/oioioi/oi/migrations/0007_schooltype_school_rspo_and_more.py @@ -0,0 +1,45 @@ +# Generated by Django 4.2.4 on 2023-08-17 10:29 + +from django.db import migrations, models +import django.db.models.deletion +import oioioi.base.utils.validators +import oioioi.participants.fields + + +def create_default_school_types(apps, schema_editor): + names = [ + 'Szkoła podstawowa', + 'Liceum ogólnokształcące', + 'Technikum' + ] + SchoolType = apps.get_model('oi', 'SchoolType') + for name in names: + type = SchoolType(name=name) + type.save() + +class Migration(migrations.Migration): + + dependencies = [ + ('oi', '0006_auto_20210620_1806'), + ] + + operations = [ + migrations.CreateModel( + name='SchoolType', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=255, validators=[oioioi.base.utils.validators.validate_whitespaces], verbose_name='name')), + ], + ), + migrations.AddField( + model_name='school', + name='rspo', + field=models.PositiveIntegerField(blank=True, null=True, unique=True), + ), + migrations.AddField( + model_name='school', + name='type', + field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='oi.schooltype'), + ), + migrations.RunPython(create_default_school_types), + ] diff --git a/oioioi/oi/models.py b/oioioi/oi/models.py index b390e0d33..1e19b14f8 100644 --- a/oioioi/oi/models.py +++ b/oioioi/oi/models.py @@ -62,7 +62,16 @@ +class SchoolType(models.Model): + name = models.CharField( + max_length=255, validators=[validate_whitespaces], verbose_name=_("name") + ) + + + class School(models.Model): + rspo = models.PositiveIntegerField(blank=True, null=True, unique=True) + type = models.ForeignKey(SchoolType, null=True, on_delete=models.SET_NULL) name = models.CharField( max_length=255, validators=[validate_whitespaces], verbose_name=_("name") ) diff --git a/oioioi/oi/tests.py b/oioioi/oi/tests.py index c62a73b62..01064cc61 100644 --- a/oioioi/oi/tests.py +++ b/oioioi/oi/tests.py @@ -2,6 +2,7 @@ import os import re from datetime import datetime, timedelta, timezone # pylint: disable=E0611 +from unittest.mock import patch from django.contrib.admin.utils import quote from django.contrib.auth.models import User @@ -25,6 +26,7 @@ class TestOIAdmin(TestCase): 'test_contest', 'test_oi_registration', 'test_permissions', + 'test_schools_import', ] def test_admin_menu(self): @@ -39,12 +41,18 @@ def test_admin_menu(self): self.assertNotContains(response, 'Regions') def test_schools_import(self): - filename = os.path.join(os.path.dirname(__file__), 'files', 'schools.csv') + # WARNING: There *cannot be* oioioi/schools directory automatically generated and deleted by the script. + patch('builtins.input', return_value='y') + filename = os.path.join(os.path.dirname(__file__), 'files', 'rspo.csv') manager = import_schools.Command() - manager.run_from_argv(['manage.py', 'import_schools', filename]) - self.assertEqual(School.objects.count(), 3) - school = School.objects.get(postal_code='02-044') - self.assertEqual(school.city, u'Bielsko-Biała Zdrój') + manager.run_from_argv(['manage.py', 'import_schools', filename, '--first-import']) + self.assertEqual(School.objects.count(), 7) + school = School.objects.get(postal_code='49-305') + self.assertEqual(school.city, u'Brzeg') + BASE_DIR = f'{os.getcwd()}/schools' + for file in os.listdir(BASE_DIR): + os.remove(os.path.join(BASE_DIR, file)) + os.rmdir(BASE_DIR) def test_safe_exec_mode(self): contest = Contest.objects.get()