diff --git a/README.md b/README.md index 5cd78ea..c17a8ae 100644 --- a/README.md +++ b/README.md @@ -55,21 +55,21 @@ ISTAT codes can be found [here](https://it.wikipedia.org/wiki/Codice_ISTAT) or [ >>> pd.Series(["Torino", "Agliè", "Airasca"]).italy_geopop.from_municipality(population_limits='total') ``` -| | municipality_code | municipality | province_code | province | province_short | region | region_code | geometry | population_F | population_M | population | -| --- | ----------------- | ------------ | ------------- | -------- | -------------- | -------- | ----------- | ------------ | ------------ | ------------ | ---------- | -| 0 | 1272 | Torino | 1 | Torino | TO | Piemonte | 1 | MULTIPOLYGON | 441686.0 | 407062.0 | 848748.0 | -| 1 | 1001 | Agliè | 1 | Torino | TO | Piemonte | 1 | MULTIPOLYGON | 1347.0 | 1215.0 | 2562.0 | -| 2 | 1002 | Airasca | 1 | Torino | TO | Piemonte | 1 | MULTIPOLYGON | 1793.0 | 1867.0 | 3660.0 | +| | municipality_code | municipality | province_code | province | province_short | region | region_code | cadastral_code | geometry | population_F | population_M | population | +| --- | ----------------- | ------------ | ------------- | -------- | -------------- | -------- | ----------- | -------------- | ------------ | ------------ | ------------ | ---------- | +| 0 | 1272 | Torino | 1 | Torino | TO | Piemonte | 1 | L219 | MULTIPOLYGON | 441686.0 | 407062.0 | 848748.0 | +| 1 | 1001 | Agliè | 1 | Torino | TO | Piemonte | 1 | A074 | MULTIPOLYGON | 1347.0 | 1215.0 | 2562.0 | +| 2 | 1002 | Airasca | 1 | Torino | TO | Piemonte | 1 | A109 | MULTIPOLYGON | 1793.0 | 1867.0 | 3660.0 | ``` >>> pd.Series(["Torino", "Milano", "Venezia"]).italy_geopop.from_province(population_limits=[50], population_labels=['below_50', 'above_equal_50']) ``` -| | province_code | province | province_short | municipalities | region | region_code | geometry | below_50_F | above_equal_50_F | below_50_M | above_equal_50_M | below_50 | above_equal_50 | -| --- | ------------- | -------- | -------------- | ------------------------- | --------- | ----------- | ------------ | ---------- | ---------------- | ---------- | ---------------- | --------- | -------------- | -| 0 | 1 | Torino | TO | [{'municipality_code': 10 | Piemonte | 1 | POLYGON | 550793.0 | 586366.0 | 572143.0 | 499068.0 | 1122936.0 | 1085434.0 | -| 1 | 15 | Milano | MI | [{'municipality_code': 15 | Lombardia | 3 | MULTIPOLYGON | 857481.0 | 792711.0 | 898004.0 | 666434.0 | 1755485.0 | 1459145.0 | -| 2 | 27 | Venezia | VE | [{'municipality_code': 27 | Veneto | 5 | POLYGON | 205100.0 | 224401.0 | 214116.0 | 193299.0 | 419216.0 | 417700.0 | +| | province_code | province | province_short | municipalities | region | region_code | geometry | below_50_F | above_equal_50_F | below_50_M | above_equal_50_M | below_50 | above_equal_50 | +| --- | ------------- | -------- | -------------- | -------------------------- | --------- | ----------- | ------------ | ---------- | ---------------- | ---------- | ---------------- | --------- | -------------- | +| 0 | 1 | Torino | TO | [{'cadastral_code': 'A074' | Piemonte | 1 | POLYGON | 550793.0 | 586366.0 | 572143.0 | 499068.0 | 1122936.0 | 1085434.0 | +| 1 | 15 | Milano | MI | [{'cadastral_code': 'A010' | Lombardia | 3 | MULTIPOLYGON | 857481.0 | 792711.0 | 898004.0 | 666434.0 | 1755485.0 | 1459145.0 | +| 2 | 27 | Venezia | VE | [{'cadastral_code': 'A302' | Veneto | 5 | POLYGON | 205100.0 | 224401.0 | 214116.0 | 193299.0 | 419216.0 | 417700.0 | ``` @@ -79,9 +79,9 @@ ISTAT codes can be found [here](https://it.wikipedia.org/wiki/Codice_ISTAT) or [ | | region_code | region | provinces | geometry | <3_F | 3-11_F | 11-19_F | 19-25_F | 25-50_F | 50-65_F | 65-75_F | >=75_F | <3_M | 3-11_M | 11-19_M | 19-25_M | 25-50_M | 50-65_M | 65-75_M | >=75_M | <3 | 3-11 | 11-19 | 19-25 | 25-50 | 50-65 | 65-75 | >=75 | | --- | ----------- | --------- | ------------------------- | ------------ | -------- | -------- | -------- | -------- | --------- | --------- | -------- | -------- | -------- | -------- | -------- | -------- | --------- | --------- | -------- | -------- | -------- | -------- | -------- | -------- | --------- | --------- | --------- | --------- | -| 0 | 1 | Piemonte | [{'province_code': 1, 'pr | POLYGON | 40122.0 | 131269.0 | 149768.0 | 112474.0 | 614252.0 | 506764.0 | 279224.0 | 348632.0 | 42361.0 | 138788.0 | 159618.0 | 123911.0 | 629878.0 | 490464.0 | 251918.0 | 236907.0 | 82483.0 | 270057.0 | 309386.0 | 236385.0 | 1244130.0 | 997228.0 | 531142.0 | 585539.0 | -| 1 | 3 | Lombardia | [{'province_code': 12, 'p | MULTIPOLYGON | 103867.0 | 336353.0 | 378153.0 | 274455.0 | 1520576.0 | 1144338.0 | 586818.0 | 716916.0 | 109087.0 | 356547.0 | 403719.0 | 303888.0 | 1572013.0 | 1135834.0 | 524720.0 | 475720.0 | 212954.0 | 692900.0 | 781872.0 | 578343.0 | 3092589.0 | 2280172.0 | 1111538.0 | 1192636.0 | -| 2 | 5 | Veneto | [{'province_code': 23, 'p | POLYGON | 48285.0 | 157284.0 | 182441.0 | 136850.0 | 718105.0 | 578543.0 | 291166.0 | 354328.0 | 51390.0 | 166176.0 | 194064.0 | 149055.0 | 737009.0 | 573454.0 | 267403.0 | 242192.0 | 99675.0 | 323460.0 | 376505.0 | 285905.0 | 1455114.0 | 1151997.0 | 558569.0 | 596520.0 | +| 0 | 1 | Piemonte | [{'municipalities': array | POLYGON | 40122.0 | 131269.0 | 149768.0 | 112474.0 | 614252.0 | 506764.0 | 279224.0 | 348632.0 | 42361.0 | 138788.0 | 159618.0 | 123911.0 | 629878.0 | 490464.0 | 251918.0 | 236907.0 | 82483.0 | 270057.0 | 309386.0 | 236385.0 | 1244130.0 | 997228.0 | 531142.0 | 585539.0 | +| 1 | 3 | Lombardia | [{'municipalities': array | MULTIPOLYGON | 103867.0 | 336353.0 | 378153.0 | 274455.0 | 1520576.0 | 1144338.0 | 586818.0 | 716916.0 | 109087.0 | 356547.0 | 403719.0 | 303888.0 | 1572013.0 | 1135834.0 | 524720.0 | 475720.0 | 212954.0 | 692900.0 | 781872.0 | 578343.0 | 3092589.0 | 2280172.0 | 1111538.0 | 1192636.0 | +| 2 | 5 | Veneto | [{'municipalities': array | POLYGON | 48285.0 | 157284.0 | 182441.0 | 136850.0 | 718105.0 | 578543.0 | 291166.0 | 354328.0 | 51390.0 | 166176.0 | 194064.0 | 149055.0 | 737009.0 | 573454.0 | 267403.0 | 242192.0 | 99675.0 | 323460.0 | 376505.0 | 285905.0 | 1455114.0 | 1151997.0 | 558569.0 | 596520.0 | ``` @@ -91,10 +91,10 @@ ISTAT codes can be found [here](https://it.wikipedia.org/wiki/Codice_ISTAT) or [ | | region_code | region | provinces | geometry | <3_F | 3-11_F | 11-19_F | 19-25_F | 25-50_F | 50-65_F | 65-75_F | >=75_F | <3_M | 3-11_M | 11-19_M | 19-25_M | 25-50_M | 50-65_M | 65-75_M | >=75_M | <3 | 3-11 | 11-19 | 19-25 | 25-50 | 50-65 | 65-75 | >=75 | | --- | ----------- | --------- | ------------------------- | ------------ | -------- | -------- | -------- | -------- | --------- | --------- | -------- | -------- | -------- | -------- | -------- | -------- | --------- | --------- | -------- | -------- | -------- | -------- | -------- | -------- | --------- | --------- | --------- | --------- | -| 0 | 3.0 | Lombardia | [{'province_code': 12, 'p | MULTIPOLYGON | 103867.0 | 336353.0 | 378153.0 | 274455.0 | 1520576.0 | 1144338.0 | 586818.0 | 716916.0 | 109087.0 | 356547.0 | 403719.0 | 303888.0 | 1572013.0 | 1135834.0 | 524720.0 | 475720.0 | 212954.0 | 692900.0 | 781872.0 | 578343.0 | 3092589.0 | 2280172.0 | 1111538.0 | 1192636.0 | -| 1 | 5.0 | Veneto | [{'province_code': 23, 'p | POLYGON | 48285.0 | 157284.0 | 182441.0 | 136850.0 | 718105.0 | 578543.0 | 291166.0 | 354328.0 | 51390.0 | 166176.0 | 194064.0 | 149055.0 | 737009.0 | 573454.0 | 267403.0 | 242192.0 | 99675.0 | 323460.0 | 376505.0 | 285905.0 | 1455114.0 | 1151997.0 | 558569.0 | 596520.0 | +| 0 | 3.0 | Lombardia | [{'municipalities': array | MULTIPOLYGON | 103867.0 | 336353.0 | 378153.0 | 274455.0 | 1520576.0 | 1144338.0 | 586818.0 | 716916.0 | 109087.0 | 356547.0 | 403719.0 | 303888.0 | 1572013.0 | 1135834.0 | 524720.0 | 475720.0 | 212954.0 | 692900.0 | 781872.0 | 578343.0 | 3092589.0 | 2280172.0 | 1111538.0 | 1192636.0 | +| 1 | 5.0 | Veneto | [{'municipalities': array | POLYGON | 48285.0 | 157284.0 | 182441.0 | 136850.0 | 718105.0 | 578543.0 | 291166.0 | 354328.0 | 51390.0 | 166176.0 | 194064.0 | 149055.0 | 737009.0 | 573454.0 | 267403.0 | 242192.0 | 99675.0 | 323460.0 | 376505.0 | 285905.0 | 1455114.0 | 1151997.0 | 558569.0 | 596520.0 | | 2 | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | -| 3 | 15.0 | Campania | [{'province_code': 61, 'p | MULTIPOLYGON | 65798.0 | 201345.0 | 239653.0 | 185798.0 | 909861.0 | 641838.0 | 320637.0 | 311913.0 | 69298.0 | 213525.0 | 253444.0 | 200452.0 | 907541.0 | 602405.0 | 288497.0 | 212415.0 | 135096.0 | 414870.0 | 493097.0 | 386250.0 | 1817402.0 | 1244243.0 | 609134.0 | 524328.0 | +| 3 | 15.0 | Campania | [{'municipalities': array | MULTIPOLYGON | 65798.0 | 201345.0 | 239653.0 | 185798.0 | 909861.0 | 641838.0 | 320637.0 | 311913.0 | 69298.0 | 213525.0 | 253444.0 | 200452.0 | 907541.0 | 602405.0 | 288497.0 | 212415.0 | 135096.0 | 414870.0 | 493097.0 | 386250.0 | 1817402.0 | 1244243.0 | 609134.0 | 524328.0 | ## 📖 License diff --git a/docs/source/_static/assets/quick_usage_01.csv b/docs/source/_static/assets/quick_usage_01.csv index 2aaaeff..a5f1f00 100644 --- a/docs/source/_static/assets/quick_usage_01.csv +++ b/docs/source/_static/assets/quick_usage_01.csv @@ -1,4 +1,4 @@ -,municipality_code,municipality,province_code,province,province_short,region,region_code,geometry,population_F,population_M,population -0,1272,Torino,1,Torino,TO,Piemonte,1,MULTIPOLYGON ,441686.0,407062.0,848748.0 -1,1001,Agliè,1,Torino,TO,Piemonte,1,MULTIPOLYGON ,1347.0,1215.0,2562.0 -2,1002,Airasca,1,Torino,TO,Piemonte,1,MULTIPOLYGON ,1793.0,1867.0,3660.0 +,municipality_code,municipality,cadastral_code,province_code,province,province_short,region,region_code,geometry,population_F,population_M,population +0,1272,Torino,L219,1,Torino,TO,Piemonte,1,MULTIPOLYGON ,441686.0,407062.0,848748.0 +1,1001,Agliè,A074,1,Torino,TO,Piemonte,1,MULTIPOLYGON ,1347.0,1215.0,2562.0 +2,1002,Airasca,A109,1,Torino,TO,Piemonte,1,MULTIPOLYGON ,1793.0,1867.0,3660.0 diff --git a/docs/source/_static/assets/quick_usage_02.csv b/docs/source/_static/assets/quick_usage_02.csv index eec10ba..463e065 100644 --- a/docs/source/_static/assets/quick_usage_02.csv +++ b/docs/source/_static/assets/quick_usage_02.csv @@ -1,4 +1,4 @@ ,province_code,province,province_short,municipalities,region,region_code,geometry,below_50_F,above_equal_50_F,below_50_M,above_equal_50_M,below_50,above_equal_50 -0,1,Torino,TO,[{'municipality_code': 10,Piemonte,1,POLYGON ,550793.0,586366.0,572143.0,499068.0,1122936.0,1085434.0 -1,15,Milano,MI,[{'municipality_code': 15,Lombardia,3,MULTIPOLYGON ,857481.0,792711.0,898004.0,666434.0,1755485.0,1459145.0 -2,27,Venezia,VE,[{'municipality_code': 27,Veneto,5,POLYGON ,205100.0,224401.0,214116.0,193299.0,419216.0,417700.0 +0,1,Torino,TO,[{'cadastral_code': 'A074,Piemonte,1,POLYGON ,550793.0,586366.0,572143.0,499068.0,1122936.0,1085434.0 +1,15,Milano,MI,[{'cadastral_code': 'A010,Lombardia,3,MULTIPOLYGON ,857481.0,792711.0,898004.0,666434.0,1755485.0,1459145.0 +2,27,Venezia,VE,[{'cadastral_code': 'A302,Veneto,5,POLYGON ,205100.0,224401.0,214116.0,193299.0,419216.0,417700.0 diff --git a/docs/source/_static/assets/quick_usage_03.csv b/docs/source/_static/assets/quick_usage_03.csv index 4992bce..3004dfd 100644 --- a/docs/source/_static/assets/quick_usage_03.csv +++ b/docs/source/_static/assets/quick_usage_03.csv @@ -1,4 +1,4 @@ ,region_code,region,provinces,geometry,<3_F,3-11_F,11-19_F,19-25_F,25-50_F,50-65_F,65-75_F,>=75_F,<3_M,3-11_M,11-19_M,19-25_M,25-50_M,50-65_M,65-75_M,>=75_M,<3,3-11,11-19,19-25,25-50,50-65,65-75,>=75 -0,1,Piemonte,"[{'province_code': 1, 'pr",POLYGON ,40122.0,131269.0,149768.0,112474.0,614252.0,506764.0,279224.0,348632.0,42361.0,138788.0,159618.0,123911.0,629878.0,490464.0,251918.0,236907.0,82483.0,270057.0,309386.0,236385.0,1244130.0,997228.0,531142.0,585539.0 -1,3,Lombardia,"[{'province_code': 12, 'p",MULTIPOLYGON ,103867.0,336353.0,378153.0,274455.0,1520576.0,1144338.0,586818.0,716916.0,109087.0,356547.0,403719.0,303888.0,1572013.0,1135834.0,524720.0,475720.0,212954.0,692900.0,781872.0,578343.0,3092589.0,2280172.0,1111538.0,1192636.0 -2,5,Veneto,"[{'province_code': 23, 'p",POLYGON ,48285.0,157284.0,182441.0,136850.0,718105.0,578543.0,291166.0,354328.0,51390.0,166176.0,194064.0,149055.0,737009.0,573454.0,267403.0,242192.0,99675.0,323460.0,376505.0,285905.0,1455114.0,1151997.0,558569.0,596520.0 +0,1,Piemonte,[{'municipalities': array,POLYGON ,40122.0,131269.0,149768.0,112474.0,614252.0,506764.0,279224.0,348632.0,42361.0,138788.0,159618.0,123911.0,629878.0,490464.0,251918.0,236907.0,82483.0,270057.0,309386.0,236385.0,1244130.0,997228.0,531142.0,585539.0 +1,3,Lombardia,[{'municipalities': array,MULTIPOLYGON ,103867.0,336353.0,378153.0,274455.0,1520576.0,1144338.0,586818.0,716916.0,109087.0,356547.0,403719.0,303888.0,1572013.0,1135834.0,524720.0,475720.0,212954.0,692900.0,781872.0,578343.0,3092589.0,2280172.0,1111538.0,1192636.0 +2,5,Veneto,[{'municipalities': array,POLYGON ,48285.0,157284.0,182441.0,136850.0,718105.0,578543.0,291166.0,354328.0,51390.0,166176.0,194064.0,149055.0,737009.0,573454.0,267403.0,242192.0,99675.0,323460.0,376505.0,285905.0,1455114.0,1151997.0,558569.0,596520.0 diff --git a/docs/source/_static/assets/quick_usage_04.csv b/docs/source/_static/assets/quick_usage_04.csv index dcd59de..2e4b8fa 100644 --- a/docs/source/_static/assets/quick_usage_04.csv +++ b/docs/source/_static/assets/quick_usage_04.csv @@ -1,5 +1,5 @@ ,region_code,region,provinces,geometry,<3_F,3-11_F,11-19_F,19-25_F,25-50_F,50-65_F,65-75_F,>=75_F,<3_M,3-11_M,11-19_M,19-25_M,25-50_M,50-65_M,65-75_M,>=75_M,<3,3-11,11-19,19-25,25-50,50-65,65-75,>=75 -0,3.0,Lombardia,"[{'province_code': 12, 'p",MULTIPOLYGON ,103867.0,336353.0,378153.0,274455.0,1520576.0,1144338.0,586818.0,716916.0,109087.0,356547.0,403719.0,303888.0,1572013.0,1135834.0,524720.0,475720.0,212954.0,692900.0,781872.0,578343.0,3092589.0,2280172.0,1111538.0,1192636.0 -1,5.0,Veneto,"[{'province_code': 23, 'p",POLYGON ,48285.0,157284.0,182441.0,136850.0,718105.0,578543.0,291166.0,354328.0,51390.0,166176.0,194064.0,149055.0,737009.0,573454.0,267403.0,242192.0,99675.0,323460.0,376505.0,285905.0,1455114.0,1151997.0,558569.0,596520.0 +0,3.0,Lombardia,[{'municipalities': array,MULTIPOLYGON ,103867.0,336353.0,378153.0,274455.0,1520576.0,1144338.0,586818.0,716916.0,109087.0,356547.0,403719.0,303888.0,1572013.0,1135834.0,524720.0,475720.0,212954.0,692900.0,781872.0,578343.0,3092589.0,2280172.0,1111538.0,1192636.0 +1,5.0,Veneto,[{'municipalities': array,POLYGON ,48285.0,157284.0,182441.0,136850.0,718105.0,578543.0,291166.0,354328.0,51390.0,166176.0,194064.0,149055.0,737009.0,573454.0,267403.0,242192.0,99675.0,323460.0,376505.0,285905.0,1455114.0,1151997.0,558569.0,596520.0 2,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan,nan -3,15.0,Campania,"[{'province_code': 61, 'p",MULTIPOLYGON ,65798.0,201345.0,239653.0,185798.0,909861.0,641838.0,320637.0,311913.0,69298.0,213525.0,253444.0,200452.0,907541.0,602405.0,288497.0,212415.0,135096.0,414870.0,493097.0,386250.0,1817402.0,1244243.0,609134.0,524328.0 +3,15.0,Campania,[{'municipalities': array,MULTIPOLYGON ,65798.0,201345.0,239653.0,185798.0,909861.0,641838.0,320637.0,311913.0,69298.0,213525.0,253444.0,200452.0,907541.0,602405.0,288497.0,212415.0,135096.0,414870.0,493097.0,386250.0,1817402.0,1244243.0,609134.0,524328.0 diff --git a/docs/source/api.rst b/docs/source/api.rst index cf757a3..427e1cc 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -59,6 +59,7 @@ Municipality data - ``municipality: str`` - municipality name, capitalized, available only if data is retrieved using :py:meth:`italy_goepop.pandas_extension.ItalyGeopop.from_municipality`. - ``municipality_code: int`` - municipality istat code, available only if data is retrieved using :py:meth:`italy_goepop.pandas_extension.ItalyGeopop.from_municipality`. +- ``cadastral_code: str`` - municipality cadastral code (cadastral code), available only if data is retrieved using :py:meth:`italy_goepop.pandas_extension.ItalyGeopop.from_municipality`. - ``municipalities: list`` - a list of dictionaries with the following structure ``{'municipality_code': , 'municipality': pd.DataFrame: """Get data for municipalities. - Input series can contain municipalities names or municipalities istat codes; *data types can also be mixed*. + Input series can contain municipalities names, municipalities istat codes or municipalities cadastral code (also known as Belfiore's code); *data types can also be mixed*. If input data is not found in italian data, a row of NaNs is returned, *this behaviour may change in the future.* :param return_cols: used to subset the returned data in order to provide the requested fields. If None, all available fields are returned. If is an instance of re.Pattern or is a string and regex param is True columns will be filtered and returned only if their names match the regular expression. The available fields are listed above, defaults to None. @@ -91,7 +93,7 @@ def from_municipality( :return: Requested data in a 2-dimensional dataframe that has the same index of input data. :rtype: pandas.DataFrame """ - str_indexed, code_indexed = self._generate_municipality_dfs( + str_indexed, code_indexed, cadastral_indexed = self._generate_municipality_dfs( population_limits=population_limits, population_labels=population_labels, include_geometry=self.include_geometry, @@ -99,15 +101,20 @@ def from_municipality( empty_serie = self._generate_empty_serie(str_indexed.columns.to_list()) str_indexed = dict(str_indexed.iterrows()) code_indexed = dict(code_indexed.iterrows()) + cadastral_indexed = dict(cadastral_indexed.iterrows()) @cache def get_data(x) -> pd.Series: x = str(x).strip().lower() try: x = int(float(x)) + print('x is int', x) return code_indexed.get(x, empty_serie) except Exception: - return str_indexed.get(x, empty_serie) + if re.fullmatch('[a-z][0-9]{3}', x): + return cadastral_indexed.get(x, empty_serie) + else: + return str_indexed.get(x, empty_serie) return handle_return_cols(self._obj.apply(get_data), return_cols, regex) diff --git a/tests/test_geopop.py b/tests/test_geopop.py index 1b50779..cdacfcf 100644 --- a/tests/test_geopop.py +++ b/tests/test_geopop.py @@ -7,6 +7,7 @@ _municipality_columns = [ 'municipality', + 'cadastral_code', 'province', 'province_code', 'province_short', diff --git a/tests/test_pandas_extension.py b/tests/test_pandas_extension.py index 52d6ba1..0f47761 100644 --- a/tests/test_pandas_extension.py +++ b/tests/test_pandas_extension.py @@ -12,6 +12,7 @@ _municipality_columns = [ 'municipality', 'municipality_code', + 'cadastral_code', ] _province_columns = [ @@ -94,6 +95,21 @@ def municipality(request) -> pd.Series: return pd.Series( [70070, 88007, 60018, 4001, 16131, 17097, 4071, 29026, 26084, 6015] ) + elif request.param == 'cadastral': + return pd.Series( + [ + 'B368', + 'H838', + 'M212', + 'M194', + 'H265', + 'H432', + 'C236', + 'C879', + 'B160', + 'B551', + ] + ) else: return pd.Series( [ @@ -368,15 +384,9 @@ def test_pandas_extension_from_municipality_returns_right_columns( ) df_columns = list(output.columns) for c in ( - [ - 'municipality', - 'municipality_code', - 'province', - 'province_code', - 'province_short', - 'region', - 'region_code', - ] + _municipality_columns + + _province_columns + + _region_columns + pop_cols + (['geometry'] if include_geometry else []) ): @@ -386,6 +396,43 @@ def test_pandas_extension_from_municipality_returns_right_columns( assert len(df_columns) == 0 +@pytest.mark.parametrize('include_geometry', [True, False]) +@pytest.mark.parametrize( + 'population_limits,population_labels', + [ + ('auto', None), + ('total', None), + ([50.0, 75], None), + ([50], ['below_50', 'above_50']), + ], +) +def test_pandas_extension_from_municipality_finds_results( + municipality, include_geometry, population_limits, population_labels +): + if population_limits == 'auto': + pop_cols = _auto_population_limits_columns + elif population_limits == 'total': + pop_cols = _total_population_columns + else: + pop_cols = [] + if population_labels is None: + _pop_limits = prepare_limits(population_limits) + for c in generate_labels_for_age_cutoffs(_pop_limits): + pop_cols.append(c) + pop_cols.append(f'{c}_M') + pop_cols.append(f'{c}_F') + else: + for c in population_labels: + pop_cols.append(c) + pop_cols.append(f'{c}_M') + pop_cols.append(f'{c}_F') + with pandas_activate_context(include_geometry=include_geometry): + output = municipality.italy_geopop.from_municipality( + population_limits=population_limits, population_labels=population_labels + ) + assert not len(output[output.municipality_code.isna()]) + + @pytest.mark.parametrize('include_geometry', [True, False]) @pytest.mark.parametrize( 'population_limits,population_labels', @@ -422,14 +469,9 @@ def test_pandas_extension_from_province_returns_right_columns( ) df_columns = list(output.columns) for c in ( - [ - 'province_code', - 'province', - 'province_short', - 'municipalities', - 'region', - 'region_code', - ] + ['municipalities'] + + _province_columns + + _region_columns + pop_cols + (['geometry'] if include_geometry else []) ): @@ -439,6 +481,43 @@ def test_pandas_extension_from_province_returns_right_columns( assert len(df_columns) == 0 +@pytest.mark.parametrize('include_geometry', [True, False]) +@pytest.mark.parametrize( + 'population_limits,population_labels', + [ + ('auto', None), + ('total', None), + ([50.0, 75], None), + ([50], ['below_50', 'above_50']), + ], +) +def test_pandas_extension_from_province_finds_results( + province, include_geometry, population_limits, population_labels +): + if population_limits == 'auto': + pop_cols = _auto_population_limits_columns + elif population_limits == 'total': + pop_cols = _total_population_columns + else: + pop_cols = [] + if population_labels is None: + _pop_limits = prepare_limits(population_limits) + for c in generate_labels_for_age_cutoffs(_pop_limits): + pop_cols.append(c) + pop_cols.append(f'{c}_M') + pop_cols.append(f'{c}_F') + else: + for c in population_labels: + pop_cols.append(c) + pop_cols.append(f'{c}_M') + pop_cols.append(f'{c}_F') + with pandas_activate_context(include_geometry=include_geometry): + output = province.italy_geopop.from_province( + population_limits=population_limits, population_labels=population_labels + ) + assert not len(output[output.province_code.isna()]) + + @pytest.mark.parametrize('include_geometry', [True, False]) @pytest.mark.parametrize( 'population_limits,population_labels', @@ -475,11 +554,8 @@ def test_pandas_extension_from_region_returns_right_columns( ) df_columns = list(output.columns) for c in ( - [ - 'provinces', - 'region', - 'region_code', - ] + ['provinces'] + + _region_columns + pop_cols + (['geometry'] if include_geometry else []) ): @@ -489,6 +565,43 @@ def test_pandas_extension_from_region_returns_right_columns( assert len(df_columns) == 0 +@pytest.mark.parametrize('include_geometry', [True, False]) +@pytest.mark.parametrize( + 'population_limits,population_labels', + [ + ('auto', None), + ('total', None), + ([50.0, 75], None), + ([50], ['below_50', 'above_50']), + ], +) +def test_pandas_extension_from_region_finds_results( + region, include_geometry, population_limits, population_labels +): + if population_limits == 'auto': + pop_cols = _auto_population_limits_columns + elif population_limits == 'total': + pop_cols = _total_population_columns + else: + pop_cols = [] + if population_labels is None: + _pop_limits = prepare_limits(population_limits) + for c in generate_labels_for_age_cutoffs(_pop_limits): + pop_cols.append(c) + pop_cols.append(f'{c}_M') + pop_cols.append(f'{c}_F') + else: + for c in population_labels: + pop_cols.append(c) + pop_cols.append(f'{c}_M') + pop_cols.append(f'{c}_F') + with pandas_activate_context(include_geometry=include_geometry): + output = region.italy_geopop.from_region( + population_limits=population_limits, population_labels=population_labels + ) + assert not len(output[output.region_code.isna()]) + + # Test smart features @pytest.mark.parametrize('include_geometry', [True, False]) def test_pandas_extension_find_correct_municipality_information_from_complex_municipality_name( @@ -516,8 +629,12 @@ def test_pandas_extension_find_correct_province_information_from_complex_provinc province_name_complex, province_name_complex_to_simple, include_geometry ): with pandas_activate_context(include_geometry=include_geometry): - expected = province_name_complex_to_simple.italy_geopop.from_province() - output = province_name_complex.italy_geopop.smart_from_province() + expected = province_name_complex_to_simple.italy_geopop.from_province().drop( + ['municipalities'], axis=1 + ) + output = province_name_complex.italy_geopop.smart_from_province().drop( + ['municipalities'], axis=1 + ) assert (output != expected).sum().sum() == 0 @@ -537,8 +654,12 @@ def test_pandas_extension_find_correct_region_information_from_complex_region_na region_name_complex, region_name_complex_to_simple, include_geometry ): with pandas_activate_context(include_geometry=include_geometry): - expected = region_name_complex_to_simple.italy_geopop.from_region() - output = region_name_complex.italy_geopop.smart_from_region() + expected = region_name_complex_to_simple.italy_geopop.from_region().drop( + ['provinces'], axis=1 + ) + output = region_name_complex.italy_geopop.smart_from_region().drop( + ['provinces'], axis=1 + ) assert (output != expected).sum().sum() == 0 diff --git a/tests/test_pandas_extension_from_municipality.py b/tests/test_pandas_extension_from_municipality.py deleted file mode 100644 index 57ac48b..0000000 --- a/tests/test_pandas_extension_from_municipality.py +++ /dev/null @@ -1,184 +0,0 @@ -# import geopandas as gpd -# import pandas as pd -# import pytest - -# from italy_geopop.geopop import ItalyGeopopDataFrame -# from italy_geopop.pandas_extension import pandas_activate_context - - -# _municipality_columns = [ -# 'municipality', -# 'municipality_code', -# ] - -# _province_columns = [ -# 'province_code', -# 'province', -# 'province_short', -# ] - - -# _region_columns = [ -# 'region', -# 'region_code', -# ] - -# _population_columns = [ -# 'population', -# 'population_M', -# 'population_F', -# ] - - -# @pytest.fixture -# def dummy_series() -> pd.Series: -# """ -# Returns a pd.Series with empty string values '' indexed as range(0, 500, 5). -# """ -# index = list(range(0, 500, 5)) -# return pd.Series(['' for _ in index], index=index) - - -# @pytest.fixture -# def municipality_names() -> pd.Series: -# """ -# Returns a pd.Series with some valid municipality names. This function ensures that returned municipalities are unique in Italy municipalities. -# """ -# return pd.Series( -# [ -# 'Ronzo-Chienis', -# 'Castelvisconti', -# 'Scafati', -# 'Gandosso', -# 'Vasto', -# 'Vigolo', -# 'Giffone', -# 'Mezzana Rabattone', -# 'Tissi', -# 'Boara Pisani', -# ] -# ) - - -# @pytest.fixture -# def municipality_name_complex() -> pd.Series: -# """ -# Returns a pd.Series with a valid complex municipality name. -# """ -# return pd.Series(['Comune di Abano Terme']) - - -# @pytest.fixture -# def municipality_name_complex_to_simple() -> pd.Series: -# """ -# Returns a pd.Series with a the right simple name for municipality_name_complex above. -# """ -# return pd.Series(['abano terme']) - - -# @pytest.fixture -# def not_unequivocal_municipality_name_complex() -> pd.Series: -# """ -# Returns a pd.Series with a non-unequivocal complex municipality name. -# """ -# return pd.Series(['Verona or Milano']) - - -# @pytest.fixture -# def municipality_codes() -> pd.Series: -# """ -# Returns a pd.Series with some valid municipalities codes. -# """ -# return pd.Series( -# [70070, 88007, 60018, 4001, 16131, 17097, 4071, 29026, 26084, 6015] -# ) - - -# def test_pandas_extension_find_correct_municipality_information_from_names_with_geometry( -# geopop_df, municipality_names -# ): -# with pandas_activate_context(include_geometry=True): -# expected = ( -# geopop_df.set_index('municipality') -# .loc[municipality_names.to_list()] -# .reset_index() -# ) -# expected = pd.merge( -# expected, -# ItalyGeopopDataFrame.get_municipalities_geometry(), -# how='left', -# left_on='municipality_code', -# right_index=True, -# ) -# expected = gpd.GeoDataFrame(expected) - -# output = municipality_names.italy_geopop.from_municipality() -# output = output[expected.columns] - -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_find_correct_municipality_information_from_codes_with_geometry( -# geopop_df, municipality_codes -# ): -# with pandas_activate_context(include_geometry=True): -# expected = ( -# geopop_df.set_index('municipality_code') -# .loc[municipality_codes.to_list()] -# .reset_index() -# ) -# expected = pd.merge( -# expected, -# ItalyGeopopDataFrame.get_municipalities_geometry(), -# how='left', -# left_on='municipality_code', -# right_index=True, -# ) -# expected = gpd.GeoDataFrame(expected) - -# output = municipality_codes.italy_geopop.from_municipality() -# output = output[expected.columns] - -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_find_correct_municipality_information_from_mixed_names_and_codes_with_geometry( -# geopop_df, municipality_codes, municipality_names -# ): -# with pandas_activate_context(include_geometry=True): -# expected_names = ( -# geopop_df.set_index('municipality') -# .loc[municipality_names.to_list()] -# .reset_index() -# ) - -# expected_codes = ( -# geopop_df.set_index('municipality_code') -# .loc[municipality_codes.to_list()] -# .reset_index() -# ) -# expected = pd.concat([expected_names, expected_codes], ignore_index=True) -# expected = pd.merge( -# expected, -# ItalyGeopopDataFrame.get_municipalities_geometry(), -# how='left', -# left_on='municipality_code', -# right_index=True, -# ) -# expected = gpd.GeoDataFrame(expected) - -# input = pd.concat([municipality_names, municipality_codes], ignore_index=True) - -# output = input.italy_geopop.from_municipality() -# output = output[expected.columns] - -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_find_correct_municipality_information_from_complex_municipality_name_with_geometry( -# municipality_name_complex, municipality_name_complex_to_simple -# ): -# with pandas_activate_context(include_geometry=True): -# expected = municipality_name_complex_to_simple.italy_geopop.from_municipality() -# output = municipality_name_complex.italy_geopop.smart_from_municipality() -# assert (output != expected).sum().sum() == 0 diff --git a/tests/test_pandas_extension_from_province.py b/tests/test_pandas_extension_from_province.py deleted file mode 100644 index 56b4529..0000000 --- a/tests/test_pandas_extension_from_province.py +++ /dev/null @@ -1,430 +0,0 @@ -# import geopandas as gpd -# import pandas as pd -# import pytest - -# from italy_geopop.geopop import ItalyGeopopDataFrame -# from italy_geopop.pandas_extension import pandas_activate_context - - -# _municipality_columns = [ -# 'municipality', -# 'municipality_code', -# ] - -# _province_columns = [ -# 'province_code', -# 'province', -# 'province_short', -# ] - - -# _region_columns = [ -# 'region', -# 'region_code', -# ] - -# _population_columns = [ -# 'population', -# 'population_M', -# 'population_F', -# ] - - -# @pytest.fixture -# def geopop_df() -> ItalyGeopopDataFrame: -# return ItalyGeopopDataFrame() - - -# @pytest.fixture -# def dummy_series() -> pd.Series: -# """ -# Returns a pd.Series with empty string values '' indexed as range(0, 500, 5). -# """ -# index = list(range(0, 500, 5)) -# return pd.Series(['' for _ in index], index=index) - - -# @pytest.fixture -# def province_names() -> pd.Series: -# """ -# Returns a pd.Series with some valid provinces names. -# """ -# return pd.Series( -# [ -# 'Verbano-Cusio-Ossola', -# 'Viterbo', -# 'La Spezia', -# 'Frosinone', -# 'Lodi', -# 'Teramo', -# 'Pordenone', -# 'Genova', -# "Reggio nell'Emilia", -# 'Caltanissetta', -# ] -# ) - - -# @pytest.fixture -# def province_name_complex() -> pd.Series: -# """ -# Returns a pd.Series with a valid complex province name. -# """ -# return pd.Series(['Università degli studi di Verona']) - - -# @pytest.fixture -# def province_name_complex_to_simple() -> pd.Series: -# """ -# Returns a pd.Series with a the right simple name for province_name_complex above. -# """ -# return pd.Series(['verona']) - - -# @pytest.fixture -# def not_unequivocal_province_name_complex() -> pd.Series: -# """ -# Returns a pd.Series with a non-unequivocal complex province name. -# """ -# return pd.Series(['Verona or Milano']) - - -# @pytest.fixture -# def province_abbreviations() -> pd.Series: -# """ -# Returns a pd.Series with some valid provinces abbreviations. -# """ -# return pd.Series(['AG', 'RO', 'SA', 'CH', 'CN', 'SS', 'GO', 'LO', 'AQ', 'TS']) - - -# @pytest.fixture -# def province_codes() -> pd.Series: -# """ -# Returns a pd.Series with some valid provinces codes. -# """ -# return pd.Series([68, 26, 75, 7, 32, 74, 4, 38, 57, 47]) - - -# def test_pandas_extension_from_province_return_df_with_same_index_as_input_with_geometry( -# dummy_series, -# ): -# with pandas_activate_context(include_geometry=True): -# output = dummy_series.italy_geopop.from_province() -# diffs = output.index.to_series() != dummy_series.index.to_series() -# assert diffs.sum() == 0 - - -# def test_pandas_extension_from_province_returns_all_columns_if_not_differently_specified_with_geometry( -# dummy_series, -# ): -# with pandas_activate_context(include_geometry=True): -# output = dummy_series.italy_geopop.from_province() -# for c in _province_columns + _region_columns + _population_columns: -# pytest.assume(c in output.columns) -# for c in _municipality_columns: -# pytest.assume(c not in output.columns) - - -# def test_pandas_extension_find_correct_province_information_from_names_with_geometry( -# geopop_df, province_names -# ): -# with pandas_activate_context(include_geometry=True): -# expected = ( -# geopop_df.aggregate_province() -# .set_index('province') -# .loc[province_names.to_list()] -# .reset_index() -# ) -# expected = pd.merge( -# expected, -# ItalyGeopopDataFrame.get_provinces_geometry(), -# how='left', -# left_on='province_code', -# right_index=True, -# ) -# expected = gpd.GeoDataFrame(expected) - -# output = province_names.italy_geopop.from_province() -# output = output[expected.columns] - -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_find_correct_province_information_from_codes_with_geometry( -# geopop_df, province_codes -# ): -# with pandas_activate_context(include_geometry=True): -# expected = ( -# geopop_df.aggregate_province() -# .set_index('province_code') -# .loc[province_codes.to_list()] -# .reset_index() -# ) -# expected = pd.merge( -# expected, -# ItalyGeopopDataFrame.get_provinces_geometry(), -# how='left', -# left_on='province_code', -# right_index=True, -# ) -# expected = gpd.GeoDataFrame(expected) - -# output = province_codes.italy_geopop.from_province() -# output = output[expected.columns] - -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_find_correct_province_information_from_names_short_with_geometry( -# geopop_df, province_abbreviations -# ): -# with pandas_activate_context(include_geometry=True): -# expected = ( -# geopop_df.aggregate_province() -# .set_index('province_short') -# .loc[province_abbreviations.to_list()] -# .reset_index() -# ) -# expected = pd.merge( -# expected, -# ItalyGeopopDataFrame.get_provinces_geometry(), -# how='left', -# left_on='province_code', -# right_index=True, -# ) -# expected = gpd.GeoDataFrame(expected) - -# output = province_abbreviations.italy_geopop.from_province() -# output = output[expected.columns] - -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_find_correct_province_information_from_mixed_names_and_codes_and_names_short_with_geometry( -# geopop_df, province_names, province_codes, province_abbreviations -# ): -# with pandas_activate_context(include_geometry=True): -# expected_names = ( -# geopop_df.aggregate_province() -# .set_index('province') -# .loc[province_names.to_list()] -# .reset_index() -# ) - -# expected_codes = ( -# geopop_df.aggregate_province() -# .set_index('province_code') -# .loc[province_codes.to_list()] -# .reset_index() -# ) - -# expected_names_short = ( -# geopop_df.aggregate_province() -# .set_index('province_short') -# .loc[province_abbreviations.to_list()] -# .reset_index() -# ) - -# expected = pd.concat( -# [expected_names, expected_codes, expected_names_short], ignore_index=True -# ) -# expected = pd.merge( -# expected, -# ItalyGeopopDataFrame.get_provinces_geometry(), -# how='left', -# left_on='province_code', -# right_index=True, -# ) -# expected = gpd.GeoDataFrame(expected) - -# input = pd.concat( -# [province_names, province_codes, province_abbreviations], ignore_index=True -# ) - -# output = input.italy_geopop.from_province() -# output = output[expected.columns] - -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_find_correct_province_information_from_complex_province_name_with_geometry( -# province_name_complex, province_name_complex_to_simple -# ): -# with pandas_activate_context(include_geometry=True): -# expected = province_name_complex_to_simple.italy_geopop.from_province() -# output = province_name_complex.italy_geopop.smart_from_province() -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_return_nan_for_non_unequivocal_province_name_with_geometry( -# not_unequivocal_province_name_complex, -# ): -# with pandas_activate_context(include_geometry=True): -# output = ( -# not_unequivocal_province_name_complex.italy_geopop.smart_from_province() -# ) -# assert output.isna().all().all() - - -# ## without geometry - - -# def test_pandas_extension_from_province_return_df_with_same_index_as_input_without_geometry( -# dummy_series, -# ): -# with pandas_activate_context(include_geometry=False): -# output = dummy_series.italy_geopop.from_province() -# diffs = output.index.to_series() != dummy_series.index.to_series() -# assert diffs.sum() == 0 - - -# def test_pandas_extension_from_province_returns_all_columns_if_not_differently_specified_without_geometry( -# dummy_series, -# ): -# with pandas_activate_context(include_geometry=False): -# output = dummy_series.italy_geopop.from_province() -# for c in _province_columns + _region_columns + _population_columns: -# pytest.assume(c in output.columns) -# for c in _municipality_columns: -# pytest.assume(c not in output.columns) - - -# def test_pandas_extension_find_correct_province_information_from_names_without_geometry( -# geopop_df, province_names -# ): -# with pandas_activate_context(include_geometry=False): -# expected = ( -# geopop_df.aggregate_province() -# .set_index('province') -# .loc[province_names.to_list()] -# .reset_index() -# ) -# expected = pd.merge( -# expected, -# ItalyGeopopDataFrame.get_provinces_geometry(), -# how='left', -# left_on='province_code', -# right_index=True, -# ) -# expected = gpd.GeoDataFrame(expected) - -# output = province_names.italy_geopop.from_province() -# output = output[expected.columns] - -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_find_correct_province_information_from_codes_without_geometry( -# geopop_df, province_codes -# ): -# with pandas_activate_context(include_geometry=False): -# expected = ( -# geopop_df.aggregate_province() -# .set_index('province_code') -# .loc[province_codes.to_list()] -# .reset_index() -# ) -# expected = pd.merge( -# expected, -# ItalyGeopopDataFrame.get_provinces_geometry(), -# how='left', -# left_on='province_code', -# right_index=True, -# ) -# expected = gpd.GeoDataFrame(expected) - -# output = province_codes.italy_geopop.from_province() -# output = output[expected.columns] - -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_find_correct_province_information_from_names_short_without_geometry( -# geopop_df, province_abbreviations -# ): -# with pandas_activate_context(include_geometry=False): -# expected = ( -# geopop_df.aggregate_province() -# .set_index('province_short') -# .loc[province_abbreviations.to_list()] -# .reset_index() -# ) -# expected = pd.merge( -# expected, -# ItalyGeopopDataFrame.get_provinces_geometry(), -# how='left', -# left_on='province_code', -# right_index=True, -# ) -# expected = gpd.GeoDataFrame(expected) - -# output = province_abbreviations.italy_geopop.from_province() -# output = output[expected.columns] - -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_find_correct_province_information_from_mixed_names_and_codes_and_names_short_without_geometry( -# geopop_df, province_names, province_codes, province_abbreviations -# ): -# with pandas_activate_context(include_geometry=False): -# expected_names = ( -# geopop_df.aggregate_province() -# .set_index('province') -# .loc[province_names.to_list()] -# .reset_index() -# ) - -# expected_codes = ( -# geopop_df.aggregate_province() -# .set_index('province_code') -# .loc[province_codes.to_list()] -# .reset_index() -# ) - -# expected_names_short = ( -# geopop_df.aggregate_province() -# .set_index('province_short') -# .loc[province_abbreviations.to_list()] -# .reset_index() -# ) - -# expected = pd.concat( -# [expected_names, expected_codes, expected_names_short], ignore_index=True -# ) -# expected = pd.merge( -# expected, -# ItalyGeopopDataFrame.get_provinces_geometry(), -# how='left', -# left_on='province_code', -# right_index=True, -# ) -# expected = gpd.GeoDataFrame(expected) - -# input = pd.concat( -# [province_names, province_codes, province_abbreviations], ignore_index=True -# ) - -# output = input.italy_geopop.from_province() -# output = output[expected.columns] - -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_find_correct_province_information_from_complex_province_name_without_geometry( -# province_name_complex, province_name_complex_to_simple -# ): -# with pandas_activate_context(include_geometry=False): -# expected = province_name_complex_to_simple.italy_geopop.from_province() -# output = province_name_complex.italy_geopop.smart_from_province() -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_return_nan_for_non_unequivocal_province_name_without_geometry( -# not_unequivocal_province_name_complex, -# ): -# with pandas_activate_context(include_geometry=False): -# output = ( -# not_unequivocal_province_name_complex.italy_geopop.smart_from_province() -# ) -# assert output.isna().all().all() diff --git a/tests/test_pandas_extension_from_region.py b/tests/test_pandas_extension_from_region.py deleted file mode 100644 index cc5d55a..0000000 --- a/tests/test_pandas_extension_from_region.py +++ /dev/null @@ -1,346 +0,0 @@ -# import geopandas as gpd -# import pandas as pd -# import pytest - -# from italy_geopop.geopop import ItalyGeopopDataFrame -# from italy_geopop.pandas_extension import pandas_activate_context - - -# _municipality_columns = [ -# 'municipality', -# 'municipality_code', -# ] - -# _province_columns = [ -# 'province_code', -# 'province', -# 'province_short', -# ] - - -# _region_columns = [ -# 'region', -# 'region_code', -# ] - -# _population_columns = [ -# 'population', -# 'population_M', -# 'population_F', -# ] - - -# @pytest.fixture -# def geopop_df() -> ItalyGeopopDataFrame: -# return ItalyGeopopDataFrame() - - -# @pytest.fixture -# def dummy_series() -> pd.Series: -# """ -# Returns a pd.Series with empty string values '' indexed as range(0, 500, 5). -# """ -# index = list(range(0, 500, 5)) -# return pd.Series(['' for _ in index], index=index) - - -# @pytest.fixture -# def region_names() -> pd.Series: -# """ -# Returns a pd.Series with some valid regions names. -# """ -# return pd.Series( -# [ -# 'Veneto', -# 'Umbria', -# 'Piemonte', -# 'Sicilia', -# 'Friuli-Venezia Giulia', -# 'Emilia-Romagna', -# 'Campania', -# 'Puglia', -# 'Lazio', -# 'Abruzzo', -# ] -# ) - - -# @pytest.fixture -# def region_name_complex() -> pd.Series: -# """ -# Returns a pd.Series with a valid complex region name. -# """ -# return pd.Series(['Regione del Veneto']) - - -# @pytest.fixture -# def region_name_complex_to_simple() -> pd.Series: -# """ -# Returns a pd.Series with a the right simple name for region_name_complex above. -# """ -# return pd.Series(['veneto']) - - -# @pytest.fixture -# def not_unequivocal_region_name_complex() -> pd.Series: -# """ -# Returns a pd.Series with a non-unequivocal complex region name. -# """ -# return pd.Series(['Piemonte o Lombardia']) - - -# @pytest.fixture -# def region_codes() -> pd.Series: -# """ -# Returns a pd.Series with some valid regions codes. -# """ -# return pd.Series([18, 15, 6, 1, 3, 13, 10, 14, 5, 19]) - - -# def test_pandas_extension_from_region_return_df_with_same_index_as_input_with_geometry( -# dummy_series, -# ): -# with pandas_activate_context(include_geometry=True): -# output = dummy_series.italy_geopop.from_region() -# diffs = output.index.to_series() != dummy_series.index.to_series() -# assert diffs.sum() == 0 - - -# def test_pandas_extension_from_region_returns_all_columns_if_not_differently_specified_with_geometry( -# dummy_series, -# ): -# with pandas_activate_context(include_geometry=True): -# output = dummy_series.italy_geopop.from_region() -# for c in _region_columns + _population_columns: -# pytest.assume(c in output.columns) -# for c in _municipality_columns + _province_columns: -# pytest.assume(c not in output.columns) - - -# def test_pandas_extension_find_correct_region_information_from_names_with_geometry( -# geopop_df, region_names -# ): -# with pandas_activate_context(include_geometry=True): -# expected = ( -# geopop_df.aggregate_region() -# .set_index('region') -# .loc[region_names.to_list()] -# .reset_index() -# ) -# expected = pd.merge( -# expected, -# ItalyGeopopDataFrame.get_regions_geometry(), -# how='left', -# left_on='region_code', -# right_index=True, -# ) -# expected = gpd.GeoDataFrame(expected) - -# output = region_names.italy_geopop.from_region() -# output = output[expected.columns] - -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_find_correct_region_information_from_codes_with_geometry( -# geopop_df, region_codes -# ): -# with pandas_activate_context(include_geometry=True): -# expected = ( -# geopop_df.aggregate_region() -# .set_index('region_code') -# .loc[region_codes.to_list()] -# .reset_index() -# ) -# expected = pd.merge( -# expected, -# ItalyGeopopDataFrame.get_regions_geometry(), -# how='left', -# left_on='region_code', -# right_index=True, -# ) -# expected = gpd.GeoDataFrame(expected) - -# output = region_codes.italy_geopop.from_region() -# output = output[expected.columns] - -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_find_correct_region_information_from_mixed_names_and_codes_with_geometry( -# geopop_df, region_names, region_codes -# ): -# with pandas_activate_context(include_geometry=True): -# expected_names = ( -# geopop_df.aggregate_region() -# .set_index('region') -# .loc[region_names.to_list()] -# .reset_index() -# ) - -# expected_codes = ( -# geopop_df.aggregate_region() -# .set_index('region_code') -# .loc[region_codes.to_list()] -# .reset_index() -# ) - -# expected = pd.concat([expected_names, expected_codes], ignore_index=True) -# expected = pd.merge( -# expected, -# ItalyGeopopDataFrame.get_regions_geometry(), -# how='left', -# left_on='region_code', -# right_index=True, -# ) -# expected = gpd.GeoDataFrame(expected) - -# input = pd.concat([region_names, region_codes], ignore_index=True) - -# output = input.italy_geopop.from_region() -# output = output[expected.columns] - -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_find_correct_region_information_from_complex_region_name_with_geometry( -# region_name_complex, region_name_complex_to_simple -# ): -# with pandas_activate_context(include_geometry=True): -# expected = region_name_complex_to_simple.italy_geopop.from_region() -# output = region_name_complex.italy_geopop.smart_from_region() -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_return_nan_for_non_unequivocal_region_name_with_geometry( -# not_unequivocal_region_name_complex, -# ): -# with pandas_activate_context(include_geometry=True): -# output = not_unequivocal_region_name_complex.italy_geopop.smart_from_region() -# assert output.isna().all().all() - - -# ## without geometry - - -# def test_pandas_extension_from_region_return_df_with_same_index_as_input_without_geometry( -# dummy_series, -# ): -# with pandas_activate_context(include_geometry=False): -# output = dummy_series.italy_geopop.from_region() -# diffs = output.index.to_series() != dummy_series.index.to_series() -# assert diffs.sum() == 0 - - -# def test_pandas_extension_from_region_returns_all_columns_if_not_differently_specified_without_geometry( -# dummy_series, -# ): -# with pandas_activate_context(include_geometry=False): -# output = dummy_series.italy_geopop.from_region() -# for c in _region_columns + _population_columns: -# pytest.assume(c in output.columns) -# for c in _municipality_columns + _province_columns: -# pytest.assume(c not in output.columns) - - -# def test_pandas_extension_find_correct_region_information_from_names_without_geometry( -# geopop_df, region_names -# ): -# with pandas_activate_context(include_geometry=False): -# expected = ( -# geopop_df.aggregate_region() -# .set_index('region') -# .loc[region_names.to_list()] -# .reset_index() -# ) -# expected = pd.merge( -# expected, -# ItalyGeopopDataFrame.get_regions_geometry(), -# how='left', -# left_on='region_code', -# right_index=True, -# ) -# expected = gpd.GeoDataFrame(expected) - -# output = region_names.italy_geopop.from_region() -# output = output[expected.columns] - -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_find_correct_region_information_from_codes_without_geometry( -# geopop_df, region_codes -# ): -# with pandas_activate_context(include_geometry=False): -# expected = ( -# geopop_df.aggregate_region() -# .set_index('region_code') -# .loc[region_codes.to_list()] -# .reset_index() -# ) -# expected = pd.merge( -# expected, -# ItalyGeopopDataFrame.get_regions_geometry(), -# how='left', -# left_on='region_code', -# right_index=True, -# ) -# expected = gpd.GeoDataFrame(expected) - -# output = region_codes.italy_geopop.from_region() -# output = output[expected.columns] - -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_find_correct_region_information_from_mixed_names_and_codes_without_geometry( -# geopop_df, region_names, region_codes -# ): -# with pandas_activate_context(include_geometry=False): -# expected_names = ( -# geopop_df.aggregate_region() -# .set_index('region') -# .loc[region_names.to_list()] -# .reset_index() -# ) - -# expected_codes = ( -# geopop_df.aggregate_region() -# .set_index('region_code') -# .loc[region_codes.to_list()] -# .reset_index() -# ) - -# expected = pd.concat([expected_names, expected_codes], ignore_index=True) -# expected = pd.merge( -# expected, -# ItalyGeopopDataFrame.get_regions_geometry(), -# how='left', -# left_on='region_code', -# right_index=True, -# ) -# expected = gpd.GeoDataFrame(expected) - -# input = pd.concat([region_names, region_codes], ignore_index=True) - -# output = input.italy_geopop.from_region() -# output = output[expected.columns] - -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_find_correct_region_information_from_complex_region_name_without_geometry( -# region_name_complex, region_name_complex_to_simple -# ): -# with pandas_activate_context(include_geometry=False): -# expected = region_name_complex_to_simple.italy_geopop.from_region() -# output = region_name_complex.italy_geopop.smart_from_region() -# assert (output != expected).sum().sum() == 0 - - -# def test_pandas_extension_return_nan_for_non_unequivocal_region_name_without_geometry( -# not_unequivocal_region_name_complex, -# ): -# with pandas_activate_context(include_geometry=False): -# output = not_unequivocal_region_name_complex.italy_geopop.smart_from_region() -# assert output.isna().all().all()