Skip to content

Commit 3f68a5e

Browse files
committed
implementando issue #8
1 parent 34a01f9 commit 3f68a5e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+1080093
-41
lines changed

assets/plygon-bugExample01.png

64.2 KB

data/dump_osm/AC.geojson

Lines changed: 69426 additions & 1 deletion

data/dump_osm/AL.geojson

Lines changed: 114150 additions & 0 deletions

data/dump_osm/AP.geojson

Lines changed: 14902 additions & 0 deletions

data/dump_osm/BA.geojson

Lines changed: 79546 additions & 0 deletions

data/dump_osm/CE.geojson

Lines changed: 11454 additions & 0 deletions

data/dump_osm/ES.geojson

Lines changed: 24550 additions & 0 deletions

data/dump_osm/GO.geojson

Lines changed: 12794 additions & 0 deletions

data/dump_osm/MG.geojson

Lines changed: 119082 additions & 0 deletions

data/dump_osm/MS.geojson

Lines changed: 62358 additions & 0 deletions

data/dump_osm/MT.geojson

Lines changed: 70838 additions & 0 deletions

data/dump_osm/PA.geojson

Lines changed: 34602 additions & 0 deletions

data/dump_osm/PB.geojson

Lines changed: 20794 additions & 0 deletions

data/dump_osm/PE.geojson

Lines changed: 26578 additions & 0 deletions

data/dump_osm/PI.geojson

Lines changed: 17146 additions & 0 deletions

data/dump_osm/PR.geojson

Lines changed: 98706 additions & 0 deletions

data/dump_osm/RJ.geojson

Lines changed: 31354 additions & 0 deletions

data/dump_osm/RN.geojson

Lines changed: 11526 additions & 0 deletions

data/dump_osm/RO.geojson

Lines changed: 24362 additions & 0 deletions

data/dump_osm/RR.geojson

Lines changed: 30414 additions & 0 deletions

data/dump_osm/RS.geojson

Lines changed: 59770 additions & 0 deletions

data/dump_osm/SC.geojson

Lines changed: 87770 additions & 0 deletions

data/dump_osm/SE.geojson

Lines changed: 16858 additions & 0 deletions

data/dump_osm/SP.geojson

Lines changed: 13986 additions & 0 deletions

data/dump_osm/TO.geojson

Lines changed: 27050 additions & 0 deletions
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

src/dumpWikidata.php

Lines changed: 77 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,91 @@
11
-- Generating backups of JSON-Wikidata --
22

33
<?php
4-
// usage: php dumpWikidata.php flagOpcionalQuandoFixErr
4+
// usage: php dumpWikidata.php [geo][err]
55

66
// CONFIGS
7-
$url_tpl = 'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&ids=';
8-
// cols 0=subdivision, 1=name_prefix, 2=name, 3=id, 4=idIBGE, 5=wdId, 6=lexLabel
9-
$uf_idx=0; $wdId_idx = 5; $lexLabel_idx = 6;
10-
$UF=''; $localCsv = false; $stopAt=0;
7+
$urlWd_tpl = 'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&ids=';
8+
$urlOsm_tpl = 'http://polygons.openstreetmap.fr/get_geojson.py?id=';
9+
$UF=''; $localCsv = false; $stopAt=0;
1110

12-
$saveFolder = realpath( dirname(__FILE__)."/../data/wikidata" );
11+
$saveFolder = realpath( dirname(__FILE__)."/../data" );
1312
$url = $localCsv
14-
? "$saveFolder/../br-state-codes.csv"
13+
? "$saveFolder/br-state-codes.csv"
1514
: 'https://github.com/datasets-br/state-codes/raw/master/data/br-state-codes.csv'
1615
;
17-
$fixErr = ($argc>=2)? 'MODO FIX-ERR': '';
18-
print "\n USANDO $fixErr $url";
16+
// cols 0=subdivision, 1=name_prefix, 2=name, 3=id, 4=idIBGE, 5=wdId, 6=lexLabel
17+
$uf_idx=0; $wdId_idx = 5; $lexLabel_idx = 6;
18+
19+
20+
$modo = ($argc>=2)? ( ($argv[1]=='geo')? 'GEO': 'FIX-ERR' ): '';
21+
$ext = ($modo=='GEO')? 'geojson': 'json';
22+
print "\n USANDO $modo $url";
1923

2024

2125
// LOAD DATA:
2226
$R = []; // [fname]= wdId
2327
if (($handle = fopen($url, "r")) !== FALSE) {
2428
for($i=0; ($row=fgetcsv($handle)) && (!$stopAt || $i<$stopAt); $i++)
2529
if ( $i && isset($row[1]) )
26-
$R[ lex2filename($row[$lexLabel_idx]) ] = $row[$wdId_idx];
30+
$R[ $row[$uf_idx] ] = $row[$wdId_idx];
2731
} else
2832
exit("\nERRO ao abrir planilha das cidades em \n\t$url\n");
2933

3034

31-
if ($fixErr) foreach($R as $fname=>$wdId) {
32-
$fs = splitFilename($fname,true);
33-
if ($fs[2]>50) unset($R[$fname]);
35+
if ($modo=='FIX-ERR') foreach($R as $fname=>$wdId) {
36+
if ( filesize("$saveFolder/dump_wikidata/$fname.$ext")>50 ) unset($R[$fname]);
3437
}
3538

3639
// WGET AND SAVE JSON:
3740
$i=1;
3841
$n=count($R);
3942
$ERR=[];
40-
foreach($R as $fname=>$wdId) {
41-
print "\n\t($i of $n) $fname: $wdId ";
42-
$json = file_get_contents("$url_tpl$wdId");
43-
if ($json) {
44-
$out = json_stdWikidata($json);
45-
if ($out) {
46-
$savedBytes = file_put_contents( "$saveFolder/$fname.json", $out );
47-
print "saved ($savedBytes bytes) with fresh $wdId";
48-
} else
49-
ERRset($fname,"invalid Wikidata structure");
50-
} else
51-
ERRset($fname,"empty json");
52-
$i++;
53-
}
43+
44+
switch($modo) {
45+
46+
case '':
47+
case 'FIX-ERR':
48+
foreach($R as $fname=>$wdId) {
49+
print "\n\t($i of $n) $fname: $wdId ";
50+
$json = file_get_contents("$urlWd_tpl$wdId");
51+
if ($json) {
52+
$out = json_stdWikidata($json);
53+
if ($out) {
54+
$savedBytes = file_put_contents( "$saveFolder/dump_wikidata/$fname.$ext", $out );
55+
print "saved ($savedBytes bytes) with fresh $wdId";
56+
} else
57+
ERRset($fname,"invalid Wikidata structure");
58+
} else
59+
ERRset($fname,"empty json");
60+
$i++;
61+
}
62+
break;
63+
64+
case 'GEO':
65+
foreach($R as $fname=>$wdId) {
66+
print "\n\t($i of $n) $fname: $wdId ";
67+
$osmId= getOsmId($fname,$wdId); // usa wdId?
68+
$json='';
69+
if ($osmId) $json = file_get_contents("$urlOsm_tpl$osmId");
70+
else ERRset($fname,"no osmId or P402");
71+
if ($json) {
72+
$out = json_stdOsm($json);
73+
if ($out) {
74+
$savedBytes = file_put_contents( "$saveFolder/dump_osm/$fname.$ext", $out );
75+
print "saved ($savedBytes bytes) with fresh OSM/$osmId";
76+
} else
77+
ERRset($fname,"invalid OSM structure");
78+
} else
79+
ERRset($fname,"empty json");
80+
$i++;
81+
}
82+
break;
83+
84+
default:
85+
die("\n Modo $modo DESCONHECIDO.\n");
86+
87+
} // end switch
88+
5489

5590
if (count($ERR)) { print "\n ----------- ERRORS ---------\n"; foreach($ERR as $msg) print "\n * $msg"; }
5691

@@ -64,6 +99,13 @@ function ERRset($fname,$msg) {
6499
$ERR[] = $msg;
65100
}
66101

102+
function json_stdOsm($jstr) {
103+
if (!trim($jstr)) return '';
104+
$j = json_decode($jstr,JSON_BIGINT_AS_STRING|JSON_OBJECT_AS_ARRAY);
105+
if ( !isset($j['type']) ) return '';
106+
return json_encode($j,JSON_PRETTY_PRINT|JSON_UNESCAPED_UNICODE|JSON_UNESCAPED_SLASHES);
107+
}
108+
67109
function json_stdWikidata($jstr) {
68110
if (!trim($jstr)) return '';
69111
$j = json_decode($jstr,JSON_BIGINT_AS_STRING|JSON_OBJECT_AS_ARRAY);
@@ -82,19 +124,14 @@ function json_stdWikidata($jstr) {
82124
return json_encode($j,JSON_PRETTY_PRINT|JSON_UNESCAPED_UNICODE|JSON_UNESCAPED_SLASHES);
83125
}
84126

85-
function lex2filename($s) {
86-
$s=ucwords( str_replace('.',' ',$s) );
87-
return preg_replace('/ D | /','',$s); // elimina preposicao contraida (bug norma lexml)
88-
}
89-
90-
function splitFilename($f,$checkSize=false) {
91-
global $saveFolder;
92-
$uf = substr($f,0,2);
93-
$fname2 = substr($f,3);
94-
$saveFolder2 = "$saveFolder/$uf";
95-
$fp = "$saveFolder2/$fname2.json";
96-
$size = $checkSize? (file_exists($fp)? filesize($fp): 0): null;
97-
return [$fp,$saveFolder2,$size];
127+
function getOsmId($fname) {
128+
global $saveFolder;
129+
$f = "$saveFolder/dump_wikidata/$fname.json";
130+
$j = json_decode( file_get_contents($f), JSON_BIGINT_AS_STRING|JSON_OBJECT_AS_ARRAY);
131+
if (isset($j['claims']['P402'][0]['value']) )
132+
return $j['claims']['P402'][0]['value'];
133+
else
134+
return 0;
98135
}
99136

100137
?>

0 commit comments

Comments
 (0)