Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(*): Add cities list to make label #13

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 30 additions & 23 deletions addok_france/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,11 @@
# Match "rue", "boulevard", "bd", etc.
TYPES_PATTERN = re.compile(r'\b(' + TYPES_REGEX + r')\b', flags=re.IGNORECASE)


# Match number + ordinal, once glued by glue_ordinal (or typed like this in the
# search string, for example "6bis", "234ter").
FOLD_PATTERN = re.compile(r'^(\d{1,4})(' + ORDINAL_REGEX + ')$',
flags=re.IGNORECASE)


# Match number once cleaned by glue_ordinal and fold_ordinal (for example
# "6b", "234t"…)
NUMBER_PATTERN = re.compile(r'\b\d{1,4}[a-z]?\b', flags=re.IGNORECASE)
Expand Down Expand Up @@ -139,28 +137,37 @@ def make_labels(helper, result):
housenumber = getattr(result, 'housenumber', None)

def add(labels, label):
labels.insert(0, label)
labels.add(label)
if housenumber:
label = '{} {}'.format(housenumber, label)
labels.insert(0, label)
labels.add(label)

raw_cities = result._rawattr("city")

if isinstance(raw_cities, list):
cities = raw_cities
else:
cities = [raw_cities]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Faut sûrement checker plus loin dans la chaîne si le fait d'avoir une liste pour city est bien pris en charge, notamment pour le libellé final du résultat, et y compris les clients JS.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hello,

Nous avons ce code en production depuis 2 ans, j'avoue qu'il répond aux besoins mais la performance en soi, n'es pas très importante dans notre cas.


city = result.city
postcode = result.postcode
names = result._rawattr('name')
if not isinstance(names, (list, tuple)):
names = [names]
for name in names:
labels = []
label = name
if postcode and result.type == 'municipality':
add(labels, '{} {}'.format(label, postcode))
add(labels, '{} {}'.format(postcode, label))
add(labels, label)
if city and city != label:
add(labels, '{} {}'.format(label, city))
if postcode:
label = '{} {}'.format(label, postcode)
add(labels, label)
label = '{} {}'.format(label, city)
add(labels, label)
result.labels.extend(labels)
names = getattr(result, 'name', None)
labels = set()

for city in cities:
if not isinstance(names, (list, tuple)):
names = [names]
for name in names:
label = name
if postcode and result.type == 'municipality':
add(labels, '{} {}'.format(label, postcode))
add(labels, '{} {}'.format(postcode, label))
add(labels, label)
if city and city != label:
add(labels, '{} {}'.format(label, city))
if postcode:
label = '{} {}'.format(label, postcode)
add(labels, label)
label = '{} {}'.format(label, city)
add(labels, label)

result.labels.extend(sorted(list(labels), key=lambda item: (len(item), item), reverse=True))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ici je pense qu'on perd en efficacité: il faut trouver la bonne heuristique pour mettre en premier les labels les plus standards, ce qui était plus ou moins ce que le code essayait de faire.

70 changes: 65 additions & 5 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import json

import pytest

from addok.batch import process_documents
from addok.core import search, Result
from addok.ds import get_document
from addok.helpers.text import Token

from addok_france.utils import (clean_query, extract_address, flag_housenumber,
fold_ordinal, glue_ordinal, make_labels,
remove_leading_zeros)
Expand Down Expand Up @@ -303,15 +303,75 @@ def test_make_labels(config):
assert result.labels == [
'1 bis rue des Lilas 75010 Paris',
'rue des Lilas 75010 Paris',
'1 bis rue des Lilas 75010',
'rue des Lilas 75010',
'1 bis rue des Lilas Paris',
'1 bis rue des Lilas 75010',
'rue des Lilas Paris',
'rue des Lilas 75010',
'1 bis rue des Lilas',
'rue des Lilas'
]


def test_make_labels_merged_cities(config):
doc = {
"_id": "53543a313139353538390000",
"id": "53543a313139353538390000",
"type": "street",
"postcode": "49120",
"hexacleStreet": "492812226P",
"lat": "47.1469",
"lon": "-0.75745",
"context": "49, Maine-et-Loire, Pays de la Loire",
"importance": 1,
"userLabel": "RUE PIERRE LEPOUREAU",
"name": "RUE PIERRE LEPOUREAU",
"housenumbers": {
"2 BIS": {
"hexacleNumber": "49281222UE",
"lat": "47.1504",
"lon": "-0.757414"
}
},
"cityAfnorLabel": "CHEMILLE EN ANJOU",
"userCityLabel": "ST GEORGES DES GARDES (CHEMILLE EN ANJOU)",
"cityAliasAfnorLabel": "ST GEORGES DES GARDES",
"city": [
"ST GEORGES DES GARDES (CHEMILLE EN ANJOU)",
"ST GEORGES DES GARDES",
"CHEMILLE EN ANJOU",
"SAINT GEORGES DES GARDES"
]
}

process_documents(json.dumps(doc))
result = Result(get_document('d|53543a313139353538390000'))
result.housenumber = '2 bis' # Simulate match_housenumber
make_labels(None, result)

assert result.labels == [
'2 bis RUE PIERRE LEPOUREAU 49120 ST GEORGES DES GARDES (CHEMILLE EN ANJOU)',
'RUE PIERRE LEPOUREAU 49120 ST GEORGES DES GARDES (CHEMILLE EN ANJOU)',
'2 bis RUE PIERRE LEPOUREAU ST GEORGES DES GARDES (CHEMILLE EN ANJOU)',
'RUE PIERRE LEPOUREAU ST GEORGES DES GARDES (CHEMILLE EN ANJOU)',
'2 bis RUE PIERRE LEPOUREAU 49120 SAINT GEORGES DES GARDES',
'2 bis RUE PIERRE LEPOUREAU 49120 ST GEORGES DES GARDES',
'RUE PIERRE LEPOUREAU 49120 SAINT GEORGES DES GARDES',
'2 bis RUE PIERRE LEPOUREAU SAINT GEORGES DES GARDES',
'2 bis RUE PIERRE LEPOUREAU 49120 CHEMILLE EN ANJOU',
'RUE PIERRE LEPOUREAU 49120 ST GEORGES DES GARDES',
'2 bis RUE PIERRE LEPOUREAU ST GEORGES DES GARDES',
'RUE PIERRE LEPOUREAU SAINT GEORGES DES GARDES',
'RUE PIERRE LEPOUREAU 49120 CHEMILLE EN ANJOU',
'2 bis RUE PIERRE LEPOUREAU CHEMILLE EN ANJOU',
'RUE PIERRE LEPOUREAU ST GEORGES DES GARDES',
'RUE PIERRE LEPOUREAU CHEMILLE EN ANJOU',
'2 bis RUE PIERRE LEPOUREAU 49120',
'RUE PIERRE LEPOUREAU 49120',
'2 bis RUE PIERRE LEPOUREAU',
'RUE PIERRE LEPOUREAU'
]


def test_make_municipality_labels(config):
doc = {
'id': 'xxxx',
Expand All @@ -327,7 +387,7 @@ def test_make_municipality_labels(config):
result = Result(get_document('d|yyyy'))
make_labels(None, result)
assert result.labels == [
'Lille',
'59000 Lille',
'Lille 59000',
'59000 Lille',
'Lille'
]