From a1e0b9fce74b168097def329f97a6e106309a3b4 Mon Sep 17 00:00:00 2001 From: BrindusaN Date: Tue, 21 Dec 2021 15:16:28 +0200 Subject: [PATCH 1/3] Replace char U+0163 with U+021B --- resources/libpostal/dictionaries/all/surnames.txt | 10 +++++----- .../libpostal/dictionaries/ro/street_types.txt | 4 ++-- .../dictionaries/locality/name:eng_x_preferred.txt | 14 +++++++------- .../dictionaries/locality/name:fra_x_preferred.txt | 12 ++++++------ .../dictionaries/region/name:eng_x_preferred.txt | 6 +++--- test/address.rom.test.js | 4 ++++ 6 files changed, 27 insertions(+), 23 deletions(-) diff --git a/resources/libpostal/dictionaries/all/surnames.txt b/resources/libpostal/dictionaries/all/surnames.txt index 3c18d260..32299c01 100644 --- a/resources/libpostal/dictionaries/all/surnames.txt +++ b/resources/libpostal/dictionaries/all/surnames.txt @@ -4735,7 +4735,7 @@ Văcăroiu Vădrariu Văduva Vălean -Văluţă +Văluță Văideanu Văsie Vătăşoiu @@ -38654,7 +38654,7 @@ Tonge Tonguç Toninelli Tonini -Toniţa +Tonița Tonokura Tonon Tondre @@ -44366,14 +44366,14 @@ Negro Negron Negroni Negru -Negruţă +Negruță Negi Negishi Negus Negussie Negash Negedu -Negoiţescu +Negoițescu Neil Neild Neill @@ -49372,4 +49372,4 @@ van Leeuwenhoek van Rossum 't Hoen Μαγουλάς -Ḥazzan \ No newline at end of file +Ḥazzan diff --git a/resources/libpostal/dictionaries/ro/street_types.txt b/resources/libpostal/dictionaries/ro/street_types.txt index 46758ffa..e9ec0224 100644 --- a/resources/libpostal/dictionaries/ro/street_types.txt +++ b/resources/libpostal/dictionaries/ro/street_types.txt @@ -5,9 +5,9 @@ drumul fundătura|fundatura|fnd fundacul|fdc intrarea|int|intr -piaţa|piata|piață|pta|pţa|p-ta|p-ţa +piața|piata|piață|pta|pța|p-ta|p-ța strada|str stradela|str-la|sdla șoseaua|soseaua|sos|șos splaiul|sp|spl -vârful|varful|virful|vîrful|varf|vf \ No newline at end of file +vârful|varful|virful|vîrful|varf|vf diff --git a/resources/whosonfirst/dictionaries/locality/name:eng_x_preferred.txt b/resources/whosonfirst/dictionaries/locality/name:eng_x_preferred.txt index 514de73d..cd92f81e 100644 --- a/resources/whosonfirst/dictionaries/locality/name:eng_x_preferred.txt +++ b/resources/whosonfirst/dictionaries/locality/name:eng_x_preferred.txt @@ -26183,7 +26183,7 @@ tigina chișinău tiraspol byelcy -rîbniţa +rîbnița maastricht helmond oss @@ -26241,7 +26241,7 @@ slatina călărași targu jiu slobozia -reşiţa +reşița constanța târgovişte ramnicu valcea @@ -26271,8 +26271,8 @@ miercurea-ciuc oradea vaslui barlad -piatra neamţ -bistriţa +piatra neamț +bistrița satu mare baia mare iași @@ -37206,7 +37206,7 @@ ricse Șerbănești grănicești zvoriștea -rădăuţi +rădăuți calafindești zamostea dornești @@ -55989,7 +55989,7 @@ rozavlea sălsig gârdani iapa -sighetu marmaţiei +sighetu marmației săpânța jeud băile borșa @@ -261362,4 +261362,4 @@ camden haven rockhampton central coast batemans bay -alice springs \ No newline at end of file +alice springs diff --git a/resources/whosonfirst/dictionaries/locality/name:fra_x_preferred.txt b/resources/whosonfirst/dictionaries/locality/name:fra_x_preferred.txt index d559ae5c..97af2d13 100644 --- a/resources/whosonfirst/dictionaries/locality/name:fra_x_preferred.txt +++ b/resources/whosonfirst/dictionaries/locality/name:fra_x_preferred.txt @@ -9021,7 +9021,7 @@ riga bender chişinău tiraspol -municipalité de bălţi +municipalité de bălți rîbnița maastricht helmond @@ -9083,7 +9083,7 @@ drobeta turnu-severin slatina târgu jiu slobozia -reşiţa +reşița constanța târgoviste râmnicu vâlcea @@ -9111,8 +9111,8 @@ turda oradea vaslui bârlad -piatra neamţ -bistriţa +piatra neamț +bistrița satu mare baia mare iași @@ -19673,7 +19673,7 @@ pálháza vilmány sátoraljaújhely ricse -rădăuţi +rădăuți cândești malá lehota hriňová @@ -77628,4 +77628,4 @@ chézeaux avrecourt lavilleneuve-au-roi l'Île-saint-denis -laneuville-à-rémy \ No newline at end of file +laneuville-à-rémy diff --git a/resources/whosonfirst/dictionaries/region/name:eng_x_preferred.txt b/resources/whosonfirst/dictionaries/region/name:eng_x_preferred.txt index 6e53bf3a..cb48a0e7 100644 --- a/resources/whosonfirst/dictionaries/region/name:eng_x_preferred.txt +++ b/resources/whosonfirst/dictionaries/region/name:eng_x_preferred.txt @@ -3797,11 +3797,11 @@ rezina Șoldănești district florești district drocia -raionul edineţ +raionul edineț briceni soroca dondușeni district -raionul ocniţa +raionul ocnița ciudad de méxico guerrero méxico @@ -4514,4 +4514,4 @@ khyber pakhtunkhwa south sardinia baalbek-hermel akkar -al-shahaniya‎ \ No newline at end of file +al-shahaniya‎ diff --git a/test/address.rom.test.js b/test/address.rom.test.js index 6485f2fe..8e2e4a8a 100644 --- a/test/address.rom.test.js +++ b/test/address.rom.test.js @@ -20,6 +20,10 @@ const testcase = (test, common) => { assert('Calea Victoriei 54 Bucharest ', [ { street: 'Calea Victoriei' }, { housenumber: '54' }, { locality: 'Bucharest' } ]) + + assert('Piața Montreal 1', [ + { street: 'Piața Montreal' }, { housenumber: '1' } + ]) } module.exports.all = (tape, common) => { From 609db06a1ec78fb3aabc7849b35260a0d3625fcb Mon Sep 17 00:00:00 2001 From: BrindusaN Date: Tue, 21 Dec 2021 15:20:53 +0200 Subject: [PATCH 2/3] Replace char U+015F with U+0219 for romanian names --- .../dictionaries/all/given_names.txt | 4 ++-- .../libpostal/dictionaries/all/surnames.txt | 6 ++--- .../locality/name:fra_x_preferred.txt | 22 +++++++++---------- .../region/name:eng_x_preferred.txt | 2 +- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/resources/libpostal/dictionaries/all/given_names.txt b/resources/libpostal/dictionaries/all/given_names.txt index cfb4df0c..3c21a3f4 100644 --- a/resources/libpostal/dictionaries/all/given_names.txt +++ b/resources/libpostal/dictionaries/all/given_names.txt @@ -8403,7 +8403,7 @@ Dragoje Dragoljub Dragomir Dragoslav -Dragoş +Dragoș Draga Dragan Dragana @@ -12712,4 +12712,4 @@ Xóchitl Ștefan Ștefania thị -ʻŌpūnui \ No newline at end of file +ʻŌpūnui diff --git a/resources/libpostal/dictionaries/all/surnames.txt b/resources/libpostal/dictionaries/all/surnames.txt index 32299c01..9ed82525 100644 --- a/resources/libpostal/dictionaries/all/surnames.txt +++ b/resources/libpostal/dictionaries/all/surnames.txt @@ -4085,7 +4085,7 @@ Vormer Vorganov Vorgrimler Vornholt -Vornişel +Vornișel Voráček Voráčová Vorbe @@ -4349,7 +4349,7 @@ Vulević Vulliamy Vullo Vulpe -Vulpeş +Vulpeș Vulović Vultaggio Vujanić @@ -4738,7 +4738,7 @@ Vălean Văluță Văideanu Văsie -Vătăşoiu +Vătășoiu Vēja Vējonis Vētra diff --git a/resources/whosonfirst/dictionaries/locality/name:fra_x_preferred.txt b/resources/whosonfirst/dictionaries/locality/name:fra_x_preferred.txt index 97af2d13..9163499a 100644 --- a/resources/whosonfirst/dictionaries/locality/name:fra_x_preferred.txt +++ b/resources/whosonfirst/dictionaries/locality/name:fra_x_preferred.txt @@ -9019,7 +9019,7 @@ jelgava jurmala riga bender -chişinău +chișinău tiraspol municipalité de bălți rîbnița @@ -9083,28 +9083,28 @@ drobeta turnu-severin slatina târgu jiu slobozia -reşița +reșița constanța târgoviste râmnicu vâlcea -piteşti -timişoara -ploieşti +pitești +timișoara +ploiești brăila buzău deva hunedoara tulcea -braşov -mediaş +brașov +mediaș sibiu arad alba iulia -focşani +focșani galați bacău -oneşti -târgu mureş +onești +târgu mureș zalău cluj-napoca turda @@ -9117,7 +9117,7 @@ satu mare baia mare iași suceava -botoşani +botoșani nitra trnava banská bystrica diff --git a/resources/whosonfirst/dictionaries/region/name:eng_x_preferred.txt b/resources/whosonfirst/dictionaries/region/name:eng_x_preferred.txt index cb48a0e7..88313141 100644 --- a/resources/whosonfirst/dictionaries/region/name:eng_x_preferred.txt +++ b/resources/whosonfirst/dictionaries/region/name:eng_x_preferred.txt @@ -4043,7 +4043,7 @@ vaslui neamt bistrita-nasaud satu mare -maramureş county +maramureș county iasi suceava botosani From c637753d2dcc72eb1577003b2fb5f9b9fd3dad4a Mon Sep 17 00:00:00 2001 From: BrindusaN Date: Tue, 21 Dec 2021 15:23:44 +0200 Subject: [PATCH 3/3] Replace char U+015E with U+0218 for romanian names --- resources/libpostal/dictionaries/all/surnames.txt | 2 +- .../whosonfirst/dictionaries/locality/name:eng_x_preferred.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/libpostal/dictionaries/all/surnames.txt b/resources/libpostal/dictionaries/all/surnames.txt index 9ed82525..fa423d4e 100644 --- a/resources/libpostal/dictionaries/all/surnames.txt +++ b/resources/libpostal/dictionaries/all/surnames.txt @@ -49039,7 +49039,7 @@ Uğurlu Şipal Şirin Şişli -Ştefănescu +Ștefănescu Žigeranović Žigić Živanović diff --git a/resources/whosonfirst/dictionaries/locality/name:eng_x_preferred.txt b/resources/whosonfirst/dictionaries/locality/name:eng_x_preferred.txt index cd92f81e..beee4a56 100644 --- a/resources/whosonfirst/dictionaries/locality/name:eng_x_preferred.txt +++ b/resources/whosonfirst/dictionaries/locality/name:eng_x_preferred.txt @@ -65351,7 +65351,7 @@ lipoven' sagaydaki satu-nou selemet -Ştefan vodă +Ștefan vodă antonesht' brezoya carahasani