[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[robocop] 09/37: Improve address matching
From: |
Admin |
Subject: |
[robocop] 09/37: Improve address matching |
Date: |
Thu, 05 Jun 2025 10:20:54 +0200 |
This is an automated email from the git hooks/post-receive script.
grothoff pushed a commit to branch master
in repository robocop.
commit b9bab0a42664a81dbcdd15d793050050555544ae
Author: Vint Leenaars <vl.software@leenaa.rs>
AuthorDate: Mon May 5 12:37:05 2025 +0200
Improve address matching
---
src/KYC/Check.hs | 51 ++++++++++++++++++++++++++++------------------
test/Tests/Check.hs | 2 +-
test/data/target_38925.xml | 3 +++
test/data/target_68815.xml | 6 +++---
4 files changed, 38 insertions(+), 24 deletions(-)
diff --git a/src/KYC/Check.hs b/src/KYC/Check.hs
index 500ed21..444e3eb 100644
--- a/src/KYC/Check.hs
+++ b/src/KYC/Check.hs
@@ -204,32 +204,40 @@ checkAddress :: Map Int (Quality SSL.Address) ->
GLS.Address -> [WithSSID Qualit
checkAddress addresses' address' = catMaybes $ map (compareAddress address') $
toList addresses'
compareAddress :: GLS.Address -> (Int, Quality SSL.Address) -> Maybe (WithSSID
QualityFloat)
-compareAddress gls_address (ssid, quality) = if total_score >= threshold_float
- then Just $ WithSSID ssid $
liftQuality (\_ -> total_score) quality
+compareAddress gls_address (ssid, quality) = if country_score >= 0.75
+ then Just $ WithSSID ssid $
liftQuality (\_ -> country_score) quality
else Nothing
where ssl_address = removeQuality quality
- total_score = totalFromMaybes [ country_score
- , details_score
+ total_score = totalFromMaybes [ details_score
, area_score
, location_score
, zip_code_score
]
country_score = case SSL.country ssl_address of
- Just c -> Just $ if c == GLS.country gls_address
then 1 else 0
- Nothing -> Nothing
+ Just c -> if c == GLS.country gls_address then 0.5
* (total_score + 1) else 0
+ Nothing -> total_score
details_score = case details ssl_address of
Just det -> let
- perms = permutateText $ catMaybes [
Just $ street_name gls_address
- ,
Just $ street_number gls_address
- ,
country_subdivision gls_address
- ,
lines gls_address
- ,
town_district gls_address
- ,
town_location gls_address
- ,
Just $ zipcode gls_address
- ]
- ratio = compareTexts (cleanText det)
0 perms
+ possible_numbers = catMaybes [ Just $
street_number gls_address
+ ,
building_number gls_address
+ , lines
gls_address
+ ]
+ possible_info = catMaybes [ Just $
street_name gls_address
+ ,
building_name gls_address
+ ,
country_subdivision gls_address
+ ,
town_district gls_address
+ ,
town_location gls_address
+ , Just $
zipcode gls_address
+ ]
+ possible_details = possible_numbers
++ possible_info
+ perms = permutateText possible_details
+ clean_details = cleanText det
+ ratio = compareTexts clean_details 0
perms
+ perfect_matches = foldl (\c i -> if
foldl (\b d -> b || i `T.isInfixOf` d) False clean_details then c + 1 else c) 0
possible_info
in
- Just $ if ratio >= threshold_ratio
then ratioToFloat ratio else 0
+ Just $ if ratio >= threshold_ratio
+ then ratioToFloat ratio
+ else if perfect_matches /= 0
then 0.5 else 0
Nothing -> Nothing
area_score = case area ssl_address of
Just areas -> let
@@ -239,7 +247,7 @@ compareAddress gls_address (ssid, quality) = if total_score
>= threshold_float
]
ratio = compareTexts areas 0 perms
in
- Just $ if ratio >= threshold_ratio
then ratioToFloat ratio else 0
+ if ratio >= threshold_ratio then
Just $ ratioToFloat ratio else Nothing
Nothing -> Nothing
location_score = case location ssl_address of
Just lcs -> let
@@ -249,18 +257,21 @@ compareAddress gls_address (ssid, quality) = if
total_score >= threshold_float
]
ratio = compareTexts lcs 0 perms
in
- Just $ if ratio >= threshold_ratio
then ratioToFloat ratio else 0
+ if ratio >= threshold_ratio then Just
$ ratioToFloat ratio else Nothing
Nothing -> Nothing
zip_code_score = case zip_code ssl_address of
Just zc -> let
ratio = compareText zc 0 [zipcode
gls_address]
in
- Just $ if ratio >= threshold_ratio
then ratioToFloat ratio else 0
+ if ratio >= threshold_ratio then Just
$ ratioToFloat ratio else Nothing
Nothing -> Nothing
cleanText :: [Text] -> [Text]
-cleanText text = map (T.foldl (\new_text char -> if char `elem` chars_to_rm
then new_text else snoc new_text char) start_text) text
+cleanText text = map (T.foldl (\new_text char -> if char `elem` chars_to_rm
+ then new_text
+ else snoc new_text char
+ ) start_text) text
where chars_to_rm = ".,-" :: String
start_text = "" :: Text
diff --git a/test/Tests/Check.hs b/test/Tests/Check.hs
index 72e0007..ffc0837 100644
--- a/test/Tests/Check.hs
+++ b/test/Tests/Check.hs
@@ -139,7 +139,7 @@ personTests sanction_list =
, testTarget False 38925 target_38925_v3 sanction_list $ distribution
0 100 0 0 0 0.75
, testTarget False 38925 target_38925_v4 sanction_list $ distribution
100 0 0 0 0 0.75
, testTarget False 38925 target_38925_v5 sanction_list $ distribution
100 100 0 0 0 0.75
- , testTarget False 68815 target_68815 sanction_list $ distribution
100 100 0 125 0 0.75
+ , testTarget False 68815 target_68815 sanction_list $ distribution
75 100 0 125 50 0.75
]
, testGroup "Fake target with XML file"
diff --git a/test/data/target_38925.xml b/test/data/target_38925.xml
index 8711ce1..ba2ea54 100644
--- a/test/data/target_38925.xml
+++ b/test/data/target_38925.xml
@@ -35,4 +35,7 @@
<justification ssid="38923">Secretary of the Crimea
Electoral Commission. In this capacity she participated in the organisation of
the Russian presidential elections of 18 Mar 2018 in the illegally annexed
Crimea and Sevastopol, and thereby actively supported and implemented policies
that undermine the territorial integrity, sovereignty and independence of
Ukraine.</justification>
</individual>
</target>
+ <place ssid="38924">
+ <location>Autonomous Republic of Crimea</location>
+ </place>
</swiss-sanctions-list>
diff --git a/test/data/target_68815.xml b/test/data/target_68815.xml
index dcb3bb1..7448542 100644
--- a/test/data/target_68815.xml
+++ b/test/data/target_68815.xml
@@ -24,8 +24,8 @@
<other-information ssid="68814">National identification
no: Haiti 004-341-263-3</other-information>
</individual>
</target>
- <place ssid="49815">
- <location>Saint-Petersburg</location>
- <country iso-code="RU">Russian Federation</country>
+ <place ssid="66660">
+ <location>Port-au-Prince</location>
+ <country iso-code="HT">Haiti</country>
</place>
</swiss-sanctions-list>
--
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.
- [robocop] branch master created (now 4c475a4), Admin, 2025/06/05
- [robocop] 03/37: Improve testsuite, Admin, 2025/06/05
- [robocop] 06/37: Add more tests, Admin, 2025/06/05
- [robocop] 05/37: Add tests, Admin, 2025/06/05
- [robocop] 02/37: Initialise repository, Admin, 2025/06/05
- [robocop] 04/37: More tests, update config & licenses, Admin, 2025/06/05
- [robocop] 01/37: init w/ instruction files, Admin, 2025/06/05
- [robocop] 10/37: Update testfunctions, Admin, 2025/06/05
- [robocop] 09/37: Improve address matching,
Admin <=
- [robocop] 11/37: Re-order tests, Admin, 2025/06/05
- [robocop] 08/37: Add more tests, Admin, 2025/06/05
- [robocop] 12/37: Add new tests, Admin, 2025/06/05
- [robocop] 07/37: Improve checks + tests, Admin, 2025/06/05