gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[robocop] 09/37: Improve address matching


From: Admin
Subject: [robocop] 09/37: Improve address matching
Date: Thu, 05 Jun 2025 10:20:54 +0200

This is an automated email from the git hooks/post-receive script.

grothoff pushed a commit to branch master
in repository robocop.

commit b9bab0a42664a81dbcdd15d793050050555544ae
Author: Vint Leenaars <vl.software@leenaa.rs>
AuthorDate: Mon May 5 12:37:05 2025 +0200

    Improve address matching
---
 src/KYC/Check.hs           | 51 ++++++++++++++++++++++++++++------------------
 test/Tests/Check.hs        |  2 +-
 test/data/target_38925.xml |  3 +++
 test/data/target_68815.xml |  6 +++---
 4 files changed, 38 insertions(+), 24 deletions(-)

diff --git a/src/KYC/Check.hs b/src/KYC/Check.hs
index 500ed21..444e3eb 100644
--- a/src/KYC/Check.hs
+++ b/src/KYC/Check.hs
@@ -204,32 +204,40 @@ checkAddress :: Map Int (Quality SSL.Address) -> 
GLS.Address -> [WithSSID Qualit
 checkAddress addresses' address' = catMaybes $ map (compareAddress address') $ 
toList addresses'
 
 compareAddress :: GLS.Address -> (Int, Quality SSL.Address) -> Maybe (WithSSID 
QualityFloat)
-compareAddress gls_address (ssid, quality) = if total_score >= threshold_float 
-                                             then Just $ WithSSID ssid $ 
liftQuality (\_ -> total_score) quality
+compareAddress gls_address (ssid, quality) = if country_score >= 0.75 
+                                             then Just $ WithSSID ssid $ 
liftQuality (\_ -> country_score) quality
                                              else Nothing 
   where ssl_address    = removeQuality quality
-        total_score    = totalFromMaybes [ country_score
-                                         , details_score
+        total_score    = totalFromMaybes [ details_score
                                          , area_score
                                          , location_score 
                                          , zip_code_score
                                          ]
         country_score  = case SSL.country ssl_address of
-                           Just c  -> Just $ if c == GLS.country gls_address 
then 1 else 0
-                           Nothing -> Nothing
+                           Just c  -> if c == GLS.country gls_address then 0.5 
* (total_score + 1) else 0 
+                           Nothing -> total_score 
         details_score  = case details ssl_address of
                            Just det -> let
-                                         perms = permutateText $ catMaybes [ 
Just $ street_name   gls_address
-                                                                           , 
Just $ street_number gls_address
-                                                                           , 
country_subdivision  gls_address
-                                                                           , 
lines                gls_address
-                                                                           , 
town_district        gls_address
-                                                                           , 
town_location        gls_address
-                                                                           , 
Just $ zipcode       gls_address
-                                                                           ]
-                                         ratio = compareTexts (cleanText det) 
0 perms
+                                         possible_numbers = catMaybes [ Just $ 
street_number gls_address
+                                                                      , 
building_number      gls_address
+                                                                      , lines  
              gls_address
+                                                                      ]
+                                         possible_info    = catMaybes [ Just $ 
street_name   gls_address
+                                                                      , 
building_name        gls_address
+                                                                      , 
country_subdivision  gls_address
+                                                                      , 
town_district        gls_address
+                                                                      , 
town_location        gls_address
+                                                                      , Just $ 
zipcode       gls_address
+                                                                      ]
+                                         possible_details = possible_numbers 
++ possible_info
+                                         perms = permutateText possible_details
+                                         clean_details = cleanText det
+                                         ratio = compareTexts clean_details 0 
perms
+                                         perfect_matches = foldl (\c i -> if 
foldl (\b d -> b || i `T.isInfixOf` d) False clean_details then c + 1 else c) 0 
possible_info
                                        in
-                                         Just $ if ratio >= threshold_ratio 
then ratioToFloat ratio else 0
+                                         Just $ if ratio >= threshold_ratio
+                                                then ratioToFloat ratio
+                                                else if perfect_matches /= 0 
then 0.5 else 0 
                            Nothing  -> Nothing
         area_score     = case area ssl_address of
                            Just areas -> let
@@ -239,7 +247,7 @@ compareAddress gls_address (ssid, quality) = if total_score 
>= threshold_float
                                                                              ]
                                            ratio = compareTexts areas 0 perms
                                          in
-                                           Just $ if ratio >= threshold_ratio 
then ratioToFloat ratio else 0
+                                           if ratio >= threshold_ratio then 
Just $ ratioToFloat ratio else Nothing
                            Nothing    -> Nothing
         location_score = case location ssl_address of
                            Just lcs -> let
@@ -249,18 +257,21 @@ compareAddress gls_address (ssid, quality) = if 
total_score >= threshold_float
                                                                            ]
                                          ratio = compareTexts lcs 0 perms
                                        in
-                                         Just $ if ratio >= threshold_ratio 
then ratioToFloat ratio else 0
+                                        if ratio >= threshold_ratio then Just 
$ ratioToFloat ratio else Nothing
                            Nothing  -> Nothing
         zip_code_score = case zip_code ssl_address of
                            Just zc -> let
                                         ratio = compareText zc 0 [zipcode 
gls_address]
                                       in
-                                        Just $ if ratio >= threshold_ratio 
then ratioToFloat ratio else 0
+                                        if ratio >= threshold_ratio then Just 
$ ratioToFloat ratio else Nothing 
                            Nothing -> Nothing
 
 
 cleanText :: [Text] -> [Text]
-cleanText text = map (T.foldl (\new_text char -> if char `elem` chars_to_rm 
then new_text else snoc new_text char) start_text) text
+cleanText text = map (T.foldl (\new_text char -> if char `elem` chars_to_rm
+                                                 then new_text
+                                                 else snoc new_text char
+                              ) start_text) text
   where chars_to_rm = ".,-" :: String 
         start_text  = ""    :: Text
 
diff --git a/test/Tests/Check.hs b/test/Tests/Check.hs
index 72e0007..ffc0837 100644
--- a/test/Tests/Check.hs
+++ b/test/Tests/Check.hs
@@ -139,7 +139,7 @@ personTests sanction_list =
         , testTarget False 38925 target_38925_v3 sanction_list $ distribution 
0       100  0   0    0           0.75
         , testTarget False 38925 target_38925_v4 sanction_list $ distribution 
100     0    0   0    0           0.75
         , testTarget False 38925 target_38925_v5 sanction_list $ distribution 
100     100  0   0    0           0.75
-        , testTarget False 68815 target_68815    sanction_list $ distribution 
100     100  0   125  0           0.75
+        , testTarget False 68815 target_68815    sanction_list $ distribution 
75      100  0   125  50          0.75
         ]
 
     , testGroup "Fake target with XML file"
diff --git a/test/data/target_38925.xml b/test/data/target_38925.xml
index 8711ce1..ba2ea54 100644
--- a/test/data/target_38925.xml
+++ b/test/data/target_38925.xml
@@ -35,4 +35,7 @@
                        <justification ssid="38923">Secretary of the Crimea 
Electoral Commission. In this capacity she participated in the organisation of 
the Russian presidential elections of 18 Mar 2018 in the illegally annexed 
Crimea and Sevastopol, and thereby actively supported and implemented policies 
that undermine the territorial integrity, sovereignty and independence of 
Ukraine.</justification>
                </individual>
        </target>
+       <place ssid="38924">
+               <location>Autonomous Republic of Crimea</location>
+       </place>
 </swiss-sanctions-list>
diff --git a/test/data/target_68815.xml b/test/data/target_68815.xml
index dcb3bb1..7448542 100644
--- a/test/data/target_68815.xml
+++ b/test/data/target_68815.xml
@@ -24,8 +24,8 @@
                        <other-information ssid="68814">National identification 
no: Haiti 004-341-263-3</other-information>
                </individual>
        </target>
-       <place ssid="49815">
-               <location>Saint-Petersburg</location>
-               <country iso-code="RU">Russian Federation</country>
+       <place ssid="66660">
+               <location>Port-au-Prince</location>
+               <country iso-code="HT">Haiti</country>
        </place>
 </swiss-sanctions-list>

-- 
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]