gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[robocop] branch master updated: rewrite robocop in rust


From: Admin
Subject: [robocop] branch master updated: rewrite robocop in rust
Date: Sun, 08 Jun 2025 01:13:01 +0200

This is an automated email from the git hooks/post-receive script.

grothoff pushed a commit to branch master
in repository robocop.

The following commit(s) were added to refs/heads/master by this push:
     new 09e6796  rewrite robocop in rust
09e6796 is described below

commit 09e679666a8461e79a5f32eec3341947e9624985
Author: Christian Grothoff <christian@grothoff.org>
AuthorDate: Sun Jun 8 01:12:55 2025 +0200

    rewrite robocop in rust
---
 Cargo.toml  |   7 ++
 src/main.rs | 281 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 288 insertions(+)

diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..819651f
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "robocop"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+serde_json = "1.0"
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..901cc04
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,281 @@
+// This file is part of Robocop
+//
+// Robocop is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// Robocop is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <https://www.gnu.org/licenses/>.
+//
+// Copyright (C) 2025 Taler Systems SA
+
+use std::collections::HashMap;
+use std::env;
+use std::fs;
+use std::io::{self, BufRead, BufReader};
+use serde_json::{Value, Map};
+
+// Finite State Machine for efficient string matching
+#[derive(Debug, Clone)]
+struct Matching {
+    // Maps (state, char) -> new_state
+    transitions: HashMap<(usize, char), usize>,
+    // Final states with their associated values and costs
+    final_states: HashMap<usize, (String, usize)>,
+    max_state: usize,
+}
+
+impl Matching {
+    fn new() -> Self {
+        Self {
+            transitions: HashMap::new(),
+            final_states: HashMap::new(),
+            max_state: 0,
+        }
+    }
+
+
+    fn add_string(&mut self, s: &str) {
+        let chars: Vec<char> = s.chars().collect();
+        let mut state_ids = Vec::new();
+
+        // Pre-allocate state IDs to avoid multiple mutable borrows
+        for _ in 0..=chars.len() {
+            let state_id = self.max_state;
+            self.max_state += 1;
+            state_ids.push(state_id);
+        }
+
+        // Add final states
+        for (i, &state_id) in state_ids.iter().enumerate() {
+            self.final_states.insert(
+                state_id,
+                (s.to_string(), chars.len() - i)
+            );
+        }
+
+        // Build transitions for exact matches
+        for (i, &ch) in chars.iter().enumerate() {
+            let current_state = state_ids[i];
+            let next_state = state_ids[i + 1];
+            self.transitions.insert((current_state, ch), next_state);
+        }
+    }
+
+
+    fn find_best_match(&self, input: &str) -> Option<(String, f64)> {
+        let mut best_match = None;
+        let mut best_score = 0.0;
+
+        for (_, (candidate, _)) in &self.final_states {
+            let distance = levenshtein_distance(input, candidate);
+            let max_len = input.len().max(candidate.len());
+            let score = if max_len == 0 {
+                1.0
+            } else {
+                1.0 - (distance as f64 / max_len as f64)
+            };
+
+            if score > best_score {
+                best_score = score;
+                best_match = Some((candidate.clone(), score));
+            }
+        }
+
+        best_match
+    }
+}
+
+// Record structure for matching
+#[derive(Debug, Clone)]
+struct Record {
+    ssid: String,
+    fields: HashMap<String, Matching>,
+}
+
+impl Record {
+    fn new(ssid: String) -> Self {
+        Self {
+            ssid,
+            fields: HashMap::new(),
+        }
+    }
+
+    fn add_field_values(&mut self, key: &str, values: &[String]) {
+        let mut fsm = Matching::new();
+        for value in values {
+            fsm.add_string(value);
+        }
+        self.fields.insert(key.to_string(), fsm);
+    }
+}
+
+// Matching engine
+struct MatchingEngine {
+    records: Vec<Record>,
+}
+
+impl MatchingEngine {
+    fn new() -> Self {
+        Self {
+            records: Vec::new(),
+        }
+    }
+
+    fn load_from_json(&mut self, filename: &str) -> Result<(), Box<dyn 
std::error::Error>> {
+        let content = fs::read_to_string(filename)?;
+        let json_array: Vec<Value> = serde_json::from_str(&content)?;
+
+        for (idx, item) in json_array.iter().enumerate() {
+            if let Value::Object(obj) = item {
+                let ssid = obj.get("ssid")
+                    .and_then(|v| v.as_str())
+                    .unwrap_or(&format!("record_{}", idx))
+                    .to_string();
+
+                let mut record = Record::new(ssid);
+
+                for (key, value) in obj {
+                    if key == "ssid" {
+                        continue;
+                    }
+
+                    // Only process arrays
+                    if let Value::Array(arr) = value {
+                        let string_values: Vec<String> = arr
+                            .iter()
+                            .filter_map(|v| v.as_str().map(|s| s.to_string()))
+                            .collect();
+
+                        if !string_values.is_empty() {
+                            record.add_field_values(key, &string_values);
+                        }
+                    }
+                }
+
+                self.records.push(record);
+            }
+        }
+
+        Ok(())
+    }
+
+    fn find_best_match(&self, input: &Map<String, Value>) -> (f64, f64, 
String) {
+        let mut best_overall_score = 0.0;
+        let mut best_ssid = String::new();
+        let mut best_avg_score = 0.0;
+        let mut best_confidence = 0;
+        let mut max_fields = 0;
+
+        for record in &self.records {
+            let mut total_score = 0.0;
+            let mut matching_fields = 0;
+            let total_fields = record.fields.len();
+
+            for (key, input_value) in input {
+                if let Some(input_str) = input_value.as_str() {
+
+                    if let Some(fsm) = record.fields.get(key) {
+                        if let Some((_, score)) = 
fsm.find_best_match(input_str) {
+                            total_score += score;
+                            matching_fields += 1;
+                        }
+                    }
+                }
+            }
+            max_fields = max_fields.max(total_fields);
+            if total_fields > 0 {
+                if total_score > best_overall_score {
+                    best_overall_score = total_score;
+                    best_avg_score = total_score / matching_fields as f64;
+                    best_confidence = matching_fields;
+                    best_ssid = record.ssid.clone();
+                }
+            }
+        }
+
+        (best_avg_score, best_confidence as f64 / max_fields as f64, best_ssid)
+    }
+}
+
+// Levenshtein distance implementation
+fn levenshtein_distance(s1: &str, s2: &str) -> usize {
+    let chars1: Vec<char> = s1.chars().collect();
+    let chars2: Vec<char> = s2.chars().collect();
+    let len1 = chars1.len();
+    let len2 = chars2.len();
+
+    let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
+
+    // Initialize first row and column
+    for i in 0..=len1 {
+        matrix[i][0] = i;
+    }
+    for j in 0..=len2 {
+        matrix[0][j] = j;
+    }
+
+    // Fill the matrix
+    for i in 1..=len1 {
+        for j in 1..=len2 {
+            let cost = if chars1[i - 1] == chars2[j - 1] { 0 } else { 1 };
+            matrix[i][j] = std::cmp::min(
+                std::cmp::min(
+                    matrix[i - 1][j] + 1,      // deletion
+                    matrix[i][j - 1] + 1       // insertion
+                ),
+                matrix[i - 1][j - 1] + cost    // substitution
+            );
+        }
+    }
+
+    matrix[len1][len2]
+}
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let args: Vec<String> = env::args().collect();
+    if args.len() != 2 {
+        eprintln!("Usage: {} <json_file>", args[0]);
+        std::process::exit(1);
+    }
+
+    let filename = &args[1];
+
+    // Load and pre-process the JSON database
+    let mut engine = MatchingEngine::new();
+    engine.load_from_json(filename)?;
+
+    // Read JSON objects from stdin
+    let stdin = io::stdin();
+    let reader = BufReader::new(stdin);
+
+    for line in reader.lines() {
+        let line = line?;
+        if line.trim().is_empty() {
+            continue;
+        }
+
+        match serde_json::from_str::<Value>(&line) {
+            Ok(Value::Object(obj)) => {
+                let (quality, confidence, ssid) = engine.find_best_match(&obj);
+                println!("{:.6} {:.6} {}", quality, confidence, ssid);
+            }
+            Ok(_) => {
+                eprintln!("Warning: Skipping non-object JSON: {}", line);
+                std::process::exit(1);
+            }
+            Err(e) => {
+                eprintln!("Warning: Failed to parse JSON: {} - {}", line, e);
+                std::process::exit(1);
+            }
+        }
+    }
+
+    Ok(())
+}

-- 
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]