[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[robocop] branch master updated: rewrite robocop in rust
From: |
Admin |
Subject: |
[robocop] branch master updated: rewrite robocop in rust |
Date: |
Sun, 08 Jun 2025 01:13:01 +0200 |
This is an automated email from the git hooks/post-receive script.
grothoff pushed a commit to branch master
in repository robocop.
The following commit(s) were added to refs/heads/master by this push:
new 09e6796 rewrite robocop in rust
09e6796 is described below
commit 09e679666a8461e79a5f32eec3341947e9624985
Author: Christian Grothoff <christian@grothoff.org>
AuthorDate: Sun Jun 8 01:12:55 2025 +0200
rewrite robocop in rust
---
Cargo.toml | 7 ++
src/main.rs | 281 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 288 insertions(+)
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..819651f
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "robocop"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+serde_json = "1.0"
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..901cc04
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,281 @@
+// This file is part of Robocop
+//
+// Robocop is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// Robocop is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <https://www.gnu.org/licenses/>.
+//
+// Copyright (C) 2025 Taler Systems SA
+
+use std::collections::HashMap;
+use std::env;
+use std::fs;
+use std::io::{self, BufRead, BufReader};
+use serde_json::{Value, Map};
+
+// Finite State Machine for efficient string matching
+#[derive(Debug, Clone)]
+struct Matching {
+ // Maps (state, char) -> new_state
+ transitions: HashMap<(usize, char), usize>,
+ // Final states with their associated values and costs
+ final_states: HashMap<usize, (String, usize)>,
+ max_state: usize,
+}
+
+impl Matching {
+ fn new() -> Self {
+ Self {
+ transitions: HashMap::new(),
+ final_states: HashMap::new(),
+ max_state: 0,
+ }
+ }
+
+
+ fn add_string(&mut self, s: &str) {
+ let chars: Vec<char> = s.chars().collect();
+ let mut state_ids = Vec::new();
+
+ // Pre-allocate state IDs to avoid multiple mutable borrows
+ for _ in 0..=chars.len() {
+ let state_id = self.max_state;
+ self.max_state += 1;
+ state_ids.push(state_id);
+ }
+
+ // Add final states
+ for (i, &state_id) in state_ids.iter().enumerate() {
+ self.final_states.insert(
+ state_id,
+ (s.to_string(), chars.len() - i)
+ );
+ }
+
+ // Build transitions for exact matches
+ for (i, &ch) in chars.iter().enumerate() {
+ let current_state = state_ids[i];
+ let next_state = state_ids[i + 1];
+ self.transitions.insert((current_state, ch), next_state);
+ }
+ }
+
+
+ fn find_best_match(&self, input: &str) -> Option<(String, f64)> {
+ let mut best_match = None;
+ let mut best_score = 0.0;
+
+ for (_, (candidate, _)) in &self.final_states {
+ let distance = levenshtein_distance(input, candidate);
+ let max_len = input.len().max(candidate.len());
+ let score = if max_len == 0 {
+ 1.0
+ } else {
+ 1.0 - (distance as f64 / max_len as f64)
+ };
+
+ if score > best_score {
+ best_score = score;
+ best_match = Some((candidate.clone(), score));
+ }
+ }
+
+ best_match
+ }
+}
+
+// Record structure for matching
+#[derive(Debug, Clone)]
+struct Record {
+ ssid: String,
+ fields: HashMap<String, Matching>,
+}
+
+impl Record {
+ fn new(ssid: String) -> Self {
+ Self {
+ ssid,
+ fields: HashMap::new(),
+ }
+ }
+
+ fn add_field_values(&mut self, key: &str, values: &[String]) {
+ let mut fsm = Matching::new();
+ for value in values {
+ fsm.add_string(value);
+ }
+ self.fields.insert(key.to_string(), fsm);
+ }
+}
+
+// Matching engine
+struct MatchingEngine {
+ records: Vec<Record>,
+}
+
+impl MatchingEngine {
+ fn new() -> Self {
+ Self {
+ records: Vec::new(),
+ }
+ }
+
+ fn load_from_json(&mut self, filename: &str) -> Result<(), Box<dyn
std::error::Error>> {
+ let content = fs::read_to_string(filename)?;
+ let json_array: Vec<Value> = serde_json::from_str(&content)?;
+
+ for (idx, item) in json_array.iter().enumerate() {
+ if let Value::Object(obj) = item {
+ let ssid = obj.get("ssid")
+ .and_then(|v| v.as_str())
+ .unwrap_or(&format!("record_{}", idx))
+ .to_string();
+
+ let mut record = Record::new(ssid);
+
+ for (key, value) in obj {
+ if key == "ssid" {
+ continue;
+ }
+
+ // Only process arrays
+ if let Value::Array(arr) = value {
+ let string_values: Vec<String> = arr
+ .iter()
+ .filter_map(|v| v.as_str().map(|s| s.to_string()))
+ .collect();
+
+ if !string_values.is_empty() {
+ record.add_field_values(key, &string_values);
+ }
+ }
+ }
+
+ self.records.push(record);
+ }
+ }
+
+ Ok(())
+ }
+
+ fn find_best_match(&self, input: &Map<String, Value>) -> (f64, f64,
String) {
+ let mut best_overall_score = 0.0;
+ let mut best_ssid = String::new();
+ let mut best_avg_score = 0.0;
+ let mut best_confidence = 0;
+ let mut max_fields = 0;
+
+ for record in &self.records {
+ let mut total_score = 0.0;
+ let mut matching_fields = 0;
+ let total_fields = record.fields.len();
+
+ for (key, input_value) in input {
+ if let Some(input_str) = input_value.as_str() {
+
+ if let Some(fsm) = record.fields.get(key) {
+ if let Some((_, score)) =
fsm.find_best_match(input_str) {
+ total_score += score;
+ matching_fields += 1;
+ }
+ }
+ }
+ }
+ max_fields = max_fields.max(total_fields);
+ if total_fields > 0 {
+ if total_score > best_overall_score {
+ best_overall_score = total_score;
+ best_avg_score = total_score / matching_fields as f64;
+ best_confidence = matching_fields;
+ best_ssid = record.ssid.clone();
+ }
+ }
+ }
+
+ (best_avg_score, best_confidence as f64 / max_fields as f64, best_ssid)
+ }
+}
+
+// Levenshtein distance implementation
+fn levenshtein_distance(s1: &str, s2: &str) -> usize {
+ let chars1: Vec<char> = s1.chars().collect();
+ let chars2: Vec<char> = s2.chars().collect();
+ let len1 = chars1.len();
+ let len2 = chars2.len();
+
+ let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
+
+ // Initialize first row and column
+ for i in 0..=len1 {
+ matrix[i][0] = i;
+ }
+ for j in 0..=len2 {
+ matrix[0][j] = j;
+ }
+
+ // Fill the matrix
+ for i in 1..=len1 {
+ for j in 1..=len2 {
+ let cost = if chars1[i - 1] == chars2[j - 1] { 0 } else { 1 };
+ matrix[i][j] = std::cmp::min(
+ std::cmp::min(
+ matrix[i - 1][j] + 1, // deletion
+ matrix[i][j - 1] + 1 // insertion
+ ),
+ matrix[i - 1][j - 1] + cost // substitution
+ );
+ }
+ }
+
+ matrix[len1][len2]
+}
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+ let args: Vec<String> = env::args().collect();
+ if args.len() != 2 {
+ eprintln!("Usage: {} <json_file>", args[0]);
+ std::process::exit(1);
+ }
+
+ let filename = &args[1];
+
+ // Load and pre-process the JSON database
+ let mut engine = MatchingEngine::new();
+ engine.load_from_json(filename)?;
+
+ // Read JSON objects from stdin
+ let stdin = io::stdin();
+ let reader = BufReader::new(stdin);
+
+ for line in reader.lines() {
+ let line = line?;
+ if line.trim().is_empty() {
+ continue;
+ }
+
+ match serde_json::from_str::<Value>(&line) {
+ Ok(Value::Object(obj)) => {
+ let (quality, confidence, ssid) = engine.find_best_match(&obj);
+ println!("{:.6} {:.6} {}", quality, confidence, ssid);
+ }
+ Ok(_) => {
+ eprintln!("Warning: Skipping non-object JSON: {}", line);
+ std::process::exit(1);
+ }
+ Err(e) => {
+ eprintln!("Warning: Failed to parse JSON: {} - {}", line, e);
+ std::process::exit(1);
+ }
+ }
+ }
+
+ Ok(())
+}
--
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [robocop] branch master updated: rewrite robocop in rust,
Admin <=