From a423181451eb2fdbd130b51134f824f50542f6e3 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Wed, 13 May 2026 10:18:51 -0600 Subject: [PATCH] feat: Added graph validation --- src/graph/mod.rs | 2 + src/graph/state.rs | 8 +- src/graph/validator.rs | 640 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 644 insertions(+), 6 deletions(-) create mode 100644 src/graph/validator.rs diff --git a/src/graph/mod.rs b/src/graph/mod.rs index 385b6b9..3a0655c 100644 --- a/src/graph/mod.rs +++ b/src/graph/mod.rs @@ -4,6 +4,7 @@ pub mod parser; pub mod state; pub mod types; +pub mod validator; pub use parser::{GraphParser, agent_has_graph, load_agent_graph}; pub use state::{StateManager, StateRepresentation}; @@ -11,6 +12,7 @@ pub use types::{ AgentNode, ApprovalNode, EndNode, Graph, GraphSettings, GraphState, InputNode, Node, NodeType, ScriptNode, }; +pub use validator::{GraphValidator, ValidationError, ValidationResult}; pub const GRAPH_SCHEMA_VERSION: &str = "1.0"; diff --git a/src/graph/state.rs b/src/graph/state.rs index 7ac35e7..0ec7396 100644 --- a/src/graph/state.rs +++ b/src/graph/state.rs @@ -209,12 +209,8 @@ fn value_to_string(value: &Value) -> String { Value::Number(n) => n.to_string(), Value::Bool(b) => b.to_string(), Value::Null => "null".to_string(), - Value::Array(_) => { - serde_json::to_string(value).unwrap_or_else(|_| String::from("[]")) - } - Value::Object(_) => { - serde_json::to_string(value).unwrap_or_else(|_| String::from("{}")) - } + Value::Array(_) => serde_json::to_string(value).unwrap_or_else(|_| String::from("[]")), + Value::Object(_) => serde_json::to_string(value).unwrap_or_else(|_| String::from("{}")), } } diff --git a/src/graph/validator.rs b/src/graph/validator.rs new file mode 100644 index 0000000..bb0c590 --- /dev/null +++ b/src/graph/validator.rs @@ -0,0 +1,640 @@ +//! Static validation for graph definitions: reference integrity, cycles, +//! reachability, terminal nodes, script/agent existence, and approval +//! routes-vs-options consistency. +//! +//! The validator only follows **declared static edges** (`next`, approval +//! `routes`, script `fallback`, `on_timeout`). Script nodes can also route +//! dynamically via `_next` in their JSON output at runtime; those edges are +//! invisible here. As a result, unreachable-node detection and "no reachable +//! End node" are reported as warnings (not errors) to avoid false positives +//! against dynamically-routed graphs. + +use super::types::{Graph, Node, NodeType}; +use crate::config::paths; +use anyhow::{Result, bail}; +use std::collections::{HashSet, VecDeque}; +use std::path::PathBuf; + +/// A single validation finding, optionally scoped to a node. +#[derive(Debug, Clone)] +pub struct ValidationError { + pub node_id: Option, + pub message: String, +} + +impl ValidationError { + fn new(message: impl Into) -> Self { + Self { + node_id: None, + message: message.into(), + } + } + + fn with_node(node_id: impl Into, message: impl Into) -> Self { + Self { + node_id: Some(node_id.into()), + message: message.into(), + } + } +} + +/// Aggregated validation findings: blocking `errors` and informational +/// `warnings`. +#[derive(Debug, Default)] +pub struct ValidationResult { + pub errors: Vec, + pub warnings: Vec, +} + +impl ValidationResult { + pub fn is_valid(&self) -> bool { + self.errors.is_empty() + } + + fn error(&mut self, e: ValidationError) { + self.errors.push(e); + } + + fn warning(&mut self, w: ValidationError) { + self.warnings.push(w); + } + + /// Consume into a `Result`, aggregating all errors into a single message. + /// Warnings are dropped. + pub fn into_result(self) -> Result<()> { + if self.is_valid() { + return Ok(()); + } + let lines: Vec = self + .errors + .iter() + .map(|e| match &e.node_id { + Some(id) => format!(" [{id}] {}", e.message), + None => format!(" {}", e.message), + }) + .collect(); + bail!( + "Graph validation failed with {} error(s):\n{}", + self.errors.len(), + lines.join("\n") + ); + } +} + +/// Validator for graph structures. `base_dir` is used to resolve relative +/// script paths (typically the owning agent's data directory). +pub struct GraphValidator { + base_dir: PathBuf, +} + +impl GraphValidator { + pub fn new(base_dir: impl Into) -> Self { + Self { + base_dir: base_dir.into(), + } + } + + pub fn validate(&self, graph: &Graph) -> ValidationResult { + let mut result = ValidationResult::default(); + self.validate_node_references(graph, &mut result); + self.validate_cycles(graph, &mut result); + self.validate_reachability(graph, &mut result); + self.validate_terminal_nodes(graph, &mut result); + self.validate_scripts(graph, &mut result); + self.validate_agents(graph, &mut result); + self.validate_approval_routes(graph, &mut result); + result + } + + fn validate_node_references(&self, graph: &Graph, result: &mut ValidationResult) { + for (node_id, node) in &graph.nodes { + for (target, label) in declared_targets(node) { + if !graph.has_node(&target) { + result.error(ValidationError::with_node( + node_id, + format!("References non-existent node '{target}' in {label}"), + )); + } + } + } + } + + fn validate_cycles(&self, graph: &Graph, result: &mut ValidationResult) { + let mut visited: HashSet = HashSet::new(); + let mut rec_stack: HashSet = HashSet::new(); + let mut path: Vec = Vec::new(); + + for node_id in graph.node_ids() { + if !visited.contains(node_id) + && let Some(cycle) = + detect_cycle_dfs(graph, node_id, &mut visited, &mut rec_stack, &mut path) + { + result.error(ValidationError::new(format!( + "Cycle detected: {}", + cycle.join(" -> ") + ))); + return; + } + } + } + + fn validate_reachability(&self, graph: &Graph, result: &mut ValidationResult) { + let reachable = find_reachable_nodes(graph); + for node_id in graph.node_ids() { + if !reachable.contains(node_id) { + result.warning(ValidationError::with_node( + node_id, + "Node is unreachable from the start node via declared edges \ + (script `_next` routing is not analyzed)", + )); + } + } + } + + fn validate_terminal_nodes(&self, graph: &Graph, result: &mut ValidationResult) { + let has_any_end = graph + .nodes + .values() + .any(|n| matches!(n.node_type, NodeType::End(_))); + + if !has_any_end { + result.error(ValidationError::new( + "Graph has no end nodes; execution would never terminate", + )); + return; + } + + let reachable = find_reachable_nodes(graph); + let reachable_end = graph + .nodes + .iter() + .any(|(id, n)| matches!(n.node_type, NodeType::End(_)) && reachable.contains(id)); + if !reachable_end { + result.warning(ValidationError::new( + "No end node is reachable from the start node via declared edges \ + (a script's `_next` may still route to one)", + )); + } + } + + fn validate_scripts(&self, graph: &Graph, result: &mut ValidationResult) { + for (node_id, node) in &graph.nodes { + if let NodeType::Script(s) = &node.node_type { + let script_path = self.base_dir.join(&s.script); + if !script_path.exists() { + result.error(ValidationError::with_node( + node_id, + format!("Script file not found: '{}'", script_path.display()), + )); + } + } + } + } + + fn validate_agents(&self, graph: &Graph, result: &mut ValidationResult) { + for (node_id, node) in &graph.nodes { + if let NodeType::Agent(a) = &node.node_type { + let agent_dir = paths::agent_data_dir(&a.agent); + let config_path = paths::agent_config_file(&a.agent); + if !agent_dir.exists() { + result.error(ValidationError::with_node( + node_id, + format!("Agent '{}' not found (directory missing)", a.agent), + )); + } else if !config_path.exists() { + result.error(ValidationError::with_node( + node_id, + format!("Agent '{}' has no config.yaml", a.agent), + )); + } + } + } + } + + fn validate_approval_routes(&self, graph: &Graph, result: &mut ValidationResult) { + for (node_id, node) in &graph.nodes { + if let NodeType::Approval(a) = &node.node_type { + for option in &a.options { + if !a.routes.contains_key(option) { + result.error(ValidationError::with_node( + node_id, + format!("Approval option '{option}' has no route defined"), + )); + } + } + for key in a.routes.keys() { + if !a.options.contains(key) { + result.warning(ValidationError::with_node( + node_id, + format!("Route '{key}' has no corresponding option"), + )); + } + } + } + } + } +} + +/// All declared outgoing targets from a node, paired with a human-readable +/// label for use in error messages. Used both for cycle detection and +/// reference validation. +fn declared_targets(node: &Node) -> Vec<(String, &'static str)> { + let mut out = Vec::new(); + if let Some(n) = &node.next { + out.push((n.clone(), "'next'")); + } + match &node.node_type { + NodeType::Approval(a) => { + for v in a.routes.values() { + out.push((v.clone(), "approval 'routes'")); + } + if let Some(t) = &a.on_timeout { + out.push((t.clone(), "'on_timeout'")); + } + } + NodeType::Script(s) => { + if let Some(t) = &s.fallback { + out.push((t.clone(), "script 'fallback'")); + } + } + NodeType::Input(i) => { + if let Some(t) = &i.on_timeout { + out.push((t.clone(), "'on_timeout'")); + } + } + NodeType::Agent(_) | NodeType::End(_) => {} + } + out +} + +fn outgoing_node_ids(node: &Node) -> Vec { + declared_targets(node).into_iter().map(|(t, _)| t).collect() +} + +fn find_reachable_nodes(graph: &Graph) -> HashSet { + let mut reachable: HashSet = HashSet::new(); + let mut queue: VecDeque = VecDeque::new(); + + if !graph.has_node(&graph.start) { + return reachable; + } + + reachable.insert(graph.start.clone()); + queue.push_back(graph.start.clone()); + + while let Some(id) = queue.pop_front() { + if let Some(node) = graph.get_node(&id) { + for next in outgoing_node_ids(node) { + if graph.has_node(&next) && reachable.insert(next.clone()) { + queue.push_back(next); + } + } + } + } + reachable +} + +fn detect_cycle_dfs( + graph: &Graph, + node_id: &str, + visited: &mut HashSet, + rec_stack: &mut HashSet, + path: &mut Vec, +) -> Option> { + visited.insert(node_id.to_string()); + rec_stack.insert(node_id.to_string()); + path.push(node_id.to_string()); + + if let Some(node) = graph.get_node(node_id) { + for next in outgoing_node_ids(node) { + if !graph.has_node(&next) { + continue; + } + if !visited.contains(&next) { + if let Some(cycle) = detect_cycle_dfs(graph, &next, visited, rec_stack, path) { + return Some(cycle); + } + } else if rec_stack.contains(&next) { + let start = path.iter().position(|n| n == &next).unwrap_or(0); + let mut cycle: Vec = path[start..].to_vec(); + cycle.push(next.clone()); + return Some(cycle); + } + } + } + + path.pop(); + rec_stack.remove(node_id); + None +} + +#[cfg(test)] +mod tests { + use super::super::types::*; + use super::*; + use indexmap::IndexMap; + use std::collections::HashMap; + use std::env; + + fn graph_with(nodes: Vec<(&str, Node)>, start: &str) -> Graph { + let mut map: IndexMap = IndexMap::new(); + for (id, node) in nodes { + map.insert(id.to_string(), node); + } + Graph { + name: "t".into(), + description: String::new(), + version: "1.0".into(), + settings: GraphSettings::default(), + initial_state: HashMap::new(), + start: start.into(), + nodes: map, + } + } + + fn end_node(id: &str) -> Node { + Node { + id: id.into(), + description: String::new(), + node_type: NodeType::End(EndNode { + output: String::new(), + state_updates: None, + }), + next: None, + } + } + + fn approval_node(id: &str, options: &[&str], routes: &[(&str, &str)]) -> Node { + let mut r: HashMap = HashMap::new(); + for (k, v) in routes { + r.insert((*k).into(), (*v).into()); + } + Node { + id: id.into(), + description: String::new(), + node_type: NodeType::Approval(ApprovalNode { + question: "?".into(), + options: options.iter().map(|s| (*s).into()).collect(), + routes: r, + state_updates: None, + timeout: None, + on_timeout: None, + }), + next: None, + } + } + + fn script_node(id: &str, script: &str, fallback: Option<&str>) -> Node { + Node { + id: id.into(), + description: String::new(), + node_type: NodeType::Script(ScriptNode { + script: script.into(), + state_updates: None, + fallback: fallback.map(String::from), + timeout: 30, + }), + next: None, + } + } + + fn agent_node(id: &str, agent: &str, next: Option<&str>) -> Node { + Node { + id: id.into(), + description: String::new(), + node_type: NodeType::Agent(AgentNode { + agent: agent.into(), + prompt: "hi".into(), + state_updates: None, + timeout: None, + }), + next: next.map(String::from), + } + } + + fn validator() -> GraphValidator { + GraphValidator::new(env::current_dir().unwrap()) + } + + #[test] + fn valid_simple_graph_passes() { + let mut start = end_node("start"); + start.next = Some("end".into()); + let graph = graph_with(vec![("start", start), ("end", end_node("end"))], "start"); + let result = validator().validate(&graph); + assert!(result.is_valid(), "errors: {:?}", result.errors); + } + + #[test] + fn flags_missing_node_reference_in_next() { + let mut n = end_node("n1"); + n.next = Some("nope".into()); + let graph = graph_with(vec![("n1", n), ("end", end_node("end"))], "n1"); + let result = validator().validate(&graph); + assert!(!result.is_valid()); + assert!( + result + .errors + .iter() + .any(|e| e.message.contains("non-existent node 'nope'") + && e.node_id.as_deref() == Some("n1")) + ); + } + + #[test] + fn flags_missing_approval_route_target() { + let approval = approval_node("ap", &["yes", "no"], &[("yes", "end"), ("no", "missing")]); + let graph = graph_with(vec![("ap", approval), ("end", end_node("end"))], "ap"); + let result = validator().validate(&graph); + assert!(!result.is_valid()); + assert!( + result + .errors + .iter() + .any(|e| e.message.contains("non-existent node 'missing'")) + ); + } + + #[test] + fn flags_missing_script_fallback_target() { + let scr = script_node("s", "does-not-exist.py", Some("nowhere")); + let graph = graph_with(vec![("s", scr), ("end", end_node("end"))], "s"); + let result = validator().validate(&graph); + assert!( + result + .errors + .iter() + .any(|e| e.message.contains("non-existent node 'nowhere'")) + ); + } + + #[test] + fn detects_two_node_cycle() { + let mut a = end_node("a"); + a.next = Some("b".into()); + let mut b = end_node("b"); + b.next = Some("a".into()); + let graph = graph_with(vec![("a", a), ("b", b)], "a"); + let result = validator().validate(&graph); + assert!(!result.is_valid()); + assert!( + result + .errors + .iter() + .any(|e| e.message.contains("Cycle detected")) + ); + } + + #[test] + fn detects_self_loop_as_cycle() { + let mut a = end_node("a"); + a.next = Some("a".into()); + let graph = graph_with(vec![("a", a)], "a"); + let result = validator().validate(&graph); + assert!( + result + .errors + .iter() + .any(|e| e.message.contains("Cycle detected")) + ); + } + + #[test] + fn warns_on_unreachable_node() { + let graph = graph_with( + vec![("start", end_node("start")), ("orphan", end_node("orphan"))], + "start", + ); + let result = validator().validate(&graph); + assert!( + result.warnings.iter().any( + |w| w.node_id.as_deref() == Some("orphan") && w.message.contains("unreachable") + ) + ); + } + + #[test] + fn errors_when_graph_has_no_end_node_at_all() { + let mut a = agent_node("a", "__no_such_agent__", Some("b")); + let b = agent_node("b", "__no_such_agent__", None); + a.next = Some("b".into()); + let graph = graph_with(vec![("a", a), ("b", b)], "a"); + let result = validator().validate(&graph); + assert!( + result + .errors + .iter() + .any(|e| e.message.contains("no end nodes")), + "errors: {:?}", + result.errors + ); + } + + #[test] + fn warns_when_end_exists_but_not_reachable() { + let start = Node { + id: "start".into(), + description: String::new(), + node_type: NodeType::Input(InputNode { + question: "?".into(), + default: None, + validation: None, + state_updates: None, + timeout: None, + on_timeout: None, + }), + next: None, + }; + let graph = graph_with( + vec![("start", start), ("orphan_end", end_node("orphan_end"))], + "start", + ); + let result = validator().validate(&graph); + assert!(result.is_valid(), "unexpected errors: {:?}", result.errors); + assert!( + result + .warnings + .iter() + .any(|w| w.message.contains("No end node is reachable")) + ); + } + + #[test] + fn errors_when_script_file_missing() { + let scr = script_node("s", "definitely-not-here.py", None); + let mut start = end_node("start"); + start.next = Some("s".into()); + let graph = graph_with( + vec![("start", start), ("s", scr), ("end", end_node("end"))], + "start", + ); + let result = validator().validate(&graph); + assert!( + result + .errors + .iter() + .any(|e| e.message.contains("Script file not found") + && e.node_id.as_deref() == Some("s")) + ); + } + + #[test] + fn errors_when_referenced_agent_missing() { + let agent = agent_node("a", "__definitely_no_such_agent__", Some("end")); + let graph = graph_with(vec![("a", agent), ("end", end_node("end"))], "a"); + let result = validator().validate(&graph); + assert!(result.errors.iter().any(|e| { + e.message + .contains("Agent '__definitely_no_such_agent__' not found") + })); + } + + #[test] + fn errors_when_approval_option_has_no_route() { + let approval = approval_node("ap", &["yes", "no"], &[("yes", "end")]); + let graph = graph_with(vec![("ap", approval), ("end", end_node("end"))], "ap"); + let result = validator().validate(&graph); + assert!( + result + .errors + .iter() + .any(|e| e.message.contains("'no' has no route defined")) + ); + } + + #[test] + fn warns_when_approval_has_extra_route() { + let approval = approval_node("ap", &["yes"], &[("yes", "end"), ("maybe", "end")]); + let graph = graph_with(vec![("ap", approval), ("end", end_node("end"))], "ap"); + let result = validator().validate(&graph); + assert!(result.warnings.iter().any(|w| { + w.message + .contains("Route 'maybe' has no corresponding option") + })); + } + + #[test] + fn into_result_aggregates_all_errors() { + let mut a = end_node("a"); + a.next = Some("missing1".into()); + let mut b = end_node("b"); + b.next = Some("missing2".into()); + let graph = graph_with(vec![("a", a), ("b", b)], "a"); + let err = validator() + .validate(&graph) + .into_result() + .unwrap_err() + .to_string(); + assert!(err.contains("missing1"), "got: {err}"); + assert!(err.contains("missing2"), "got: {err}"); + assert!(err.contains("validation failed"), "got: {err}"); + } + + #[test] + fn into_result_returns_ok_when_no_errors() { + let mut start = end_node("start"); + start.next = Some("end".into()); + let graph = graph_with(vec![("start", start), ("end", end_node("end"))], "start"); + assert!(validator().validate(&graph).into_result().is_ok()); + } +}