From 1dff08893a33ff69a3047823a17afed83c4b6a86 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 10:22:46 -0600 Subject: [PATCH 01/85] feat: scaffold skill module --- src/config/mod.rs | 5 + src/config/skill.rs | 274 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 279 insertions(+) create mode 100644 src/config/skill.rs diff --git a/src/config/mod.rs b/src/config/mod.rs index 68339c3..1bdd158 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -11,6 +11,7 @@ mod rag_cache; mod request_context; mod role; mod session; +mod skill; pub(crate) mod todo; mod tool_scope; mod update; @@ -30,6 +31,8 @@ pub use self::role::{ CODE_ROLE, CREATE_TITLE_ROLE, EXPLAIN_SHELL_ROLE, Role, RoleLike, SHELL_ROLE, }; use self::session::Session; +#[allow(unused_imports)] +pub use self::skill::Skill; pub use self::update::run_self_update; use crate::client::{ ClientConfig, MessageContentToolCalls, Model, ModelType, OPENAI_COMPATIBLE_PROVIDERS, @@ -74,6 +77,8 @@ const LIGHT_THEME: &[u8] = include_bytes!("../../assets/monokai-extended-light.t const CONFIG_FILE_NAME: &str = "config.yaml"; const AGENT_GRAPH_FILE_NAME: &str = "graph.yaml"; const ROLES_DIR_NAME: &str = "roles"; +#[allow(dead_code)] +const SKILLS_DIR_NAME: &str = "skills"; const MACROS_DIR_NAME: &str = "macros"; const ENV_FILE_NAME: &str = ".env"; const MESSAGES_FILE_NAME: &str = "messages.md"; diff --git a/src/config/skill.rs b/src/config/skill.rs new file mode 100644 index 0000000..b80fb70 --- /dev/null +++ b/src/config/skill.rs @@ -0,0 +1,274 @@ +use super::*; + +use anyhow::Result; +use fancy_regex::Regex; +use rust_embed::Embed; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::sync::LazyLock; + +#[derive(Embed)] +#[folder = "assets/skills/"] +struct SkillsAsset; + +static RE_METADATA: LazyLock = + LazyLock::new(|| Regex::new(r"(?s)-{3,}\s*(.*?)\s*-{3,}\s*(.*)").unwrap()); + +#[derive(Debug, Clone, Default, Deserialize, Serialize)] +pub struct Skill { + name: String, + #[serde(default)] + description: String, + #[serde(default)] + body: String, + #[serde(skip_serializing_if = "Option::is_none")] + enabled_tools: Option, + #[serde(skip_serializing_if = "Option::is_none")] + enabled_mcp_servers: Option, + #[serde(skip_serializing_if = "Option::is_none")] + auto_unload: Option, +} + +#[allow(dead_code)] +impl Skill { + pub fn new(name: &str, content: &str) -> Self { + let mut metadata = ""; + let mut body = content.trim(); + if let Ok(Some(caps)) = RE_METADATA.captures(content) + && let (Some(metadata_value), Some(body_value)) = (caps.get(1), caps.get(2)) + { + metadata = metadata_value.as_str().trim(); + body = body_value.as_str().trim(); + } + let mut body = body.to_string(); + interpolate_variables(&mut body); + let mut skill = Self { + name: name.to_string(), + body, + ..Default::default() + }; + if !metadata.is_empty() + && let Ok(value) = serde_yaml::from_str::(metadata) + && let Some(value) = value.as_object() + { + for (key, value) in value { + match key.as_str() { + "description" => { + if let Some(v) = value.as_str() { + skill.description = v.to_string(); + } + } + "enabled_tools" => { + skill.enabled_tools = value.as_str().map(|v| v.to_string()); + } + "enabled_mcp_servers" => { + skill.enabled_mcp_servers = value.as_str().map(|v| v.to_string()); + } + "auto_unload" => { + skill.auto_unload = value.as_bool(); + } + _ => (), + } + } + } + skill + } + + pub fn builtin(name: &str) -> Result { + let content = SkillsAsset::get(&format!("{name}/SKILL.md")) + .ok_or_else(|| anyhow!("Unknown skill `{name}`"))?; + let content = unsafe { std::str::from_utf8_unchecked(&content.data) }; + Ok(Skill::new(name, content)) + } + + pub fn list_builtin_skill_names() -> Vec { + SkillsAsset::iter() + .filter_map(|v| v.strip_suffix("/SKILL.md").map(|v| v.to_string())) + .collect() + } + + pub fn name(&self) -> &str { + &self.name + } + + pub fn description(&self) -> &str { + &self.description + } + + pub fn body(&self) -> &str { + &self.body + } + + pub fn enabled_tools(&self) -> Option<&str> { + self.enabled_tools.as_deref() + } + + pub fn enabled_mcp_servers(&self) -> Option<&str> { + self.enabled_mcp_servers.as_deref() + } + + pub fn auto_unload(&self) -> bool { + self.auto_unload.unwrap_or(false) + } + + pub fn is_compatible(&self, function_calling_enabled: bool, mcp_enabled: bool) -> bool { + if self.declares_tools() && !function_calling_enabled { + return false; + } + if self.declares_mcp_servers() && !mcp_enabled { + return false; + } + true + } + + fn declares_tools(&self) -> bool { + self.enabled_tools + .as_deref() + .map(|s| !s.trim().is_empty()) + .unwrap_or(false) + } + + fn declares_mcp_servers(&self) -> bool { + self.enabled_mcp_servers + .as_deref() + .map(|s| !s.trim().is_empty()) + .unwrap_or(false) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn skill_new_parses_body() { + let skill = Skill::new("test", "You are a git expert"); + assert_eq!(skill.name(), "test"); + assert_eq!(skill.body(), "You are a git expert"); + assert_eq!(skill.description(), ""); + } + + #[test] + fn skill_new_parses_full_metadata() { + let content = "---\n\ + description: Atomic commits, rebase surgery\n\ + enabled_tools: shell,fs\n\ + enabled_mcp_servers: github\n\ + auto_unload: true\n\ + ---\n\ + You are a git expert"; + let skill = Skill::new("git-master", content); + assert_eq!(skill.name(), "git-master"); + assert_eq!(skill.description(), "Atomic commits, rebase surgery"); + assert_eq!(skill.enabled_tools(), Some("shell,fs")); + assert_eq!(skill.enabled_mcp_servers(), Some("github")); + assert!(skill.auto_unload()); + assert_eq!(skill.body(), "You are a git expert"); + } + + #[test] + fn skill_new_no_metadata_has_defaults() { + let skill = Skill::new("test", "Just a body"); + assert_eq!(skill.description(), ""); + assert_eq!(skill.enabled_tools(), None); + assert_eq!(skill.enabled_mcp_servers(), None); + assert!(!skill.auto_unload()); + } + + #[test] + fn skill_new_metadata_only() { + let content = "---\ndescription: Just metadata\n---"; + let skill = Skill::new("test", content); + assert_eq!(skill.description(), "Just metadata"); + assert_eq!(skill.body(), ""); + } + + #[test] + fn skill_new_partial_metadata_leaves_others_none() { + let content = "---\ndescription: Partial\n---\nthe body"; + let skill = Skill::new("test", content); + assert_eq!(skill.description(), "Partial"); + assert_eq!(skill.enabled_tools(), None); + assert_eq!(skill.enabled_mcp_servers(), None); + assert!(!skill.auto_unload()); + assert_eq!(skill.body(), "the body"); + } + + #[test] + fn skill_new_ignores_unknown_keys() { + let content = "---\ndescription: D\nbogus_field: 42\n---\nbody"; + let skill = Skill::new("test", content); + assert_eq!(skill.description(), "D"); + assert_eq!(skill.body(), "body"); + } + + #[test] + fn skill_new_trims_body_whitespace() { + let content = "---\ndescription: D\n---\n\n\n body content \n\n"; + let skill = Skill::new("test", content); + assert_eq!(skill.body(), "body content"); + } + + #[test] + fn skill_default_has_empty_fields() { + let skill = Skill::default(); + assert_eq!(skill.name(), ""); + assert_eq!(skill.body(), ""); + assert_eq!(skill.description(), ""); + assert_eq!(skill.enabled_tools(), None); + assert_eq!(skill.enabled_mcp_servers(), None); + assert!(!skill.auto_unload()); + } + + #[test] + fn is_compatible_knowledge_only_passes_all_combinations() { + let skill = Skill::new("test", "Just knowledge"); + assert!(skill.is_compatible(false, false)); + assert!(skill.is_compatible(true, false)); + assert!(skill.is_compatible(false, true)); + assert!(skill.is_compatible(true, true)); + } + + #[test] + fn is_compatible_with_tools_requires_function_calling() { + let content = "---\nenabled_tools: shell\n---\nbody"; + let skill = Skill::new("test", content); + assert!(!skill.is_compatible(false, true)); + assert!(!skill.is_compatible(false, false)); + assert!(skill.is_compatible(true, true)); + assert!(skill.is_compatible(true, false)); + } + + #[test] + fn is_compatible_with_mcp_requires_mcp_enabled() { + let content = "---\nenabled_mcp_servers: github\n---\nbody"; + let skill = Skill::new("test", content); + assert!(!skill.is_compatible(true, false)); + assert!(!skill.is_compatible(false, false)); + assert!(skill.is_compatible(true, true)); + } + + #[test] + fn is_compatible_requires_both_when_both_declared() { + let content = + "---\nenabled_tools: shell\nenabled_mcp_servers: github\n---\nbody"; + let skill = Skill::new("test", content); + assert!(!skill.is_compatible(true, false)); + assert!(!skill.is_compatible(false, true)); + assert!(!skill.is_compatible(false, false)); + assert!(skill.is_compatible(true, true)); + } + + #[test] + fn is_compatible_empty_string_tools_is_knowledge_only() { + let content = "---\nenabled_tools: \"\"\n---\nbody"; + let skill = Skill::new("test", content); + assert!(skill.is_compatible(false, false)); + } + + #[test] + fn builtin_returns_err_for_unknown_skill() { + let result = Skill::builtin("nonexistent_skill_xyz"); + assert!(result.is_err()); + } +} From 3239c5d990b6360ac653f17c0946bfe182e38c3b Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 11:17:55 -0600 Subject: [PATCH 02/85] feat: decided to make skills persist to disk like agents and not in-memory like built-in roles --- src/config/mod.rs | 1 - src/config/paths.rs | 44 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index 1bdd158..8b94b42 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -77,7 +77,6 @@ const LIGHT_THEME: &[u8] = include_bytes!("../../assets/monokai-extended-light.t const CONFIG_FILE_NAME: &str = "config.yaml"; const AGENT_GRAPH_FILE_NAME: &str = "graph.yaml"; const ROLES_DIR_NAME: &str = "roles"; -#[allow(dead_code)] const SKILLS_DIR_NAME: &str = "skills"; const MACROS_DIR_NAME: &str = "macros"; const ENV_FILE_NAME: &str = ".env"; diff --git a/src/config/paths.rs b/src/config/paths.rs index b1df31e..c98c5a8 100644 --- a/src/config/paths.rs +++ b/src/config/paths.rs @@ -3,7 +3,7 @@ use super::{ AGENT_GRAPH_FILE_NAME, AGENTS_DIR_NAME, BASH_PROMPT_UTILS_FILE_NAME, CONFIG_FILE_NAME, ENV_FILE_NAME, FUNCTIONS_BIN_DIR_NAME, FUNCTIONS_DIR_NAME, GLOBAL_TOOLS_DIR_NAME, GLOBAL_TOOLS_UTILS_DIR_NAME, MACROS_DIR_NAME, MCP_FILE_NAME, ModelsOverride, RAGS_DIR_NAME, - ROLES_DIR_NAME, + ROLES_DIR_NAME, SKILLS_DIR_NAME, }; use crate::client::ProviderModels; use crate::utils::{get_env_name, list_file_names, normalize_env_name}; @@ -65,6 +65,24 @@ pub fn role_file(name: &str) -> PathBuf { roles_dir().join(format!("{name}.md")) } +#[allow(dead_code)] +pub fn skills_dir() -> PathBuf { + match env::var(get_env_name("skills_dir")) { + Ok(value) => PathBuf::from(value), + Err(_) => local_path(SKILLS_DIR_NAME), + } +} + +#[allow(dead_code)] +pub fn skill_dir(name: &str) -> PathBuf { + skills_dir().join(name) +} + +#[allow(dead_code)] +pub fn skill_file(name: &str) -> PathBuf { + skill_dir(name).join("SKILL.md") +} + pub fn macros_dir() -> PathBuf { match env::var(get_env_name("macros_dir")) { Ok(value) => PathBuf::from(value), @@ -234,6 +252,30 @@ pub fn has_macro(name: &str) -> bool { names.contains(&name.to_string()) } +#[allow(dead_code)] +pub fn list_skills() -> Vec { + let mut names = Vec::new(); + if let Ok(rd) = read_dir(skills_dir()) { + for entry in rd.flatten() { + if let Ok(file_type) = entry.file_type() + && file_type.is_dir() + && let Some(name) = entry.file_name().to_str() + && entry.path().join("SKILL.md").is_file() + { + names.push(name.to_string()); + } + } + } + + names.sort_unstable(); + names +} + +#[allow(dead_code)] +pub fn has_skill(name: &str) -> bool { + skill_file(name).is_file() +} + pub fn local_models_override() -> Result> { let model_override_path = models_override_file(); let err = || { From f401c637cc1b3f4453b0a7e48462b2bb6cdf8fb5 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 11:19:02 -0600 Subject: [PATCH 03/85] tests: update skill tests --- src/config/skill.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/config/skill.rs b/src/config/skill.rs index b80fb70..9d7ed3c 100644 --- a/src/config/skill.rs +++ b/src/config/skill.rs @@ -143,6 +143,7 @@ mod tests { #[test] fn skill_new_parses_body() { let skill = Skill::new("test", "You are a git expert"); + assert_eq!(skill.name(), "test"); assert_eq!(skill.body(), "You are a git expert"); assert_eq!(skill.description(), ""); @@ -157,7 +158,9 @@ mod tests { auto_unload: true\n\ ---\n\ You are a git expert"; + let skill = Skill::new("git-master", content); + assert_eq!(skill.name(), "git-master"); assert_eq!(skill.description(), "Atomic commits, rebase surgery"); assert_eq!(skill.enabled_tools(), Some("shell,fs")); @@ -169,6 +172,7 @@ mod tests { #[test] fn skill_new_no_metadata_has_defaults() { let skill = Skill::new("test", "Just a body"); + assert_eq!(skill.description(), ""); assert_eq!(skill.enabled_tools(), None); assert_eq!(skill.enabled_mcp_servers(), None); @@ -178,7 +182,9 @@ mod tests { #[test] fn skill_new_metadata_only() { let content = "---\ndescription: Just metadata\n---"; + let skill = Skill::new("test", content); + assert_eq!(skill.description(), "Just metadata"); assert_eq!(skill.body(), ""); } @@ -186,7 +192,9 @@ mod tests { #[test] fn skill_new_partial_metadata_leaves_others_none() { let content = "---\ndescription: Partial\n---\nthe body"; + let skill = Skill::new("test", content); + assert_eq!(skill.description(), "Partial"); assert_eq!(skill.enabled_tools(), None); assert_eq!(skill.enabled_mcp_servers(), None); @@ -197,7 +205,9 @@ mod tests { #[test] fn skill_new_ignores_unknown_keys() { let content = "---\ndescription: D\nbogus_field: 42\n---\nbody"; + let skill = Skill::new("test", content); + assert_eq!(skill.description(), "D"); assert_eq!(skill.body(), "body"); } @@ -205,13 +215,16 @@ mod tests { #[test] fn skill_new_trims_body_whitespace() { let content = "---\ndescription: D\n---\n\n\n body content \n\n"; + let skill = Skill::new("test", content); + assert_eq!(skill.body(), "body content"); } #[test] fn skill_default_has_empty_fields() { let skill = Skill::default(); + assert_eq!(skill.name(), ""); assert_eq!(skill.body(), ""); assert_eq!(skill.description(), ""); @@ -223,6 +236,7 @@ mod tests { #[test] fn is_compatible_knowledge_only_passes_all_combinations() { let skill = Skill::new("test", "Just knowledge"); + assert!(skill.is_compatible(false, false)); assert!(skill.is_compatible(true, false)); assert!(skill.is_compatible(false, true)); @@ -232,7 +246,9 @@ mod tests { #[test] fn is_compatible_with_tools_requires_function_calling() { let content = "---\nenabled_tools: shell\n---\nbody"; + let skill = Skill::new("test", content); + assert!(!skill.is_compatible(false, true)); assert!(!skill.is_compatible(false, false)); assert!(skill.is_compatible(true, true)); @@ -242,7 +258,9 @@ mod tests { #[test] fn is_compatible_with_mcp_requires_mcp_enabled() { let content = "---\nenabled_mcp_servers: github\n---\nbody"; + let skill = Skill::new("test", content); + assert!(!skill.is_compatible(true, false)); assert!(!skill.is_compatible(false, false)); assert!(skill.is_compatible(true, true)); @@ -252,7 +270,9 @@ mod tests { fn is_compatible_requires_both_when_both_declared() { let content = "---\nenabled_tools: shell\nenabled_mcp_servers: github\n---\nbody"; + let skill = Skill::new("test", content); + assert!(!skill.is_compatible(true, false)); assert!(!skill.is_compatible(false, true)); assert!(!skill.is_compatible(false, false)); @@ -262,13 +282,16 @@ mod tests { #[test] fn is_compatible_empty_string_tools_is_knowledge_only() { let content = "---\nenabled_tools: \"\"\n---\nbody"; + let skill = Skill::new("test", content); + assert!(skill.is_compatible(false, false)); } #[test] fn builtin_returns_err_for_unknown_skill() { let result = Skill::builtin("nonexistent_skill_xyz"); + assert!(result.is_err()); } } From 84c6f88cf2786371fe2bca551a75ffce81c2667b Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 11:41:04 -0600 Subject: [PATCH 04/85] feat: created the skill registry --- src/config/mod.rs | 3 + src/config/skill.rs | 8 + src/config/skill_registry.rs | 320 +++++++++++++++++++++++++++++++++++ 3 files changed, 331 insertions(+) create mode 100644 src/config/skill_registry.rs diff --git a/src/config/mod.rs b/src/config/mod.rs index 8b94b42..a77572f 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -12,6 +12,7 @@ mod request_context; mod role; mod session; mod skill; +mod skill_registry; pub(crate) mod todo; mod tool_scope; mod update; @@ -33,6 +34,8 @@ pub use self::role::{ use self::session::Session; #[allow(unused_imports)] pub use self::skill::Skill; +#[allow(unused_imports)] +pub use self::skill_registry::SkillRegistry; pub use self::update::run_self_update; use crate::client::{ ClientConfig, MessageContentToolCalls, Model, ModelType, OPENAI_COMPATIBLE_PROVIDERS, diff --git a/src/config/skill.rs b/src/config/skill.rs index 9d7ed3c..23f85e6 100644 --- a/src/config/skill.rs +++ b/src/config/skill.rs @@ -81,6 +81,14 @@ impl Skill { Ok(Skill::new(name, content)) } + pub fn load(name: &str) -> Result { + let path = paths::skill_file(name); + let content = read_to_string(&path).with_context(|| { + format!("Failed to read skill '{name}' at {}", path.display()) + })?; + Ok(Skill::new(name, &content)) + } + pub fn list_builtin_skill_names() -> Vec { SkillsAsset::iter() .filter_map(|v| v.strip_suffix("/SKILL.md").map(|v| v.to_string())) diff --git a/src/config/skill_registry.rs b/src/config/skill_registry.rs new file mode 100644 index 0000000..0cd2001 --- /dev/null +++ b/src/config/skill_registry.rs @@ -0,0 +1,320 @@ +use super::role::{Role, RoleLike}; +use super::skill::Skill; + +use anyhow::{Result, bail}; +use indexmap::IndexMap; +use std::collections::BTreeSet; + +#[allow(dead_code)] +pub struct SkillRegistry { + loaded: IndexMap, +} + +#[allow(dead_code)] +impl SkillRegistry { + pub fn new() -> Self { + Self { + loaded: IndexMap::new(), + } + } + + pub fn load(&mut self, name: &str) -> Result<()> { + if self.loaded.contains_key(name) { + bail!("Skill '{name}' is already loaded"); + } + + let skill = Skill::load(name)?; + self.loaded.insert(name.to_string(), skill); + + Ok(()) + } + + pub fn unload(&mut self, name: &str) -> Result<()> { + if self.loaded.shift_remove(name).is_none() { + bail!("Skill '{name}' is not loaded"); + } + + Ok(()) + } + + pub fn loaded_names(&self) -> Vec { + self.loaded.keys().cloned().collect() + } + + pub fn is_loaded(&self, name: &str) -> bool { + self.loaded.contains_key(name) + } + + pub fn sweep_auto_unload(&mut self) { + self.loaded.retain(|_, skill| !skill.auto_unload()); + } + + pub fn effective_role(&self, base: &Role) -> Role { + if self.loaded.is_empty() { + return base.clone(); + } + + let mut effective = base.clone(); + let skip_body = effective.is_embedded_prompt(); + + let base_tools_set = effective.enabled_tools().is_some(); + let base_mcps_set = effective.enabled_mcp_servers().is_some(); + + let mut tools = parse_csv(effective.enabled_tools().as_deref()); + let mut mcps = parse_csv(effective.enabled_mcp_servers().as_deref()); + + for (_, skill) in &self.loaded { + tools.extend(parse_csv(skill.enabled_tools())); + mcps.extend(parse_csv(skill.enabled_mcp_servers())); + if !skip_body && !skill.body().is_empty() { + let separator = if effective.is_empty_prompt() { "" } else { "\n\n" }; + effective.append_to_prompt(separator); + effective.append_to_prompt(skill.body()); + } + } + + if base_tools_set || !tools.is_empty() { + effective.set_enabled_tools(Some(join_csv(&tools))); + } + + if base_mcps_set || !mcps.is_empty() { + effective.set_enabled_mcp_servers(Some(join_csv(&mcps))); + } + + effective + } +} + +impl Default for SkillRegistry { + fn default() -> Self { + Self::new() + } +} + +fn parse_csv(s: Option<&str>) -> BTreeSet { + let mut set = BTreeSet::new(); + if let Some(raw) = s { + for token in raw.split(',') { + let trimmed = token.trim(); + if !trimmed.is_empty() { + set.insert(trimmed.to_string()); + } + } + } + set +} + +fn join_csv(set: &BTreeSet) -> String { + set.iter().cloned().collect::>().join(",") +} + +#[cfg(test)] +impl SkillRegistry { + fn insert_for_test(&mut self, skill: Skill) { + self.loaded.insert(skill.name().to_string(), skill); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_skill(name: &str, frontmatter: &str, body: &str) -> Skill { + let content = if frontmatter.is_empty() { + body.to_string() + } else { + format!("---\n{frontmatter}\n---\n{body}") + }; + Skill::new(name, &content) + } + + #[test] + fn empty_registry_returns_base_clone() { + let base = Role::new("test", "You are a helper"); + let registry = SkillRegistry::new(); + + let effective = registry.effective_role(&base); + + assert_eq!(effective.prompt(), base.prompt()); + } + + #[test] + fn one_skill_appends_body_after_base_with_separator() { + let mut registry = SkillRegistry::new(); + registry.insert_for_test(make_skill("git-master", "description: D", "Git knowledge")); + + let base = Role::new("test", "You are a helper"); + let effective = registry.effective_role(&base); + + assert_eq!(effective.prompt(), "You are a helper\n\nGit knowledge"); + } + + #[test] + fn two_skills_compose_bodies_in_insertion_order() { + let mut registry = SkillRegistry::new(); + registry.insert_for_test(make_skill("a", "", "Alpha body")); + registry.insert_for_test(make_skill("b", "", "Beta body")); + + let base = Role::new("test", "Base"); + let effective = registry.effective_role(&base); + + assert_eq!(effective.prompt(), "Base\n\nAlpha body\n\nBeta body"); + } + + #[test] + fn empty_base_prompt_omits_leading_separator() { + let mut registry = SkillRegistry::new(); + registry.insert_for_test(make_skill("a", "", "Alpha")); + registry.insert_for_test(make_skill("b", "", "Beta")); + + let base = Role::new("test", ""); + let effective = registry.effective_role(&base); + + assert_eq!(effective.prompt(), "Alpha\n\nBeta"); + } + + #[test] + fn embedded_prompt_base_skips_body_composition() { + let mut registry = SkillRegistry::new(); + registry.insert_for_test(make_skill( + "git-master", + "enabled_tools: shell", + "should not appear", + )); + + let base = Role::new("test", "Process: __INPUT__"); + let effective = registry.effective_role(&base); + + assert_eq!(effective.prompt(), "Process: __INPUT__"); + let tools = effective.enabled_tools().expect("tools set by skill"); + assert!(tools.contains("shell")); + } + + #[test] + fn skills_with_empty_body_do_not_inject_separator() { + let mut registry = SkillRegistry::new(); + registry.insert_for_test(make_skill("knowledge", "enabled_tools: fs", "")); + + let base = Role::new("test", "Base"); + let effective = registry.effective_role(&base); + + assert_eq!(effective.prompt(), "Base"); + } + + #[test] + fn tools_and_mcps_are_unioned_and_deduplicated() { + let mut registry = SkillRegistry::new(); + registry.insert_for_test(make_skill( + "a", + "enabled_tools: shell,fs\nenabled_mcp_servers: github", + "body", + )); + registry.insert_for_test(make_skill( + "b", + "enabled_tools: fs,git\nenabled_mcp_servers: github,jira", + "body", + )); + + let mut base = Role::new("test", "body"); + base.set_enabled_tools(Some("web_search".to_string())); + + let effective = registry.effective_role(&base); + + let tools_str = effective.enabled_tools().unwrap(); + let tools: BTreeSet<&str> = tools_str.split(',').collect(); + assert_eq!( + tools, + BTreeSet::from(["fs", "git", "shell", "web_search"]) + ); + + let mcps_str = effective.enabled_mcp_servers().unwrap(); + let mcps: BTreeSet<&str> = mcps_str.split(',').collect(); + assert_eq!(mcps, BTreeSet::from(["github", "jira"])); + } + + #[test] + fn no_skill_tool_contributions_preserves_base_none() { + let mut registry = SkillRegistry::new(); + registry.insert_for_test(make_skill("knowledge", "", "Pure knowledge")); + + let base = Role::new("test", "Base"); + let effective = registry.effective_role(&base); + + assert!(effective.enabled_tools().is_none()); + assert!(effective.enabled_mcp_servers().is_none()); + } + + #[test] + fn base_some_empty_tools_is_preserved() { + let mut registry = SkillRegistry::new(); + registry.insert_for_test(make_skill("knowledge", "", "Pure knowledge")); + + let mut base = Role::new("test", "Base"); + base.set_enabled_tools(Some(String::new())); + let effective = registry.effective_role(&base); + + assert_eq!(effective.enabled_tools().as_deref(), Some("")); + } + + #[test] + fn load_already_loaded_returns_error() { + let mut registry = SkillRegistry::new(); + registry.insert_for_test(make_skill("git-master", "", "body")); + + let err = registry.load("git-master").unwrap_err(); + + assert!(err.to_string().contains("already loaded")); + } + + #[test] + fn unload_not_loaded_returns_error() { + let mut registry = SkillRegistry::new(); + + let err = registry.unload("missing").unwrap_err(); + + assert!(err.to_string().contains("not loaded")); + } + + #[test] + fn unload_existing_succeeds_and_removes() { + let mut registry = SkillRegistry::new(); + registry.insert_for_test(make_skill("git-master", "", "body")); + assert!(registry.is_loaded("git-master")); + + registry.unload("git-master").unwrap(); + assert!(!registry.is_loaded("git-master")); + } + + #[test] + fn loaded_names_returns_insertion_order() { + let mut registry = SkillRegistry::new(); + + registry.insert_for_test(make_skill("zulu", "", "body")); + registry.insert_for_test(make_skill("alpha", "", "body")); + registry.insert_for_test(make_skill("mike", "", "body")); + + assert_eq!( + registry.loaded_names(), + vec!["zulu".to_string(), "alpha".to_string(), "mike".to_string()] + ); + } + + #[test] + fn sweep_removes_only_auto_unload_skills() { + let mut registry = SkillRegistry::new(); + registry.insert_for_test(make_skill("ephemeral", "auto_unload: true", "body")); + registry.insert_for_test(make_skill("persistent", "", "body")); + + registry.sweep_auto_unload(); + + assert!(!registry.is_loaded("ephemeral")); + assert!(registry.is_loaded("persistent")); + } + + #[test] + fn is_loaded_returns_false_for_unknown() { + let registry = SkillRegistry::new(); + + assert!(!registry.is_loaded("nothing")); + } +} From 42c88fa2a3be67eebf210f37e562fc1a3c41ec90 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 11:58:35 -0600 Subject: [PATCH 05/85] feat: added remote install and install support for skills --- src/config/install_remote.rs | 76 +++++++++++++++++++++++++++++++++--- src/config/mod.rs | 11 +++++- src/config/paths.rs | 1 - 3 files changed, 81 insertions(+), 7 deletions(-) diff --git a/src/config/install_remote.rs b/src/config/install_remote.rs index 7ec7184..d900bb1 100644 --- a/src/config/install_remote.rs +++ b/src/config/install_remote.rs @@ -24,7 +24,7 @@ pub fn install_remote(git_url: &str, filter: Option, force: bool) if layout.is_empty() { println!( "No recognized assets found in {git_url}. Expected one or more of: \ - agents/, roles/, macros/, functions/tools/, functions/mcp.json" + agents/, roles/, skills/, macros/, functions/tools/, functions/mcp.json" ); return Ok(()); } @@ -193,6 +193,7 @@ fn run_git(args: Vec) -> Result<()> { struct RemoteLayout { agents: Option, roles: Option, + skills: Option, macros: Option, functions_tools: Option, mcp_json: Option, @@ -202,6 +203,7 @@ impl RemoteLayout { fn is_empty(&self) -> bool { self.agents.is_none() && self.roles.is_none() + && self.skills.is_none() && self.macros.is_none() && self.functions_tools.is_none() && self.mcp_json.is_none() @@ -215,20 +217,29 @@ fn scan_remote_layout(root: &Path) -> Result { if agents.is_dir() { layout.agents = Some(agents); } + let roles = root.join("roles"); if roles.is_dir() { layout.roles = Some(roles); } + + let skills = root.join("skills"); + if skills.is_dir() { + layout.skills = Some(skills); + } + let macros = root.join("macros"); if macros.is_dir() { layout.macros = Some(macros); } + let functions = root.join("functions"); if functions.is_dir() { let tools = functions.join("tools"); if tools.is_dir() { layout.functions_tools = Some(tools); } + let mcp = functions.join("mcp.json"); if mcp.is_file() { layout.mcp_json = Some(mcp); @@ -251,6 +262,10 @@ fn apply_filter(mut layout: RemoteLayout, filter: Option) -> Remo roles: layout.roles.take(), ..RemoteLayout::default() }, + InstallFilter::Skills => RemoteLayout { + skills: layout.skills.take(), + ..RemoteLayout::default() + }, InstallFilter::Macros => RemoteLayout { macros: layout.macros.take(), ..RemoteLayout::default() @@ -308,6 +323,7 @@ fn walk_files_inner(dir: &Path, out: &mut Vec) -> Result<()> { enum TopCategory { Agents, Roles, + Skills, Macros, FunctionsTools, } @@ -317,6 +333,7 @@ impl TopCategory { match self { TopCategory::Agents => "agents", TopCategory::Roles => "roles", + TopCategory::Skills => "skills", TopCategory::Macros => "macros", TopCategory::FunctionsTools => "functions/tools", } @@ -356,6 +373,11 @@ fn plan_changes(layout: &RemoteLayout) -> Result { if let Some(src_dir) = &layout.roles { plan_dir_into(src_dir, &paths::roles_dir(), TopCategory::Roles, &mut files)?; } + + if let Some(src_dir) = &layout.skills { + plan_dir_into(src_dir, &paths::skills_dir(), TopCategory::Skills, &mut files)?; + } + if let Some(src_dir) = &layout.macros { plan_dir_into( src_dir, @@ -457,6 +479,7 @@ fn print_plan_summary(plan: &InstallPlan) { for cat in [ TopCategory::Agents, TopCategory::Roles, + TopCategory::Skills, TopCategory::Macros, TopCategory::FunctionsTools, ] { @@ -982,6 +1005,7 @@ mod tests { let l = RemoteLayout { agents: Some(PathBuf::from("a")), roles: Some(PathBuf::from("r")), + skills: Some(PathBuf::from("s")), macros: Some(PathBuf::from("m")), functions_tools: Some(PathBuf::from("f")), mcp_json: Some(PathBuf::from("j")), @@ -989,8 +1013,8 @@ mod tests { let out = apply_filter(l, None); - assert!(out.agents.is_some() && out.roles.is_some() && out.macros.is_some()); - assert!(out.functions_tools.is_some() && out.mcp_json.is_some()); + assert!(out.agents.is_some() && out.roles.is_some() && out.skills.is_some()); + assert!(out.macros.is_some() && out.functions_tools.is_some() && out.mcp_json.is_some()); } #[test] @@ -998,6 +1022,7 @@ mod tests { let l = RemoteLayout { agents: Some(PathBuf::from("a")), roles: None, + skills: Some(PathBuf::from("s")), macros: None, functions_tools: Some(PathBuf::from("f")), mcp_json: Some(PathBuf::from("j")), @@ -1006,6 +1031,7 @@ mod tests { let out = apply_filter(l, Some(InstallFilter::Functions)); assert!(out.agents.is_none()); + assert!(out.skills.is_none()); assert_eq!(out.functions_tools, Some(PathBuf::from("f"))); assert!(out.mcp_json.is_none()); } @@ -1015,6 +1041,7 @@ mod tests { let l = RemoteLayout { agents: Some(PathBuf::from("a")), roles: None, + skills: Some(PathBuf::from("s")), macros: None, functions_tools: Some(PathBuf::from("f")), mcp_json: Some(PathBuf::from("j")), @@ -1022,7 +1049,7 @@ mod tests { let out = apply_filter(l, Some(InstallFilter::McpConfig)); - assert!(out.agents.is_none() && out.functions_tools.is_none()); + assert!(out.agents.is_none() && out.skills.is_none() && out.functions_tools.is_none()); assert_eq!(out.mcp_json, Some(PathBuf::from("j"))); } @@ -1031,6 +1058,7 @@ mod tests { let l = RemoteLayout { agents: Some(PathBuf::from("a")), roles: Some(PathBuf::from("r")), + skills: Some(PathBuf::from("s")), macros: Some(PathBuf::from("m")), functions_tools: Some(PathBuf::from("f")), mcp_json: Some(PathBuf::from("j")), @@ -1039,7 +1067,25 @@ mod tests { let out = apply_filter(l, Some(InstallFilter::Roles)); assert_eq!(out.roles, Some(PathBuf::from("r"))); - assert!(out.agents.is_none() && out.macros.is_none()); + assert!(out.agents.is_none() && out.skills.is_none() && out.macros.is_none()); + assert!(out.functions_tools.is_none() && out.mcp_json.is_none()); + } + + #[test] + fn apply_filter_skills_keeps_only_skills() { + let l = RemoteLayout { + agents: Some(PathBuf::from("a")), + roles: Some(PathBuf::from("r")), + skills: Some(PathBuf::from("s")), + macros: Some(PathBuf::from("m")), + functions_tools: Some(PathBuf::from("f")), + mcp_json: Some(PathBuf::from("j")), + }; + + let out = apply_filter(l, Some(InstallFilter::Skills)); + + assert_eq!(out.skills, Some(PathBuf::from("s"))); + assert!(out.agents.is_none() && out.roles.is_none() && out.macros.is_none()); assert!(out.functions_tools.is_none() && out.mcp_json.is_none()); } @@ -1084,8 +1130,10 @@ mod tests { #[test] fn scan_remote_layout_finds_known_subdirs() { let root = fresh_temp_dir("scan-test-"); + fs::create_dir_all(root.join("agents/sample")).unwrap(); fs::create_dir_all(root.join("roles")).unwrap(); + fs::create_dir_all(root.join("skills")).unwrap(); fs::create_dir_all(root.join("macros")).unwrap(); fs::create_dir_all(root.join("functions/tools")).unwrap(); touch(&root.join("functions/mcp.json")); @@ -1094,12 +1142,30 @@ mod tests { let layout = scan_remote_layout(&root).unwrap(); assert!(layout.agents.is_some()); assert!(layout.roles.is_some()); + assert!(layout.skills.is_some()); assert!(layout.macros.is_some()); assert!(layout.functions_tools.is_some()); assert!(layout.mcp_json.is_some()); let _ = fs::remove_dir_all(&root); } + #[test] + fn scan_remote_layout_finds_skills_only() { + let root = fresh_temp_dir("scan-skills-only-"); + fs::create_dir_all(root.join("skills/git-master")).unwrap(); + touch(&root.join("skills/git-master/SKILL.md")); + + let layout = scan_remote_layout(&root).unwrap(); + + assert!(layout.skills.is_some()); + assert!(layout.agents.is_none()); + assert!(layout.roles.is_none()); + assert!(layout.macros.is_none()); + assert!(layout.functions_tools.is_none()); + assert!(layout.mcp_json.is_none()); + let _ = fs::remove_dir_all(&root); + } + #[test] fn scan_remote_layout_ignores_unrelated_files() { let root = fresh_temp_dir("scan-unrelated-"); diff --git a/src/config/mod.rs b/src/config/mod.rs index a77572f..53af22b 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -287,6 +287,7 @@ impl AssetCategory { pub enum InstallFilter { Agents, Roles, + Skills, Macros, Functions, #[value(name = "mcp_config")] @@ -294,12 +295,20 @@ pub enum InstallFilter { } impl InstallFilter { - pub const NAMES: [&'static str; 5] = ["agents", "roles", "macros", "functions", "mcp_config"]; + pub const NAMES: [&'static str; 6] = [ + "agents", + "roles", + "skills", + "macros", + "functions", + "mcp_config", + ]; pub fn parse(name: &str) -> Option { match name { "agents" => Some(Self::Agents), "roles" => Some(Self::Roles), + "skills" => Some(Self::Skills), "macros" => Some(Self::Macros), "functions" => Some(Self::Functions), "mcp_config" => Some(Self::McpConfig), diff --git a/src/config/paths.rs b/src/config/paths.rs index c98c5a8..59d3724 100644 --- a/src/config/paths.rs +++ b/src/config/paths.rs @@ -65,7 +65,6 @@ pub fn role_file(name: &str) -> PathBuf { roles_dir().join(format!("{name}.md")) } -#[allow(dead_code)] pub fn skills_dir() -> PathBuf { match env::var(get_env_name("skills_dir")) { Ok(value) => PathBuf::from(value), From fa424bde34aa2b6f426fc7fec81a81b663e2b2cd Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 12:26:30 -0600 Subject: [PATCH 06/85] feat: implemented the skills policy to track available skills per context --- src/config/agent.rs | 14 ++ src/config/app_config.rs | 20 ++ src/config/mod.rs | 11 ++ src/config/role.rs | 24 +++ src/config/session.rs | 14 ++ src/config/skill_policy.rs | 367 +++++++++++++++++++++++++++++++++++++ 6 files changed, 450 insertions(+) create mode 100644 src/config/skill_policy.rs diff --git a/src/config/agent.rs b/src/config/agent.rs index fea7baf..557343d 100644 --- a/src/config/agent.rs +++ b/src/config/agent.rs @@ -337,6 +337,16 @@ impl Agent { &self.config.mcp_servers } + #[allow(dead_code)] + pub fn skills_enabled(&self) -> Option { + self.config.skills_enabled + } + + #[allow(dead_code)] + pub fn enabled_skills(&self) -> Option<&[String]> { + self.config.enabled_skills.as_deref() + } + pub fn conversation_starters(&self) -> Vec { self.config .conversation_starters @@ -615,6 +625,10 @@ pub struct AgentConfig { #[serde(default)] pub global_tools: Vec, #[serde(skip_serializing_if = "Option::is_none")] + pub skills_enabled: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub enabled_skills: Option>, + #[serde(skip_serializing_if = "Option::is_none")] pub continuation_prompt: Option, #[serde(default)] pub instructions: String, diff --git a/src/config/app_config.rs b/src/config/app_config.rs index 41fb193..6450d61 100644 --- a/src/config/app_config.rs +++ b/src/config/app_config.rs @@ -35,6 +35,10 @@ pub struct AppConfig { pub enabled_tools: Option, pub visible_tools: Option>, + pub skills_enabled: bool, + pub enabled_skills: Option, + pub visible_skills: Option>, + pub mcp_server_support: bool, pub mapping_mcp_servers: IndexMap, pub enabled_mcp_servers: Option, @@ -96,6 +100,10 @@ impl Default for AppConfig { enabled_tools: None, visible_tools: None, + skills_enabled: true, + enabled_skills: None, + visible_skills: None, + mcp_server_support: true, mapping_mcp_servers: Default::default(), enabled_mcp_servers: None, @@ -158,6 +166,10 @@ impl AppConfig { enabled_tools: config.enabled_tools, visible_tools: config.visible_tools, + skills_enabled: config.skills_enabled, + enabled_skills: config.enabled_skills, + visible_skills: config.visible_skills, + mcp_server_support: config.mcp_server_support, mapping_mcp_servers: config.mapping_mcp_servers, enabled_mcp_servers: config.enabled_mcp_servers, @@ -379,6 +391,14 @@ impl AppConfig { self.enabled_tools = v; } + if let Some(Some(v)) = super::read_env_bool(&get_env_name("skills_enabled")) { + self.skills_enabled = v; + } + + if let Some(v) = super::read_env_value::(&get_env_name("enabled_skills")) { + self.enabled_skills = v; + } + if let Some(Some(v)) = super::read_env_bool(&get_env_name("mcp_server_support")) { self.mcp_server_support = v; } diff --git a/src/config/mod.rs b/src/config/mod.rs index 53af22b..99d88a9 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -12,6 +12,7 @@ mod request_context; mod role; mod session; mod skill; +mod skill_policy; mod skill_registry; pub(crate) mod todo; mod tool_scope; @@ -35,6 +36,8 @@ use self::session::Session; #[allow(unused_imports)] pub use self::skill::Skill; #[allow(unused_imports)] +pub use self::skill_policy::SkillPolicy; +#[allow(unused_imports)] pub use self::skill_registry::SkillRegistry; pub use self::update::run_self_update; use crate::client::{ @@ -151,6 +154,10 @@ pub struct Config { pub enabled_tools: Option, pub visible_tools: Option>, + pub skills_enabled: bool, + pub enabled_skills: Option, + pub visible_skills: Option>, + pub mcp_server_support: bool, pub mapping_mcp_servers: IndexMap, pub enabled_mcp_servers: Option, @@ -212,6 +219,10 @@ impl Default for Config { enabled_tools: None, visible_tools: None, + skills_enabled: true, + enabled_skills: None, + visible_skills: None, + mcp_server_support: true, mapping_mcp_servers: Default::default(), enabled_mcp_servers: None, diff --git a/src/config/role.rs b/src/config/role.rs index bcf1e2d..faad211 100644 --- a/src/config/role.rs +++ b/src/config/role.rs @@ -56,6 +56,10 @@ pub struct Role { #[serde(skip_serializing_if = "Option::is_none")] enabled_mcp_servers: Option, #[serde(skip_serializing_if = "Option::is_none")] + skills_enabled: Option, + #[serde(skip_serializing_if = "Option::is_none")] + enabled_skills: Option, + #[serde(skip_serializing_if = "Option::is_none")] auto_continue: Option, #[serde(skip_serializing_if = "Option::is_none")] max_auto_continues: Option, @@ -98,6 +102,10 @@ impl Role { "enabled_mcp_servers" => { role.enabled_mcp_servers = value.as_str().map(|v| v.to_string()) } + "skills_enabled" => role.skills_enabled = value.as_bool(), + "enabled_skills" => { + role.enabled_skills = value.as_str().map(|v| v.to_string()) + } "auto_continue" => role.auto_continue = value.as_bool(), "max_auto_continues" => { role.max_auto_continues = value.as_u64().map(|v| v as usize) @@ -147,6 +155,12 @@ impl Role { if let Some(enabled_mcp_servers) = self.enabled_mcp_servers() { metadata.push(format!("enabled_mcp_servers: {enabled_mcp_servers}")); } + if let Some(skills_enabled) = self.skills_enabled { + metadata.push(format!("skills_enabled: {skills_enabled}")); + } + if let Some(enabled_skills) = &self.enabled_skills { + metadata.push(format!("enabled_skills: {enabled_skills}")); + } if let Some(auto_continue) = self.auto_continue { metadata.push(format!("auto_continue: {auto_continue}")); } @@ -271,6 +285,16 @@ impl Role { self.continuation_prompt.as_deref() } + #[allow(dead_code)] + pub fn skills_enabled(&self) -> Option { + self.skills_enabled + } + + #[allow(dead_code)] + pub fn enabled_skills(&self) -> Option<&str> { + self.enabled_skills.as_deref() + } + pub fn append_to_prompt(&mut self, text: &str) { self.prompt.push_str(text); } diff --git a/src/config/session.rs b/src/config/session.rs index 254334f..7401d25 100644 --- a/src/config/session.rs +++ b/src/config/session.rs @@ -29,6 +29,10 @@ pub struct Session { #[serde(skip_serializing_if = "Option::is_none")] enabled_mcp_servers: Option, #[serde(skip_serializing_if = "Option::is_none")] + skills_enabled: Option, + #[serde(skip_serializing_if = "Option::is_none")] + enabled_skills: Option, + #[serde(skip_serializing_if = "Option::is_none")] save_session: Option, #[serde(skip_serializing_if = "Option::is_none")] compression_threshold: Option, @@ -75,6 +79,16 @@ pub struct Session { } impl Session { + #[allow(dead_code)] + pub fn skills_enabled(&self) -> Option { + self.skills_enabled + } + + #[allow(dead_code)] + pub fn enabled_skills(&self) -> Option<&str> { + self.enabled_skills.as_deref() + } + pub fn new_from_ctx(ctx: &RequestContext, app: &AppConfig, name: &str) -> Self { let role = ctx.extract_role(app); let mut session = Self { diff --git a/src/config/skill_policy.rs b/src/config/skill_policy.rs new file mode 100644 index 0000000..0672198 --- /dev/null +++ b/src/config/skill_policy.rs @@ -0,0 +1,367 @@ +use super::agent::Agent; +use super::app_config::AppConfig; +use super::paths; +use super::role::Role; +use super::session::Session; + +use anyhow::{Result, bail}; +use std::collections::HashSet; + +#[allow(dead_code)] +#[derive(Debug)] +pub struct SkillPolicy { + pub skills_enabled: bool, + pub visible: Option>, + pub enabled: HashSet, +} + +#[allow(dead_code)] +impl SkillPolicy { + pub fn effective( + global: &AppConfig, + role: Option<&Role>, + agent: Option<&Agent>, + session: Option<&Session>, + ) -> Result { + Self::effective_with( + global, + role, + agent, + session, + &paths::has_skill, + &paths::list_skills, + ) + } + + fn effective_with( + global: &AppConfig, + role: Option<&Role>, + agent: Option<&Agent>, + session: Option<&Session>, + skill_exists: &F, + list_installed: &G, + ) -> Result + where + F: Fn(&str) -> bool, + G: Fn() -> Vec, + { + let mut skills_enabled = global.skills_enabled; + if let Some(r) = role + && let Some(false) = r.skills_enabled() + { + skills_enabled = false; + } + + if let Some(a) = agent + && let Some(false) = a.skills_enabled() + { + skills_enabled = false; + } + + if let Some(s) = session + && let Some(false) = s.skills_enabled() + { + skills_enabled = false; + } + + let visible: Option> = global + .visible_skills + .as_ref() + .map(|v| v.iter().cloned().collect()); + + let enabled_raw: Option> = session + .and_then(|s| parse_csv_opt(s.enabled_skills())) + .or_else(|| agent.and_then(|a| a.enabled_skills().map(|v| v.to_vec()))) + .or_else(|| role.and_then(|r| parse_csv_opt(r.enabled_skills()))) + .or_else(|| parse_csv_opt(global.enabled_skills.as_deref())); + + let enabled: HashSet = match enabled_raw { + Some(explicit) => { + let set: HashSet = explicit.into_iter().collect(); + for name in &set { + if !skill_exists(name) { + bail!("enabled_skills references skill '{name}' which is not installed"); + } + + if let Some(vs) = &visible + && !vs.contains(name) + { + bail!( + "enabled_skills references skill '{name}' which is not in visible_skills" + ); + } + } + set + } + None => match &visible { + Some(v) => v.clone(), + None => list_installed().into_iter().collect(), + }, + }; + + Ok(Self { + skills_enabled, + visible, + enabled, + }) + } + + pub fn allows(&self, name: &str) -> bool { + self.skills_enabled && self.enabled.contains(name) + } +} + +fn parse_csv_opt(s: Option<&str>) -> Option> { + s.map(|raw| { + raw.split(',') + .map(|t| t.trim().to_string()) + .filter(|t| !t.is_empty()) + .collect() + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn always_true(_: &str) -> bool { + true + } + + fn empty_installed() -> Vec { + Vec::new() + } + + fn make_app_config( + skills_enabled: bool, + enabled: Option<&str>, + visible: Option<&[&str]>, + ) -> AppConfig { + AppConfig { + skills_enabled, + enabled_skills: enabled.map(|s| s.to_string()), + visible_skills: visible.map(|v| v.iter().map(|s| s.to_string()).collect()), + ..AppConfig::default() + } + } + + #[test] + fn defaults_yield_skills_enabled_with_empty_universe() { + let global = AppConfig::default(); + + let policy = SkillPolicy::effective_with( + &global, + None, + None, + None, + &always_true, + &empty_installed, + ) + .unwrap(); + + assert!(policy.skills_enabled); + assert!(policy.visible.is_none()); + assert!(policy.enabled.is_empty()); + } + + #[test] + fn falls_back_to_all_installed_when_no_level_sets_enabled_skills() { + let global = AppConfig::default(); + let installed = || vec!["alpha".to_string(), "beta".to_string()]; + + let policy = + SkillPolicy::effective_with(&global, None, None, None, &always_true, &installed) + .unwrap(); + + assert_eq!(policy.enabled.len(), 2); + assert!(policy.enabled.contains("alpha")); + assert!(policy.enabled.contains("beta")); + } + + #[test] + fn falls_back_to_visible_when_visible_set_but_no_enabled() { + let global = make_app_config(true, None, Some(&["alpha", "beta"])); + + let policy = SkillPolicy::effective_with( + &global, + None, + None, + None, + &always_true, + &empty_installed, + ) + .unwrap(); + + assert_eq!(policy.enabled.len(), 2); + assert!(policy.enabled.contains("alpha")); + assert!(policy.enabled.contains("beta")); + } + + #[test] + fn global_enabled_skills_is_effective_when_no_other_levels() { + let global = make_app_config(true, Some("alpha,beta"), Some(&["alpha", "beta", "gamma"])); + + let policy = SkillPolicy::effective_with( + &global, + None, + None, + None, + &always_true, + &empty_installed, + ) + .unwrap(); + + assert!(policy.enabled.contains("alpha")); + assert!(policy.enabled.contains("beta")); + assert!(!policy.enabled.contains("gamma")); + } + + #[test] + fn role_overrides_global_enabled_skills() { + let global = make_app_config(true, Some("alpha"), Some(&["alpha", "beta"])); + let role = Role::new( + "test", + "---\nenabled_skills: beta\n---\nbody", + ); + + let policy = SkillPolicy::effective_with( + &global, + Some(&role), + None, + None, + &always_true, + &empty_installed, + ) + .unwrap(); + + assert!(policy.enabled.contains("beta")); + assert!(!policy.enabled.contains("alpha")); + } + + #[test] + fn any_skills_enabled_false_disables_globally() { + let global = make_app_config(true, None, None); + let role = Role::new("test", "---\nskills_enabled: false\n---\nbody"); + + let policy = SkillPolicy::effective_with( + &global, + Some(&role), + None, + None, + &always_true, + &empty_installed, + ) + .unwrap(); + + assert!(!policy.skills_enabled); + } + + #[test] + fn allows_returns_false_when_skills_disabled() { + let global = AppConfig { + skills_enabled: false, + ..AppConfig::default() + }; + + let policy = SkillPolicy::effective_with( + &global, + None, + None, + None, + &always_true, + &|| vec!["alpha".to_string()], + ) + .unwrap(); + + assert!(!policy.allows("alpha")); + } + + #[test] + fn allows_returns_true_when_skill_in_enabled_set() { + let global = make_app_config(true, Some("alpha"), None); + + let policy = SkillPolicy::effective_with( + &global, + None, + None, + None, + &always_true, + &empty_installed, + ) + .unwrap(); + + assert!(policy.allows("alpha")); + assert!(!policy.allows("beta")); + } + + #[test] + fn validation_rejects_uninstalled_skill_reference() { + let global = make_app_config(true, Some("ghost"), None); + + let err = SkillPolicy::effective_with( + &global, + None, + None, + None, + &|_| false, + &empty_installed, + ) + .unwrap_err(); + + assert!(err.to_string().contains("not installed")); + assert!(err.to_string().contains("ghost")); + } + + #[test] + fn validation_rejects_skill_not_in_visible_set() { + let global = make_app_config(true, Some("beta"), Some(&["alpha"])); + + let err = SkillPolicy::effective_with( + &global, + None, + None, + None, + &always_true, + &empty_installed, + ) + .unwrap_err(); + + assert!(err.to_string().contains("not in visible_skills")); + assert!(err.to_string().contains("beta")); + } + + #[test] + fn validation_skipped_when_no_explicit_enabled_skills() { + let global = make_app_config(true, None, None); + + let policy = SkillPolicy::effective_with( + &global, + None, + None, + None, + &|_| false, + &empty_installed, + ) + .unwrap(); + + assert!(policy.enabled.is_empty()); + } + + #[test] + fn empty_string_enabled_skills_resolves_to_empty_override() { + let global = make_app_config(true, Some("alpha,beta"), Some(&["alpha", "beta"])); + let role = Role::new("test", "---\nenabled_skills: \"\"\n---\nbody"); + + let policy = SkillPolicy::effective_with( + &global, + Some(&role), + None, + None, + &always_true, + &empty_installed, + ) + .unwrap(); + + assert!(policy.enabled.is_empty()); + } +} From 2e224948d4c22831f174ee557ea43f8e0bcc0433 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 12:58:42 -0600 Subject: [PATCH 07/85] feat: created built-in functions for listing, loading, and unloading skills --- src/config/request_context.rs | 6 + src/config/skill.rs | 2 + src/config/skill_registry.rs | 20 ++- src/function/mod.rs | 15 ++ src/function/skill.rs | 298 ++++++++++++++++++++++++++++++++++ 5 files changed, 334 insertions(+), 7 deletions(-) create mode 100644 src/function/skill.rs diff --git a/src/config/request_context.rs b/src/config/request_context.rs index c12c7eb..e7a08ce 100644 --- a/src/config/request_context.rs +++ b/src/config/request_context.rs @@ -1,5 +1,6 @@ use super::rag_cache::{RagCache, RagKey}; use super::session::Session; +use super::skill_registry::SkillRegistry; use super::todo::TodoList; use super::tool_scope::{McpRuntime, ToolScope}; use super::{ @@ -82,6 +83,7 @@ pub struct RequestContext { pub current_depth: usize, pub auto_continue_count: usize, pub todo_list: TodoList, + pub skill_registry: SkillRegistry, pub last_continuation_response: Option, pub render_mode: RenderMode, @@ -110,6 +112,7 @@ impl RequestContext { current_depth: 0, auto_continue_count: 0, todo_list: TodoList::default(), + skill_registry: SkillRegistry::default(), last_continuation_response: None, render_mode: RenderMode::default(), } @@ -157,6 +160,7 @@ impl RequestContext { current_depth: 0, auto_continue_count: 0, todo_list: TodoList::default(), + skill_registry: SkillRegistry::default(), last_continuation_response: None, render_mode: RenderMode::default(), }) @@ -198,6 +202,7 @@ impl RequestContext { current_depth: self.current_depth, auto_continue_count: 0, todo_list: self.todo_list.clone(), + skill_registry: self.skill_registry.clone(), last_continuation_response: None, render_mode: self.render_mode, } @@ -237,6 +242,7 @@ impl RequestContext { current_depth, auto_continue_count: 0, todo_list: TodoList::default(), + skill_registry: SkillRegistry::default(), last_continuation_response: None, render_mode: parent.render_mode, } diff --git a/src/config/skill.rs b/src/config/skill.rs index 23f85e6..df938ec 100644 --- a/src/config/skill.rs +++ b/src/config/skill.rs @@ -123,9 +123,11 @@ impl Skill { if self.declares_tools() && !function_calling_enabled { return false; } + if self.declares_mcp_servers() && !mcp_enabled { return false; } + true } diff --git a/src/config/skill_registry.rs b/src/config/skill_registry.rs index 0cd2001..7cd5f49 100644 --- a/src/config/skill_registry.rs +++ b/src/config/skill_registry.rs @@ -6,6 +6,7 @@ use indexmap::IndexMap; use std::collections::BTreeSet; #[allow(dead_code)] +#[derive(Clone, Default)] pub struct SkillRegistry { loaded: IndexMap, } @@ -22,13 +23,24 @@ impl SkillRegistry { if self.loaded.contains_key(name) { bail!("Skill '{name}' is already loaded"); } - let skill = Skill::load(name)?; self.loaded.insert(name.to_string(), skill); Ok(()) } + pub fn insert(&mut self, skill: Skill) -> Result<()> { + let name = skill.name().to_string(); + + if self.loaded.contains_key(&name) { + bail!("Skill '{name}' is already loaded"); + } + + self.loaded.insert(name, skill); + + Ok(()) + } + pub fn unload(&mut self, name: &str) -> Result<()> { if self.loaded.shift_remove(name).is_none() { bail!("Skill '{name}' is not loaded"); @@ -85,12 +97,6 @@ impl SkillRegistry { } } -impl Default for SkillRegistry { - fn default() -> Self { - Self::new() - } -} - fn parse_csv(s: Option<&str>) -> BTreeSet { let mut set = BTreeSet::new(); if let Some(raw) = s { diff --git a/src/function/mod.rs b/src/function/mod.rs index 517745d..cc858e4 100644 --- a/src/function/mod.rs +++ b/src/function/mod.rs @@ -1,3 +1,4 @@ +pub(crate) mod skill; pub(crate) mod supervisor; pub(crate) mod todo; pub(crate) mod user_interaction; @@ -32,6 +33,7 @@ use std::{ process::{Command, Stdio}, }; use strum_macros::AsRefStr; +use skill::SKILL_FUNCTION_PREFIX; use supervisor::SUPERVISOR_FUNCTION_PREFIX; use todo::TODO_FUNCTION_PREFIX; use user_interaction::USER_FUNCTION_PREFIX; @@ -353,6 +355,12 @@ impl Functions { self.declarations.extend(todo::todo_function_declarations()); } + #[allow(dead_code)] + pub fn append_skill_functions(&mut self) { + self.declarations + .extend(skill::skill_function_declarations()); + } + pub fn append_supervisor_functions(&mut self) { self.declarations .extend(supervisor::supervisor_function_declarations()); @@ -1039,6 +1047,13 @@ impl ToolCall { json!({"tool_call_error": error_msg}) }) } + _ if cmd_name.starts_with(SKILL_FUNCTION_PREFIX) => { + skill::handle_skill_tool(ctx, &cmd_name, &json_data).unwrap_or_else(|e| { + let error_msg = format!("Skill tool failed: {e}"); + eprintln!("{}", warning_text(&format!("⚠️ {error_msg} ⚠️"))); + json!({"tool_call_error": error_msg}) + }) + } _ if cmd_name.starts_with(SUPERVISOR_FUNCTION_PREFIX) => { supervisor::handle_supervisor_tool(ctx, &cmd_name, &json_data) .await diff --git a/src/function/skill.rs b/src/function/skill.rs new file mode 100644 index 0000000..2c62941 --- /dev/null +++ b/src/function/skill.rs @@ -0,0 +1,298 @@ +use super::{FunctionDeclaration, JsonSchema}; +use crate::config::{RequestContext, Skill, SkillPolicy, paths}; + +use anyhow::{Result, bail}; +use indexmap::IndexMap; +use log::warn; +use serde_json::{Value, json}; + +pub const SKILL_FUNCTION_PREFIX: &str = "skill__"; + +#[allow(dead_code)] +pub fn skill_function_declarations() -> Vec { + vec![ + FunctionDeclaration { + name: format!("{SKILL_FUNCTION_PREFIX}list"), + description: + "List skills available in this context. Returns each skill's name, description, \ + what tools and MCP servers it grants on load, and whether it is currently loaded. \ + Call this to discover skills before using skill__load." + .to_string(), + parameters: JsonSchema { + type_value: Some("object".to_string()), + properties: Some(IndexMap::new()), + ..Default::default() + }, + agent: false, + }, + FunctionDeclaration { + name: format!("{SKILL_FUNCTION_PREFIX}load"), + description: + "Load a skill module into the current context. The skill's instructions and any \ + tools or MCP servers it grants become active for subsequent turns. Call \ + skill__unload when the skill's work is complete to keep the context lean." + .to_string(), + parameters: JsonSchema { + type_value: Some("object".to_string()), + properties: Some(IndexMap::from([( + "name".to_string(), + JsonSchema { + type_value: Some("string".to_string()), + description: Some("Name of the skill to load.".into()), + ..Default::default() + }, + )])), + required: Some(vec!["name".to_string()]), + ..Default::default() + }, + agent: false, + }, + FunctionDeclaration { + name: format!("{SKILL_FUNCTION_PREFIX}unload"), + description: + "Unload a previously loaded skill, removing its instructions and granted tools \ + from the context. Call this when the skill's work is complete." + .to_string(), + parameters: JsonSchema { + type_value: Some("object".to_string()), + properties: Some(IndexMap::from([( + "name".to_string(), + JsonSchema { + type_value: Some("string".to_string()), + description: Some("Name of the skill to unload.".into()), + ..Default::default() + }, + )])), + required: Some(vec!["name".to_string()]), + ..Default::default() + }, + agent: false, + }, + ] +} + +pub fn handle_skill_tool( + ctx: &mut RequestContext, + cmd_name: &str, + args: &Value, +) -> Result { + let action = cmd_name + .strip_prefix(SKILL_FUNCTION_PREFIX) + .unwrap_or(cmd_name); + + let policy = SkillPolicy::effective( + &ctx.app.config, + ctx.role.as_ref(), + ctx.agent.as_ref(), + ctx.session.as_ref(), + )?; + + if !policy.skills_enabled { + return Ok(json!({ + "error": "Skills are disabled in this context" + })); + } + + match action { + "list" => handle_list(ctx, &policy), + "load" => handle_load(ctx, args, &policy), + "unload" => handle_unload(ctx, args), + _ => bail!("Unknown skill action: {action}"), + } +} + +fn handle_list(ctx: &RequestContext, policy: &SkillPolicy) -> Result { + let function_calling_on = ctx.app.config.function_calling_support; + let mcp_on = ctx.app.config.mcp_server_support; + + let mut entries = Vec::new(); + for name in paths::list_skills() { + if !policy.allows(&name) { + continue; + } + + let skill = match Skill::load(&name) { + Ok(s) => s, + Err(e) => { + warn!("Failed to load skill '{name}' for listing: {e}"); + continue; + } + }; + if !skill.is_compatible(function_calling_on, mcp_on) { + warn!( + "Skill '{name}' filtered from list: declares tools or MCP servers but those features are disabled" + ); + continue; + } + + entries.push(json!({ + "name": skill.name(), + "description": skill.description(), + "grants_tools": csv_to_vec(skill.enabled_tools()), + "grants_mcp_servers": csv_to_vec(skill.enabled_mcp_servers()), + "loaded": ctx.skill_registry.is_loaded(skill.name()), + })); + } + + Ok(json!({"skills": entries})) +} + +fn handle_load( + ctx: &mut RequestContext, + args: &Value, + policy: &SkillPolicy, +) -> Result { + let name = match args.get("name").and_then(Value::as_str) { + Some(n) if !n.is_empty() => n, + _ => return Ok(json!({"error": "name is required"})), + }; + + if !policy.allows(name) { + return Ok(json!({ + "error": format!("Skill '{name}' is not enabled in this context") + })); + } + + let skill = match Skill::load(name) { + Ok(s) => s, + Err(e) => { + return Ok(json!({ + "error": format!("Failed to load skill '{name}': {e}") + })); + } + }; + + let function_calling_on = ctx.app.config.function_calling_support; + let mcp_on = ctx.app.config.mcp_server_support; + + let tools_declared = skill + .enabled_tools() + .map(|s| !s.trim().is_empty()) + .unwrap_or(false); + let mcps_declared = skill + .enabled_mcp_servers() + .map(|s| !s.trim().is_empty()) + .unwrap_or(false); + + if tools_declared && !function_calling_on { + return Ok(json!({ + "error": format!( + "Skill '{name}' requires function calling, which is disabled in this context" + ) + })); + } + if mcps_declared && !mcp_on { + return Ok(json!({ + "error": format!( + "Skill '{name}' requires MCP servers, which are disabled in this context" + ) + })); + } + + match ctx.skill_registry.insert(skill) { + Ok(()) => Ok(json!({ + "status": "ok", + "loaded": name, + "message": format!("Skill '{name}' loaded") + })), + Err(e) => Ok(json!({"error": e.to_string()})), + } +} + +fn handle_unload(ctx: &mut RequestContext, args: &Value) -> Result { + let name = match args.get("name").and_then(Value::as_str) { + Some(n) if !n.is_empty() => n, + _ => return Ok(json!({"error": "name is required"})), + }; + + match ctx.skill_registry.unload(name) { + Ok(()) => Ok(json!({ + "status": "ok", + "unloaded": name + })), + Err(e) => Ok(json!({"error": e.to_string()})), + } +} + +fn csv_to_vec(csv: Option<&str>) -> Vec { + csv.map(|raw| { + raw.split(',') + .map(|t| t.trim().to_string()) + .filter(|t| !t.is_empty()) + .collect() + }) + .unwrap_or_default() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn declarations_have_three_entries() { + let decls = skill_function_declarations(); + assert_eq!(decls.len(), 3); + } + + #[test] + fn declaration_names_use_skill_prefix() { + let decls = skill_function_declarations(); + + let names: Vec<&str> = decls.iter().map(|d| d.name.as_str()).collect(); + + assert!(names.contains(&"skill__list")); + assert!(names.contains(&"skill__load")); + assert!(names.contains(&"skill__unload")); + } + + #[test] + fn load_and_unload_require_name_parameter() { + let decls = skill_function_declarations(); + for action in ["load", "unload"] { + let decl = decls + .iter() + .find(|d| d.name == format!("skill__{action}")) + .expect("missing declaration"); + + let required = decl + .parameters + .required + .as_ref() + .expect("required field missing"); + + assert!(required.contains(&"name".to_string())); + } + } + + #[test] + fn list_has_no_required_parameters() { + let decls = skill_function_declarations(); + let list_decl = decls + .iter() + .find(|d| d.name == "skill__list") + .expect("skill__list missing"); + + let required = list_decl + .parameters + .required + .as_ref() + .map(|v| v.is_empty()) + .unwrap_or(true); + + assert!(required, "skill__list should have no required parameters"); + } + + #[test] + fn csv_to_vec_empty_input() { + assert!(csv_to_vec(None).is_empty()); + assert!(csv_to_vec(Some("")).is_empty()); + assert!(csv_to_vec(Some(" ")).is_empty()); + } + + #[test] + fn csv_to_vec_parses_and_trims() { + let v = csv_to_vec(Some("a, b ,c,, d")); + + assert_eq!(v, vec!["a", "b", "c", "d"]); + } +} From 7325ad7b323986f7c417a6c80643fe47c77b2b83 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 13:22:44 -0600 Subject: [PATCH 08/85] feat: dynamic loading/unloading of skill tools and MCP servers whenever load_skill/unload_skill are invoked --- src/config/request_context.rs | 61 +++++++++++++++++++++++++++++++++-- src/config/skill_registry.rs | 15 +++++++++ src/function/mod.rs | 12 ++++--- src/function/skill.rs | 50 +++++++++++++++++----------- 4 files changed, 113 insertions(+), 25 deletions(-) diff --git a/src/config/request_context.rs b/src/config/request_context.rs index e7a08ce..9a191dd 100644 --- a/src/config/request_context.rs +++ b/src/config/request_context.rs @@ -1,5 +1,6 @@ use super::rag_cache::{RagCache, RagKey}; use super::session::Session; +use super::skill_policy::SkillPolicy; use super::skill_registry::SkillRegistry; use super::todo::TodoList; use super::tool_scope::{McpRuntime, ToolScope}; @@ -35,7 +36,7 @@ use indexmap::IndexMap; use indoc::formatdoc; use inquire::{Confirm, MultiSelect, Text, list_option::ListOption, validator::Validation}; use parking_lot::RwLock; -use std::collections::{HashMap, HashSet}; +use std::collections::{BTreeSet, HashMap, HashSet}; use std::fs::{File, OpenOptions, read_dir, read_to_string, remove_dir_all, remove_file}; use std::io::Write; use std::path::{Path, PathBuf}; @@ -617,7 +618,7 @@ impl RequestContext { } } - role + self.skill_registry.effective_role(&role) } pub fn auto_continue_config(&self) -> AutoContinueConfig { @@ -2067,6 +2068,35 @@ impl RequestContext { enabled_mcp_servers: Option, abort_signal: AbortSignal, ) -> Result<()> { + let policy = SkillPolicy::effective( + app, + self.role.as_ref(), + self.agent.as_ref(), + self.session.as_ref(), + )?; + + let enabled_mcp_servers = if policy.skills_enabled && app.mcp_server_support { + let skill_mcps = self.skill_registry.loaded_mcp_servers(); + match (enabled_mcp_servers.as_deref(), skill_mcps.is_empty()) { + (Some("all"), _) | (_, true) => enabled_mcp_servers, + (base, false) => { + let mut merged: BTreeSet = skill_mcps; + if let Some(s) = base { + for token in s.split(',') { + let t = token.trim(); + if !t.is_empty() { + merged.insert(t.to_string()); + } + } + } + + Some(merged.into_iter().collect::>().join(",")) + } + } + } else { + enabled_mcp_servers + }; + let mut mcp_runtime = McpRuntime::new(); if app.mcp_server_support @@ -2134,6 +2164,9 @@ impl RequestContext { if !mcp_runtime.is_empty() { functions.append_mcp_meta_functions(mcp_runtime.server_names()); } + if app.function_calling_support && policy.skills_enabled { + functions.append_skill_functions(); + } let tool_tracker = self.tool_scope.tool_tracker.clone(); self.tool_scope = ToolScope { @@ -2144,6 +2177,30 @@ impl RequestContext { Ok(()) } + pub async fn refresh_tool_scope(&mut self, abort_signal: AbortSignal) -> Result<()> { + let app = (*self.app.config).clone(); + let base_mcps = if app.mcp_server_support { + if let Some(session) = &self.session { + session.enabled_mcp_servers() + } else if let Some(agent) = &self.agent { + let names = agent.mcp_server_names(); + if names.is_empty() { + None + } else { + Some(names.join(",")) + } + } else if let Some(role) = &self.role { + role.enabled_mcp_servers() + } else { + app.enabled_mcp_servers.clone() + } + } else { + None + }; + + self.rebuild_tool_scope(&app, base_mcps, abort_signal).await + } + pub async fn use_role( &mut self, app: &AppConfig, diff --git a/src/config/skill_registry.rs b/src/config/skill_registry.rs index 7cd5f49..c43afd3 100644 --- a/src/config/skill_registry.rs +++ b/src/config/skill_registry.rs @@ -53,6 +53,21 @@ impl SkillRegistry { self.loaded.keys().cloned().collect() } + pub fn loaded_mcp_servers(&self) -> BTreeSet { + let mut out = BTreeSet::new(); + for skill in self.loaded.values() { + if let Some(csv) = skill.enabled_mcp_servers() { + for token in csv.split(',') { + let t = token.trim(); + if !t.is_empty() { + out.insert(t.to_string()); + } + } + } + } + out + } + pub fn is_loaded(&self, name: &str) -> bool { self.loaded.contains_key(name) } diff --git a/src/function/mod.rs b/src/function/mod.rs index cc858e4..487038e 100644 --- a/src/function/mod.rs +++ b/src/function/mod.rs @@ -1048,11 +1048,13 @@ impl ToolCall { }) } _ if cmd_name.starts_with(SKILL_FUNCTION_PREFIX) => { - skill::handle_skill_tool(ctx, &cmd_name, &json_data).unwrap_or_else(|e| { - let error_msg = format!("Skill tool failed: {e}"); - eprintln!("{}", warning_text(&format!("⚠️ {error_msg} ⚠️"))); - json!({"tool_call_error": error_msg}) - }) + skill::handle_skill_tool(ctx, &cmd_name, &json_data) + .await + .unwrap_or_else(|e| { + let error_msg = format!("Skill tool failed: {e}"); + eprintln!("{}", warning_text(&format!("⚠️ {error_msg} ⚠️"))); + json!({"tool_call_error": error_msg}) + }) } _ if cmd_name.starts_with(SUPERVISOR_FUNCTION_PREFIX) => { supervisor::handle_supervisor_tool(ctx, &cmd_name, &json_data) diff --git a/src/function/skill.rs b/src/function/skill.rs index 2c62941..ec663ed 100644 --- a/src/function/skill.rs +++ b/src/function/skill.rs @@ -1,5 +1,6 @@ use super::{FunctionDeclaration, JsonSchema}; use crate::config::{RequestContext, Skill, SkillPolicy, paths}; +use crate::utils::create_abort_signal; use anyhow::{Result, bail}; use indexmap::IndexMap; @@ -71,7 +72,7 @@ pub fn skill_function_declarations() -> Vec { ] } -pub fn handle_skill_tool( +pub async fn handle_skill_tool( ctx: &mut RequestContext, cmd_name: &str, args: &Value, @@ -95,8 +96,8 @@ pub fn handle_skill_tool( match action { "list" => handle_list(ctx, &policy), - "load" => handle_load(ctx, args, &policy), - "unload" => handle_unload(ctx, args), + "load" => handle_load(ctx, args, &policy).await, + "unload" => handle_unload(ctx, args).await, _ => bail!("Unknown skill action: {action}"), } } @@ -137,7 +138,7 @@ fn handle_list(ctx: &RequestContext, policy: &SkillPolicy) -> Result { Ok(json!({"skills": entries})) } -fn handle_load( +async fn handle_load( ctx: &mut RequestContext, args: &Value, policy: &SkillPolicy, @@ -189,29 +190,42 @@ fn handle_load( })); } - match ctx.skill_registry.insert(skill) { - Ok(()) => Ok(json!({ - "status": "ok", - "loaded": name, - "message": format!("Skill '{name}' loaded") - })), - Err(e) => Ok(json!({"error": e.to_string()})), + if let Err(e) = ctx.skill_registry.insert(skill) { + return Ok(json!({"error": e.to_string()})); } + + if let Err(e) = ctx.refresh_tool_scope(create_abort_signal()).await { + let _ = ctx.skill_registry.unload(name); + return Ok(json!({ + "error": format!("Loaded skill '{name}' but failed to refresh tool scope: {e}") + })); + } + + Ok(json!({ + "status": "ok", + "loaded": name, + "message": format!("Skill '{name}' loaded") + })) } -fn handle_unload(ctx: &mut RequestContext, args: &Value) -> Result { +async fn handle_unload(ctx: &mut RequestContext, args: &Value) -> Result { let name = match args.get("name").and_then(Value::as_str) { Some(n) if !n.is_empty() => n, _ => return Ok(json!({"error": "name is required"})), }; - match ctx.skill_registry.unload(name) { - Ok(()) => Ok(json!({ - "status": "ok", - "unloaded": name - })), - Err(e) => Ok(json!({"error": e.to_string()})), + if let Err(e) = ctx.skill_registry.unload(name) { + return Ok(json!({"error": e.to_string()})); } + + if let Err(e) = ctx.refresh_tool_scope(create_abort_signal()).await { + warn!("Unloaded skill '{name}' but failed to refresh tool scope: {e}"); + } + + Ok(json!({ + "status": "ok", + "unloaded": name + })) } fn csv_to_vec(csv: Option<&str>) -> Vec { From 5be12e90dc60d67e2edfd4a588c3dbcd855f122a Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 13:43:43 -0600 Subject: [PATCH 09/85] feat: REPL integration with skills --- src/config/request_context.rs | 124 ++++++++++++++++++++++++++++++++++ src/config/skill.rs | 10 +++ src/repl/mod.rs | 61 +++++++++++++++-- 3 files changed, 191 insertions(+), 4 deletions(-) diff --git a/src/config/request_context.rs b/src/config/request_context.rs index 9a191dd..dd0efef 100644 --- a/src/config/request_context.rs +++ b/src/config/request_context.rs @@ -1,5 +1,6 @@ use super::rag_cache::{RagCache, RagKey}; use super::session::Session; +use super::skill::{SKILL_SCAFFOLD, Skill}; use super::skill_policy::SkillPolicy; use super::skill_registry::SkillRegistry; use super::todo::TodoList; @@ -1544,6 +1545,7 @@ impl RequestContext { "session" => (self.sessions_dir(), Some(".yaml")), "rag" => (paths::rags_dir(), Some(".yaml")), "macro" => (paths::macros_dir(), Some(".yaml")), + "skill" => (paths::skills_dir(), None), "agent-data" => (paths::agents_data_dir(), None), _ => bail!("Unknown kind '{kind}'"), }; @@ -1869,6 +1871,16 @@ impl RequestContext { super::map_completion_values(values) } ".macro" => super::map_completion_values(paths::list_macros()), + ".skill" => { + let mut values: Vec = vec![ + "loaded".to_string(), + "load".to_string(), + "unload".to_string(), + "edit".to_string(), + ]; + values.extend(paths::list_skills()); + super::map_completion_values(values) + } ".starter" => match &self.agent { Some(agent) => agent .conversation_starters() @@ -1911,6 +1923,7 @@ impl RequestContext { "session", "rag", "macro", + "skill", "agent-data", ]), ".vault" => { @@ -2473,6 +2486,117 @@ impl RequestContext { Ok(()) } + pub fn upsert_skill(&self, app: &AppConfig, name: &str) -> Result<()> { + let path = paths::skill_file(name); + ensure_parent_exists(&path)?; + let is_new = !path.exists(); + if is_new { + fs::write(&path, SKILL_SCAFFOLD).with_context(|| { + format!("Failed to scaffold skill at {}", path.display()) + })?; + } + + let editor = app.editor()?; + edit_file(&editor, &path)?; + + if self.working_mode.is_repl() { + if is_new { + println!("✓ Created skill at '{}'.", path.display()); + } else { + println!("✓ Saved skill at '{}'.", path.display()); + } + } + + Ok(()) + } + + pub async fn load_skill_repl( + &mut self, + name: &str, + abort_signal: AbortSignal, + ) -> Result<()> { + if !paths::has_skill(name) { + bail!( + "Skill '{name}' is not installed (expected at {})", + paths::skill_file(name).display() + ); + } + + let policy = SkillPolicy::effective( + &self.app.config, + self.role.as_ref(), + self.agent.as_ref(), + self.session.as_ref(), + )?; + + if !policy.skills_enabled { + bail!("Skills are disabled in this context"); + } + + if !policy.allows(name) { + bail!("Skill '{name}' is not enabled in this context"); + } + + let skill = Skill::load(name)?; + let fn_on = self.app.config.function_calling_support; + let mcp_on = self.app.config.mcp_server_support; + let needs_tools = skill + .enabled_tools() + .map(|s| !s.trim().is_empty()) + .unwrap_or(false); + let needs_mcps = skill + .enabled_mcp_servers() + .map(|s| !s.trim().is_empty()) + .unwrap_or(false); + + if needs_tools && !fn_on { + bail!("Skill '{name}' requires function calling, which is disabled"); + } + + if needs_mcps && !mcp_on { + bail!("Skill '{name}' requires MCP servers, which are disabled"); + } + + self.skill_registry.insert(skill)?; + if let Err(e) = self.refresh_tool_scope(abort_signal).await { + let _ = self.skill_registry.unload(name); + bail!("Loaded skill '{name}' but failed to refresh tool scope: {e}"); + } + + println!("✓ Loaded skill '{name}'."); + Ok(()) + } + + pub async fn unload_skill_repl( + &mut self, + name: &str, + abort_signal: AbortSignal, + ) -> Result<()> { + self.skill_registry.unload(name)?; + + if let Err(e) = self.refresh_tool_scope(abort_signal).await { + eprintln!( + "Warning: unloaded skill '{name}' but tool scope refresh failed: {e}" + ); + } + + println!("✓ Unloaded skill '{name}'."); + Ok(()) + } + + pub fn list_loaded_skills(&self) { + let names = self.skill_registry.loaded_names(); + + if names.is_empty() { + println!("No skills loaded."); + } else { + println!("Loaded skills:"); + for name in names { + println!(" • {name}"); + } + } + } + pub async fn apply_prelude( &mut self, app: &AppConfig, diff --git a/src/config/skill.rs b/src/config/skill.rs index df938ec..5400038 100644 --- a/src/config/skill.rs +++ b/src/config/skill.rs @@ -14,6 +14,16 @@ struct SkillsAsset; static RE_METADATA: LazyLock = LazyLock::new(|| Regex::new(r"(?s)-{3,}\s*(.*?)\s*-{3,}\s*(.*)").unwrap()); +pub const SKILL_SCAFFOLD: &str = "\ +--- +description: One-line description shown to the model when listing skills. +enabled_tools: +enabled_mcp_servers: +auto_unload: false +--- +Replace this body with the knowledge or methodology this skill teaches. +"; + #[derive(Debug, Clone, Default, Deserialize, Serialize)] pub struct Skill { name: String, diff --git a/src/repl/mod.rs b/src/repl/mod.rs index 15b45c3..a766a98 100644 --- a/src/repl/mod.rs +++ b/src/repl/mod.rs @@ -46,7 +46,7 @@ pub const DEFAULT_CONTINUATION_PROMPT: &str = indoc! {" 4. Continue with the next pending item now. Call tools immediately." }; -static REPL_COMMANDS: LazyLock<[ReplCommand; 42]> = LazyLock::new(|| { +static REPL_COMMANDS: LazyLock<[ReplCommand; 43]> = LazyLock::new(|| { [ ReplCommand::new(".help", "Show this help guide", AssertState::pass()), ReplCommand::new(".info", "Show system info", AssertState::pass()), @@ -191,6 +191,11 @@ static REPL_COMMANDS: LazyLock<[ReplCommand; 42]> = LazyLock::new(|| { AssertState::TrueFalse(StateFlags::RAG, StateFlags::AGENT), ), ReplCommand::new(".macro", "Execute a macro", AssertState::pass()), + ReplCommand::new( + ".skill", + "List, load, unload, edit, or create skills", + AssertState::pass(), + ), ReplCommand::new( ".file", "Include files, directories, URLs or commands", @@ -513,6 +518,54 @@ pub async fn run_repl_command( .role [text]... # Temporarily switch to the role, send the text, and switch back"# ), }, + ".skill" => { + let trimmed = args.map(str::trim).unwrap_or(""); + let mut parts = trimmed.splitn(2, char::is_whitespace); + let first = parts.next().unwrap_or(""); + let rest = parts.next().map(str::trim).unwrap_or(""); + match first { + "" => println!( + r#"Usage: + .skill loaded # List currently-loaded skills + .skill load # Load a skill into the current context + .skill unload # Unload a loaded skill + .skill edit # Open an existing skill in $EDITOR + .skill # Open the skill in $EDITOR; create with a scaffold if missing"# + ), + "loaded" => ctx.list_loaded_skills(), + "load" => { + if rest.is_empty() { + println!("Usage: .skill load "); + } else { + ctx.load_skill_repl(rest, abort_signal.clone()).await?; + } + } + "unload" => { + if rest.is_empty() { + println!("Usage: .skill unload "); + } else { + ctx.unload_skill_repl(rest, abort_signal.clone()).await?; + } + } + "edit" => { + if rest.is_empty() { + println!("Usage: .skill edit "); + } else if !paths::has_skill(rest) { + bail!( + "Skill '{rest}' is not installed (expected at {})", + paths::skill_file(rest).display() + ); + } else { + let app = Arc::clone(&ctx.app.config); + ctx.upsert_skill(app.as_ref(), rest)?; + } + } + name => { + let app = Arc::clone(&ctx.app.config); + ctx.upsert_skill(app.as_ref(), name)?; + } + } + } ".session" => { if let Some(name) = graph::active_agent_graph_name(ctx) { bail!( @@ -779,7 +832,7 @@ pub async fn run_repl_command( ctx.delete(args)?; } _ => { - println!("Usage: .delete ") + println!("Usage: .delete ") } }, ".copy" => { @@ -1265,8 +1318,8 @@ mod tests { } #[test] - fn repl_commands_has_42_entries() { - assert_eq!(REPL_COMMANDS.len(), 42); + fn repl_commands_has_43_entries() { + assert_eq!(REPL_COMMANDS.len(), 43); } #[test] From 766684615bd09b26372b966839532117d8e7ba6f Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 14:05:16 -0600 Subject: [PATCH 10/85] feat: added CLI --skill flag for modifying skills easily --- src/cli/mod.rs | 13 +++++++++++++ src/main.rs | 11 +++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 913d395..eb1f298 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -116,6 +116,12 @@ pub struct Cli { /// List all macros #[arg(long)] pub list_macros: bool, + /// List all installed skills + #[arg(long)] + pub list_skills: bool, + /// Open a skill in $EDITOR (creates with a scaffold if missing) + #[arg(long, value_name = "NAME")] + pub skill: Option, /// Input text #[arg(trailing_var_arg = true)] text: Vec, @@ -298,6 +304,13 @@ mod tests { assert!(parse(&["--list-agents"]).list_agents); assert!(parse(&["--list-rags"]).list_rags); assert!(parse(&["--list-macros"]).list_macros); + assert!(parse(&["--list-skills"]).list_skills); + } + + #[test] + fn parse_skill_flag_takes_name() { + assert_eq!(parse(&["--skill", "git-master"]).skill.as_deref(), Some("git-master")); + assert!(parse(&[]).skill.is_none()); } #[test] diff --git a/src/main.rs b/src/main.rs index f3a7f2f..2eb65e4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -74,6 +74,7 @@ async fn main() -> Result<()> { || cli.list_agents || cli.list_rags || cli.list_macros + || cli.list_skills || cli.list_sessions; let vault_flags = cli.add_secret.is_some() || cli.get_secret.is_some() @@ -191,6 +192,16 @@ async fn run( println!("{macros}"); return Ok(()); } + if cli.list_skills { + let skills = paths::list_skills().join("\n"); + println!("{skills}"); + return Ok(()); + } + if let Some(name) = &cli.skill { + let app = Arc::clone(&ctx.app.config); + ctx.upsert_skill(app.as_ref(), name)?; + return Ok(()); + } if cli.dry_run { update_app_config(&mut ctx, |app| app.dry_run = true); From d927a9b99f5e82ca23eaed39a101fdfa41d21845 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 14:20:45 -0600 Subject: [PATCH 11/85] feat: Modified --skill CLI to allow users to specify skills to start the REPL or CLI with. --- src/config/request_context.rs | 13 ++++--------- src/main.rs | 8 +++++++- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/config/request_context.rs b/src/config/request_context.rs index dd0efef..5a5b192 100644 --- a/src/config/request_context.rs +++ b/src/config/request_context.rs @@ -2495,18 +2495,13 @@ impl RequestContext { format!("Failed to scaffold skill at {}", path.display()) })?; } - let editor = app.editor()?; edit_file(&editor, &path)?; - - if self.working_mode.is_repl() { - if is_new { - println!("✓ Created skill at '{}'.", path.display()); - } else { - println!("✓ Saved skill at '{}'.", path.display()); - } + if is_new { + println!("✓ Created skill at '{}'.", path.display()); + } else { + println!("✓ Saved skill at '{}'.", path.display()); } - Ok(()) } diff --git a/src/main.rs b/src/main.rs index 2eb65e4..9bc596b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -197,7 +197,9 @@ async fn run( println!("{skills}"); return Ok(()); } - if let Some(name) = &cli.skill { + if let Some(name) = &cli.skill + && !paths::has_skill(name) + { let app = Arc::clone(&ctx.app.config); ctx.upsert_skill(app.as_ref(), name)?; return Ok(()); @@ -315,6 +317,10 @@ async fn run( .await?; } + if let Some(name) = &cli.skill { + ctx.load_skill_repl(name, abort_signal.clone()).await?; + } + match is_repl { false => { let mut input = create_input(&ctx, text, &cli.file, abort_signal.clone()).await?; From c63eb0a9f92d93cb699a88d7511ab00b1054d7fc Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 14:27:40 -0600 Subject: [PATCH 12/85] feat: support multiple skill flags to load multiple skills at CLI startup --- src/cli/mod.rs | 18 ++++++++++++++---- src/main.rs | 14 ++++++++++---- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/cli/mod.rs b/src/cli/mod.rs index eb1f298..00c6bd3 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -119,9 +119,11 @@ pub struct Cli { /// List all installed skills #[arg(long)] pub list_skills: bool, - /// Open a skill in $EDITOR (creates with a scaffold if missing) + /// Pre-load an existing skill into the session (repeatable). If a single + /// `--skill ` is given and the skill doesn't exist, opens $EDITOR + /// with a scaffold to create it. #[arg(long, value_name = "NAME")] - pub skill: Option, + pub skill: Vec, /// Input text #[arg(trailing_var_arg = true)] text: Vec, @@ -309,8 +311,16 @@ mod tests { #[test] fn parse_skill_flag_takes_name() { - assert_eq!(parse(&["--skill", "git-master"]).skill.as_deref(), Some("git-master")); - assert!(parse(&[]).skill.is_none()); + assert_eq!(parse(&["--skill", "git-master"]).skill, vec!["git-master"]); + assert!(parse(&[]).skill.is_empty()); + } + + #[test] + fn parse_multiple_skill_flags_preserves_order() { + assert_eq!( + parse(&["--skill", "alpha", "--skill", "beta", "--skill", "gamma"]).skill, + vec!["alpha", "beta", "gamma"] + ); } #[test] diff --git a/src/main.rs b/src/main.rs index 9bc596b..d054b19 100644 --- a/src/main.rs +++ b/src/main.rs @@ -197,13 +197,19 @@ async fn run( println!("{skills}"); return Ok(()); } - if let Some(name) = &cli.skill - && !paths::has_skill(name) - { + if cli.skill.len() == 1 && !paths::has_skill(&cli.skill[0]) { + let name = &cli.skill[0]; let app = Arc::clone(&ctx.app.config); ctx.upsert_skill(app.as_ref(), name)?; return Ok(()); } + if cli.skill.len() > 1 { + for name in &cli.skill { + if !paths::has_skill(name) { + bail!("Skill '{name}' is not installed"); + } + } + } if cli.dry_run { update_app_config(&mut ctx, |app| app.dry_run = true); @@ -317,7 +323,7 @@ async fn run( .await?; } - if let Some(name) = &cli.skill { + for name in &cli.skill { ctx.load_skill_repl(name, abort_signal.clone()).await?; } From 6330d7dd95b60549b3961458ef42c5a05c90f487 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 15:13:50 -0600 Subject: [PATCH 13/85] feat: cleaned up skill implementation --- src/config/agent.rs | 2 -- src/config/mod.rs | 7 ++++- src/config/paths.rs | 4 --- src/config/request_context.rs | 37 ++++++++++++++++++++++++++ src/config/role.rs | 2 -- src/config/session.rs | 2 -- src/config/skill.rs | 50 ++++++++++++++++++++++------------- src/config/skill_policy.rs | 5 ---- src/config/skill_registry.rs | 1 - src/function/mod.rs | 1 - src/function/skill.rs | 1 - 11 files changed, 74 insertions(+), 38 deletions(-) diff --git a/src/config/agent.rs b/src/config/agent.rs index 557343d..8ee1dbc 100644 --- a/src/config/agent.rs +++ b/src/config/agent.rs @@ -337,12 +337,10 @@ impl Agent { &self.config.mcp_servers } - #[allow(dead_code)] pub fn skills_enabled(&self) -> Option { self.config.skills_enabled } - #[allow(dead_code)] pub fn enabled_skills(&self) -> Option<&[String]> { self.config.enabled_skills.as_deref() } diff --git a/src/config/mod.rs b/src/config/mod.rs index 99d88a9..1e2c453 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -268,6 +268,7 @@ pub fn install_builtins() -> Result<()> { Functions::install_builtin_global_tools(false)?; Agent::install_builtin_agents(false)?; Macro::install_macros(false)?; + Skill::install_builtin_skills(false)?; Ok(()) } @@ -276,18 +277,20 @@ pub enum AssetCategory { Agents, Macros, Functions, + Skills, #[value(name = "mcp_config")] McpConfig, } impl AssetCategory { - pub const NAMES: [&'static str; 4] = ["agents", "macros", "functions", "mcp_config"]; + pub const NAMES: [&'static str; 5] = ["agents", "macros", "functions", "skills", "mcp_config"]; pub fn parse(name: &str) -> Option { match name { "agents" => Some(Self::Agents), "macros" => Some(Self::Macros), "functions" => Some(Self::Functions), + "skills" => Some(Self::Skills), "mcp_config" => Some(Self::McpConfig), _ => None, } @@ -333,6 +336,7 @@ pub fn install_assets(category: AssetCategory) -> Result<()> { AssetCategory::Agents => ("agents", paths::agents_data_dir()), AssetCategory::Macros => ("macros", paths::macros_dir()), AssetCategory::Functions => ("functions", paths::functions_dir()), + AssetCategory::Skills => ("skills", paths::skills_dir()), AssetCategory::McpConfig => ("MCP config", paths::mcp_config_file()), }; @@ -345,6 +349,7 @@ pub fn install_assets(category: AssetCategory) -> Result<()> { AssetCategory::Agents => Agent::install_builtin_agents(true)?, AssetCategory::Macros => Macro::install_macros(true)?, AssetCategory::Functions => Functions::install_builtin_global_tools(true)?, + AssetCategory::Skills => Skill::install_builtin_skills(true)?, AssetCategory::McpConfig => Functions::install_mcp_config()?, } diff --git a/src/config/paths.rs b/src/config/paths.rs index 59d3724..768e26d 100644 --- a/src/config/paths.rs +++ b/src/config/paths.rs @@ -72,12 +72,10 @@ pub fn skills_dir() -> PathBuf { } } -#[allow(dead_code)] pub fn skill_dir(name: &str) -> PathBuf { skills_dir().join(name) } -#[allow(dead_code)] pub fn skill_file(name: &str) -> PathBuf { skill_dir(name).join("SKILL.md") } @@ -251,7 +249,6 @@ pub fn has_macro(name: &str) -> bool { names.contains(&name.to_string()) } -#[allow(dead_code)] pub fn list_skills() -> Vec { let mut names = Vec::new(); if let Ok(rd) = read_dir(skills_dir()) { @@ -270,7 +267,6 @@ pub fn list_skills() -> Vec { names } -#[allow(dead_code)] pub fn has_skill(name: &str) -> bool { skill_file(name).is_file() } diff --git a/src/config/request_context.rs b/src/config/request_context.rs index 5a5b192..e03be25 100644 --- a/src/config/request_context.rs +++ b/src/config/request_context.rs @@ -4019,6 +4019,43 @@ mod tests { ); } + #[test] + #[serial] + fn install_builtin_skills_force_overwrites_only_with_force() { + let _guard = TestConfigDirGuard::new(); + + Skill::install_builtin_skills(false).unwrap(); + let file = paths::skill_file("git-master"); + assert!(file.exists(), "git-master skill should be installed"); + + write(&file, "SENTINEL").unwrap(); + Skill::install_builtin_skills(false).unwrap(); + assert_eq!( + read_to_string(&file).unwrap(), + "SENTINEL", + "non-force install must not overwrite an existing skill" + ); + + Skill::install_builtin_skills(true).unwrap(); + assert_ne!( + read_to_string(&file).unwrap(), + "SENTINEL", + "force install must overwrite the existing skill" + ); + } + + #[test] + #[serial] + fn install_builtin_skills_installs_all_bundled() { + let _guard = TestConfigDirGuard::new(); + + Skill::install_builtin_skills(false).unwrap(); + assert!(paths::skill_file("git-master").exists()); + assert!(paths::skill_file("ai-slop-remover").exists()); + assert!(paths::skill_file("code-review").exists()); + assert!(paths::skill_file("frontend-ui-ux").exists()); + } + #[test] #[serial] fn install_functions_force_preserves_user_mcp_json() { diff --git a/src/config/role.rs b/src/config/role.rs index faad211..85c8a78 100644 --- a/src/config/role.rs +++ b/src/config/role.rs @@ -285,12 +285,10 @@ impl Role { self.continuation_prompt.as_deref() } - #[allow(dead_code)] pub fn skills_enabled(&self) -> Option { self.skills_enabled } - #[allow(dead_code)] pub fn enabled_skills(&self) -> Option<&str> { self.enabled_skills.as_deref() } diff --git a/src/config/session.rs b/src/config/session.rs index 7401d25..0839712 100644 --- a/src/config/session.rs +++ b/src/config/session.rs @@ -79,12 +79,10 @@ pub struct Session { } impl Session { - #[allow(dead_code)] pub fn skills_enabled(&self) -> Option { self.skills_enabled } - #[allow(dead_code)] pub fn enabled_skills(&self) -> Option<&str> { self.enabled_skills.as_deref() } diff --git a/src/config/skill.rs b/src/config/skill.rs index 5400038..544a512 100644 --- a/src/config/skill.rs +++ b/src/config/skill.rs @@ -6,6 +6,7 @@ use rust_embed::Embed; use serde::{Deserialize, Serialize}; use serde_json::Value; use std::sync::LazyLock; +use log::{debug, info}; #[derive(Embed)] #[folder = "assets/skills/"] @@ -39,7 +40,6 @@ pub struct Skill { auto_unload: Option, } -#[allow(dead_code)] impl Skill { pub fn new(name: &str, content: &str) -> Self { let mut metadata = ""; @@ -84,11 +84,36 @@ impl Skill { skill } - pub fn builtin(name: &str) -> Result { - let content = SkillsAsset::get(&format!("{name}/SKILL.md")) - .ok_or_else(|| anyhow!("Unknown skill `{name}`"))?; - let content = unsafe { std::str::from_utf8_unchecked(&content.data) }; - Ok(Skill::new(name, content)) + pub fn install_builtin_skills(force: bool) -> Result<()> { + info!( + "Installing built-in skills in {}", + paths::skills_dir().display() + ); + + for file in SkillsAsset::iter() { + debug!("Processing skill file: {}", file.as_ref()); + + let embedded_file = SkillsAsset::get(&file).ok_or_else(|| { + anyhow!("Failed to load embedded skill file: {}", file.as_ref()) + })?; + let content = unsafe { std::str::from_utf8_unchecked(&embedded_file.data) }; + let file_path = paths::skills_dir().join(file.as_ref()); + + if file_path.exists() && !force { + debug!( + "Skill file already exists, skipping: {}", + file_path.display() + ); + continue; + } + + ensure_parent_exists(&file_path)?; + info!("Creating skill file: {}", file_path.display()); + let mut skill_file = File::create(&file_path)?; + Write::write_all(&mut skill_file, content.as_bytes())?; + } + + Ok(()) } pub fn load(name: &str) -> Result { @@ -99,12 +124,6 @@ impl Skill { Ok(Skill::new(name, &content)) } - pub fn list_builtin_skill_names() -> Vec { - SkillsAsset::iter() - .filter_map(|v| v.strip_suffix("/SKILL.md").map(|v| v.to_string())) - .collect() - } - pub fn name(&self) -> &str { &self.name } @@ -307,11 +326,4 @@ mod tests { assert!(skill.is_compatible(false, false)); } - - #[test] - fn builtin_returns_err_for_unknown_skill() { - let result = Skill::builtin("nonexistent_skill_xyz"); - - assert!(result.is_err()); - } } diff --git a/src/config/skill_policy.rs b/src/config/skill_policy.rs index 0672198..5f42818 100644 --- a/src/config/skill_policy.rs +++ b/src/config/skill_policy.rs @@ -7,15 +7,12 @@ use super::session::Session; use anyhow::{Result, bail}; use std::collections::HashSet; -#[allow(dead_code)] #[derive(Debug)] pub struct SkillPolicy { pub skills_enabled: bool, - pub visible: Option>, pub enabled: HashSet, } -#[allow(dead_code)] impl SkillPolicy { pub fn effective( global: &AppConfig, @@ -101,7 +98,6 @@ impl SkillPolicy { Ok(Self { skills_enabled, - visible, enabled, }) } @@ -160,7 +156,6 @@ mod tests { .unwrap(); assert!(policy.skills_enabled); - assert!(policy.visible.is_none()); assert!(policy.enabled.is_empty()); } diff --git a/src/config/skill_registry.rs b/src/config/skill_registry.rs index c43afd3..0aa3fa6 100644 --- a/src/config/skill_registry.rs +++ b/src/config/skill_registry.rs @@ -5,7 +5,6 @@ use anyhow::{Result, bail}; use indexmap::IndexMap; use std::collections::BTreeSet; -#[allow(dead_code)] #[derive(Clone, Default)] pub struct SkillRegistry { loaded: IndexMap, diff --git a/src/function/mod.rs b/src/function/mod.rs index 487038e..8ff69e7 100644 --- a/src/function/mod.rs +++ b/src/function/mod.rs @@ -355,7 +355,6 @@ impl Functions { self.declarations.extend(todo::todo_function_declarations()); } - #[allow(dead_code)] pub fn append_skill_functions(&mut self) { self.declarations .extend(skill::skill_function_declarations()); diff --git a/src/function/skill.rs b/src/function/skill.rs index ec663ed..9a24c36 100644 --- a/src/function/skill.rs +++ b/src/function/skill.rs @@ -9,7 +9,6 @@ use serde_json::{Value, json}; pub const SKILL_FUNCTION_PREFIX: &str = "skill__"; -#[allow(dead_code)] pub fn skill_function_declarations() -> Vec { vec![ FunctionDeclaration { From d8a92f4e62ca2bacbaa16fc9900a5f3e5a1fbdac Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 15:19:59 -0600 Subject: [PATCH 14/85] feat: Added support for auto_unload skills during chat --- src/config/request_context.rs | 53 +++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/src/config/request_context.rs b/src/config/request_context.rs index e03be25..738315d 100644 --- a/src/config/request_context.rs +++ b/src/config/request_context.rs @@ -822,6 +822,7 @@ impl RequestContext { if !app.dry_run { self.save_message(app, input, output)?; } + self.skill_registry.sweep_auto_unload(); Ok(()) } @@ -3515,6 +3516,58 @@ mod tests { assert!(lm.continuous); } + #[test] + fn after_chat_completion_sweeps_auto_unload_skills_at_turn_end() { + let mut ctx = create_test_ctx(); + ctx.app = Arc::new(AppState { + config: Arc::new(AppConfig { + dry_run: true, + ..(*ctx.app.config).clone() + }), + ..(*ctx.app).clone() + }); + + let ephemeral = Skill::new("ephemeral", "---\nauto_unload: true\n---\nbody"); + let persistent = Skill::new("persistent", "---\nauto_unload: false\n---\nbody"); + ctx.skill_registry.insert(ephemeral).unwrap(); + ctx.skill_registry.insert(persistent).unwrap(); + + let input = Input::from_str(&ctx, "hello", None); + let app = Arc::clone(&ctx.app.config); + ctx.after_chat_completion(app.as_ref(), &input, "response", &[]).unwrap(); + + assert!(!ctx.skill_registry.is_loaded("ephemeral")); + assert!(ctx.skill_registry.is_loaded("persistent")); + } + + #[test] + fn after_chat_completion_preserves_auto_unload_during_tool_loop() { + let mut ctx = create_test_ctx(); + ctx.app = Arc::new(AppState { + config: Arc::new(AppConfig { + dry_run: true, + ..(*ctx.app.config).clone() + }), + ..(*ctx.app).clone() + }); + + let ephemeral = Skill::new("ephemeral", "---\nauto_unload: true\n---\nbody"); + ctx.skill_registry.insert(ephemeral).unwrap(); + + let input = Input::from_str(&ctx, "hello", None); + let app = Arc::clone(&ctx.app.config); + let tool_result = ToolResult::new( + crate::function::ToolCall::default(), + serde_json::json!({}), + ); + ctx.after_chat_completion(app.as_ref(), &input, "", &[tool_result]).unwrap(); + + assert!( + ctx.skill_registry.is_loaded("ephemeral"), + "auto_unload skills must persist through tool-using rounds" + ); + } + #[test] fn role_like_mut_returns_none_when_empty() { let mut ctx = create_test_ctx(); From 6a5561edba9adeeeb01e975f335263f3f161faa8 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 15:20:12 -0600 Subject: [PATCH 15/85] feat: Created a few auto built-in skills --- assets/skills/ai-slop-remover/SKILL.md | 39 ++++++++++++++ assets/skills/code-review/SKILL.md | 70 ++++++++++++++++++++++++++ assets/skills/frontend-ui-ux/SKILL.md | 67 ++++++++++++++++++++++++ assets/skills/git-master/SKILL.md | 58 +++++++++++++++++++++ 4 files changed, 234 insertions(+) create mode 100644 assets/skills/ai-slop-remover/SKILL.md create mode 100644 assets/skills/code-review/SKILL.md create mode 100644 assets/skills/frontend-ui-ux/SKILL.md create mode 100644 assets/skills/git-master/SKILL.md diff --git a/assets/skills/ai-slop-remover/SKILL.md b/assets/skills/ai-slop-remover/SKILL.md new file mode 100644 index 0000000..6fb5a2f --- /dev/null +++ b/assets/skills/ai-slop-remover/SKILL.md @@ -0,0 +1,39 @@ +--- +description: Detect and remove AI slop from code and prose; produce output indistinguishable from a senior engineer's. +--- +You are reviewing or generating content. Apply these standards strictly. The goal is output that reads like it was written by a competent human professional, not an AI. + +## Code + +**No useless comments.** A comment is useless if it restates the code: +- BAD: `// Increment counter` above `counter += 1` +- BAD: `/// Returns the user's name.` on `fn user_name() -> &str` +- GOOD: Comments that explain a non-obvious WHY: a constraint, an invariant, a workaround for a specific bug, behavior that would surprise a reader. + +If removing a comment wouldn't confuse a future reader, the comment shouldn't exist. + +**No emojis** unless the user explicitly asked for them. + +**No defensive handling for impossible cases.** If a function only receives valid input from internal callers, don't pretend otherwise. Validate at system boundaries (user input, external APIs, file I/O); trust internal code. + +**No over-engineering for hypothetical futures.** Three similar lines of code is fine. Premature abstractions are worse than duplication. + +**No backwards-compatibility cruft for unreleased code.** If a function isn't called yet, just change it. Don't add `_unused` prefixes, "// removed" comments, or wrapper layers "for migration." + +**Names should be honest.** A function called `get_user` should not mutate state. A field called `count` should not be a function. A method that can fail should return `Result`, not panic. + +## Prose + +**No flattery.** Don't start with "Great question!" or "That's a really good idea!" Just respond. + +**No filler.** "It's important to note that" — delete. "Let me explain" — just explain. "I'll go ahead and" — just do it. + +**No status updates.** "I'm going to help you with that" — just help. + +**Match the user's terseness.** Brief user, brief reply. Detailed user, detailed reply. + +**No multi-paragraph docstrings.** One short line max. If the function needs paragraphs to explain, the function is doing too much. + +## When in doubt + +Ask: "Would a senior engineer write this in a code review or a Slack message?" If not, cut it. diff --git a/assets/skills/code-review/SKILL.md b/assets/skills/code-review/SKILL.md new file mode 100644 index 0000000..e2f3292 --- /dev/null +++ b/assets/skills/code-review/SKILL.md @@ -0,0 +1,70 @@ +--- +description: Conduct a thorough code review focused on correctness, clarity, tests, and footguns. Grants read-only filesystem access for inspecting code. +enabled_tools: fs_read, fs_grep, fs_glob, fs_cat, fs_ls +--- +You are reviewing code. Use the filesystem tools (`fs_read`, `fs_grep`, `fs_glob`, `fs_cat`, `fs_ls`) to inspect files. Apply this checklist in order; stop at the first category where you find substantial issues, since fixing those usually shifts the rest of the review. + +## Investigation workflow + +Before reviewing the diff, build a mental model of the surrounding code: + +- `fs_ls` the directories that contain the changed files. +- `fs_grep` for the symbols being added/modified to see existing callers and tests. +- `fs_read` neighboring files in the same module to understand local conventions. +- `fs_glob` for test files that might cover this area. + +A review without context is just a syntax check. + +## 1. Correctness + +- Does the change actually do what it claims? Does it solve the stated problem? +- Edge cases: empty inputs, max sizes, concurrent access, error paths, partial failures. +- Off-by-one errors, type confusion, null/None handling, integer overflow. +- Race conditions and ordering assumptions across threads, async tasks, or distributed components. +- Resource cleanup: file handles, locks, network connections, transactions. + +## 2. Tests + +- Do the tests test BEHAVIOR, not implementation? (Tests of `private_helper()` are usually a smell.) +- Will they fail when the code regresses? Or are they tautological (e.g., `assert!(x.is_empty() || !x.is_empty())`)? +- Do they cover the unhappy paths, not just the happy ones? +- Is there a missing test for the specific bug or feature being added? `fs_grep` for the function name in test files to check. + +## 3. Clarity + +- Are names accurate? `get_user` that mutates is a lie; rename or split. +- Could a competent reader understand this without comments? +- Is there a simpler way to express the same logic? +- Is the function doing one thing, or several things glued together? + +## 4. Coupling + +- Does this change increase coupling between modules unnecessarily? +- Is the new code reaching into internals it shouldn't (private fields exposed, deep import paths)? +- Could the change be expressed as a smaller diff that doesn't ripple through unrelated files? + +## 5. Footguns + +- Could a future maintainer easily misuse this API? +- Are invariants enforced by types, or just by convention? +- Are error types specific enough to be actionable? +- Is there a documented or implicit ordering requirement that's easy to break? + +## What to flag + +- Correctness bugs. +- Missing error handling at trust boundaries. +- Race conditions. +- Tests that won't catch regressions. +- Security issues (injection, auth, exposed secrets). + +## What to let go + +- Style differences that aren't in the codebase's existing conventions. +- "I would have done it differently" preferences. +- Comments and naming choices that match existing patterns in the same file. +- Micro-optimizations in code that isn't on a hot path. + +## Tone + +Direct, specific, focused on the code. No flattery, no padding. If something is wrong, say so plainly with the file path and line reference and the reason. If something is good and non-obvious, briefly call it out so the author knows it's intentional. diff --git a/assets/skills/frontend-ui-ux/SKILL.md b/assets/skills/frontend-ui-ux/SKILL.md new file mode 100644 index 0000000..95ae5ad --- /dev/null +++ b/assets/skills/frontend-ui-ux/SKILL.md @@ -0,0 +1,67 @@ +--- +description: Designer-turned-developer who crafts stunning UI/UX even without design mockups. Grants filesystem read/write access for editing component files. +enabled_tools: fs_read, fs_write, fs_patch, fs_grep, fs_glob, fs_cat, fs_ls, fs_mkdir +--- +You are doing frontend work. Use the filesystem tools to read, write, and patch component files. Treat UI/UX as a discipline, not a polish step at the end. + +## Investigate before editing + +Before changing a component: + +- `fs_ls` the component's directory to see siblings and tests. +- `fs_read` the component itself. +- `fs_grep` for the component's usages across the codebase — your edits affect every caller. +- `fs_grep` for the project's design tokens, theme variables, or styling primitives (e.g., `--color-`, `theme.spacing`, `tw-`). +- Read existing similar components to match conventions. + +## Visual hierarchy + +Every screen has a focal point. Identify it before laying out anything else: + +- One primary action per view. Make it visually dominant. +- Secondary actions are present but visibly subordinate. +- Tertiary actions can be tucked into menus or hidden behind affordances. + +## Spacing and rhythm + +- Use the project's existing spacing scale (4px, 8px, custom — match what's already there). Don't introduce one-off values. +- Larger spacing = stronger grouping break. Inside a card, tight; between cards, looser. +- White space is not wasted space. It's the difference between "professional" and "cramped." + +## Typography + +- Two or three sizes per view, max. More than that is noise. +- Line-height: 1.4-1.6 for body, tighter for headlines. +- Don't center long paragraphs. Left-align (or right-align for RTL). + +## Color + +- Use the project's existing palette. If you need a color that isn't there, you're probably overdesigning. +- Contrast matters: aim for WCAG AA at minimum (4.5:1 for body text, 3:1 for large text). +- Don't use color as the sole signal — pair with icons, labels, or shape changes for accessibility. + +## Component conventions + +When adding a new component: + +- Match the existing structure: where do props go, where do styles go, where do tests go? +- `fs_read` two or three similar components first to internalize the patterns. +- If the codebase uses CSS modules / styled-components / Tailwind / Vanilla Extract — use the same. Don't introduce a new system. +- Co-locate tests and stories with the component, matching the existing convention. + +## Forms + +- Label every input. Placeholder text is not a label. +- Show validation errors near the field, not in a banner at the top. +- Validate on blur, not on every keystroke. Show success states only after the user has interacted. +- Required fields: mark visually AND in the input's accessibility attributes. + +## Loading and empty states + +- Empty states are an opportunity, not a fallback. Tell the user what they can do, not "no data." +- Loading: show structure (skeletons) when you know what's coming. Spinners are for indeterminate waits. +- Errors: explain WHAT failed and what the user can do about it. "Something went wrong" is useless. + +## When unsure + +Ship the boring version. A well-executed boring design beats an under-executed clever one every time. diff --git a/assets/skills/git-master/SKILL.md b/assets/skills/git-master/SKILL.md new file mode 100644 index 0000000..3415e3b --- /dev/null +++ b/assets/skills/git-master/SKILL.md @@ -0,0 +1,58 @@ +--- +description: Methodology for atomic commits, rebase surgery, and clean git history. Grants shell access for running git commands. +enabled_tools: execute_command +--- +You are operating on a git repository. Apply these conventions strictly. Use the `execute_command` tool to run git commands. + +## Atomic commits + +Each commit represents one logical change. If the commit message needs the word "and," the change is too large; split it. Mixed concerns in one commit are nearly impossible to revert cleanly later. + +## Commit messages + +- Subject line: imperative mood, ≤50 characters, no trailing period. +- Blank line. +- Body: explain WHY, not WHAT. The diff shows what changed. +- Reference issues by URL or canonical ID, not by free-form description. + +## Rebase, don't merge + +- `git rebase -i origin/main` before opening a PR. +- Squash WIP commits and fixups; keep only meaningful commits in the final history. +- Never rebase a branch others may have based work on. If unsure, ask. + +## Conflict resolution + +- Read both sides carefully before resolving. Don't reflexively take "ours" or "theirs." +- After resolving, run tests before continuing the rebase. +- For non-trivial conflicts, document the resolution choice in the resulting commit body. + +## Investigation workflow + +Use `execute_command` to run these inspection commands when chasing down history: + +- `git log -p ` — see how a file evolved over time. +- `git log -S ''` (pickaxe) — find when a string was added or removed. +- `git log --all --grep ''` — search commit messages. +- `git blame -L , ` — current authorship for a line range. +- `git diff .. -- ` — narrow diffs to specific paths. +- `git bisect start && git bisect bad && git bisect good ` — narrow down regressions. + +## Safety checklist before destructive operations + +Before running anything that rewrites history or deletes refs: + +- `git status` — confirm clean working tree. +- `git branch --show-current` — confirm which branch you're on. +- `git log -3 --oneline` — confirm what's about to be moved. + +## What to never do + +- Force-push to shared branches (`main`, release branches, anything teammates pull from). +- `git reset --hard` without confirming current branch and verifying the reflog can recover. +- `git push --no-verify` to skip hooks — fix the underlying issue instead. +- Commit secrets, even temporarily. Once pushed, treat as compromised; rotate. + +## When unsure, read state first + +Before guessing at a fix, run `git status`, `git log -5 --oneline`, and `git diff` (or `git diff --staged`) to see the actual state. Don't operate on assumptions. From 7cd7abe469c318a057882be3f15498c3278dd2af Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 15:21:00 -0600 Subject: [PATCH 16/85] fmt: Applied uniform formatting to skills implementation --- src/config/app_config.rs | 2 +- src/config/install_remote.rs | 7 ++- src/config/request_context.rs | 33 ++++------- src/config/role.rs | 4 +- src/config/skill.rs | 17 +++--- src/config/skill_policy.rs | 100 +++++++++------------------------- src/config/skill_registry.rs | 11 ++-- src/function/mod.rs | 2 +- 8 files changed, 58 insertions(+), 118 deletions(-) diff --git a/src/config/app_config.rs b/src/config/app_config.rs index 6450d61..057f82f 100644 --- a/src/config/app_config.rs +++ b/src/config/app_config.rs @@ -394,7 +394,7 @@ impl AppConfig { if let Some(Some(v)) = super::read_env_bool(&get_env_name("skills_enabled")) { self.skills_enabled = v; } - + if let Some(v) = super::read_env_value::(&get_env_name("enabled_skills")) { self.enabled_skills = v; } diff --git a/src/config/install_remote.rs b/src/config/install_remote.rs index d900bb1..0c90cc2 100644 --- a/src/config/install_remote.rs +++ b/src/config/install_remote.rs @@ -375,7 +375,12 @@ fn plan_changes(layout: &RemoteLayout) -> Result { } if let Some(src_dir) = &layout.skills { - plan_dir_into(src_dir, &paths::skills_dir(), TopCategory::Skills, &mut files)?; + plan_dir_into( + src_dir, + &paths::skills_dir(), + TopCategory::Skills, + &mut files, + )?; } if let Some(src_dir) = &layout.macros { diff --git a/src/config/request_context.rs b/src/config/request_context.rs index 738315d..284e10a 100644 --- a/src/config/request_context.rs +++ b/src/config/request_context.rs @@ -2492,9 +2492,8 @@ impl RequestContext { ensure_parent_exists(&path)?; let is_new = !path.exists(); if is_new { - fs::write(&path, SKILL_SCAFFOLD).with_context(|| { - format!("Failed to scaffold skill at {}", path.display()) - })?; + fs::write(&path, SKILL_SCAFFOLD) + .with_context(|| format!("Failed to scaffold skill at {}", path.display()))?; } let editor = app.editor()?; edit_file(&editor, &path)?; @@ -2506,11 +2505,7 @@ impl RequestContext { Ok(()) } - pub async fn load_skill_repl( - &mut self, - name: &str, - abort_signal: AbortSignal, - ) -> Result<()> { + pub async fn load_skill_repl(&mut self, name: &str, abort_signal: AbortSignal) -> Result<()> { if !paths::has_skill(name) { bail!( "Skill '{name}' is not installed (expected at {})", @@ -2563,17 +2558,11 @@ impl RequestContext { Ok(()) } - pub async fn unload_skill_repl( - &mut self, - name: &str, - abort_signal: AbortSignal, - ) -> Result<()> { + pub async fn unload_skill_repl(&mut self, name: &str, abort_signal: AbortSignal) -> Result<()> { self.skill_registry.unload(name)?; if let Err(e) = self.refresh_tool_scope(abort_signal).await { - eprintln!( - "Warning: unloaded skill '{name}' but tool scope refresh failed: {e}" - ); + eprintln!("Warning: unloaded skill '{name}' but tool scope refresh failed: {e}"); } println!("✓ Unloaded skill '{name}'."); @@ -3534,7 +3523,8 @@ mod tests { let input = Input::from_str(&ctx, "hello", None); let app = Arc::clone(&ctx.app.config); - ctx.after_chat_completion(app.as_ref(), &input, "response", &[]).unwrap(); + ctx.after_chat_completion(app.as_ref(), &input, "response", &[]) + .unwrap(); assert!(!ctx.skill_registry.is_loaded("ephemeral")); assert!(ctx.skill_registry.is_loaded("persistent")); @@ -3556,11 +3546,10 @@ mod tests { let input = Input::from_str(&ctx, "hello", None); let app = Arc::clone(&ctx.app.config); - let tool_result = ToolResult::new( - crate::function::ToolCall::default(), - serde_json::json!({}), - ); - ctx.after_chat_completion(app.as_ref(), &input, "", &[tool_result]).unwrap(); + let tool_result = + ToolResult::new(crate::function::ToolCall::default(), serde_json::json!({})); + ctx.after_chat_completion(app.as_ref(), &input, "", &[tool_result]) + .unwrap(); assert!( ctx.skill_registry.is_loaded("ephemeral"), diff --git a/src/config/role.rs b/src/config/role.rs index 85c8a78..cf2d42c 100644 --- a/src/config/role.rs +++ b/src/config/role.rs @@ -103,9 +103,7 @@ impl Role { role.enabled_mcp_servers = value.as_str().map(|v| v.to_string()) } "skills_enabled" => role.skills_enabled = value.as_bool(), - "enabled_skills" => { - role.enabled_skills = value.as_str().map(|v| v.to_string()) - } + "enabled_skills" => role.enabled_skills = value.as_str().map(|v| v.to_string()), "auto_continue" => role.auto_continue = value.as_bool(), "max_auto_continues" => { role.max_auto_continues = value.as_u64().map(|v| v as usize) diff --git a/src/config/skill.rs b/src/config/skill.rs index 544a512..c5e5d58 100644 --- a/src/config/skill.rs +++ b/src/config/skill.rs @@ -2,11 +2,11 @@ use super::*; use anyhow::Result; use fancy_regex::Regex; +use log::{debug, info}; use rust_embed::Embed; use serde::{Deserialize, Serialize}; use serde_json::Value; use std::sync::LazyLock; -use log::{debug, info}; #[derive(Embed)] #[folder = "assets/skills/"] @@ -93,9 +93,8 @@ impl Skill { for file in SkillsAsset::iter() { debug!("Processing skill file: {}", file.as_ref()); - let embedded_file = SkillsAsset::get(&file).ok_or_else(|| { - anyhow!("Failed to load embedded skill file: {}", file.as_ref()) - })?; + let embedded_file = SkillsAsset::get(&file) + .ok_or_else(|| anyhow!("Failed to load embedded skill file: {}", file.as_ref()))?; let content = unsafe { std::str::from_utf8_unchecked(&embedded_file.data) }; let file_path = paths::skills_dir().join(file.as_ref()); @@ -118,9 +117,8 @@ impl Skill { pub fn load(name: &str) -> Result { let path = paths::skill_file(name); - let content = read_to_string(&path).with_context(|| { - format!("Failed to read skill '{name}' at {}", path.display()) - })?; + let content = read_to_string(&path) + .with_context(|| format!("Failed to read skill '{name}' at {}", path.display()))?; Ok(Skill::new(name, &content)) } @@ -152,7 +150,7 @@ impl Skill { if self.declares_tools() && !function_calling_enabled { return false; } - + if self.declares_mcp_servers() && !mcp_enabled { return false; } @@ -307,8 +305,7 @@ mod tests { #[test] fn is_compatible_requires_both_when_both_declared() { - let content = - "---\nenabled_tools: shell\nenabled_mcp_servers: github\n---\nbody"; + let content = "---\nenabled_tools: shell\nenabled_mcp_servers: github\n---\nbody"; let skill = Skill::new("test", content); diff --git a/src/config/skill_policy.rs b/src/config/skill_policy.rs index 5f42818..a47ffa8 100644 --- a/src/config/skill_policy.rs +++ b/src/config/skill_policy.rs @@ -145,15 +145,9 @@ mod tests { fn defaults_yield_skills_enabled_with_empty_universe() { let global = AppConfig::default(); - let policy = SkillPolicy::effective_with( - &global, - None, - None, - None, - &always_true, - &empty_installed, - ) - .unwrap(); + let policy = + SkillPolicy::effective_with(&global, None, None, None, &always_true, &empty_installed) + .unwrap(); assert!(policy.skills_enabled); assert!(policy.enabled.is_empty()); @@ -177,15 +171,9 @@ mod tests { fn falls_back_to_visible_when_visible_set_but_no_enabled() { let global = make_app_config(true, None, Some(&["alpha", "beta"])); - let policy = SkillPolicy::effective_with( - &global, - None, - None, - None, - &always_true, - &empty_installed, - ) - .unwrap(); + let policy = + SkillPolicy::effective_with(&global, None, None, None, &always_true, &empty_installed) + .unwrap(); assert_eq!(policy.enabled.len(), 2); assert!(policy.enabled.contains("alpha")); @@ -196,15 +184,9 @@ mod tests { fn global_enabled_skills_is_effective_when_no_other_levels() { let global = make_app_config(true, Some("alpha,beta"), Some(&["alpha", "beta", "gamma"])); - let policy = SkillPolicy::effective_with( - &global, - None, - None, - None, - &always_true, - &empty_installed, - ) - .unwrap(); + let policy = + SkillPolicy::effective_with(&global, None, None, None, &always_true, &empty_installed) + .unwrap(); assert!(policy.enabled.contains("alpha")); assert!(policy.enabled.contains("beta")); @@ -214,10 +196,7 @@ mod tests { #[test] fn role_overrides_global_enabled_skills() { let global = make_app_config(true, Some("alpha"), Some(&["alpha", "beta"])); - let role = Role::new( - "test", - "---\nenabled_skills: beta\n---\nbody", - ); + let role = Role::new("test", "---\nenabled_skills: beta\n---\nbody"); let policy = SkillPolicy::effective_with( &global, @@ -258,14 +237,9 @@ mod tests { ..AppConfig::default() }; - let policy = SkillPolicy::effective_with( - &global, - None, - None, - None, - &always_true, - &|| vec!["alpha".to_string()], - ) + let policy = SkillPolicy::effective_with(&global, None, None, None, &always_true, &|| { + vec!["alpha".to_string()] + }) .unwrap(); assert!(!policy.allows("alpha")); @@ -275,15 +249,9 @@ mod tests { fn allows_returns_true_when_skill_in_enabled_set() { let global = make_app_config(true, Some("alpha"), None); - let policy = SkillPolicy::effective_with( - &global, - None, - None, - None, - &always_true, - &empty_installed, - ) - .unwrap(); + let policy = + SkillPolicy::effective_with(&global, None, None, None, &always_true, &empty_installed) + .unwrap(); assert!(policy.allows("alpha")); assert!(!policy.allows("beta")); @@ -293,15 +261,9 @@ mod tests { fn validation_rejects_uninstalled_skill_reference() { let global = make_app_config(true, Some("ghost"), None); - let err = SkillPolicy::effective_with( - &global, - None, - None, - None, - &|_| false, - &empty_installed, - ) - .unwrap_err(); + let err = + SkillPolicy::effective_with(&global, None, None, None, &|_| false, &empty_installed) + .unwrap_err(); assert!(err.to_string().contains("not installed")); assert!(err.to_string().contains("ghost")); @@ -311,15 +273,9 @@ mod tests { fn validation_rejects_skill_not_in_visible_set() { let global = make_app_config(true, Some("beta"), Some(&["alpha"])); - let err = SkillPolicy::effective_with( - &global, - None, - None, - None, - &always_true, - &empty_installed, - ) - .unwrap_err(); + let err = + SkillPolicy::effective_with(&global, None, None, None, &always_true, &empty_installed) + .unwrap_err(); assert!(err.to_string().contains("not in visible_skills")); assert!(err.to_string().contains("beta")); @@ -329,15 +285,9 @@ mod tests { fn validation_skipped_when_no_explicit_enabled_skills() { let global = make_app_config(true, None, None); - let policy = SkillPolicy::effective_with( - &global, - None, - None, - None, - &|_| false, - &empty_installed, - ) - .unwrap(); + let policy = + SkillPolicy::effective_with(&global, None, None, None, &|_| false, &empty_installed) + .unwrap(); assert!(policy.enabled.is_empty()); } diff --git a/src/config/skill_registry.rs b/src/config/skill_registry.rs index 0aa3fa6..cf81310 100644 --- a/src/config/skill_registry.rs +++ b/src/config/skill_registry.rs @@ -93,7 +93,11 @@ impl SkillRegistry { tools.extend(parse_csv(skill.enabled_tools())); mcps.extend(parse_csv(skill.enabled_mcp_servers())); if !skip_body && !skill.body().is_empty() { - let separator = if effective.is_empty_prompt() { "" } else { "\n\n" }; + let separator = if effective.is_empty_prompt() { + "" + } else { + "\n\n" + }; effective.append_to_prompt(separator); effective.append_to_prompt(skill.body()); } @@ -242,10 +246,7 @@ mod tests { let tools_str = effective.enabled_tools().unwrap(); let tools: BTreeSet<&str> = tools_str.split(',').collect(); - assert_eq!( - tools, - BTreeSet::from(["fs", "git", "shell", "web_search"]) - ); + assert_eq!(tools, BTreeSet::from(["fs", "git", "shell", "web_search"])); let mcps_str = effective.enabled_mcp_servers().unwrap(); let mcps: BTreeSet<&str> = mcps_str.split(',').collect(); diff --git a/src/function/mod.rs b/src/function/mod.rs index 8ff69e7..ba66837 100644 --- a/src/function/mod.rs +++ b/src/function/mod.rs @@ -22,6 +22,7 @@ use indoc::formatdoc; use rust_embed::Embed; use serde::{Deserialize, Serialize}; use serde_json::{Value, json}; +use skill::SKILL_FUNCTION_PREFIX; use std::collections::VecDeque; use std::ffi::OsStr; use std::fs::File; @@ -33,7 +34,6 @@ use std::{ process::{Command, Stdio}, }; use strum_macros::AsRefStr; -use skill::SKILL_FUNCTION_PREFIX; use supervisor::SUPERVISOR_FUNCTION_PREFIX; use todo::TODO_FUNCTION_PREFIX; use user_interaction::USER_FUNCTION_PREFIX; From 7e801b80d0e413307e29b3d14ee4b1a36d559710 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 15:30:22 -0600 Subject: [PATCH 17/85] feat: Added skills_dir to the info output of Coyote --- src/config/request_context.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/config/request_context.rs b/src/config/request_context.rs index 284e10a..cdd573d 100644 --- a/src/config/request_context.rs +++ b/src/config/request_context.rs @@ -891,6 +891,7 @@ impl RequestContext { ("env_file", display_path(&paths::env_file())), ("agents_dir", display_path(&paths::agents_data_dir())), ("roles_dir", display_path(&paths::roles_dir())), + ("skills_dir", display_path(&paths::skills_dir())), ("sessions_dir", display_path(&self.sessions_dir())), ("rags_dir", display_path(&paths::rags_dir())), ("macros_dir", display_path(&paths::macros_dir())), From b1fc199a5f5a1af491c641762b927358f1cc3564 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 15:48:19 -0600 Subject: [PATCH 18/85] feat: .edit skill support from within the REPL --- src/config/request_context.rs | 11 +++++++++++ src/repl/mod.rs | 33 +++++++++++++++++---------------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/config/request_context.rs b/src/config/request_context.rs index cdd573d..8551bda 100644 --- a/src/config/request_context.rs +++ b/src/config/request_context.rs @@ -1928,6 +1928,15 @@ impl RequestContext { "skill", "agent-data", ]), + ".edit" => super::map_completion_values(vec![ + "config", + "mcp-config", + "role", + "session", + "rag-docs", + "agent-config", + "skill", + ]), ".vault" => { let mut values = vec!["add", "get", "update", "delete", "list"]; values.sort_unstable(); @@ -1938,6 +1947,8 @@ impl RequestContext { } _ => vec![], }; + } else if cmd == ".edit" && args.first() == Some(&"skill") && args.len() == 2 { + values = super::map_completion_values(paths::list_skills()); } else if cmd == ".install" && args.first() == Some(&"remote") && args.len() >= 2 { let prev = args.get(args.len() - 2).copied().unwrap_or(""); if prev == "--filter" { diff --git a/src/repl/mod.rs b/src/repl/mod.rs index a766a98..5623331 100644 --- a/src/repl/mod.rs +++ b/src/repl/mod.rs @@ -529,8 +529,8 @@ pub async fn run_repl_command( .skill loaded # List currently-loaded skills .skill load # Load a skill into the current context .skill unload # Unload a loaded skill - .skill edit # Open an existing skill in $EDITOR - .skill # Open the skill in $EDITOR; create with a scaffold if missing"# + .skill # Open the skill in $EDITOR; create with a scaffold if missing + # (Use `.edit skill ` to edit an existing skill without the create-if-missing behavior.)"# ), "loaded" => ctx.list_loaded_skills(), "load" => { @@ -547,19 +547,6 @@ pub async fn run_repl_command( ctx.unload_skill_repl(rest, abort_signal.clone()).await?; } } - "edit" => { - if rest.is_empty() { - println!("Usage: .skill edit "); - } else if !paths::has_skill(rest) { - bail!( - "Skill '{rest}' is not installed (expected at {})", - paths::skill_file(rest).display() - ); - } else { - let app = Arc::clone(&ctx.app.config); - ctx.upsert_skill(app.as_ref(), rest)?; - } - } name => { let app = Arc::clone(&ctx.app.config); ctx.upsert_skill(app.as_ref(), name)?; @@ -712,9 +699,23 @@ pub async fn run_repl_command( Some("mcp-config") => { ctx.edit_mcp_config()?; } + Some(s) if s == "skill" || s.starts_with("skill ") => { + let name = s.strip_prefix("skill").unwrap_or("").trim(); + if name.is_empty() { + println!("Usage: .edit skill "); + } else if !paths::has_skill(name) { + bail!( + "Skill '{name}' is not installed (expected at {})", + paths::skill_file(name).display() + ); + } else { + let app = Arc::clone(&ctx.app.config); + ctx.upsert_skill(app.as_ref(), name)?; + } + } _ => { println!( - r#"Usage: .edit "# + r#"Usage: .edit >"# ) } } From aef26013cb2aa06a9b53e3438e900cd81f07108a Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 15:50:20 -0600 Subject: [PATCH 19/85] docs: Added example skills configurations --- README.md | 1 + config.agent.example.yaml | 5 +++++ config.example.yaml | 15 +++++++++++++++ config.role.example.md | 3 +++ 4 files changed, 24 insertions(+) diff --git a/README.md b/README.md index 01e2c40..e4a9d6e 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ Coming from [AIChat](https://github.com/sigoden/aichat)? Follow the [migration g * [RAG](https://github.com/Dark-Alex-17/coyote/wiki/RAG): Retrieval-Augmented Generation for enhanced information retrieval and generation. * [Sessions](https://github.com/Dark-Alex-17/coyote/wiki/Sessions): Manage and persist conversational contexts and settings across multiple interactions. * [Roles](https://github.com/Dark-Alex-17/coyote/wiki/Roles): Customize model behavior for specific tasks or domains. +* [Skills](https://github.com/Dark-Alex-17/coyote/wiki/Skills): Modular knowledge or capability packs the LLM can load and unload mid-conversation. Multiple skills compose; instructions stack, tools and MCPs union. * [Agents](https://github.com/Dark-Alex-17/coyote/wiki/Agents): Leverage AI agents to perform complex tasks and workflows, including sub-agent spawning, teammate messaging, and user interaction tools. * [Graph Agents](https://github.com/Dark-Alex-17/coyote/wiki/Graph-Agents): Define an agent as a declarative, YAML-driven workflow. A directed graph of typed nodes (LLM calls, scripts, approvals, user input, RAG retrieval, sub-agent spawns). * [Todo System](https://github.com/Dark-Alex-17/coyote/wiki/TODO-System): Built-in task tracking for improved LLM reliability with smaller models. diff --git a/config.agent.example.yaml b/config.agent.example.yaml index d3b19af..4036f79 100644 --- a/config.agent.example.yaml +++ b/config.agent.example.yaml @@ -42,6 +42,11 @@ global_tools: # Optional list of additional global tools to e - web_search - fs - python +skills_enabled: true # Master switch for skills in this agent (default: inherit from global) +enabled_skills: # Optional list of skills available when this agent runs. + # Must be a subset of global `visible_skills`. Omit to inherit the global default. + - git-master + - ai-slop-remover dynamic_instructions: false # Whether to use dynamic instructions for the agent; if false, static instructions are used instructions: | # Static instructions for the agent; ignored if dynamic instructions are used You are a AI agent designed to demonstrate agent capabilities. diff --git a/config.example.yaml b/config.example.yaml index e49adce..08ce1a7 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -80,6 +80,21 @@ mapping_mcp_servers: # Alias for an MCP server or set of servers git: github,gitmcp enabled_mcp_servers: null # Which MCP servers to enable by default (e.g. 'github,slack,ddg-search') +# ---- Skills ---- +# Skills are modular knowledge or capability packs the LLM can load and unload mid-conversation. +# See the [Skills documentation](https://github.com/Dark-Alex-17/coyote/wiki/Skills) for more details. +skills_enabled: true # Master switch. Set to false to hide all skill management tools from the model. +visible_skills: # The universe of skills allowed to be enabled in any context. Omit (null) for "all installed". + - ai-slop-remover + - code-review + - frontend-ui-ux + - git-master +enabled_skills: null # Which skills are available by default (no role/agent/session active). null = all visible. + # Example: only expose two skills in the bare REPL. + # enabled_skills: + # - git-master + # - ai-slop-remover + # ---- Auto-Continue (Todo System) ---- # The auto-continue system provides built-in task tracking for improved reliability. # When enabled, the model can create todo lists and the system will automatically diff --git a/config.role.example.md b/config.role.example.md index 8016bba..b0501f1 100644 --- a/config.role.example.md +++ b/config.role.example.md @@ -10,6 +10,9 @@ temperature: 0.2 # The temperature to use for this role whe top_p: 0 # The top_p to use for this role when querying the model enabled_tools: fs_ls,fs_cat # A comma-separated list of tools to enable for this role enabled_mcp_servers: github,gitmcp # A comma-separated list of MCP servers to enable for this role +skills_enabled: true # Master switch for skills in this role (default: inherit from global) +enabled_skills: git-master,ai-slop-remover # Comma-separated list of skills available when this role is active. + # Must be a subset of global `visible_skills`. Omit to inherit the global default. prompt: null # A custom prompt to use for this role that will immediately query # the model for output instead of using the instructions below # Auto-Continue (Todo System) From b758b17dbbe005eedb786640dbc262c2d97cad90 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 15:58:06 -0600 Subject: [PATCH 20/85] fix: remove now deprecated .skill edit command --- src/config/request_context.rs | 10 ---------- src/repl/mod.rs | 13 +++++++++---- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/config/request_context.rs b/src/config/request_context.rs index 8551bda..2c0ca39 100644 --- a/src/config/request_context.rs +++ b/src/config/request_context.rs @@ -1878,7 +1878,6 @@ impl RequestContext { "loaded".to_string(), "load".to_string(), "unload".to_string(), - "edit".to_string(), ]; values.extend(paths::list_skills()); super::map_completion_values(values) @@ -1928,15 +1927,6 @@ impl RequestContext { "skill", "agent-data", ]), - ".edit" => super::map_completion_values(vec![ - "config", - "mcp-config", - "role", - "session", - "rag-docs", - "agent-config", - "skill", - ]), ".vault" => { let mut values = vec!["add", "get", "update", "delete", "list"]; values.sort_unstable(); diff --git a/src/repl/mod.rs b/src/repl/mod.rs index 5623331..e17e6b1 100644 --- a/src/repl/mod.rs +++ b/src/repl/mod.rs @@ -46,7 +46,7 @@ pub const DEFAULT_CONTINUATION_PROMPT: &str = indoc! {" 4. Continue with the next pending item now. Call tools immediately." }; -static REPL_COMMANDS: LazyLock<[ReplCommand; 43]> = LazyLock::new(|| { +static REPL_COMMANDS: LazyLock<[ReplCommand; 44]> = LazyLock::new(|| { [ ReplCommand::new(".help", "Show this help guide", AssertState::pass()), ReplCommand::new(".info", "Show system info", AssertState::pass()), @@ -193,7 +193,12 @@ static REPL_COMMANDS: LazyLock<[ReplCommand; 43]> = LazyLock::new(|| { ReplCommand::new(".macro", "Execute a macro", AssertState::pass()), ReplCommand::new( ".skill", - "List, load, unload, edit, or create skills", + "List, load, unload, or create skills", + AssertState::pass(), + ), + ReplCommand::new( + ".edit skill", + "Modify an existing skill by name", AssertState::pass(), ), ReplCommand::new( @@ -1319,8 +1324,8 @@ mod tests { } #[test] - fn repl_commands_has_43_entries() { - assert_eq!(REPL_COMMANDS.len(), 43); + fn repl_commands_has_44_entries() { + assert_eq!(REPL_COMMANDS.len(), 44); } #[test] From 985ae11fcfad28873f695218fef278c2bec1afa5 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 16:04:22 -0600 Subject: [PATCH 21/85] feat: removed potentially confusing tab completions for .skill --- src/config/request_context.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/config/request_context.rs b/src/config/request_context.rs index 2c0ca39..8ae9dcc 100644 --- a/src/config/request_context.rs +++ b/src/config/request_context.rs @@ -1874,13 +1874,11 @@ impl RequestContext { } ".macro" => super::map_completion_values(paths::list_macros()), ".skill" => { - let mut values: Vec = vec![ + super::map_completion_values(vec![ "loaded".to_string(), "load".to_string(), "unload".to_string(), - ]; - values.extend(paths::list_skills()); - super::map_completion_values(values) + ]) } ".starter" => match &self.agent { Some(agent) => agent From dc8e831f2716968ebe1aa97e8312cf977b749595 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 16:11:23 -0600 Subject: [PATCH 22/85] fix: forgot to bootstrap skills on REPL startup --- src/config/request_context.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/config/request_context.rs b/src/config/request_context.rs index 8ae9dcc..38bf0bc 100644 --- a/src/config/request_context.rs +++ b/src/config/request_context.rs @@ -132,6 +132,13 @@ impl RequestContext { functions.append_user_interaction_functions(); } + if app.config.function_calling_support { + let policy = SkillPolicy::effective(&app.config, None, None, None)?; + if policy.skills_enabled { + functions.append_skill_functions(); + } + } + let mut mcp_runtime = McpRuntime::default(); if let Some(registry) = &app.mcp_registry { mcp_runtime.sync_from_registry(registry); From 8ff9d84a852882f9d7cac777ec8eb58f5986088e Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 16:37:17 -0600 Subject: [PATCH 23/85] fix: skill loading on agents --- src/config/agent.rs | 7 +++++++ src/config/request_context.rs | 12 +++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/config/agent.rs b/src/config/agent.rs index 8ee1dbc..b80ce98 100644 --- a/src/config/agent.rs +++ b/src/config/agent.rs @@ -207,6 +207,13 @@ impl Agent { functions.append_teammate_functions(); functions.append_user_interaction_functions(); + if app.function_calling_support + && app.skills_enabled + && !matches!(agent_config.skills_enabled, Some(false)) + { + functions.append_skill_functions(); + } + agent_config.replace_tools_placeholder(&functions); Ok(Self { diff --git a/src/config/request_context.rs b/src/config/request_context.rs index 38bf0bc..2cd1729 100644 --- a/src/config/request_context.rs +++ b/src/config/request_context.rs @@ -15,7 +15,7 @@ use super::{MessageContentToolCalls, prompts}; use crate::client::{Model, ModelType, list_models}; use crate::function::{ FunctionDeclaration, Functions, ToolCallTracker, ToolResult, - user_interaction::USER_FUNCTION_PREFIX, + skill::SKILL_FUNCTION_PREFIX, user_interaction::USER_FUNCTION_PREFIX, }; use crate::mcp::{ MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX, MCP_INVOKE_META_FUNCTION_NAME_PREFIX, @@ -1145,7 +1145,9 @@ impl RequestContext { .declarations() .iter() .filter(|v| { - v.name.starts_with(USER_FUNCTION_PREFIX) && !existing.contains(&v.name) + (v.name.starts_with(USER_FUNCTION_PREFIX) + || v.name.starts_with(SKILL_FUNCTION_PREFIX)) + && !existing.contains(&v.name) }) .cloned() .collect(); @@ -1942,8 +1944,12 @@ impl RequestContext { } _ => vec![], }; - } else if cmd == ".edit" && args.first() == Some(&"skill") && args.len() == 2 { + } else if (cmd == ".edit" && args.first() == Some(&"skill") && args.len() == 2) + || (cmd == ".skill" && args.first() == Some(&"load") && args.len() == 2) + { values = super::map_completion_values(paths::list_skills()); + } else if cmd == ".skill" && args.first() == Some(&"unload") && args.len() == 2 { + values = super::map_completion_values(self.skill_registry.loaded_names()); } else if cmd == ".install" && args.first() == Some(&"remote") && args.len() >= 2 { let prev = args.get(args.len() - 2).copied().unwrap_or(""); if prev == "--filter" { From 1440e23748012a193dbf93f71c9181454c0c1b32 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 16:45:34 -0600 Subject: [PATCH 24/85] style: removed now deprecated SkillRegistry::new and skillRegistry::load methods --- src/config/skill_registry.rs | 55 +++++++++--------------------------- 1 file changed, 14 insertions(+), 41 deletions(-) diff --git a/src/config/skill_registry.rs b/src/config/skill_registry.rs index cf81310..235a4fd 100644 --- a/src/config/skill_registry.rs +++ b/src/config/skill_registry.rs @@ -10,24 +10,7 @@ pub struct SkillRegistry { loaded: IndexMap, } -#[allow(dead_code)] impl SkillRegistry { - pub fn new() -> Self { - Self { - loaded: IndexMap::new(), - } - } - - pub fn load(&mut self, name: &str) -> Result<()> { - if self.loaded.contains_key(name) { - bail!("Skill '{name}' is already loaded"); - } - let skill = Skill::load(name)?; - self.loaded.insert(name.to_string(), skill); - - Ok(()) - } - pub fn insert(&mut self, skill: Skill) -> Result<()> { let name = skill.name().to_string(); @@ -155,7 +138,7 @@ mod tests { #[test] fn empty_registry_returns_base_clone() { let base = Role::new("test", "You are a helper"); - let registry = SkillRegistry::new(); + let registry = SkillRegistry::default(); let effective = registry.effective_role(&base); @@ -164,7 +147,7 @@ mod tests { #[test] fn one_skill_appends_body_after_base_with_separator() { - let mut registry = SkillRegistry::new(); + let mut registry = SkillRegistry::default(); registry.insert_for_test(make_skill("git-master", "description: D", "Git knowledge")); let base = Role::new("test", "You are a helper"); @@ -175,7 +158,7 @@ mod tests { #[test] fn two_skills_compose_bodies_in_insertion_order() { - let mut registry = SkillRegistry::new(); + let mut registry = SkillRegistry::default(); registry.insert_for_test(make_skill("a", "", "Alpha body")); registry.insert_for_test(make_skill("b", "", "Beta body")); @@ -187,7 +170,7 @@ mod tests { #[test] fn empty_base_prompt_omits_leading_separator() { - let mut registry = SkillRegistry::new(); + let mut registry = SkillRegistry::default(); registry.insert_for_test(make_skill("a", "", "Alpha")); registry.insert_for_test(make_skill("b", "", "Beta")); @@ -199,7 +182,7 @@ mod tests { #[test] fn embedded_prompt_base_skips_body_composition() { - let mut registry = SkillRegistry::new(); + let mut registry = SkillRegistry::default(); registry.insert_for_test(make_skill( "git-master", "enabled_tools: shell", @@ -216,7 +199,7 @@ mod tests { #[test] fn skills_with_empty_body_do_not_inject_separator() { - let mut registry = SkillRegistry::new(); + let mut registry = SkillRegistry::default(); registry.insert_for_test(make_skill("knowledge", "enabled_tools: fs", "")); let base = Role::new("test", "Base"); @@ -227,7 +210,7 @@ mod tests { #[test] fn tools_and_mcps_are_unioned_and_deduplicated() { - let mut registry = SkillRegistry::new(); + let mut registry = SkillRegistry::default(); registry.insert_for_test(make_skill( "a", "enabled_tools: shell,fs\nenabled_mcp_servers: github", @@ -255,7 +238,7 @@ mod tests { #[test] fn no_skill_tool_contributions_preserves_base_none() { - let mut registry = SkillRegistry::new(); + let mut registry = SkillRegistry::default(); registry.insert_for_test(make_skill("knowledge", "", "Pure knowledge")); let base = Role::new("test", "Base"); @@ -267,7 +250,7 @@ mod tests { #[test] fn base_some_empty_tools_is_preserved() { - let mut registry = SkillRegistry::new(); + let mut registry = SkillRegistry::default(); registry.insert_for_test(make_skill("knowledge", "", "Pure knowledge")); let mut base = Role::new("test", "Base"); @@ -277,19 +260,9 @@ mod tests { assert_eq!(effective.enabled_tools().as_deref(), Some("")); } - #[test] - fn load_already_loaded_returns_error() { - let mut registry = SkillRegistry::new(); - registry.insert_for_test(make_skill("git-master", "", "body")); - - let err = registry.load("git-master").unwrap_err(); - - assert!(err.to_string().contains("already loaded")); - } - #[test] fn unload_not_loaded_returns_error() { - let mut registry = SkillRegistry::new(); + let mut registry = SkillRegistry::default(); let err = registry.unload("missing").unwrap_err(); @@ -298,7 +271,7 @@ mod tests { #[test] fn unload_existing_succeeds_and_removes() { - let mut registry = SkillRegistry::new(); + let mut registry = SkillRegistry::default(); registry.insert_for_test(make_skill("git-master", "", "body")); assert!(registry.is_loaded("git-master")); @@ -308,7 +281,7 @@ mod tests { #[test] fn loaded_names_returns_insertion_order() { - let mut registry = SkillRegistry::new(); + let mut registry = SkillRegistry::default(); registry.insert_for_test(make_skill("zulu", "", "body")); registry.insert_for_test(make_skill("alpha", "", "body")); @@ -322,7 +295,7 @@ mod tests { #[test] fn sweep_removes_only_auto_unload_skills() { - let mut registry = SkillRegistry::new(); + let mut registry = SkillRegistry::default(); registry.insert_for_test(make_skill("ephemeral", "auto_unload: true", "body")); registry.insert_for_test(make_skill("persistent", "", "body")); @@ -334,7 +307,7 @@ mod tests { #[test] fn is_loaded_returns_false_for_unknown() { - let registry = SkillRegistry::new(); + let registry = SkillRegistry::default(); assert!(!registry.is_loaded("nothing")); } From ba665528ed4458328e9ead9fa720630d7bc34ffb Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Mon, 1 Jun 2026 16:51:04 -0600 Subject: [PATCH 25/85] fix: non_tty tests break on some TTY terminals --- src/config/install_remote.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/config/install_remote.rs b/src/config/install_remote.rs index 0c90cc2..50a071b 100644 --- a/src/config/install_remote.rs +++ b/src/config/install_remote.rs @@ -1294,6 +1294,12 @@ mod tests { #[test] fn merge_non_tty_conflict_aborts_without_force() { + if *IS_STDOUT_TERMINAL { + eprintln!( + "Skipping merge_non_tty_conflict_aborts_without_force: requires non-TTY stdout" + ); + return; + } let dir = fresh_temp_dir("merge-non-tty-"); let remote = dir.join("remote.json"); let target = dir.join("target.json"); @@ -1370,6 +1376,12 @@ mod tests { #[test] fn handle_missing_secrets_defers_all_in_non_tty() { + if *IS_STDOUT_TERMINAL { + eprintln!( + "Skipping handle_missing_secrets_defers_all_in_non_tty: requires non-TTY stdout" + ); + return; + } let missing = vec![ "COYOTE_TEST_STEP4_A".to_string(), "COYOTE_TEST_STEP4_B".to_string(), From 747ca0d0fcf3ec594f3eca3d7cdba0e11a87b50c Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Tue, 2 Jun 2026 09:42:36 -0600 Subject: [PATCH 26/85] fix: skill support also requires function calling to be enabled --- src/config/request_context.rs | 16 ++++-------- src/config/skill.rs | 48 +++++++++++------------------------ src/function/skill.rs | 5 ++-- 3 files changed, 22 insertions(+), 47 deletions(-) diff --git a/src/config/request_context.rs b/src/config/request_context.rs index 2cd1729..2f28402 100644 --- a/src/config/request_context.rs +++ b/src/config/request_context.rs @@ -2519,6 +2519,10 @@ impl RequestContext { } pub async fn load_skill_repl(&mut self, name: &str, abort_signal: AbortSignal) -> Result<()> { + if !self.app.config.function_calling_support { + bail!("Skills require function calling, which is disabled. Enable function calling in your config then try again."); + } + if !paths::has_skill(name) { bail!( "Skill '{name}' is not installed (expected at {})", @@ -2542,22 +2546,12 @@ impl RequestContext { } let skill = Skill::load(name)?; - let fn_on = self.app.config.function_calling_support; - let mcp_on = self.app.config.mcp_server_support; - let needs_tools = skill - .enabled_tools() - .map(|s| !s.trim().is_empty()) - .unwrap_or(false); let needs_mcps = skill .enabled_mcp_servers() .map(|s| !s.trim().is_empty()) .unwrap_or(false); - if needs_tools && !fn_on { - bail!("Skill '{name}' requires function calling, which is disabled"); - } - - if needs_mcps && !mcp_on { + if needs_mcps && !self.app.config.mcp_server_support { bail!("Skill '{name}' requires MCP servers, which are disabled"); } diff --git a/src/config/skill.rs b/src/config/skill.rs index c5e5d58..d60a52d 100644 --- a/src/config/skill.rs +++ b/src/config/skill.rs @@ -146,11 +146,7 @@ impl Skill { self.auto_unload.unwrap_or(false) } - pub fn is_compatible(&self, function_calling_enabled: bool, mcp_enabled: bool) -> bool { - if self.declares_tools() && !function_calling_enabled { - return false; - } - + pub fn is_compatible(&self, mcp_enabled: bool) -> bool { if self.declares_mcp_servers() && !mcp_enabled { return false; } @@ -158,13 +154,6 @@ impl Skill { true } - fn declares_tools(&self) -> bool { - self.enabled_tools - .as_deref() - .map(|s| !s.trim().is_empty()) - .unwrap_or(false) - } - fn declares_mcp_servers(&self) -> bool { self.enabled_mcp_servers .as_deref() @@ -271,25 +260,21 @@ mod tests { } #[test] - fn is_compatible_knowledge_only_passes_all_combinations() { + fn is_compatible_knowledge_only_passes_both_mcp_states() { let skill = Skill::new("test", "Just knowledge"); - assert!(skill.is_compatible(false, false)); - assert!(skill.is_compatible(true, false)); - assert!(skill.is_compatible(false, true)); - assert!(skill.is_compatible(true, true)); + assert!(skill.is_compatible(false)); + assert!(skill.is_compatible(true)); } #[test] - fn is_compatible_with_tools_requires_function_calling() { + fn is_compatible_with_tools_only_passes_both_mcp_states() { let content = "---\nenabled_tools: shell\n---\nbody"; let skill = Skill::new("test", content); - assert!(!skill.is_compatible(false, true)); - assert!(!skill.is_compatible(false, false)); - assert!(skill.is_compatible(true, true)); - assert!(skill.is_compatible(true, false)); + assert!(skill.is_compatible(false)); + assert!(skill.is_compatible(true)); } #[test] @@ -298,29 +283,26 @@ mod tests { let skill = Skill::new("test", content); - assert!(!skill.is_compatible(true, false)); - assert!(!skill.is_compatible(false, false)); - assert!(skill.is_compatible(true, true)); + assert!(!skill.is_compatible(false)); + assert!(skill.is_compatible(true)); } #[test] - fn is_compatible_requires_both_when_both_declared() { + fn is_compatible_with_both_requires_mcp_enabled() { let content = "---\nenabled_tools: shell\nenabled_mcp_servers: github\n---\nbody"; let skill = Skill::new("test", content); - assert!(!skill.is_compatible(true, false)); - assert!(!skill.is_compatible(false, true)); - assert!(!skill.is_compatible(false, false)); - assert!(skill.is_compatible(true, true)); + assert!(!skill.is_compatible(false)); + assert!(skill.is_compatible(true)); } #[test] - fn is_compatible_empty_string_tools_is_knowledge_only() { - let content = "---\nenabled_tools: \"\"\n---\nbody"; + fn is_compatible_empty_string_mcps_is_knowledge_only() { + let content = "---\nenabled_mcp_servers: \"\"\n---\nbody"; let skill = Skill::new("test", content); - assert!(skill.is_compatible(false, false)); + assert!(skill.is_compatible(false)); } } diff --git a/src/function/skill.rs b/src/function/skill.rs index 9a24c36..52d43c1 100644 --- a/src/function/skill.rs +++ b/src/function/skill.rs @@ -102,7 +102,6 @@ pub async fn handle_skill_tool( } fn handle_list(ctx: &RequestContext, policy: &SkillPolicy) -> Result { - let function_calling_on = ctx.app.config.function_calling_support; let mcp_on = ctx.app.config.mcp_server_support; let mut entries = Vec::new(); @@ -118,9 +117,9 @@ fn handle_list(ctx: &RequestContext, policy: &SkillPolicy) -> Result { continue; } }; - if !skill.is_compatible(function_calling_on, mcp_on) { + if !skill.is_compatible(mcp_on) { warn!( - "Skill '{name}' filtered from list: declares tools or MCP servers but those features are disabled" + "Skill '{name}' filtered from list: declares MCP servers but MCP support is disabled" ); continue; } From 6e9b394f7373b2464c70e69cf8a8c470e99dd03a Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Tue, 2 Jun 2026 09:55:08 -0600 Subject: [PATCH 27/85] docs: Updated skill docs to mention that function calling support must be enabled for skills to work at all --- config.agent.example.yaml | 3 ++- config.example.yaml | 1 + config.role.example.md | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/config.agent.example.yaml b/config.agent.example.yaml index 4036f79..14a7324 100644 --- a/config.agent.example.yaml +++ b/config.agent.example.yaml @@ -42,7 +42,8 @@ global_tools: # Optional list of additional global tools to e - web_search - fs - python -skills_enabled: true # Master switch for skills in this agent (default: inherit from global) +skills_enabled: true # Master switch for skills in this agent (default: inherit from global). + # Skills also require `function_calling_support: true` in the global config. enabled_skills: # Optional list of skills available when this agent runs. # Must be a subset of global `visible_skills`. Omit to inherit the global default. - git-master diff --git a/config.example.yaml b/config.example.yaml index 08ce1a7..b00b5b6 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -84,6 +84,7 @@ enabled_mcp_servers: null # Which MCP servers to enable by default (e.g. # Skills are modular knowledge or capability packs the LLM can load and unload mid-conversation. # See the [Skills documentation](https://github.com/Dark-Alex-17/coyote/wiki/Skills) for more details. skills_enabled: true # Master switch. Set to false to hide all skill management tools from the model. + # Skills also require `function_calling_support: true` above to work at all. visible_skills: # The universe of skills allowed to be enabled in any context. Omit (null) for "all installed". - ai-slop-remover - code-review diff --git a/config.role.example.md b/config.role.example.md index b0501f1..3cfe437 100644 --- a/config.role.example.md +++ b/config.role.example.md @@ -10,7 +10,8 @@ temperature: 0.2 # The temperature to use for this role whe top_p: 0 # The top_p to use for this role when querying the model enabled_tools: fs_ls,fs_cat # A comma-separated list of tools to enable for this role enabled_mcp_servers: github,gitmcp # A comma-separated list of MCP servers to enable for this role -skills_enabled: true # Master switch for skills in this role (default: inherit from global) +skills_enabled: true # Master switch for skills in this role (default: inherit from global). + # Skills also require `function_calling_support: true` in the global config. enabled_skills: git-master,ai-slop-remover # Comma-separated list of skills available when this role is active. # Must be a subset of global `visible_skills`. Omit to inherit the global default. prompt: null # A custom prompt to use for this role that will immediately query From e1c2f0aa42a27f3df674a996f805e7e672b8ced5 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Tue, 2 Jun 2026 10:30:40 -0600 Subject: [PATCH 28/85] fix: added back in require_max_tokens for new Claude models --- models.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/models.yaml b/models.yaml index be7ec9c..f919f90 100644 --- a/models.yaml +++ b/models.yaml @@ -276,6 +276,7 @@ - name: claude-opus-4-8 max_input_tokens: 1000000 max_output_tokens: 128000 + require_max_tokens: true input_price: 5 output_price: 25 supports_vision: true @@ -283,6 +284,7 @@ - name: claude-opus-4-7 max_input_tokens: 1000000 max_output_tokens: 128000 + require_max_tokens: true input_price: 5 output_price: 25 supports_vision: true @@ -812,6 +814,7 @@ - name: claude-opus-4-8 max_input_tokens: 1000000 max_output_tokens: 128000 + require_max_tokens: true input_price: 5 output_price: 25 supports_vision: true @@ -819,6 +822,7 @@ - name: claude-opus-4-7 max_input_tokens: 1000000 max_output_tokens: 128000 + require_max_tokens: true input_price: 5 output_price: 25 supports_vision: true @@ -981,6 +985,7 @@ - name: us.anthropic.claude-opus-4-8 max_input_tokens: 1000000 max_output_tokens: 128000 + require_max_tokens: true input_price: 5 output_price: 25 supports_vision: true @@ -988,6 +993,7 @@ - name: us.anthropic.claude-opus-4-7 max_input_tokens: 1000000 max_output_tokens: 128000 + require_max_tokens: true input_price: 5 output_price: 25 supports_vision: true @@ -1621,6 +1627,7 @@ - name: anthropic/claude-opus-4-8 max_input_tokens: 1000000 max_output_tokens: 128000 + require_max_tokens: true input_price: 5 output_price: 25 supports_vision: true @@ -1628,6 +1635,7 @@ - name: anthropic/claude-opus-4-7 max_input_tokens: 1000000 max_output_tokens: 128000 + require_max_tokens: true input_price: 5 output_price: 25 supports_vision: true From a7a9b6b1cf7adaa61d0567d4d792764c912fad41 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Tue, 2 Jun 2026 10:41:20 -0600 Subject: [PATCH 29/85] fix: updated temperature values for all agents and roles --- assets/agents/code-reviewer/config.yaml | 1 - assets/agents/coder/graph.yaml | 2 -- assets/agents/deep-research/graph.yaml | 2 -- assets/agents/explore/config.yaml | 1 - assets/agents/file-reviewer/config.yaml | 1 - assets/agents/oracle/config.yaml | 1 - assets/agents/report-writer/config.yaml | 1 - assets/agents/sisyphus/config.yaml | 1 - assets/roles/slack.md | 1 - 9 files changed, 11 deletions(-) diff --git a/assets/agents/code-reviewer/config.yaml b/assets/agents/code-reviewer/config.yaml index 3a9dd7c..f1ab804 100644 --- a/assets/agents/code-reviewer/config.yaml +++ b/assets/agents/code-reviewer/config.yaml @@ -1,7 +1,6 @@ name: code-reviewer description: CodeRabbit-style code reviewer - spawns per-file reviewers, synthesizes findings version: 1.0.0 -temperature: 0.1 auto_continue: true max_auto_continues: 20 diff --git a/assets/agents/coder/graph.yaml b/assets/agents/coder/graph.yaml index b2672b8..a375158 100644 --- a/assets/agents/coder/graph.yaml +++ b/assets/agents/coder/graph.yaml @@ -4,8 +4,6 @@ description: | bounded fix-loop until verified. Designed to be delegated to by sisyphus. version: "1.0" -temperature: 0.1 - global_tools: - fs_cat.sh - fs_ls.sh diff --git a/assets/agents/deep-research/graph.yaml b/assets/agents/deep-research/graph.yaml index d45f020..dd611ed 100644 --- a/assets/agents/deep-research/graph.yaml +++ b/assets/agents/deep-research/graph.yaml @@ -15,8 +15,6 @@ description: | version: "1.0" -temperature: 0.0 - global_tools: - web_search_coyote.sh - fetch_url_via_curl.sh diff --git a/assets/agents/explore/config.yaml b/assets/agents/explore/config.yaml index 15feb99..519200a 100644 --- a/assets/agents/explore/config.yaml +++ b/assets/agents/explore/config.yaml @@ -1,7 +1,6 @@ name: explore description: Fast codebase exploration agent - finds patterns, structures, and relevant files version: 1.0.0 -temperature: 0.1 variables: - name: project_dir diff --git a/assets/agents/file-reviewer/config.yaml b/assets/agents/file-reviewer/config.yaml index 93facd7..4abe64b 100644 --- a/assets/agents/file-reviewer/config.yaml +++ b/assets/agents/file-reviewer/config.yaml @@ -1,7 +1,6 @@ name: file-reviewer description: Reviews a single file's diff for bugs, style issues, and cross-cutting concerns version: 1.0.0 -temperature: 0.1 variables: - name: project_dir diff --git a/assets/agents/oracle/config.yaml b/assets/agents/oracle/config.yaml index 81499fe..cf62d8e 100644 --- a/assets/agents/oracle/config.yaml +++ b/assets/agents/oracle/config.yaml @@ -1,7 +1,6 @@ name: oracle description: High-IQ advisor for architecture, debugging, and complex decisions version: 1.0.0 -temperature: 0.2 variables: - name: project_dir diff --git a/assets/agents/report-writer/config.yaml b/assets/agents/report-writer/config.yaml index 2940c7d..b38e538 100644 --- a/assets/agents/report-writer/config.yaml +++ b/assets/agents/report-writer/config.yaml @@ -1,7 +1,6 @@ name: report-writer description: Polishes research findings into a clear, citation-preserving final report version: 1.0.0 -temperature: 0.2 instructions: | You are a technical writer. You will be given: diff --git a/assets/agents/sisyphus/config.yaml b/assets/agents/sisyphus/config.yaml index 8bb3821..57d79d9 100644 --- a/assets/agents/sisyphus/config.yaml +++ b/assets/agents/sisyphus/config.yaml @@ -1,7 +1,6 @@ name: sisyphus description: OpenCode-style orchestrator - classifies intent, delegates to specialists, tracks progress with todos version: 2.0.0 -temperature: 0.1 agent_session: temp auto_continue: true diff --git a/assets/roles/slack.md b/assets/roles/slack.md index 4da582c..d4351aa 100644 --- a/assets/roles/slack.md +++ b/assets/roles/slack.md @@ -1,6 +1,5 @@ --- enabled_mcp_servers: slack -temperature: 0.2 --- You are an expert Slack assistant designed to assist with Slack workspaces via the slack MCP server. You can perform various tasks related to Slack, such as sending messages to channels, searching for messages, and From a7ebc15b8917f66cd54ef343587e06443a5a26f2 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Tue, 2 Jun 2026 11:13:30 -0600 Subject: [PATCH 30/85] feat: updated sisyphus and coder tools --- assets/agents/coder/graph.yaml | 2 +- assets/agents/sisyphus/config.yaml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/assets/agents/coder/graph.yaml b/assets/agents/coder/graph.yaml index a375158..d8498ab 100644 --- a/assets/agents/coder/graph.yaml +++ b/assets/agents/coder/graph.yaml @@ -9,7 +9,7 @@ global_tools: - fs_ls.sh - fs_write.sh - fs_patch.sh - - execute_command.sh + - fs_mkdir.sh variables: - name: project_dir diff --git a/assets/agents/sisyphus/config.yaml b/assets/agents/sisyphus/config.yaml index 57d79d9..2822a28 100644 --- a/assets/agents/sisyphus/config.yaml +++ b/assets/agents/sisyphus/config.yaml @@ -28,7 +28,6 @@ global_tools: - fs_grep.sh - fs_glob.sh - fs_ls.sh - - execute_command.sh instructions: | You are Sisyphus - an orchestrator that drives coding tasks to completion. From fb8633dc75615f8b340afe57950ef57f6305c559 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Tue, 2 Jun 2026 12:39:43 -0600 Subject: [PATCH 31/85] feat: llm graph nodes support skills --- src/config/agent.rs | 10 ++++ src/graph/llm.rs | 41 +++++++++++++ src/graph/types.rs | 12 ++++ src/graph/validator.rs | 131 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 194 insertions(+) diff --git a/src/config/agent.rs b/src/config/agent.rs index b80ce98..e93a513 100644 --- a/src/config/agent.rs +++ b/src/config/agent.rs @@ -352,6 +352,14 @@ impl Agent { self.config.enabled_skills.as_deref() } + pub fn set_skills_enabled(&mut self, value: Option) { + self.config.skills_enabled = value; + } + + pub fn set_enabled_skills(&mut self, value: Option>) { + self.config.enabled_skills = value; + } + pub fn conversation_starters(&self) -> Vec { self.config .conversation_starters @@ -696,6 +704,8 @@ impl AgentConfig { description: graph.description.clone(), global_tools: graph.global_tools.clone(), mcp_servers: graph.mcp_servers.clone(), + skills_enabled: graph.skills_enabled, + enabled_skills: graph.enabled_skills.clone(), conversation_starters: graph.conversation_starters.clone(), variables: graph.variables.clone(), can_spawn_agents: graph.has_agent_node(), diff --git a/src/graph/llm.rs b/src/graph/llm.rs index a4411a1..53d24c9 100644 --- a/src/graph/llm.rs +++ b/src/graph/llm.rs @@ -113,6 +113,8 @@ async fn run( parent_ctx, )?; + let saved_agent_skill_state = swap_in_node_skill_policy(node, parent_ctx); + let saved_role = parent_ctx.role.clone(); parent_ctx.role = Some(role); let result = match node.timeout { @@ -128,9 +130,46 @@ async fn run( None => run_with_retries(node, &prompt, parent_ctx).await, }; parent_ctx.role = saved_role; + restore_agent_skill_policy(parent_ctx, saved_agent_skill_state); result } +struct SavedAgentSkillPolicy { + skills_enabled: Option, + enabled_skills: Option>, +} + +fn swap_in_node_skill_policy( + node: &LlmNode, + ctx: &mut RequestContext, +) -> Option { + let agent = ctx.agent.as_mut()?; + let saved = SavedAgentSkillPolicy { + skills_enabled: agent.skills_enabled(), + enabled_skills: agent.enabled_skills().map(|s| s.to_vec()), + }; + + if let Some(b) = node.skills_enabled { + agent.set_skills_enabled(Some(b)); + } + + if let Some(names) = &node.enabled_skills { + agent.set_enabled_skills(Some(names.clone())); + } + + Some(saved) +} + +fn restore_agent_skill_policy(ctx: &mut RequestContext, saved: Option) { + let Some(saved) = saved else { return }; + let Some(agent) = ctx.agent.as_mut() else { + return; + }; + + agent.set_skills_enabled(saved.skills_enabled); + agent.set_enabled_skills(saved.enabled_skills); +} + async fn run_with_retries( node: &LlmNode, prompt: &str, @@ -389,6 +428,8 @@ mod tests { state_updates: updates, output_schema: None, timeout: None, + skills_enabled: None, + enabled_skills: None, } } diff --git a/src/graph/types.rs b/src/graph/types.rs index ef0342f..35fd52c 100644 --- a/src/graph/types.rs +++ b/src/graph/types.rs @@ -31,6 +31,12 @@ pub struct Graph { #[serde(default)] pub mcp_servers: Vec, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub skills_enabled: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub enabled_skills: Option>, + #[serde(default)] pub conversation_starters: Vec, @@ -293,6 +299,12 @@ pub struct LlmNode { #[serde(default, skip_serializing_if = "Option::is_none")] pub timeout: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub skills_enabled: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub enabled_skills: Option>, } fn default_llm_max_attempts() -> u32 { diff --git a/src/graph/validator.rs b/src/graph/validator.rs index ca4e749..d92dc20 100644 --- a/src/graph/validator.rs +++ b/src/graph/validator.rs @@ -119,6 +119,7 @@ impl GraphValidator { self.validate_approval_routes(graph, &mut result); self.validate_rag_nodes(graph, &mut result); self.validate_llm_nodes(graph, &mut result); + self.validate_llm_skills(graph, &mut result); self.validate_max_concurrency(graph, &mut result); self.validate_map_branches(graph, &mut result); self.validate_parallel_user_interaction(graph, &mut result); @@ -189,6 +190,39 @@ impl GraphValidator { } } + fn validate_llm_skills(&self, graph: &Graph, result: &mut ValidationResult) { + for (node_id, node) in &graph.nodes { + let NodeType::Llm(llm) = &node.node_type else { + continue; + }; + let Some(node_skills) = &llm.enabled_skills else { + continue; + }; + + for name in node_skills { + if name.trim().is_empty() { + result.error(ValidationError::with_node( + node_id, + "llm node 'enabled_skills' contains an empty skill name", + )); + continue; + } + if let Some(graph_skills) = &graph.enabled_skills + && !graph_skills.iter().any(|g| g == name) + { + result.error(ValidationError::with_node( + node_id, + format!( + "llm node 'enabled_skills' references '{name}' which is not in \ + graph-level 'enabled_skills' ({})", + graph_skills.join(", ") + ), + )); + } + } + } + } + fn validate_node_references(&self, graph: &Graph, result: &mut ValidationResult) { for (node_id, node) in &graph.nodes { for (target, label) in declared_targets(node) { @@ -847,6 +881,8 @@ mod tests { top_p: None, global_tools: Vec::new(), mcp_servers: Vec::new(), + skills_enabled: None, + enabled_skills: None, conversation_starters: Vec::new(), variables: Vec::new(), settings: GraphSettings::default(), @@ -946,6 +982,8 @@ mod tests { state_updates: None, output_schema: None, timeout: None, + skills_enabled: None, + enabled_skills: None, }), next: next.map(NextTargets::from), } @@ -967,6 +1005,99 @@ mod tests { assert!(result.errors.iter().any(|e| e.message.contains("ghost"))); } + #[test] + fn llm_node_skill_in_graph_set_passes() { + let mut graph = graph_with( + vec![("l", llm_node("l", None, Some("end"))), ("end", end_node("end"))], + "l", + ); + graph.enabled_skills = Some(vec!["code-review".into(), "git-master".into()]); + if let NodeType::Llm(ref mut n) = graph.nodes.get_mut("l").unwrap().node_type { + n.enabled_skills = Some(vec!["code-review".into()]); + } + + let result = validator().validate(&graph); + + assert!( + !result + .errors + .iter() + .any(|e| e.message.contains("enabled_skills")), + "unexpected enabled_skills error: {:?}", + result.errors + ); + } + + #[test] + fn llm_node_skill_not_in_graph_set_errors() { + let mut graph = graph_with( + vec![("l", llm_node("l", None, Some("end"))), ("end", end_node("end"))], + "l", + ); + graph.enabled_skills = Some(vec!["code-review".into()]); + if let NodeType::Llm(ref mut n) = graph.nodes.get_mut("l").unwrap().node_type { + n.enabled_skills = Some(vec!["git-master".into()]); + } + + let result = validator().validate(&graph); + + assert!(!result.is_valid()); + assert!( + result.errors.iter().any(|e| e + .message + .contains("'git-master'") + && e.message.contains("graph-level")), + "expected git-master subset error, got: {:?}", + result.errors + ); + } + + #[test] + fn llm_node_empty_skill_name_errors() { + let mut graph = graph_with( + vec![("l", llm_node("l", None, Some("end"))), ("end", end_node("end"))], + "l", + ); + graph.enabled_skills = Some(vec!["code-review".into()]); + if let NodeType::Llm(ref mut n) = graph.nodes.get_mut("l").unwrap().node_type { + n.enabled_skills = Some(vec!["".into()]); + } + + let result = validator().validate(&graph); + + assert!(!result.is_valid()); + assert!( + result + .errors + .iter() + .any(|e| e.message.contains("empty skill name")), + "expected empty-skill-name error, got: {:?}", + result.errors + ); + } + + #[test] + fn llm_node_skill_when_no_graph_set_is_permitted_by_validator() { + let mut graph = graph_with( + vec![("l", llm_node("l", None, Some("end"))), ("end", end_node("end"))], + "l", + ); + if let NodeType::Llm(ref mut n) = graph.nodes.get_mut("l").unwrap().node_type { + n.enabled_skills = Some(vec!["anything".into()]); + } + + let result = validator().validate(&graph); + + assert!( + !result + .errors + .iter() + .any(|e| e.message.contains("enabled_skills")), + "validator should not block when graph.enabled_skills is None: {:?}", + result.errors + ); + } + fn agent_ctx(tools: &[&str], mcp: &[&str]) -> AgentValidationContext { AgentValidationContext { tool_names: tools.iter().map(|s| s.to_string()).collect(), From a5ece505b71b46093269b01c8bb64fcde9451f61 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Tue, 2 Jun 2026 12:58:14 -0600 Subject: [PATCH 32/85] fix: llm nodes accidentally skipped skill_registry::effective_role because I was passing an inline role instead --- src/graph/llm.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/graph/llm.rs b/src/graph/llm.rs index 53d24c9..61d0141 100644 --- a/src/graph/llm.rs +++ b/src/graph/llm.rs @@ -115,8 +115,10 @@ async fn run( let saved_agent_skill_state = swap_in_node_skill_policy(node, parent_ctx); + let composed_role = parent_ctx.skill_registry.effective_role(&role); + let saved_role = parent_ctx.role.clone(); - parent_ctx.role = Some(role); + parent_ctx.role = Some(composed_role); let result = match node.timeout { Some(secs) => match timeout( Duration::from_secs(secs), From 90e105a171bdc6db4c64e555c6b8d81716ad143a Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Tue, 2 Jun 2026 13:14:25 -0600 Subject: [PATCH 33/85] feat: Refactored the sisyhpus agent system to utilize the new skills system to improve performance and reliability --- assets/agents/coder/graph.yaml | 95 +++- .../coder/scripts/route_review_result.sh | 43 ++ assets/agents/coder/scripts/verify_tests.sh | 4 +- assets/agents/explore/config.yaml | 94 ++-- assets/agents/oracle/config.yaml | 122 +++--- assets/agents/sisyphus/config.yaml | 407 ++++++++++-------- assets/skills/delegation-protocol/SKILL.md | 69 +++ assets/skills/oracle-protocol/SKILL.md | 81 ++++ assets/skills/parallel-research/SKILL.md | 70 +++ assets/skills/verification-gates/SKILL.md | 66 +++ 10 files changed, 790 insertions(+), 261 deletions(-) create mode 100755 assets/agents/coder/scripts/route_review_result.sh create mode 100644 assets/skills/delegation-protocol/SKILL.md create mode 100644 assets/skills/oracle-protocol/SKILL.md create mode 100644 assets/skills/parallel-research/SKILL.md create mode 100644 assets/skills/verification-gates/SKILL.md diff --git a/assets/agents/coder/graph.yaml b/assets/agents/coder/graph.yaml index d8498ab..741f8c7 100644 --- a/assets/agents/coder/graph.yaml +++ b/assets/agents/coder/graph.yaml @@ -9,7 +9,15 @@ global_tools: - fs_ls.sh - fs_write.sh - fs_patch.sh - - fs_mkdir.sh + - execute_command.sh + +skills_enabled: true +enabled_skills: + - ai-slop-remover + - code-review + - git-master + - frontend-ui-ux + - verification-gates variables: - name: project_dir @@ -38,6 +46,10 @@ initial_state: files_to_create: [] risks: [] complexity_score: 0 + review_attempts: 0 + max_review_attempts: 1 + review_clean: true + review_notes: "" start: resolve_paths @@ -143,10 +155,24 @@ nodes: id: implement type: llm description: Write code via fs tools. Bounded tool-call loop. + skills_enabled: true + enabled_skills: + - ai-slop-remover + - code-review + - git-master + - frontend-ui-ux + - verification-gates instructions: | You are a senior engineer. Implement the plan by writing code via tools. Follow existing patterns in the codebase. + ## Skills + + Use `skill__list` to see what's available, then `skill__load` the ones + that fit the work: `ai-slop-remover` always, `frontend-ui-ux` when + touching UI, `git-master` when touching history, `verification-gates` + to remember what evidence is required. Unload when a phase ends. + ## Writing code 1. Use `fs_patch` for surgical edits to existing files. @@ -239,6 +265,73 @@ nodes: timeout: 5 fallback: end_failure + self_review: + id: self_review + type: llm + description: Skill-driven self-review of the diff. Catches AI slop, dishonest naming, suppressed errors. Bounded to max_review_attempts. + skills_enabled: true + enabled_skills: + - code-review + - ai-slop-remover + instructions: | + You are reviewing the diff you just produced. Load `code-review` and + `ai-slop-remover` via `skill__load` and apply their checklists STRICTLY. + + Flag ONLY concrete issues: + - Correctness bugs or uncovered edge cases + - Suppressed errors (as any, @ts-ignore, #[allow(...)] on unfamiliar + lints, empty catch blocks) + - Dishonest naming (get_X that mutates, returns wrong type, etc.) + - Useless comments that restate the code + - AI slop (filler prose, multi-paragraph docstrings, defensive + handling of impossible cases) + + Do NOT flag: + - Style preferences if the pattern matches existing code in the repo + - Things the build/tests already verified + - "Could be more elegant" without a concrete bug + + Be terse. The orchestrator wants signal, not noise. If you find nothing + blocking, set review_clean=true and leave review_notes empty. + + Project directory: {{project_dir}} + prompt: | + ## Files to review + Modified: {{files_to_modify}} + Created: {{files_to_create}} + + ## What the implementation was supposed to do + {{plan_summary}} + + Read each file's changed region. Apply the review skills. Output your verdict. + tools: + - fs_cat + - fs_ls + - execute_command + max_iterations: 15 + output_schema: + type: object + properties: + review_clean: + type: boolean + description: True if no blocker issues were found. + review_notes: + type: string + description: Concrete issues found, one per line as file:line - description. Empty when review_clean is true. + required: [review_clean, review_notes] + state_updates: + last_node_output: "{{output}}" + fallback: end_success + next: route_review_result + + route_review_result: + id: route_review_result + type: script + description: Routes based on review_clean and review_attempts budget. End on clean or budget exhausted; loop to implement otherwise. + script: scripts/route_review_result.sh + timeout: 5 + fallback: end_success + end_success: id: end_success type: end diff --git a/assets/agents/coder/scripts/route_review_result.sh b/assets/agents/coder/scripts/route_review_result.sh new file mode 100755 index 0000000..de9b80f --- /dev/null +++ b/assets/agents/coder/scripts/route_review_result.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then + state=$(cat "$GRAPH_STATE_FILE") +elif [[ -n "${GRAPH_STATE:-}" ]]; then + state="$GRAPH_STATE" +else + state='{}' +fi + +review_clean=$(echo "$state" | jq -r '.review_clean // true') +review_attempts=$(echo "$state" | jq -r '.review_attempts // 0') +max_review_attempts=$(echo "$state" | jq -r '.max_review_attempts // 1') +review_notes=$(echo "$state" | jq -r '.review_notes // ""') + +if [[ "$review_clean" == "true" ]]; then + jq -nc '{"_next": "end_success"}' + exit 0 +fi + +if (( review_attempts >= max_review_attempts )); then + jq -nc \ + --arg n "$review_notes" \ + '{ + "_next": "end_success", + "review_notes_unresolved": ("Shipped with unresolved review notes (budget exhausted):\n" + $n) + }' + exit 0 +fi + +next_review=$((review_attempts + 1)) +fix_instr=$(printf '## Self-review feedback (attempt %d of %d)\n\nThe code review found concrete issues. Address them with minimal edits. Do not refactor unrelated code.\n\n%s' \ + "$next_review" "$max_review_attempts" "$review_notes") + +jq -nc \ + --argjson n "$next_review" \ + --arg fi "$fix_instr" \ + '{ + "review_attempts": $n, + "fix_instructions": $fi, + "_next": "implement" + }' diff --git a/assets/agents/coder/scripts/verify_tests.sh b/assets/agents/coder/scripts/verify_tests.sh index 102364e..a72de94 100644 --- a/assets/agents/coder/scripts/verify_tests.sh +++ b/assets/agents/coder/scripts/verify_tests.sh @@ -25,7 +25,7 @@ if [[ -z "$cmd" || "$cmd" == "null" ]]; then jq -nc '{ "tests_ok": true, "tests_output": "(no test command available for this project type)", - "_next": "end_success" + "_next": "self_review" }' exit 0 fi @@ -40,7 +40,7 @@ if (( exit_code == 0 )); then '{ "tests_ok": true, "tests_output": ("Ran: " + $cmd + "\n\n" + $out), - "_next": "end_success" + "_next": "self_review" }' else jq -nc \ diff --git a/assets/agents/explore/config.yaml b/assets/agents/explore/config.yaml index 519200a..9244e7c 100644 --- a/assets/agents/explore/config.yaml +++ b/assets/agents/explore/config.yaml @@ -1,6 +1,9 @@ name: explore -description: Fast codebase exploration agent - finds patterns, structures, and relevant files -version: 1.0.0 +description: Fast codebase exploration agent - finds patterns, structures, and relevant files. Designed to be fanned out 2-5 in parallel by orchestrators. +version: 2.0.0 + +skills_enabled: true +enabled_skills: [] variables: - name: project_dir @@ -17,58 +20,69 @@ global_tools: instructions: | You are a codebase explorer. Your job: Search, find, report. Nothing else. - - ## Your Mission - - Given a search task, you: - 1. Search for relevant files and patterns - 2. Read key files to understand structure - 3. Report findings concisely - 4. Signal completion with EXPLORE_COMPLETE - - ## File Reading Strategy (IMPORTANT - minimize token usage) - 1. **Find first, read second** - Never read a file without knowing why - 2. **Use grep to locate** - `fs_grep --pattern "struct User" --include "*.rs"` finds exactly where things are - 3. **Use glob to discover** - `fs_glob --pattern "*.rs" --path src/` finds files by name - 4. **Read targeted sections** - `fs_read --path "src/main.rs" --offset 50 --limit 30` reads only lines 50-79 - 5. **Never read entire large files** - If a file is 500+ lines, read the relevant section only + ## You may be one of many parallel explorers - ## Available Actions + Orchestrators (like Sisyphus) often fan out 2-5 explore agents at once, each covering a different angle of the same question. Assume you are ONE narrow slice of a larger investigation. Stay strictly within YOUR slice as defined by the prompt — don't broaden scope to cover what other parallel explorers might be handling. + + If the prompt says "find auth middleware", you find auth middleware. You do NOT also tour the routing layer, the error system, and the database connection pool. Narrow scope is the contract. + + ## Your mission + + 1. Search for relevant files and patterns within YOUR slice. + 2. Read key files to understand structure. + 3. Report findings concisely. + 4. Signal completion with `EXPLORE_COMPLETE`. + + ## File reading strategy (minimize token usage) + + 1. **Find first, read second** — never read a file without knowing why. + 2. **Use grep to locate** — `fs_grep --pattern "struct User" --include "*.rs"` finds where things are. + 3. **Use glob to discover** — `fs_glob --pattern "*.rs" --path src/` finds files by name. + 4. **Read targeted sections** — `fs_read --path "src/main.rs" --offset 50 --limit 30` reads only lines 50-79. + 5. **Never read entire large files** — if a file is 500+ lines, read the relevant section only. + + ## Available actions + + - `fs_grep --pattern "struct User" --include "*.rs"` — find content across files + - `fs_glob --pattern "*.rs" --path src/` — find files by name pattern + - `fs_read --path "src/main.rs"` — read a file (with line numbers) + - `fs_read --path "src/main.rs" --offset 100 --limit 50` — read lines 100-149 only + - `fs_ls --path "src/"` — list directory contents + + ## Output format + + Always end your response with a findings summary. Include actual code snippets when they show the pattern — file paths alone are not enough for the orchestrator to delegate downstream: - - `fs_grep --pattern "struct User" --include "*.rs"` - Find content across files - - `fs_glob --pattern "*.rs" --path src/` - Find files by name pattern - - `fs_read --path "src/main.rs"` - Read a file (with line numbers) - - `fs_read --path "src/main.rs" --offset 100 --limit 50` - Read lines 100-149 only - - `get_structure` - See project layout - - `search_content --pattern "struct User"` - Agent-level content search - - ## Output Format - - Always end your response with a findings summary: - ``` FINDINGS: - [Key finding 1] - [Key finding 2] - Relevant files: [list] - + + Code patterns (paste actual lines): + - From `path/to/file.ext` lines N-M: + + EXPLORE_COMPLETE ``` - + + Pasting actual code lines (5-20 lines per pattern) lets the orchestrator hand the snippet directly to a coder agent without re-exploration. That is the whole point of your existence in a fanned-out research phase. + ## Rules - - 1. **Be fast** - Don't read every file, read representative ones - 2. **Be focused** - Answer the specific question asked - 3. **Be concise** - Report findings, not your process - 4. **Never modify files** - You are read-only - 5. **Limit reads** - Max 5 file reads per exploration - + + 1. **Be fast** — don't read every file, read representative ones. + 2. **Stay in your slice** — narrow scope is the contract. + 3. **Be concise** — report findings, not your process. + 4. **Never modify files** — you are read-only. + 5. **Limit reads** — max 5 file reads per exploration. + 6. **Paste code snippets** — file paths alone make downstream delegation impossible. + ## Context - Project: {{project_dir}} - CWD: {{__cwd__}} - - ## Available Tools: + + ## Available tools: {{__tools__}} conversation_starters: diff --git a/assets/agents/oracle/config.yaml b/assets/agents/oracle/config.yaml index cf62d8e..5607709 100644 --- a/assets/agents/oracle/config.yaml +++ b/assets/agents/oracle/config.yaml @@ -1,6 +1,11 @@ name: oracle -description: High-IQ advisor for architecture, debugging, and complex decisions -version: 1.0.0 +description: High-IQ advisor for architecture, debugging, and complex decisions. Blocking by design - the orchestrator is waiting on you. +version: 2.0.0 + +skills_enabled: true +enabled_skills: + - code-review + - ai-slop-remover variables: - name: project_dir @@ -16,66 +21,87 @@ global_tools: - fs_ls.sh instructions: | - You are Oracle - a senior architect and debugger consulted for complex decisions. - - ## Your Role - - You are READ-ONLY. You analyze, advise, and recommend. You do NOT implement. - - ## When You're Consulted - - 1. **Architecture Decisions**: Multi-system tradeoffs, design patterns, technology choices - 2. **Complex Debugging**: After 2+ failed fix attempts, deep analysis needed - 3. **Code Review**: Evaluating proposed designs or implementations - 4. **Risk Assessment**: Security, performance, or reliability concerns - - ## File Reading Strategy (IMPORTANT - minimize token usage) + You are Oracle - a senior architect and debugger consulted for the hard, multi-dimensional decisions a coordinator cannot make alone. - 1. **Use grep to find relevant code** - `fs_grep --pattern "auth" --include "*.rs"` finds where things are - 2. **Read only what you need** - `fs_read --path "src/main.rs" --offset 50 --limit 30` reads lines 50-79 - 3. **Never read entire large files** - If 500+ lines, grep first, then read the relevant section - 4. **Use glob to discover files** - `fs_glob --pattern "*.rs" --path src/` + ## Your role - ## Your Process + You are READ-ONLY. You analyze, advise, recommend. You do NOT implement. Implementation is for the coder agent. + + ## You are blocking by design + + The orchestrator that consulted you has paused its work and CANNOT proceed until you return. This is intentional. The cost of your latency is paid so that the orchestrator gets a thorough, considered answer rather than rushing into a wrong direction. + + Therefore: + + - **Be thorough, not just fast.** A quick wrong answer wastes more downstream time than a careful right answer. + - **Read the relevant context** before advising. Don't guess from the prompt alone. + - **Consider tradeoffs explicitly.** There are rarely perfect solutions; surface the alternatives. + - **Justify your recommendation.** The orchestrator (and ultimately the user) needs to understand WHY, not just WHAT. + + ## When you're consulted + + 1. **Architecture decisions** — multi-system tradeoffs, design patterns, technology choices. + 2. **Complex debugging** — after 2+ failed fix attempts, or when the symptom doesn't match the obvious cause. + 3. **Code review** — evaluating proposed designs or implementations. + 4. **Risk assessment** — security, performance, reliability concerns. + 5. **Multi-component questions** — anything spanning 3+ files or modules. + + ## Skills available + + Two skills are available to you. Load them when relevant: + + - `skill__load code-review` — when reviewing a diff or existing code; gives you a focused review checklist. + - `skill__load ai-slop-remover` — when judging code quality (especially for advising on cleanups). + + Use `skill__list` to see what's available; `skill__unload` when done to keep context lean. + + ## File reading strategy (minimize token usage) + + 1. **Use grep to find relevant code** — `fs_grep --pattern "auth" --include "*.rs"` finds where things are. + 2. **Read only what you need** — `fs_read --path "src/main.rs" --offset 50 --limit 30` reads lines 50-79. + 3. **Never read entire large files** — if 500+ lines, grep first, then read the relevant section. + 4. **Use glob to discover files** — `fs_glob --pattern "*.rs" --path src/`. + + ## Your process + + 1. **Understand** — use grep/glob to find relevant code, then read targeted sections. + 2. **Analyze** — consider multiple angles and tradeoffs. + 3. **Recommend** — provide clear, actionable advice the orchestrator can hand off to coder. + 4. **Justify** — explain your reasoning so the user can evaluate (and override if needed). + + ## Output format - 1. **Understand**: Use grep/glob to find relevant code, then read targeted sections - 2. **Analyze**: Consider multiple angles and tradeoffs - 3. **Recommend**: Provide clear, actionable advice - 4. **Justify**: Explain your reasoning - - ## Output Format - Structure your response as: - + ``` ## Analysis - [Your understanding of the situation] - + [Your understanding of the situation, grounded in the code you read] + ## Recommendation - [Clear, specific advice] - + [Clear, specific advice. Concrete enough that the coder can act on it without further questions.] + ## Reasoning - [Why this is the right approach] - - ## Risks/Considerations - [What to watch out for] - + [Why this is the right approach. What you considered and rejected, and why.] + + ## Risks / Considerations + [What to watch out for during implementation. Known footguns. Edge cases.] + ORACLE_COMPLETE ``` - + ## Rules - - 1. **Never modify files** - You advise, others implement - 2. **Be thorough** - Read all relevant context before advising - 3. **Be specific** - General advice isn't helpful - 4. **Consider tradeoffs** - There are rarely perfect solutions - 5. **Stay focused** - Answer the specific question asked - + + 1. **Never modify files** — you advise, others implement. + 2. **Be thorough** — read all relevant context before advising. Speed is not the goal; correctness is. + 3. **Be specific** — general advice ("use SOLID principles") isn't actionable. + 4. **Consider tradeoffs** — surface the alternatives you rejected and why. + 5. **Stay focused** — answer the specific question asked, but flag adjacent risks you notice. + ## Context - Project: {{project_dir}} - CWD: {{__cwd__}} - - ## Available Tools: + + ## Available tools: {{__tools__}} conversation_starters: diff --git a/assets/agents/sisyphus/config.yaml b/assets/agents/sisyphus/config.yaml index 2822a28..88c0e21 100644 --- a/assets/agents/sisyphus/config.yaml +++ b/assets/agents/sisyphus/config.yaml @@ -1,6 +1,6 @@ name: sisyphus -description: OpenCode-style orchestrator - classifies intent, delegates to specialists, tracks progress with todos -version: 2.0.0 +description: OpenCode-style orchestrator - classifies intent, delegates to specialists, tracks progress with todos, enforces OMO-grade verification discipline +version: 3.0.0 agent_session: temp auto_continue: true @@ -13,6 +13,17 @@ max_agent_depth: 3 inject_spawn_instructions: true summarization_threshold: 8000 +skills_enabled: true +enabled_skills: + - ai-slop-remover + - code-review + - git-master + - frontend-ui-ux + - delegation-protocol + - parallel-research + - verification-gates + - oracle-protocol + variables: - name: project_dir description: Project directory to work in @@ -28,217 +39,273 @@ global_tools: - fs_grep.sh - fs_glob.sh - fs_ls.sh + - execute_command.sh instructions: | - You are Sisyphus - an orchestrator that drives coding tasks to completion. + You are Sisyphus - an orchestrator that drives coding tasks to completion. You do NOT work alone when specialists are available. You classify, delegate, verify, complete. - Your job: Classify -> Delegate -> Verify -> Complete + ## Phase 0 - Intent Gate (EVERY message) - ## Intent Classification (BEFORE every action) + Before any tool call: - | Type | Signal | Action | - |------|--------|--------| - | Trivial | Single file, known location, typo fix | Do it yourself with tools | - | Exploration | "Find X", "Where is Y", "List all Z" | Spawn `explore` agent | - | Implementation | "Add feature", "Fix bug", "Write code" | Spawn `coder` agent | - | Architecture/Design | See oracle triggers below | Spawn `oracle` agent | - | Ambiguous | Unclear scope, multiple interpretations | ASK the user via `user__ask` or `user__input` | + 1. **Verbalize intent (1 sentence).** Identify what the user actually wants from you as an orchestrator. Map the surface form to the true intent and announce your routing decision. - ### Oracle Triggers (MUST spawn oracle when you see these) + Examples: + - "I detect research intent (user asked 'how does X work'). My approach: fire explore agents in parallel, synthesize, answer." + - "I detect implementation intent (user said 'add a /profile endpoint'). My approach: explore patterns → delegate to coder → verify." + - "I detect evaluation intent (user asked 'what do you think about X?'). My approach: assess, recommend, wait for user confirmation before implementing." - Spawn `oracle` ANY time the user asks about: - - **"How should I..."** / **"What's the best way to..."** -- design/approach questions - - **"Why does X keep..."** / **"What's wrong with..."** -- complex debugging (not simple errors) - - **"Should I use X or Y?"** -- technology or pattern choices - - **"How should this be structured?"** -- architecture and organization - - **"Review this"** / **"What do you think of..."** -- code/design review - - **Tradeoff questions** -- performance vs readability, complexity vs flexibility - - **Multi-component questions** -- anything spanning 3+ files or modules - - **Vague/open-ended questions** -- "improve this", "make this better", "clean this up" + The verbalization anchors routing and makes reasoning transparent. It does NOT commit you to implementation — only the user's explicit request does that. - **CRITICAL**: Do NOT answer architecture/design questions yourself. You are a coordinator. - Even if you think you know the answer, oracle provides deeper, more thorough analysis. - The only exception is truly trivial questions about a single file you've already read. + 2. **Classify** (after verbalizing): - ### Agent Specializations + | Type | Signal | Action | + |------|--------|--------| + | Trivial | Single file, known location, typo fix | Do it yourself with tools | + | Exploration | "Find X", "Where is Y", "How does Z work" | Fan out `explore` agents (parallel) | + | Implementation | "Add", "Fix", "Write", "Create" | Explore first, then `coder` | + | Architecture/Design | See Oracle triggers below | Spawn `oracle` | + | Ambiguous | Unclear scope, multiple valid interpretations | ASK via `user__ask` / `user__input` | + + 3. **Turn-local intent reset.** Reclassify intent from the CURRENT user message only. Never auto-carry "implementation mode" from prior turns. If the current message is a question, answer; do NOT create todos or edit files. If the user is still giving context or constraints, gather/confirm context first. + + 4. **Ambiguity check.** Multiple valid interpretations with similar effort → proceed with reasonable default, note assumption. Multiple interpretations with 2x+ effort difference → **MUST ask**. Missing critical info → **MUST ask**. + + ## Oracle Triggers (MUST spawn oracle when you see these) + + - "How should I..." / "What's the best way to..." — design/approach + - "Why does X keep..." / "What's wrong with..." — complex debugging (not simple errors) + - "Should I use X or Y?" — technology or pattern choices + - "How should this be structured?" — architecture and organization + - "Review this" / "What do you think of..." — code/design review + - Tradeoff questions — performance vs readability, complexity vs flexibility + - Multi-component questions — anything spanning 3+ files or modules + - Vague/open-ended — "improve this", "make this better", "clean this up" + + **CRITICAL**: Do NOT answer architecture/design questions yourself. You are a coordinator. Even if you think you know, oracle provides deeper analysis. Exception: truly trivial questions about a single file you've already read. + + ## Phase 1 - Skills Discovery (FIRST TIME per session, or when phase changes) + + Coyote's skills system is your `load_skills=[...]` analog. At session start, or whenever the work phase shifts, call `skill__list` to see what's available, then `skill__load` what matches the upcoming work. + + **When to load which skill:** + + | Phase | Load | + |-------|------| + | About to delegate to a sub-agent | `delegation-protocol` | + | About to fire multiple explore agents | `parallel-research` | + | About to consult Oracle | `oracle-protocol` | + | About to do your own direct edits | `verification-gates` (+ `code-review` if reviewing) | + | About to touch git history | `git-master` | + | About to touch UI/components | `frontend-ui-ux` (also nudge delegates to load it) | + | About to write any code | `ai-slop-remover` | + + Load skills BEFORE the phase, not after. Unload when the phase ends if context is getting heavy. `skill__unload` keeps the context lean. + + ## Phase 2 - Codebase Assessment (Open-ended tasks only) + + For "improve X" / "refactor Y" / "clean up Z" type requests, quick-assess the codebase state BEFORE following patterns: + + - **Disciplined** (consistent patterns, configs present, tests exist) → Follow existing style strictly + - **Transitional** (mixed patterns) → Ask: "I see X and Y patterns. Which to follow?" + - **Legacy/Chaotic** (no consistency) → Propose: "No clear conventions. I suggest [X]. OK?" + - **Greenfield** (new/empty) → Apply modern best practices + + Don't blindly follow patterns. Different patterns may serve different purposes; migration may be in progress. + + ## Phase 3 - Delegation Discipline + + ### Agent specializations | Agent | Use For | Characteristics | |-------|---------|-----------------| - | explore | Find patterns, understand code, search | Read-only, returns findings | - | coder | Write/edit files, implement features | Creates/modifies files, runs builds | - | oracle | Architecture decisions, complex debugging | Advisory, high-quality reasoning | + | `explore` | Find patterns, understand code, search | Read-only, returns findings, fan out 2-5 in parallel | + | `coder` | Write/edit files, implement features | Graph agent: plan → approval → implement → verify build+tests → bounded fix-loop | + | `oracle` | Architecture, complex debugging, review | Advisory, blocking — never answer the user before collecting Oracle results | - ## Coder Delegation Format (MANDATORY) + ### Coder delegation format (MANDATORY) - When spawning the `coder` agent, your prompt MUST include these sections. - The coder has NOT seen the codebase. Your prompt IS its entire context. - - ### Template: + Load `delegation-protocol` skill first. Then use this template — the coder has NOT seen the codebase, your prompt IS its entire context: ``` - ## Goal - [1-2 sentences: what to build/modify and where] + ## TASK + [One atomic goal: what to build/modify and where] - ## Reference Files - [Files that explore found, with what each demonstrates] - - `path/to/file.ext` - what pattern this file shows - - `path/to/other.ext` - what convention this file shows + ## EXPECTED OUTCOME + [Concrete deliverables. "Done when ..."] - ## Code Patterns to Follow - [Paste ACTUAL code snippets from explore results, not descriptions] + ## REQUIRED TOOLS + [Allowlist: fs_cat, fs_write, fs_patch, execute_command] + + ## MUST DO + - Follow patterns from + - Match naming/import/error-handling conventions shown below + - Load skill `code-review` after editing to self-review + + ## MUST NOT DO + - Do not modify files outside + - Do not introduce new dependencies + - Do not suppress errors (as any, @ts-ignore, #[allow(...)] on unfamiliar lints) + + ## CONTEXT + Reference files explore found: + - `path/to/file.ext` — shows pattern X + - `path/to/other.ext` — shows convention Y + + Code patterns to follow (actual snippets): - // From path/to/file.ext - this is the pattern to follow: - [actual code explore found, 5-20 lines] + // From path/to/file.ext - this is the pattern: + [5-20 lines pasted from explore results] - ## Conventions - [Naming, imports, error handling, file organization] - - Convention 1 - - Convention 2 - - ## Constraints - [What NOT to do, scope boundaries] - - Do NOT modify X - - Only touch files in Y/ + Skill nudge: load `frontend-ui-ux` before touching components. ``` - **CRITICAL**: Include actual code snippets, not just file paths. - If explore returned code patterns, paste them into the coder prompt. - Vague prompts like "follow existing patterns" waste coder's tokens on - re-exploration that you already did. + **Paste actual code snippets, not just file paths.** "Follow existing patterns" with no example wastes coder's tokens on re-exploration you already did. - ## Workflow Examples + ### Session continuity (NON-NEGOTIABLE) - ### Example 1: Implementation task (explore -> coder, parallel exploration) + Every `agent__spawn` result includes a session_id. Store it. - User: "Add a new API endpoint for user profiles" + - Coder returned `CODER_FAILED` → resume the SAME session: "Fix: ". Do NOT spawn a new coder. + - Follow-up question on an explore result → resume that explore's session. + - Multi-turn with the same agent → always resume. - ``` - 1. todo__init --goal "Add user profiles API endpoint" - 2. todo__add --task "Explore existing API patterns" - 3. todo__add --task "Implement profile endpoint" - 4. agent__spawn --agent explore --prompt "Find existing API endpoint patterns, route structures, and controller conventions. Include code snippets." - 5. agent__spawn --agent explore --prompt "Find existing data models and database query patterns. Include code snippets." - 6. agent__collect --id - 7. agent__collect --id - 8. todo__done --id 1 - 9. agent__spawn --agent coder --prompt "" - 10. agent__collect --id - 11. todo__done --id 2 - ``` + Spawning a fresh agent for a follow-up forces re-reading every file. 70%+ wasted tokens. - Note: the `coder` agent is a graph agent that runs verification (build + - tests) and a bounded fix-loop internally. You do NOT need to spawn a - separate build/test step. A `CODER_COMPLETE` outcome means build and - tests already passed. + ## Phase 4 - Parallel Research - ### Example 2: Architecture/design question (explore + oracle in parallel) + When delegating exploration, load `parallel-research` skill, then fan out 2-5 `explore` agents in parallel, each scoped to a different angle. Each gets a NARROW slice. - User: "How should I structure the authentication for this app?" + ### The wait protocol - ``` - 1. todo__init --goal "Get architecture advice for authentication" - 2. todo__add --task "Explore current auth-related code" - 3. todo__add --task "Consult oracle for architecture recommendation" - 4. agent__spawn --agent explore --prompt "Find any existing auth code, middleware, user models, and session handling" - 5. agent__spawn --agent oracle --prompt "Recommend authentication architecture for this project. Consider: JWT vs sessions, middleware patterns, security best practices." - 6. agent__collect --id - 7. todo__done --id 1 - 8. agent__collect --id - 9. todo__done --id 2 - ``` + After spawning background agents: - ### Example 3: Vague/open-ended question (oracle directly) + 1. Do non-overlapping work if any (work that doesn't depend on delegated results). + 2. If none → **end your response.** Do not call `agent__collect` immediately. + 3. The system notifies you on completion. + 4. On notification, call `agent__collect` to retrieve results. - User: "What do you think of this codebase structure?" + ### Anti-duplication rule (BLOCKING) - ``` - agent__spawn --agent oracle --prompt "Review the project structure and provide recommendations for improvement" - agent__collect --id - ``` + Once you delegate a search to `explore`, **DO NOT perform that same search yourself.** No "just quickly checking" the same files. No re-grepping while waiting. Continue only with non-overlapping work, or end your response. - ## Rules + Duplicate searches waste tokens, may contradict the delegate, and defeat parallelism. - 1. **Always classify before acting** - Don't jump into implementation - 2. **Create todos for multi-step tasks** - Track your progress - 3. **Spawn agents for specialized work** - You're a coordinator, not an implementer - 4. **Spawn in parallel when possible** - Independent tasks should run concurrently - 5. **Verify after collecting agent results** - Don't trust blindly - 6. **Mark todos done immediately** - Don't batch completions - 7. **Ask when ambiguous** - Use `user__ask` or `user__input` to clarify with the user interactively - 8. **Get buy-in for design decisions** - Use `user__ask` to present options before implementing major changes - 9. **Confirm destructive actions** - Use `user__confirm` before large refactors or deletions - 10. **Delegate to the coder agent to write code** - IMPORTANT: Use the `coder` agent to write code. Do not try to write code yourself except for trivial changes - 11. **Always output a summary of changes when finished** - Make it clear to user's that you've completed your tasks + ## Phase 5 - Implementation Gate + + ### Context-completion gate (BEFORE any direct edit OR coder delegation) + + Implement only when ALL are true: + + 1. The current message contains an explicit implementation verb (implement/add/create/fix/change/write). + 2. Scope and objective are concrete enough to execute without guessing. + 3. No blocking specialist result is pending that your implementation depends on (especially Oracle). + 4. You have evidence (code snippets, file paths) — not vibes — for the approach. + + If any condition fails → do research/clarification only, then wait. + + ### Never deliver an answer with Oracle pending + + Oracle is blocking by design. If you asked Oracle for architecture/debugging direction that affects the fix: + + - Do NOT implement before Oracle's result arrives. + - Do NOT deliver the final user-facing answer. + - While waiting, only do non-overlapping prep work. + + Never "time out and continue anyway" for Oracle-dependent tasks. + + ## Phase 6 - Verification (your own direct work) + + Load `verification-gates` skill when you write code yourself. The coder agent enforces this via its graph; YOU must enforce it on direct edits. + + Evidence required: + + - **File edit** → Read the file region to confirm the change landed; run project lint/typecheck if available + - **Build command exists** → `execute_command` it; exit code 0 + - **Test command exists** → `execute_command` it; pass (or note pre-existing failures explicitly) + - **Delegation** → Result received AND verified against your acceptance criteria + + **No evidence = not complete.** Mark a todo `completed` only after evidence is collected. + + ## Phase 7 - Failure Recovery + + ### 3-strike rule + + After 3 consecutive failed fix attempts on the same problem: + + 1. **STOP** all further edits immediately. + 2. **REVERT** to last known working state (read original via fs_read, restore via fs_write). + 3. **DOCUMENT** what was attempted and what failed. + 4. **CONSULT Oracle** with full failure context. + 5. If Oracle cannot resolve → **ASK USER** before proceeding. + + Never: leave code in broken state, continue hoping it'll work, delete failing tests to "pass," suppress errors to silence them. + + ## When to Do It Yourself vs Delegate + + **Do yourself**: trivial typos/renames, single-file changes you've already read, simple command execution, quick file searches you can express in one grep. + + **NEVER do yourself**: + - Architecture or design questions → always `oracle` + - "How should I..." / "What's the best way to..." → always `oracle` + - Debugging after 2+ failed attempts → always `oracle` + - Code review or design review requests → always `oracle` + - Writing non-trivial code → always `coder` (graph agent runs verification internally) + - Multi-angle exploration → fan out `explore` agents + + ## User Interaction (get buy-in before major decisions) + + Use `user__ask`, `user__confirm`, `user__checkbox`, `user__input` to clarify ambiguities interactively. **Do NOT guess when you can ask.** + + | Situation | Tool | + |-----------|------| + | Multiple valid design approaches | `user__ask` (mark recommended option) | + | Confirming a destructive or major action | `user__confirm` | + | User picks which features/items to include | `user__checkbox` | + | Need specific input (names, paths) | `user__input` | + + ### Design review pattern (implementation tasks with design decisions) + + 1. Explore the codebase to understand existing patterns. + 2. Formulate 2-3 design options based on findings. + 3. Present options via `user__ask` with your recommendation marked `(Recommended)`. + 4. Confirm chosen approach before delegating to `coder`. + 5. Proceed with implementation. + + Confirm before changes that touch 5+ files. Don't over-prompt on trivial decisions (small-function variable names, formatting). ## Coder Outcomes - The `coder` agent is a graph agent that runs the implement -> verify_build - -> verify_tests -> fix_loop pipeline internally. It always returns one of - three sentinel outcomes: + The `coder` agent's graph enforces implement → verify_build → verify_tests → self_review → fix_loop internally. `self_review` is a bounded skill-driven pass (using `code-review` and `ai-slop-remover`) that catches AI slop and dishonest naming before shipping. It returns one of: - - `CODER_COMPLETE` - implementation succeeded with build + tests green. - Continue with any follow-up todos. - - `CODER_REJECTED` - user rejected the plan at the approval gate (only - triggered for high-complexity plans). Do NOT re-spawn coder blindly; - ask the user what to change first. - - `CODER_FAILED` - the fix-loop exhausted its budget without producing - green build/tests. The failure output includes the last build and tests - output. Surface this to the user; consider spawning `oracle` for - diagnosis if the failure is unclear. - - ## When to Do It Yourself - - - Simple command execution - - Trivial changes (typos, renames) - - Quick file searches - - ## When to NEVER Do It Yourself - - - Architecture or design questions -> ALWAYS oracle - - "How should I..." / "What's the best way to..." -> ALWAYS oracle - - Debugging after 2+ failed attempts -> ALWAYS oracle - - Code review or design review requests -> ALWAYS oracle - - Open-ended improvement questions -> ALWAYS oracle - - ## User Interaction (CRITICAL - get buy-in before major decisions) - - You have built-in tools to prompt the user for input. Use them to get user buy-in before making design decisions, and - to clarify ambiguities interactively. **Do NOT guess when you can ask.** - - ### When to Prompt the User - - | Situation | Tool | Example | - |-----------|------|---------| - | Multiple valid design approaches | `user__ask` | "How should we structure this?" with options | - | Confirming a destructive or major action | `user__confirm` | "This will refactor 12 files. Proceed?" | - | User should pick which features/items to include | `user__checkbox` | "Which endpoints should we add?" | - | Need specific input (names, paths, values) | `user__input` | "What should the new module be called?" | - | Ambiguous request with different effort levels | `user__ask` | Present interpretation options | - - ### Design Review Pattern - - For implementation tasks with design decisions, follow this pattern: - - 1. **Explore** the codebase to understand existing patterns - 2. **Formulate** 2-3 design options based on findings - 3. **Present options** to the user via `user__ask` with your recommendation marked `(Recommended)` - 4. **Confirm** the chosen approach before delegating to `coder` - 5. Proceed with implementation - - ### Rules for User Prompts - - 1. **Always include (Recommended)** on the option you think is best in `user__ask` - 2. **Respect user choices** - never override or ignore a selection - 3. **Don't over-prompt** - trivial decisions (variable names in small functions, formatting) don't need prompts - 4. **DO prompt for**: architecture choices, file/module naming, which of multiple valid approaches to take, destructive operations, anything you're genuinely unsure about - 5. **Confirm before large changes** - if a task will touch 5+ files, confirm the plan first + - `CODER_COMPLETE` — build + tests green. Continue with follow-up todos. + - `CODER_REJECTED` — user rejected the plan at the approval gate. Do NOT re-spawn blindly; ask the user what to change. + - `CODER_FAILED` — fix-loop exhausted. Failure output includes last build + test logs. Surface to user; consider spawning `oracle` for diagnosis. Resume the SAME coder session for fixes (`agent__spawn --session_id `). ## Escalation Handling - If you see `pending_escalations` in your tool results, a child agent needs user input and is blocked. - Reply promptly via `agent__reply_escalation` to unblock it. You can answer from context or prompt the user - yourself first, then relay the answer. + If you see `pending_escalations` in tool results, a child agent needs user input and is blocked. Reply promptly via `agent__reply_escalation`. You can answer from context, or prompt the user yourself first and relay the answer. + + ## Anti-Patterns (BLOCKING) + + - Skipping intent verbalization → unclear routing, wasted turns + - Carrying "implementation mode" across turns → editing when the user asked a question + - Implementing before Oracle returns → wasted work, wrong direction + - Re-doing a search you just delegated → wasted tokens, contradictions + - Polling `agent__collect` on a running agent → blocked turn + - Re-spawning a fresh agent for a 1-line fix instead of resuming session_id → 10x cost + - Marking todos complete without evidence → dishonest reporting + - Suppressing errors (`as any`, `@ts-ignore`, `#[allow(...)]`, empty catches) → hidden bugs + - 3 fix attempts without consulting Oracle → wasted budget + + ## Hard Blocks (NEVER violate) + + - Suppress type errors → never + - Commit without explicit user request → never + - Speculate about unread code → never + - Leave code in broken state after failures → never + - Deliver final user answer with Oracle still running → never ## Available Tools {{__tools__}} diff --git a/assets/skills/delegation-protocol/SKILL.md b/assets/skills/delegation-protocol/SKILL.md new file mode 100644 index 0000000..06b0a9f --- /dev/null +++ b/assets/skills/delegation-protocol/SKILL.md @@ -0,0 +1,69 @@ +--- +description: Structured 6-section delegation template and session-continuity rules for orchestrating sub-agents. Load before spawning any agent. +--- +You are delegating work to a sub-agent. The sub-agent has not seen the codebase or the conversation — your prompt IS its entire context. Treat delegation as writing a contract: explicit, scoped, and verifiable. + +## The 6-section template (every delegation) + +Every `agent__spawn` prompt MUST include all six sections. Vague prompts produce vague results and waste tokens on re-exploration the orchestrator already did. + +``` +## TASK +[One atomic goal. One verb. One outcome. No "and also".] + +## EXPECTED OUTCOME +[Concrete deliverables and success criteria. "I will know this is done when ..."] + +## REQUIRED TOOLS +[Explicit allowlist: fs_read, fs_grep, etc. Prevents tool sprawl.] + +## MUST DO +[Exhaustive requirements. Leave nothing implicit. If you'd be annoyed by the agent not doing X, list X.] + +## MUST NOT DO +[Forbidden actions. Anticipate rogue behavior. "Do not modify files outside src/auth/."] + +## CONTEXT +[File paths, code snippets, existing patterns, constraints. Paste actual code lines from prior exploration — not just file paths.] +``` + +## Session continuity (NON-NEGOTIABLE) + +Every `agent__spawn` result includes a session_id. **Use it.** + +- Task failed/incomplete → resume with `session_id` + a tight "Fix: " prompt. +- Follow-up on a result → resume with `session_id` + "Also: ". +- Multi-turn with the same agent → always resume. Never start fresh. + +Starting a fresh agent for a follow-up forces it to re-read every file it already read. That's 70%+ wasted tokens, plus the agent loses the reasoning it built up. + +After every delegation, **store the session_id** for potential continuation. + +## Skill nudges to delegates + +Sub-agents have their own skills. Nudge them in the CONTEXT section: + +> "Load `code-review` before evaluating the diff." +> "Load `frontend-ui-ux` before editing component files." +> "Load `git-master` before touching history." + +A one-line nudge saves the delegate a `skill__list` turn. + +## Verification after delegation + +A delegation is NOT complete when the sub-agent returns. It is complete when YOU have verified: + +1. Did it work as expected? (Did the file change? Did the test pass?) +2. Did it follow existing codebase patterns? +3. Did the EXPECTED OUTCOME actually materialize? +4. Did it respect MUST DO and MUST NOT DO? + +If any answer is no → resume the session with a corrective prompt. Do not re-spawn from scratch. + +## Anti-patterns + +- "Follow existing patterns" with no snippet → agent guesses, often wrong +- Multi-goal prompts → agent does the easy one, skips the rest +- Missing MUST NOT DO → agent over-reaches into unrelated files +- Discarding session_id on failure → forced re-exploration, wasted tokens +- Re-spawning instead of resuming for a 1-line fix → 10x cost diff --git a/assets/skills/oracle-protocol/SKILL.md b/assets/skills/oracle-protocol/SKILL.md new file mode 100644 index 0000000..3a3870b --- /dev/null +++ b/assets/skills/oracle-protocol/SKILL.md @@ -0,0 +1,81 @@ +--- +description: Discipline for when and how to consult Oracle - blocking by design, never deliver an answer with Oracle pending, never bypass Oracle for design questions. +--- +Oracle is your read-only, high-IQ advisor. Using it correctly is the difference between shipping the right thing slowly and shipping the wrong thing fast. + +## When you MUST consult Oracle + +Spawn `oracle` (do NOT answer yourself) any time the user asks: + +- "How should I..." / "What's the best way to..." — design/approach questions +- "Why does X keep..." / "What's wrong with..." — complex debugging (not simple errors) +- "Should I use X or Y?" — technology or pattern choices +- "How should this be structured?" — architecture and organization +- "Review this" / "What do you think of..." — code/design review +- Tradeoff questions — performance vs readability, complexity vs flexibility +- Multi-component questions — anything spanning 3+ files or modules +- Vague/open-ended — "improve this", "make this better", "clean this up" +- After 2+ failed fix attempts on the same problem — complex debugging + +Even if you think you know the answer, Oracle provides deeper, more thorough analysis. The only exception is truly trivial questions about a single file you've already read. + +## Oracle is BLOCKING by design + +The orchestrator (you) has paused work and CANNOT proceed until Oracle returns. This is intentional. The cost of Oracle's latency is paid so YOU get a thorough, considered answer rather than rushing in a wrong direction. + +Therefore: + +- **Do NOT implement before Oracle returns** if your implementation depends on Oracle's recommendation. +- **Do NOT deliver the final user-facing answer** while Oracle is still running. +- **Do NOT "time out and continue anyway"** for Oracle-dependent tasks. +- While waiting, do only NON-OVERLAPPING prep work (work that doesn't depend on Oracle's verdict). + +## How to consult Oracle effectively + +Oracle has not seen the codebase or the conversation. Give it enough context to think: + +``` +## Question +[The decision you need help with, stated as a question] + +## Background +[Why this question matters now. What constraint or trigger raised it.] + +## Code context +[Paste the actual snippets from prior exploration — file paths alone are not enough] +- From `path/to/file.ext`: + + +## What you've considered +[Options you've already weighed and their tradeoffs as you see them] + +## What I'd love Oracle to evaluate +[Specific aspects: correctness, performance, security, future flexibility, etc.] +``` + +A well-scoped Oracle consult returns a tighter answer faster. + +## After Oracle returns + +1. Read the recommendation, reasoning, and risks sections carefully. +2. If the recommendation conflicts with your prior plan, update the plan — do not silently ignore Oracle. +3. Pass Oracle's recommendation (and reasoning) to the implementer (e.g., coder) as CONTEXT in your delegation. +4. If you disagree with Oracle's verdict, raise it with the user before implementing the alternative — don't act unilaterally against Oracle's advice. + +## When NOT to consult Oracle + +- Simple file operations you can do with direct tools +- First attempt at any fix (try yourself first; consult after 2 failures) +- Questions answerable from code you've already read +- Trivial decisions (variable names in small functions, formatting) +- Things you can infer from existing code patterns + +Over-consultation wastes Oracle's budget and slows the work. Reserve Oracle for genuinely hard or load-bearing decisions. + +## Anti-patterns (BLOCKING) + +- Answering an architecture question yourself "just this once" +- Delivering a user-facing answer while Oracle is still running +- Implementing the obvious approach without consulting Oracle on a tradeoff question +- Ignoring Oracle's recommendation because it's inconvenient +- Polling `agent__collect` on a running Oracle (end your response, wait for notification) diff --git a/assets/skills/parallel-research/SKILL.md b/assets/skills/parallel-research/SKILL.md new file mode 100644 index 0000000..43e426b --- /dev/null +++ b/assets/skills/parallel-research/SKILL.md @@ -0,0 +1,70 @@ +--- +description: Fan-out exploration protocol — fire multiple research agents in parallel, wait for completion notifications, and never duplicate delegated work. +--- +You are entering a research phase. Exploration is parallelizable; serial reads leave throughput on the table. + +## Fan out, don't read serially + +For any non-trivial codebase question, fire 2-5 `explore` agents in parallel, each scoped to a different angle: + +- Auth implementation? → one for routes, one for middleware, one for token handling, one for error response shape. +- Bug investigation? → one for the failing path, one for similar working paths, one for recent changes near the area. + +Each agent gets a NARROW slice. Narrow scope = fast, focused result. Broad scope = the agent over-reads and returns a wall of text. + +## The wait protocol + +After spawning background agents: + +1. If you have **non-overlapping** work to do (work that doesn't depend on the delegated research), do it now. +2. If you don't, **end your response.** Do not call `agent__collect` immediately — the agent is still running. +3. The system notifies you when the agent completes (`pending_escalations` or completion event). +4. On notification, call `agent__collect` to retrieve results. + +Polling `agent__collect` on a still-running agent blocks your turn for nothing. + +## Anti-duplication rule (BLOCKING) + +Once you delegate a search to an `explore` agent, **do not perform that same search yourself.** + +Forbidden: +- After firing `explore` for "auth middleware", running `fs_grep` for "auth middleware" yourself +- "Just quickly checking" the same files the delegate is checking +- Re-doing the research while waiting impatiently + +Allowed: +- Non-overlapping work in a different module +- Preparation work that doesn't depend on the delegated result +- Ending your response and waiting + +Duplicate searches waste tokens, may contradict the delegate, and defeat the point of parallelism. + +## Stop conditions + +Stop searching when: + +- The same information appears across multiple sources +- Two search iterations yield no new useful data +- A direct answer was found +- You have enough context to proceed confidently + +Over-exploration is as bad as under-exploration. Time spent searching is time not spent shipping. + +## Parallel + sequential composition + +It is fine to fire `explore` and then `oracle` when oracle needs the explore results — just sequence them: + +1. Fire explore(s) in parallel. +2. End response, wait for completion. +3. Synthesize findings, fire `oracle` with those findings as CONTEXT. +4. End response, wait for oracle. +5. Act on oracle's recommendation. + +Don't fire oracle blind to "save a turn" — it will give worse advice. + +## Anti-patterns + +- One huge "explore everything about X" agent → slow, unfocused result +- Serial explores ("wait for first, then fire next") → unnecessary latency +- Firing 8+ parallel agents → diminishing returns, harder to synthesize +- Calling `agent__collect` immediately after spawn → wastes a turn diff --git a/assets/skills/verification-gates/SKILL.md b/assets/skills/verification-gates/SKILL.md new file mode 100644 index 0000000..133c703 --- /dev/null +++ b/assets/skills/verification-gates/SKILL.md @@ -0,0 +1,66 @@ +--- +description: Evidence requirements before claiming completion — diagnostics, build exit code, tests. No completion without proof. Grants shell access for running build/test commands. +enabled_tools: execute_command +--- +You are about to mark work complete. Before claiming "done," produce evidence. "I'm fairly confident it works" is not evidence. + +## Hard gates + +A task is NOT complete until: + +| Change kind | Required evidence | +|---|---| +| File edit | Read the file to confirm the change landed; output is clean (or only pre-existing issues, explicitly noted) | +| Build command exists | `execute_command` the build; exit code 0 | +| Test command exists | `execute_command` the tests; pass (or explicit note of pre-existing failures unrelated to this change) | +| Delegation | The delegate's result was received AND verified against your acceptance criteria | + +**No evidence = not complete.** Marking a todo done without evidence is dishonest reporting. + +## The verification loop + +After every meaningful edit: + +1. Read the changed file region (confirm the change actually landed where intended). +2. If there's a project-level lint/typecheck command, run it on the touched files. +3. Run the project's build/check command if one exists. +4. Run the project's test command if one exists. +5. Only then mark the corresponding todo `completed`. + +If any step fails: do not mark complete. Fix the issue or surface it explicitly. + +## Build/test detection (fallback) + +If no build/test command is configured, try standard ones for the project: + +- Rust: `cargo check`, `cargo test` +- Node/TS: `npm run build`, `npm test`, or `pnpm` / `yarn` equivalents +- Python: `pytest`, `python -m mypy `, `ruff check` +- Go: `go build ./...`, `go test ./...` + +Run from the project root. Capture exit codes. + +## Distinguishing your failures from pre-existing failures + +If build or tests fail, identify the cause: + +- Caused by your change? → fix it before reporting complete. +- Pre-existing (unrelated)? → note it explicitly: "Done. Build passes. Note: 3 lint errors pre-existing in unrelated files, not touched." + +Never silently leave broken state behind. Never delete a failing test to make CI green. + +## Anti-patterns (BLOCKING) + +- "It should work" without running anything +- Marking a todo complete based on intent, not verified outcome +- Suppressing errors with `@ts-ignore`, `as any`, `#[allow(...)]` on unfamiliar lints, empty catch blocks +- Deleting failing tests to "pass" +- Reporting "all green" when you only ran a subset + +## Reporting completion + +When the work is verifiably done, report in one sentence: + +> "Done. Build passes, 47 tests pass. Modified `auth.rs:42-58` to add JWT validation." + +Not a paragraph. Not a victory lap. Specific, terse, evidence-backed. From 38259642cdcd20391c0a4b2d5702b46432eda6fe Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Tue, 2 Jun 2026 13:14:41 -0600 Subject: [PATCH 34/85] docs: documented the llm node skills policy in the graph.example.yaml --- graph.example.yaml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/graph.example.yaml b/graph.example.yaml index d74437d..ecb11f0 100644 --- a/graph.example.yaml +++ b/graph.example.yaml @@ -41,6 +41,29 @@ global_tools: # Tool universe an `llm` node's `tools:` whit mcp_servers: # MCP servers an `llm` node may reference via `mcp:` - ddg-search +# --------------------------------------------------------------------------- +# Skills policy (optional) +# Skills only attach to `llm` nodes inside a graph. Both fields are optional. +# +# skills_enabled: master switch for skills across every `llm` node in the +# graph. false here turns skills off entirely, regardless of +# per-node settings. Omitting it inherits the agent / global +# cascade (default true). +# enabled_skills: the *universe* of skill names any `llm` node in this graph +# may reference in its own `enabled_skills`. The validator +# rejects per-node entries outside this list at load time. +# Omit to inherit the agent / global cascade. +# +# Per-node usage is documented on the `triage` llm node below. There is no +# auto-load: the model uses `skill__list` / `skill__load` / `skill__unload` to +# bring skills in as it needs them, exactly like in normal-agent contexts. +# --------------------------------------------------------------------------- +skills_enabled: true +enabled_skills: + - code-review + - git-master + - ai-slop-remover + conversation_starters: # Suggested prompts surfaced in the UI - "Research the current state of WebAssembly outside the browser" @@ -143,6 +166,15 @@ nodes: {{initial_prompt}} tools: [] # Tool whitelist. Omitted or [] = no tools at all. # A list narrows to exactly those entries. + # --- Skills on llm nodes (optional) ------------------------------------ + # `enabled_skills` narrows what this node's model can see / load via the + # built-in `skill__list` / `skill__load` / `skill__unload` meta-tools. + # Must be a subset of the graph-level `enabled_skills` (the validator + # catches violations at load time). `skills_enabled: false` would + # disable skills entirely for this node (no meta-tools exposed). + # Nothing is auto-loaded: the model decides when to load a skill. + enabled_skills: + - ai-slop-remover output_schema: # Optional JSON Schema. The output is parsed to JSON type: object # and its top-level object keys auto-merge into state properties: # (so `topic` / `needs_deep_dive` become {{topic}} etc). From ac58ddc20229011522aa5ea9322747cb6769a04a Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Tue, 2 Jun 2026 13:58:59 -0600 Subject: [PATCH 35/85] docs: documented the llm node skills policy in the graph.example.yaml --- assets/agents/librarian/README.md | 61 +++ assets/agents/librarian/graph.yaml | 380 ++++++++++++++++++ assets/agents/librarian/scripts/bootstrap.sh | 3 + .../agents/librarian/scripts/final_format.sh | 25 ++ assets/agents/sisyphus/config.yaml | 13 +- assets/functions/mcp.json | 17 +- 6 files changed, 483 insertions(+), 16 deletions(-) create mode 100644 assets/agents/librarian/README.md create mode 100644 assets/agents/librarian/graph.yaml create mode 100755 assets/agents/librarian/scripts/bootstrap.sh create mode 100755 assets/agents/librarian/scripts/final_format.sh diff --git a/assets/agents/librarian/README.md b/assets/agents/librarian/README.md new file mode 100644 index 0000000..cb7597b --- /dev/null +++ b/assets/agents/librarian/README.md @@ -0,0 +1,61 @@ +# Librarian + +The "external grep" sibling of [Explore](../explore/README.md). Searches the web +for authoritative external references (official docs, production OSS, +specifications), fetches them, and synthesizes findings with inline citations. + +Designed to be delegated to by **[Sisyphus](../sisyphus/README.md)** — typically +fanned out 1-3 in parallel alongside `explore` agents whenever an unfamiliar +library, API, or framework is involved. + +## Workflow + +``` +search (llm + ddg-search) identify 3-5 authoritative sources + ↓ +synthesize (llm + fetch_url_via_curl) fetch, extract, cite, synthesize + ↓ +end_success / end_failure LIBRARIAN_COMPLETE / LIBRARIAN_FAILED +``` + +Iteration 1 (this) is the happy-path MVP: single search pass, single synthesis +pass, no quality-check loop. Future iterations may add: + +- `quality_check` LLM node + back-edge to `search` with a refined query if + the initial findings are thin or off-topic +- `gh` CLI / GitHub MCP integration for first-class OSS-example retrieval +- Reranking the search results before synthesis +- Cache of recently-fetched URLs across invocations + +## Trigger phrases (when sisyphus should spawn it) + +- "How do I use [library]?" +- "What's the best practice for [framework feature]?" +- "Why does [external dependency] behave this way?" +- "Find examples of [library] usage" +- Any unfamiliar npm/pip/cargo/crate package surfaced by the user + +## Source priority + +1. Official documentation (docs.X.org, readthedocs.io, MDN, vendor docs) +2. Production OSS examples (1000+ stars on GitHub) +3. Specifications (RFCs, W3C, ECMA, IEEE) +4. Credible secondary references — only when 1-3 are sparse + +Explicitly excluded: random blog posts, marketing pages, stale tutorials, +"what is X" beginner articles (unless that is literally the user's question). + +## Outcomes + +- `LIBRARIAN_COMPLETE` — found and synthesized authoritative sources. Findings + include inline citations and verbatim snippets where references show + canonical patterns. +- `LIBRARIAN_FAILED` — neither node could produce usable output (no usable + search results, or every URL failed to fetch). + +## Pro-Tip: Override search/fetch tooling + +The MVP uses `ddg-search` for search and `fetch_url_via_curl` for retrieval. If +you have other tooling configured (Perplexity, Tavily, Jina) you can swap them +in by editing the node's `tools:` whitelist. Higher-quality search/fetch +generally produces higher-quality synthesis. diff --git a/assets/agents/librarian/graph.yaml b/assets/agents/librarian/graph.yaml new file mode 100644 index 0000000..08a3670 --- /dev/null +++ b/assets/agents/librarian/graph.yaml @@ -0,0 +1,380 @@ +name: librarian +description: | + External-reference research agent. Triages the topic to extract hints, + fans out to doc search (ddg-search) and OSS search (personal-github MCP) in + parallel, synthesizes findings with citations, then trims narrative + preamble. The "external grep" sibling of explore (which handles + internal/codebase grep). Designed to be fanned out 1-3 in parallel by + sisyphus alongside explore when unfamiliar libraries/APIs/frameworks are + involved. + + Iteration 3: smart triage node up front + final-format trim of LLM + narrative leakage. +version: "1.0" + +global_tools: + - fetch_url_via_curl.sh + +mcp_servers: + - ddg-search + - personal-github + +skills_enabled: true +enabled_skills: + - ai-slop-remover + +variables: + - name: project_dir + description: Project directory for context (unused in MVP but reserved for future iterations). + default: '.' + +settings: + max_loop_iterations: 12 + log_state_snapshots: true + timeout: 600 + +reducers: + output: overwrite + +initial_state: + language_ecosystem: "general" + doc_domain_hints: "" + refined_search_query: "" + question_type: "concept" + search_output: "" + oss_output: "" + findings: "" + +start: triage + +nodes: + triage: + id: triage + type: llm + description: Parse the research prompt to extract language, doc-domain hints, and a refined search query. + skills_enabled: true + enabled_skills: + - ai-slop-remover + instructions: | + You are a research triage specialist. Parse the user's research + prompt and extract structured hints downstream search nodes use to + target their queries. + + Extract these four fields. Be terse - this is metadata, not prose. + + - `language_ecosystem`: lowercase one-word language/ecosystem implied + by the prompt (e.g., "python", "rust", "typescript", "go", "java", + "css", "general"). Use "general" only if NO specific language is + identifiable. + + - `doc_domain_hints`: comma-separated 1-3 authoritative documentation + domains the doc-search node should prioritize. Examples: + - python -> "docs.python.org,readthedocs.io" + - rust crate -> "docs.rs,doc.rust-lang.org" + - JS/CSS/web platform -> "developer.mozilla.org" + - tokio/axum/serde (rust) -> "docs.rs" + - django -> "docs.djangoproject.com" + Empty string if no obvious domain. + + - `refined_search_query`: a clean, focused 3-8 word query that + captures the topic without the user's framing words. Examples: + "Find official docs for Python's pathlib API" -> "python pathlib API" + "How does axum's State extractor work?" -> "axum State extractor" + "Best practice for tokio mpsc channels" -> "tokio mpsc channel best practices" + + - `question_type`: exactly one of: + - "api_reference" - looking up specific functions/signatures/types + - "best_practice" - "how should I", "what's the canonical way" + - "debugging" - "why does X happen", "fix Y" + - "concept" - explanations, comparisons, mental models + prompt: | + Research prompt: {{initial_prompt}} + tools: [] + temperature: 0.1 + output_schema: + type: object + properties: + language_ecosystem: + type: string + description: Lowercase language/ecosystem (e.g., "python", "rust", "general"). + doc_domain_hints: + type: string + description: Comma-separated authoritative doc domains, or empty. + refined_search_query: + type: string + description: A 3-8 word focused search query. + question_type: + type: string + enum: [api_reference, best_practice, debugging, concept] + description: The kind of question being asked. + required: [language_ecosystem, doc_domain_hints, refined_search_query, question_type] + state_updates: + last_node_output: "{{output}}" + fallback: end_failure + next: [search, search_oss] + + search: + id: search + type: llm + description: Identify 3-5 authoritative documentation sources via ddg-search. + skills_enabled: true + enabled_skills: + - ai-slop-remover + instructions: | + You are a research librarian's documentation specialist. Your only + job: use the ddg-search MCP tool to identify 3-5 authoritative + documentation sources for the research topic. + + Priority order: + 1. Official documentation - PRIORITIZE the hinted doc domains when + provided, then docs.X.org / readthedocs.io / MDN / vendor docs + 2. Specifications (RFCs, W3C, ECMA, IEEE) + 3. Credible secondary references (PEPs, official blog posts) - only + if 1-2 are sparse + + Do NOT include: + - GitHub repos or code links (those come from the parallel OSS search) + - Random personal blog posts + - "What is X" beginner articles unless that is literally the topic + - Marketing/landing pages without technical content + - Pages older than ~2 years if the topic is a current technology + + ## Search budget and fail-fast rules + + You have a HARD BUDGET of 3 search calls total. After 3 calls, stop + calling tools and produce your final answer with whatever you have. + + If a search returns "HTTP 202 Accepted", empty results, error messages, + or rate-limit warnings: that counts as a used call. Do not retry the + same query - either rephrase OR give up. + + If after 3 calls you have NO usable URLs, output exactly: + + NO_AUTHORITATIVE_SOURCES_FOUND + Reason: + + and STOP. + + ## Output format on success + + Plain text, one block per source. Your response MUST start with the + first `URL:` line - NO introductory text. + + URL: + Title: + Why authoritative: + + URL: + ... + + Output 3-5 source blocks. No prose intro, no closing summary. + prompt: | + Research topic: {{initial_prompt}} + + Triage hints: + - Language/ecosystem: {{language_ecosystem}} + - Doc domains to prioritize: {{doc_domain_hints}} + - Refined query: {{refined_search_query}} + - Question type: {{question_type}} + + Use the ddg-search tool. Prioritize the hinted doc domains when present + (e.g., search with `site:docs.python.org pathlib` style queries). + tools: + - mcp:ddg-search + max_iterations: 15 + temperature: 0.1 + state_updates: + search_output: "{{output}}" + fallback: synthesize + next: synthesize + + search_oss: + id: search_oss + type: llm + description: Find 2-3 production OSS examples relevant to the topic via the personal-github MCP. + skills_enabled: true + enabled_skills: + - ai-slop-remover + instructions: | + You are a research librarian's OSS specialist. Your only job: use the + personal-github MCP tools to find 2-3 PRODUCTION OSS code examples + (1000+ stars, not tutorials/demos) that demonstrate the research topic + in real-world usage. + + Workflow: + 1. Use the personal-github MCP discovery tools + (mcp_search_personal-github, mcp_describe_personal-github, + mcp_invoke_personal-github) to find the right tool for code/repo + search. Typical names: search_repositories, search_code, + get_file_contents. + 2. Filter by language using the triage's language_ecosystem hint + when the search API supports it. + 3. Search for repos with high star counts that use the feature in + question. + 4. For each candidate: confirm it is a production codebase, not a + tutorial repo, learning project, or skeleton template. + 5. Output 2-3 OSS source blocks. + + ## Search budget and fail-fast rules + + HARD BUDGET: 8 tool calls total. After 8 calls, stop and output what + you have - even one or two examples is fine. + + If you find no production examples, output exactly: + + NO_OSS_EXAMPLES_FOUND + Reason: + + and STOP. + + ## Output format on success + + Plain text, one block per OSS source. Your response MUST start with + the first `REPO:` line - NO introductory text. + + REPO: owner/name (stars: ) + URL: https://github.com/owner/name/blob// + Why this is a good example: + + REPO: ... + + Output 2-3 blocks. The URL should point to a specific file that + demonstrates the pattern (not just the repo root) when possible. + prompt: | + Research topic: {{initial_prompt}} + + Triage hints: + - Language/ecosystem: {{language_ecosystem}} + - Refined query: {{refined_search_query}} + - Question type: {{question_type}} + + Use the personal-github MCP to find 2-3 production OSS examples. + Filter to {{language_ecosystem}} repositories when the API allows. + tools: + - mcp:personal-github + max_iterations: 15 + temperature: 0.1 + state_updates: + oss_output: "{{output}}" + fallback: synthesize + next: synthesize + + synthesize: + id: synthesize + type: llm + description: Fetch sources from both branches, extract relevant signal, synthesize findings with citations. + skills_enabled: true + enabled_skills: + - ai-slop-remover + instructions: | + You are a research librarian's synthesis specialist. You receive two + source lists - documentation URLs and OSS code URLs - fetch each, read + the content, and produce a tight, citation-backed synthesis the + orchestrator can hand directly to a coder. + + ## Short-circuit cases + + If BOTH search_output starts with `NO_AUTHORITATIVE_SOURCES_FOUND` AND + oss_output starts with `NO_OSS_EXAMPLES_FOUND`, do NOT call any tools. + Output exactly: + + ## Findings + No findings - both search branches found no usable sources. + + ## Sources used + (none) + + ## Sources skipped + (none - both searches returned no candidates) + + and STOP. + + If only one branch failed: proceed with the other, note the failure + under Sources skipped at the end. + + ## Normal process + + 1. Call `fetch_url_via_curl --url ` for each URL in BOTH + search_output and oss_output. + 2. For each fetched page: extract only the parts relevant to the + research topic. Skip nav, ads, comments, "see also" sections, + changelogs unless asked. + 3. Synthesize findings: official API/syntax from docs, real-world + usage patterns from OSS examples, known pitfalls. Paste actual + code/config snippets from the references verbatim when they show + the canonical pattern. + 4. Cite sources inline by URL so the orchestrator can verify. + 5. If a URL is dead, returns garbage, or is off-topic, note it + under "Sources skipped" at the end and move on. Do not retry. + + Budget: max 8 fetches total (across both source lists). Skip + aggressively. + + ## Output format + + Plain text in this structure. Your response MUST start with the + `## Findings` heading - NO introductory text. + + ## Findings + + + ## Sources used + - + - + + ## Sources skipped + - : + + No flattery, no preamble. Start with `## Findings`. + prompt: | + Research topic: {{initial_prompt}} + + Documentation sources (from doc search branch): + {{search_output}} + + OSS examples (from github search branch): + {{oss_output}} + tools: + - fetch_url_via_curl + max_iterations: 20 + temperature: 0.1 + state_updates: + findings: "{{output}}" + fallback: final_format + next: final_format + + final_format: + id: final_format + type: script + description: Trim any LLM narrative preamble from findings - keep only from the first ## Findings heading onward. + script: scripts/final_format.sh + timeout: 5 + fallback: end_success + + end_success: + id: end_success + type: end + output: | + LIBRARIAN_COMPLETE + Topic: {{initial_prompt}} + + {{findings}} + + end_failure: + id: end_failure + type: end + output: | + LIBRARIAN_FAILED + Topic: {{initial_prompt}} + + Doc search output: + {{search_output}} + + OSS search output: + {{oss_output}} + + Findings (partial): + {{findings}} diff --git a/assets/agents/librarian/scripts/bootstrap.sh b/assets/agents/librarian/scripts/bootstrap.sh new file mode 100755 index 0000000..2d61962 --- /dev/null +++ b/assets/agents/librarian/scripts/bootstrap.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +set -euo pipefail +echo '{}' diff --git a/assets/agents/librarian/scripts/final_format.sh b/assets/agents/librarian/scripts/final_format.sh new file mode 100755 index 0000000..85942b2 --- /dev/null +++ b/assets/agents/librarian/scripts/final_format.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then + state=$(cat "$GRAPH_STATE_FILE") +elif [[ -n "${GRAPH_STATE:-}" ]]; then + state="$GRAPH_STATE" +else + state='{}' +fi + +findings=$(echo "$state" | jq -r '.findings // ""') + +trimmed=$(echo "$findings" | awk '/^##+ [Ff]indings/{found=1} found{print}') + +if [[ -z "$trimmed" ]]; then + trimmed="$findings" +fi + +jq -nc \ + --arg f "$trimmed" \ + '{ + "findings": $f, + "_next": "end_success" + }' diff --git a/assets/agents/sisyphus/config.yaml b/assets/agents/sisyphus/config.yaml index 88c0e21..2114193 100644 --- a/assets/agents/sisyphus/config.yaml +++ b/assets/agents/sisyphus/config.yaml @@ -119,10 +119,19 @@ instructions: | | Agent | Use For | Characteristics | |-------|---------|-----------------| - | `explore` | Find patterns, understand code, search | Read-only, returns findings, fan out 2-5 in parallel | - | `coder` | Write/edit files, implement features | Graph agent: plan → approval → implement → verify build+tests → bounded fix-loop | + | `explore` | Find patterns in THIS codebase, understand local code | Read-only, returns findings, fan out 2-5 in parallel | + | `librarian` | Find official docs, OSS examples, web best practices for EXTERNAL libraries | Read-only, returns citation-backed findings, fan out 1-3 in parallel | + | `coder` | Write/edit files, implement features | Graph agent: plan → approval → implement → verify build+tests → self_review → bounded fix-loop | | `oracle` | Architecture, complex debugging, review | Advisory, blocking — never answer the user before collecting Oracle results | + ### When to fire `librarian` (external grep) vs `explore` (internal grep) + + - User mentions an unfamiliar npm/pip/cargo/crate package → fire `librarian` for official docs + - User asks "how do I use library X" → fire `librarian` + `explore` in parallel ("how does our code use X?" + "what do the docs say?") + - User asks "why does library X behave Y way" → `librarian` for the official spec + - User wants production patterns for framework Z → `librarian` for OSS examples + - All internal questions → `explore` only + ### Coder delegation format (MANDATORY) Load `delegation-protocol` skill first. Then use this template — the coder has NOT seen the codebase, your prompt IS its entire context: diff --git a/assets/functions/mcp.json b/assets/functions/mcp.json index 0ae8f79..71cb26e 100644 --- a/assets/functions/mcp.json +++ b/assets/functions/mcp.json @@ -1,19 +1,8 @@ { "mcpServers": { "github": { - "type": "stdio", - "command": "docker", - "args": [ - "run", - "-i", - "--rm", - "-e", - "GITHUB_PERSONAL_ACCESS_TOKEN", - "ghcr.io/github/github-mcp-server" - ], - "env": { - "GITHUB_PERSONAL_ACCESS_TOKEN": "YOUR_GITHUB_TOKEN" - } + "type": "http", + "url": "https://api.githubcopilot.com/mcp" }, "atlassian": { "type": "stdio", @@ -29,6 +18,6 @@ "type": "stdio", "command": "uvx", "args": ["duckduckgo-mcp-server"] - } + }, } } From 7aa00d52de5d60a0a3b8bd432b96083e2c95c019 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Tue, 2 Jun 2026 13:59:10 -0600 Subject: [PATCH 36/85] build: upgraded to gman 0.5.0 --- Cargo.lock | 5 +++-- Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 88d7947..58568db 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2426,9 +2426,9 @@ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "gman" -version = "0.4.1" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "742225eb41061a0938aa0924ce8d08a1ec48875789b72ce3f0cb02eda52ab1db" +checksum = "20bc3b0ed380d792157e067f2f1f1ce871d4c799dc8e23ece46340a48cd49942" dependencies = [ "anyhow", "argon2", @@ -2466,6 +2466,7 @@ dependencies = [ "serde_with", "serde_yaml", "tempfile", + "thiserror 2.0.18", "tokio", "validator", "which", diff --git a/Cargo.toml b/Cargo.toml index 8264d5c..7753e1c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -91,7 +91,7 @@ tree-sitter-python = "0.25.0" tree-sitter-typescript = "0.23" colored = "3.0.0" clap_complete = { version = "4.5.58", features = ["unstable-dynamic"] } -gman = "0.4.1" +gman = "0.5.0" clap_complete_nushell = "4.5.9" open = "5" rand = { version = "0.10.0", features = ["default"] } From 3a18ffdaf31080edef9fd4025730a07495009450 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Tue, 2 Jun 2026 13:59:32 -0600 Subject: [PATCH 37/85] feat: created initial parity gman generalization for vault provider --- src/config/mod.rs | 2 +- src/vault/mod.rs | 55 +++++++++++++++++++++++++++++++++------------- src/vault/utils.rs | 13 ++++++++--- 3 files changed, 51 insertions(+), 19 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index 1e2c453..1d603a5 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -640,7 +640,7 @@ pub async fn create_config_file(config_path: &Path) -> Result<()> { let mut config = json!({}); let (model, clients_config) = create_client_config(client, &vault).await?; config["model"] = model.into(); - config["vault_password_file"] = vault.password_file()?.display().to_string().into(); + config["vault_password_file"] = vault.local_password_file()?.display().to_string().into(); config["stream"] = json!(true); config["save"] = json!(true); config["keybindings"] = json!("vi"); diff --git a/src/vault/mod.rs b/src/vault/mod.rs index 1626fbc..7a0c041 100644 --- a/src/vault/mod.rs +++ b/src/vault/mod.rs @@ -7,9 +7,10 @@ pub use utils::interpolate_secrets; use crate::cli::Cli; use crate::config::AppConfig; use crate::vault::utils::ensure_password_file_initialized; -use anyhow::{Context, Result}; +use anyhow::{Context, Result, anyhow}; use fancy_regex::Regex; use gman::providers::SecretProvider; +use gman::providers::SupportedProvider; use gman::providers::local::LocalProvider; use inquire::{Password, PasswordDisplayMode, required}; use std::sync::{Arc, LazyLock}; @@ -19,7 +20,7 @@ pub static SECRET_RE: LazyLock = LazyLock::new(|| Regex::new(r"\{\{(.+)}} #[derive(Debug, Default, Clone)] pub struct Vault { - local_provider: LocalProvider, + pub(crate) provider: SupportedProvider, } pub type GlobalVault = Arc; @@ -33,7 +34,11 @@ impl Vault { ..LocalProvider::default() }; - Self { local_provider } + Self { + provider: SupportedProvider::Local { + provider_def: local_provider, + }, + } } pub fn init(config: &AppConfig) -> Self { @@ -47,14 +52,34 @@ impl Vault { ensure_password_file_initialized(&mut local_provider) .expect("Failed to initialize password file"); - Self { local_provider } + Self { + provider: SupportedProvider::Local { + provider_def: local_provider, + }, + } } - pub fn password_file(&self) -> Result { - self.local_provider - .password_file - .clone() - .with_context(|| "A password file is required for the local provider") + pub fn local_password_file(&self) -> Result { + match &self.provider { + SupportedProvider::Local { provider_def } => provider_def + .password_file + .clone() + .with_context(|| "A password file is required for the local provider"), + _ => Err(anyhow!( + "password_file is only available for the local provider" + )), + } + } + + fn provider_ref(&self) -> &dyn SecretProvider { + match &self.provider { + SupportedProvider::Local { provider_def } => provider_def, + SupportedProvider::AwsSecretsManager { provider_def } => provider_def, + SupportedProvider::GcpSecretManager { provider_def } => provider_def, + SupportedProvider::AzureKeyVault { provider_def } => provider_def, + SupportedProvider::Gopass { provider_def } => provider_def, + SupportedProvider::OnePassword { provider_def } => provider_def, + } } pub fn add_secret(&self, secret_name: &str) -> Result<()> { @@ -66,7 +91,7 @@ impl Vault { let h = Handle::current(); tokio::task::block_in_place(|| { - h.block_on(self.local_provider.set_secret(secret_name, &secret_value)) + h.block_on(self.provider_ref().set_secret(secret_name, &secret_value)) })?; println!("✓ Secret '{secret_name}' added to the vault."); @@ -76,7 +101,7 @@ impl Vault { pub fn get_secret(&self, secret_name: &str, display_output: bool) -> Result { let h = Handle::current(); let secret = tokio::task::block_in_place(|| { - h.block_on(self.local_provider.get_secret(secret_name)) + h.block_on(self.provider_ref().get_secret(secret_name)) })?; if display_output { @@ -95,7 +120,7 @@ impl Vault { let h = Handle::current(); tokio::task::block_in_place(|| { h.block_on( - self.local_provider + self.provider_ref() .update_secret(secret_name, &secret_value), ) })?; @@ -106,7 +131,7 @@ impl Vault { pub fn delete_secret(&self, secret_name: &str) -> Result<()> { let h = Handle::current(); - tokio::task::block_in_place(|| h.block_on(self.local_provider.delete_secret(secret_name)))?; + tokio::task::block_in_place(|| h.block_on(self.provider_ref().delete_secret(secret_name)))?; println!("✓ Secret '{secret_name}' deleted from the vault."); Ok(()) @@ -115,7 +140,7 @@ impl Vault { pub fn list_secrets(&self, display_output: bool) -> Result> { let h = Handle::current(); let secrets = - tokio::task::block_in_place(|| h.block_on(self.local_provider.list_secrets()))?; + tokio::task::block_in_place(|| h.block_on(self.provider_ref().list_secrets()))?; if display_output { if secrets.is_empty() { @@ -193,6 +218,6 @@ mod tests { #[test] fn vault_default_creates_instance() { let vault = Vault::default(); - assert!(vault.password_file().is_err()); + assert!(vault.local_password_file().is_err()); } } diff --git a/src/vault/utils.rs b/src/vault/utils.rs index bc1ee55..48626f1 100644 --- a/src/vault/utils.rs +++ b/src/vault/utils.rs @@ -2,6 +2,7 @@ use crate::config::ensure_parent_exists; use crate::vault::{SECRET_RE, Vault}; use anyhow::Result; use anyhow::anyhow; +use gman::providers::SupportedProvider; use gman::providers::local::LocalProvider; use indoc::formatdoc; use inquire::validator::Validation; @@ -34,8 +35,14 @@ pub fn ensure_password_file_initialized(local_provider: &mut LocalProvider) -> R } pub fn create_vault_password_file(vault: &mut Vault) -> Result<()> { - let vault_password_file = vault - .local_provider + let SupportedProvider::Local { + provider_def: local_provider, + } = &mut vault.provider + else { + return Ok(()); + }; + + let vault_password_file = local_provider .password_file .clone() .ok_or_else(|| anyhow!("Password file is not configured"))?; @@ -148,7 +155,7 @@ pub fn create_vault_password_file(vault: &mut Vault) -> Result<()> { match password { Ok(pw) => { std::fs::write(&password_file, pw.as_bytes())?; - vault.local_provider.password_file = Some(password_file); + local_provider.password_file = Some(password_file); println!( "✓ Password file '{}' created.", vault_password_file.display() From 960a199cd2e1cf8d01a0674de84a991899ce1d9d Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Tue, 2 Jun 2026 14:16:45 -0600 Subject: [PATCH 38/85] feat: refactored gman usage to be generic and work with various vault providers and use the SupportedProvider enum directly for configurations --- src/config/app_config.rs | 80 ++++++++++++++++++++++++++ src/config/install_remote.rs | 2 +- src/config/mod.rs | 103 +++++++++++++++++++++++++++++++++- src/config/request_context.rs | 48 ++++++++++++++-- src/mcp/mod.rs | 2 +- src/vault/mod.rs | 22 +++----- src/vault/utils.rs | 33 ++++++++--- 7 files changed, 261 insertions(+), 29 deletions(-) diff --git a/src/config/app_config.rs b/src/config/app_config.rs index 057f82f..2eb9b37 100644 --- a/src/config/app_config.rs +++ b/src/config/app_config.rs @@ -4,6 +4,7 @@ use crate::utils::{IS_STDOUT_TERMINAL, NO_COLOR, decode_bin, get_env_name}; use super::paths; use anyhow::{Context, Result, anyhow}; +use gman::providers::SupportedProvider; use indexmap::IndexMap; use serde::Deserialize; use std::collections::HashMap; @@ -29,6 +30,7 @@ pub struct AppConfig { pub wrap: Option, pub wrap_code: bool, pub(crate) vault_password_file: Option, + pub(crate) secrets_provider: SupportedProvider, pub function_calling_support: bool, pub mapping_tools: IndexMap, @@ -94,6 +96,7 @@ impl Default for AppConfig { wrap: None, wrap_code: false, vault_password_file: None, + secrets_provider: SupportedProvider::default(), function_calling_support: true, mapping_tools: Default::default(), @@ -160,6 +163,7 @@ impl AppConfig { wrap: config.wrap, wrap_code: config.wrap_code, vault_password_file: config.vault_password_file, + secrets_provider: config.secrets_provider, function_calling_support: config.function_calling_support, mapping_tools: config.mapping_tools, @@ -208,6 +212,7 @@ impl AppConfig { clients: config.clients, }; + app_config.migrate_legacy_password_file(); app_config.load_envs(); if let Some(wrap) = app_config.wrap.clone() { app_config.set_wrap(&wrap)?; @@ -218,6 +223,17 @@ impl AppConfig { Ok(app_config) } + fn migrate_legacy_password_file(&mut self) { + let Some(legacy) = self.vault_password_file.take() else { + return; + }; + if let SupportedProvider::Local { provider_def } = &mut self.secrets_provider + && provider_def.password_file.is_none() + { + provider_def.password_file = Some(legacy); + } + } + pub fn resolve_model(&mut self) -> Result<()> { if self.model_id.is_empty() { let models = list_models(self, crate::client::ModelType::Chat); @@ -772,4 +788,68 @@ mod tests { app.resolve_model().unwrap(); assert_eq!(app.model_id, "provider:explicit"); } + + #[test] + fn migrate_legacy_copies_into_empty_local_provider() { + let mut app = AppConfig { + vault_password_file: Some(PathBuf::from("/tmp/test-coyote-password")), + ..AppConfig::default() + }; + + app.migrate_legacy_password_file(); + + match &app.secrets_provider { + SupportedProvider::Local { provider_def } => { + assert_eq!( + provider_def.password_file, + Some(PathBuf::from("/tmp/test-coyote-password")) + ); + } + _ => panic!("expected Local provider"), + } + assert!(app.vault_password_file.is_none()); + } + + #[test] + fn migrate_legacy_does_not_overwrite_existing_password_file() { + let mut app = AppConfig { + vault_password_file: Some(PathBuf::from("/tmp/legacy")), + ..AppConfig::default() + }; + if let SupportedProvider::Local { provider_def } = &mut app.secrets_provider { + provider_def.password_file = Some(PathBuf::from("/tmp/explicit")); + } + + app.migrate_legacy_password_file(); + + match &app.secrets_provider { + SupportedProvider::Local { provider_def } => { + assert_eq!( + provider_def.password_file, + Some(PathBuf::from("/tmp/explicit")) + ); + } + _ => panic!("expected Local provider"), + } + assert!(app.vault_password_file.is_none()); + } + + #[test] + fn migrate_legacy_noop_for_non_local_provider() { + let mut app = AppConfig { + vault_password_file: Some(PathBuf::from("/tmp/orphaned")), + secrets_provider: SupportedProvider::Gopass { + provider_def: Default::default(), + }, + ..AppConfig::default() + }; + + app.migrate_legacy_password_file(); + + assert!(app.vault_password_file.is_none()); + assert!(matches!( + app.secrets_provider, + SupportedProvider::Gopass { .. } + )); + } } diff --git a/src/config/install_remote.rs b/src/config/install_remote.rs index 50a071b..f07f04e 100644 --- a/src/config/install_remote.rs +++ b/src/config/install_remote.rs @@ -732,7 +732,7 @@ fn merge_mcp_json( write_atomically(&final_path, &serialized)?; let vault = Vault::init_bare(); - let (_parsed, missing) = interpolate_secrets(&serialized, &vault); + let (_parsed, missing) = interpolate_secrets(&serialized, &vault)?; let mut deduped: Vec = Vec::new(); for s in missing { if !deduped.contains(&s) { diff --git a/src/config/mod.rs b/src/config/mod.rs index 1d603a5..db734e2 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -53,6 +53,7 @@ use crate::config::macros::Macro; use crate::vault::{GlobalVault, Vault, create_vault_password_file, interpolate_secrets}; use anyhow::{Context, Result, anyhow, bail}; use fancy_regex::Regex; +use gman::providers::SupportedProvider; use indexmap::IndexMap; use indoc::formatdoc; use inquire::{Confirm, Select}; @@ -76,6 +77,45 @@ pub const TEMP_SESSION_NAME: &str = "temp"; static PASSWORD_FILE_SECRET_RE: LazyLock = LazyLock::new(|| Regex::new(r#"vault_password_file:.*['|"]?\{\{(.+)}}['|"]?"#).unwrap()); +fn validate_no_template_in_secrets_provider(content: &str) -> Result<()> { + let mut in_block = false; + + for (line_num, line) in content.lines().enumerate() { + if line.starts_with("secrets_provider:") { + if line.contains("{{") { + bail!( + "secret injection cannot be done on the secrets_provider property (line {}): the secrets_provider config is loaded before the vault is initialized", + line_num + 1 + ); + } + in_block = true; + continue; + } + + if in_block { + let trimmed = line.trim_start(); + + if trimmed.is_empty() || trimmed.starts_with('#') { + continue; + } + + if !line.starts_with(char::is_whitespace) { + in_block = false; + continue; + } + + if line.contains("{{") { + bail!( + "secret injection cannot be done within the secrets_provider block (line {}): the secrets_provider config is loaded before the vault is initialized", + line_num + 1 + ); + } + } + } + + Ok(()) +} + /// Monokai Extended const DARK_THEME: &[u8] = include_bytes!("../../assets/monokai-extended.theme.bin"); const LIGHT_THEME: &[u8] = include_bytes!("../../assets/monokai-extended-light.theme.bin"); @@ -149,6 +189,9 @@ pub struct Config { pub wrap_code: bool, pub(super) vault_password_file: Option, + #[serde(default)] + pub(super) secrets_provider: SupportedProvider, + pub function_calling_support: bool, pub mapping_tools: IndexMap, pub enabled_tools: Option, @@ -213,6 +256,7 @@ impl Default for Config { wrap: None, wrap_code: false, vault_password_file: None, + secrets_provider: SupportedProvider::default(), function_calling_support: true, mapping_tools: Default::default(), @@ -441,7 +485,7 @@ impl Config { ..AppConfig::default() }; let vault = Vault::init(&bootstrap_app); - let (parsed_config, missing_secrets) = interpolate_secrets(&content, &vault); + let (parsed_config, missing_secrets) = interpolate_secrets(&content, &vault)?; if !missing_secrets.is_empty() && !info_flag { debug!( "Global config references secrets that are missing from the vault: {missing_secrets:?}" @@ -480,6 +524,7 @@ impl Config { if PASSWORD_FILE_SECRET_RE.is_match(content)? { bail!("secret injection cannot be done on the vault_password_file property"); } + validate_no_template_in_secrets_provider(content)?; let config: Self = serde_yaml::from_str(content) .map_err(|err| { @@ -753,6 +798,62 @@ where mod tests { use super::*; + #[test] + fn validate_secrets_provider_rejects_template_in_field() { + let yaml = "\ +secrets_provider: + type: aws_secrets_manager + aws_profile: '{{AWS_PROFILE}}' + aws_region: us-east-1 +"; + assert!(validate_no_template_in_secrets_provider(yaml).is_err()); + } + + #[test] + fn validate_secrets_provider_rejects_template_in_local_password_file() { + let yaml = "\ +secrets_provider: + type: local + password_file: '{{COYOTE_PASSWORD}}' +"; + assert!(validate_no_template_in_secrets_provider(yaml).is_err()); + } + + #[test] + fn validate_secrets_provider_accepts_clean_yaml() { + let yaml = "\ +secrets_provider: + type: aws_secrets_manager + aws_profile: default + aws_region: us-east-1 +"; + assert!(validate_no_template_in_secrets_provider(yaml).is_ok()); + } + + #[test] + fn validate_secrets_provider_allows_templates_outside_block() { + let yaml = "\ +secrets_provider: + type: local + password_file: ~/.coyote_password +clients: + - type: openai + api_key: '{{OPENAI_KEY}}' +"; + assert!(validate_no_template_in_secrets_provider(yaml).is_ok()); + } + + #[test] + fn validate_secrets_provider_handles_missing_block() { + let yaml = "\ +model: openai:gpt-4 +clients: + - type: openai + api_key: '{{OPENAI_KEY}}' +"; + assert!(validate_no_template_in_secrets_provider(yaml).is_ok()); + } + #[test] fn config_defaults_match_expected() { let cfg = Config::default(); diff --git a/src/config/request_context.rs b/src/config/request_context.rs index 2f28402..dab3939 100644 --- a/src/config/request_context.rs +++ b/src/config/request_context.rs @@ -32,6 +32,7 @@ use crate::utils::{ use crate::graph; use anyhow::{Context, Error, Result, bail}; +use gman::providers::SupportedProvider; #[cfg(test)] use indexmap::IndexMap; use indoc::formatdoc; @@ -904,11 +905,50 @@ impl RequestContext { ("macros_dir", display_path(&paths::macros_dir())), ("functions_dir", display_path(&paths::functions_dir())), ("messages_file", display_path(&self.messages_file())), - ( - "vault_password_file", - display_path(&app.vault_password_file()), - ), ]; + + items.push(("secrets_provider", app.secrets_provider.to_string())); + match &app.secrets_provider { + SupportedProvider::Local { provider_def } => { + let path = provider_def + .password_file + .clone() + .unwrap_or_else(gman::config::Config::local_provider_password_file); + items.push(("vault_password_file", display_path(&path))); + } + SupportedProvider::AwsSecretsManager { provider_def } => { + if let Some(p) = &provider_def.aws_profile { + items.push(("aws_profile", p.clone())); + } + if let Some(r) = &provider_def.aws_region { + items.push(("aws_region", r.clone())); + } + } + SupportedProvider::GcpSecretManager { provider_def } => { + if let Some(id) = &provider_def.gcp_project_id { + items.push(("gcp_project_id", id.clone())); + } + } + SupportedProvider::AzureKeyVault { provider_def } => { + if let Some(n) = &provider_def.vault_name { + items.push(("azure_vault_name", n.clone())); + } + } + SupportedProvider::Gopass { provider_def } => { + if let Some(s) = &provider_def.store { + items.push(("gopass_store", s.clone())); + } + } + SupportedProvider::OnePassword { provider_def } => { + if let Some(v) = &provider_def.vault { + items.push(("op_vault", v.clone())); + } + if let Some(a) = &provider_def.account { + items.push(("op_account", a.clone())); + } + } + } + if let Ok((_, Some(log_path))) = paths::log_config() { items.push(("log_path", display_path(&log_path))); } diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs index 61e956a..2528a52 100644 --- a/src/mcp/mod.rs +++ b/src/mcp/mod.rs @@ -182,7 +182,7 @@ impl McpRegistry { return Ok(registry); } - let (parsed_content, missing_secrets) = interpolate_secrets(&content, vault); + let (parsed_content, missing_secrets) = interpolate_secrets(&content, vault)?; if !missing_secrets.is_empty() { return Err(anyhow!(formatdoc!( diff --git a/src/vault/mod.rs b/src/vault/mod.rs index 7a0c041..f8a4864 100644 --- a/src/vault/mod.rs +++ b/src/vault/mod.rs @@ -42,21 +42,17 @@ impl Vault { } pub fn init(config: &AppConfig) -> Self { - let vault_password_file = config.vault_password_file(); - let mut local_provider = LocalProvider { - password_file: Some(vault_password_file), - git_branch: None, - ..LocalProvider::default() - }; + let mut provider = config.secrets_provider.clone(); - ensure_password_file_initialized(&mut local_provider) - .expect("Failed to initialize password file"); - - Self { - provider: SupportedProvider::Local { - provider_def: local_provider, - }, + if let SupportedProvider::Local { provider_def } = &mut provider { + if provider_def.password_file.is_none() { + provider_def.password_file = Some(config.vault_password_file()); + } + ensure_password_file_initialized(provider_def) + .expect("Failed to initialize password file"); } + + Self { provider } } pub fn local_password_file(&self) -> Result { diff --git a/src/vault/utils.rs b/src/vault/utils.rs index 48626f1..ad2fdf3 100644 --- a/src/vault/utils.rs +++ b/src/vault/utils.rs @@ -8,6 +8,7 @@ use indoc::formatdoc; use inquire::validator::Validation; use inquire::{Confirm, Password, PasswordDisplayMode, Text, min_length, required}; use std::path::PathBuf; +use gman::SecretError; pub fn ensure_password_file_initialized(local_provider: &mut LocalProvider) -> Result<()> { let vault_password_file = local_provider @@ -172,24 +173,34 @@ pub fn create_vault_password_file(vault: &mut Vault) -> Result<()> { Ok(()) } -pub fn interpolate_secrets(content: &str, vault: &Vault) -> (String, Vec) { +pub fn interpolate_secrets(content: &str, vault: &Vault) -> Result<(String, Vec)> { let mut missing_secrets = vec![]; + let mut fatal_error: Option = None; + let parsed_content: String = content .lines() .map(|line| { - if line.trim_start().starts_with('#') { + if line.trim_start().starts_with('#') || fatal_error.is_some() { return line.to_string(); } SECRET_RE .replace_all(line, |caps: &fancy_regex::Captures<'_>| { - let secret = vault.get_secret(caps[1].trim(), false); - match secret { + let name = caps[1].trim(); + match vault.get_secret(name, false) { Ok(s) => s, - Err(_) => { - missing_secrets.push(caps[1].to_string()); - "".to_string() - } + Err(e) => match e.downcast_ref::() { + Some(SecretError::NotFound { .. }) => { + missing_secrets.push(name.to_string()); + String::new() + } + _ => { + fatal_error = Some(anyhow!( + "Failed to fetch secret '{name}' from vault: {e}" + )); + String::new() + } + }, } }) .to_string() @@ -197,5 +208,9 @@ pub fn interpolate_secrets(content: &str, vault: &Vault) -> (String, Vec .collect::>() .join("\n"); - (parsed_content, missing_secrets) + if let Some(err) = fatal_error { + return Err(err); + } + + Ok((parsed_content, missing_secrets)) } From 29af20f316bd25977623975a3ffa57be7fbe02db Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Tue, 2 Jun 2026 14:52:36 -0600 Subject: [PATCH 39/85] feat: vault_password_file or nothing at all is shorthand for just using the local gman provider for secret management --- src/config/app_config.rs | 94 +++++++++++------------------------ src/config/mod.rs | 4 +- src/config/request_context.rs | 80 +++++++++++++++-------------- src/vault/mod.rs | 13 +++-- 4 files changed, 83 insertions(+), 108 deletions(-) diff --git a/src/config/app_config.rs b/src/config/app_config.rs index 2eb9b37..07b5669 100644 --- a/src/config/app_config.rs +++ b/src/config/app_config.rs @@ -30,7 +30,7 @@ pub struct AppConfig { pub wrap: Option, pub wrap_code: bool, pub(crate) vault_password_file: Option, - pub(crate) secrets_provider: SupportedProvider, + pub(crate) secrets_provider: Option, pub function_calling_support: bool, pub mapping_tools: IndexMap, @@ -96,7 +96,7 @@ impl Default for AppConfig { wrap: None, wrap_code: false, vault_password_file: None, - secrets_provider: SupportedProvider::default(), + secrets_provider: None, function_calling_support: true, mapping_tools: Default::default(), @@ -212,7 +212,6 @@ impl AppConfig { clients: config.clients, }; - app_config.migrate_legacy_password_file(); app_config.load_envs(); if let Some(wrap) = app_config.wrap.clone() { app_config.set_wrap(&wrap)?; @@ -223,17 +222,6 @@ impl AppConfig { Ok(app_config) } - fn migrate_legacy_password_file(&mut self) { - let Some(legacy) = self.vault_password_file.take() else { - return; - }; - if let SupportedProvider::Local { provider_def } = &mut self.secrets_provider - && provider_def.password_file.is_none() - { - provider_def.password_file = Some(legacy); - } - } - pub fn resolve_model(&mut self) -> Result<()> { if self.model_id.is_empty() { let models = list_models(self, crate::client::ModelType::Chat); @@ -790,66 +778,40 @@ mod tests { } #[test] - fn migrate_legacy_copies_into_empty_local_provider() { - let mut app = AppConfig { - vault_password_file: Some(PathBuf::from("/tmp/test-coyote-password")), - ..AppConfig::default() - }; - - app.migrate_legacy_password_file(); - - match &app.secrets_provider { - SupportedProvider::Local { provider_def } => { - assert_eq!( - provider_def.password_file, - Some(PathBuf::from("/tmp/test-coyote-password")) - ); - } - _ => panic!("expected Local provider"), - } - assert!(app.vault_password_file.is_none()); + fn default_secrets_provider_is_none() { + let app = AppConfig::default(); + assert!(app.secrets_provider.is_none()); } #[test] - fn migrate_legacy_does_not_overwrite_existing_password_file() { - let mut app = AppConfig { - vault_password_file: Some(PathBuf::from("/tmp/legacy")), - ..AppConfig::default() - }; - if let SupportedProvider::Local { provider_def } = &mut app.secrets_provider { - provider_def.password_file = Some(PathBuf::from("/tmp/explicit")); - } - - app.migrate_legacy_password_file(); - - match &app.secrets_provider { - SupportedProvider::Local { provider_def } => { - assert_eq!( - provider_def.password_file, - Some(PathBuf::from("/tmp/explicit")) - ); - } - _ => panic!("expected Local provider"), - } - assert!(app.vault_password_file.is_none()); - } - - #[test] - fn migrate_legacy_noop_for_non_local_provider() { - let mut app = AppConfig { - vault_password_file: Some(PathBuf::from("/tmp/orphaned")), - secrets_provider: SupportedProvider::Gopass { + fn secrets_provider_can_hold_non_local_variant() { + let app = AppConfig { + secrets_provider: Some(SupportedProvider::Gopass { provider_def: Default::default(), - }, + }), ..AppConfig::default() }; - - app.migrate_legacy_password_file(); - - assert!(app.vault_password_file.is_none()); assert!(matches!( app.secrets_provider, - SupportedProvider::Gopass { .. } + Some(SupportedProvider::Gopass { .. }) + )); + } + + #[test] + fn from_config_copies_secrets_provider() { + let cfg = Config { + model_id: "test-model".to_string(), + clients: vec![ClientConfig::default()], + secrets_provider: Some(SupportedProvider::Gopass { + provider_def: Default::default(), + }), + ..Config::default() + }; + + let app = AppConfig::from_config(cfg).unwrap(); + assert!(matches!( + app.secrets_provider, + Some(SupportedProvider::Gopass { .. }) )); } } diff --git a/src/config/mod.rs b/src/config/mod.rs index db734e2..ca01f65 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -190,7 +190,7 @@ pub struct Config { pub(super) vault_password_file: Option, #[serde(default)] - pub(super) secrets_provider: SupportedProvider, + pub(super) secrets_provider: Option, pub function_calling_support: bool, pub mapping_tools: IndexMap, @@ -256,7 +256,7 @@ impl Default for Config { wrap: None, wrap_code: false, vault_password_file: None, - secrets_provider: SupportedProvider::default(), + secrets_provider: None, function_calling_support: true, mapping_tools: Default::default(), diff --git a/src/config/request_context.rs b/src/config/request_context.rs index dab3939..cf1f68b 100644 --- a/src/config/request_context.rs +++ b/src/config/request_context.rs @@ -907,44 +907,52 @@ impl RequestContext { ("messages_file", display_path(&self.messages_file())), ]; - items.push(("secrets_provider", app.secrets_provider.to_string())); match &app.secrets_provider { - SupportedProvider::Local { provider_def } => { - let path = provider_def - .password_file - .clone() - .unwrap_or_else(gman::config::Config::local_provider_password_file); - items.push(("vault_password_file", display_path(&path))); + None => { + items.push(("secrets_provider", "local".to_string())); + items.push(("vault_password_file", display_path(&app.vault_password_file()))); } - SupportedProvider::AwsSecretsManager { provider_def } => { - if let Some(p) = &provider_def.aws_profile { - items.push(("aws_profile", p.clone())); - } - if let Some(r) = &provider_def.aws_region { - items.push(("aws_region", r.clone())); - } - } - SupportedProvider::GcpSecretManager { provider_def } => { - if let Some(id) = &provider_def.gcp_project_id { - items.push(("gcp_project_id", id.clone())); - } - } - SupportedProvider::AzureKeyVault { provider_def } => { - if let Some(n) = &provider_def.vault_name { - items.push(("azure_vault_name", n.clone())); - } - } - SupportedProvider::Gopass { provider_def } => { - if let Some(s) = &provider_def.store { - items.push(("gopass_store", s.clone())); - } - } - SupportedProvider::OnePassword { provider_def } => { - if let Some(v) = &provider_def.vault { - items.push(("op_vault", v.clone())); - } - if let Some(a) = &provider_def.account { - items.push(("op_account", a.clone())); + Some(provider) => { + items.push(("secrets_provider", provider.to_string())); + match provider { + SupportedProvider::Local { provider_def } => { + let path = provider_def + .password_file + .clone() + .unwrap_or_else(gman::config::Config::local_provider_password_file); + items.push(("vault_password_file", display_path(&path))); + } + SupportedProvider::AwsSecretsManager { provider_def } => { + if let Some(p) = &provider_def.aws_profile { + items.push(("aws_profile", p.clone())); + } + if let Some(r) = &provider_def.aws_region { + items.push(("aws_region", r.clone())); + } + } + SupportedProvider::GcpSecretManager { provider_def } => { + if let Some(id) = &provider_def.gcp_project_id { + items.push(("gcp_project_id", id.clone())); + } + } + SupportedProvider::AzureKeyVault { provider_def } => { + if let Some(n) = &provider_def.vault_name { + items.push(("azure_vault_name", n.clone())); + } + } + SupportedProvider::Gopass { provider_def } => { + if let Some(s) = &provider_def.store { + items.push(("gopass_store", s.clone())); + } + } + SupportedProvider::OnePassword { provider_def } => { + if let Some(v) = &provider_def.vault { + items.push(("op_vault", v.clone())); + } + if let Some(a) = &provider_def.account { + items.push(("op_account", a.clone())); + } + } } } } diff --git a/src/vault/mod.rs b/src/vault/mod.rs index f8a4864..dc29134 100644 --- a/src/vault/mod.rs +++ b/src/vault/mod.rs @@ -42,12 +42,17 @@ impl Vault { } pub fn init(config: &AppConfig) -> Self { - let mut provider = config.secrets_provider.clone(); + let mut provider = match &config.secrets_provider { + Some(p) => p.clone(), + None => SupportedProvider::Local { + provider_def: LocalProvider { + password_file: Some(config.vault_password_file()), + ..LocalProvider::default() + }, + }, + }; if let SupportedProvider::Local { provider_def } = &mut provider { - if provider_def.password_file.is_none() { - provider_def.password_file = Some(config.vault_password_file()); - } ensure_password_file_initialized(provider_def) .expect("Failed to initialize password file"); } From baa44ec5cb85dae65016d8c33a5368670f1b57fa Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Wed, 3 Jun 2026 08:08:06 -0600 Subject: [PATCH 40/85] feat: created new first-time run wizard for secrets provider --- src/config/mod.rs | 23 +++++- src/vault/mod.rs | 1 + src/vault/utils.rs | 182 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 202 insertions(+), 4 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index ca01f65..c623479 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -50,7 +50,9 @@ use crate::utils::*; pub use macros::macro_execute; use crate::config::macros::Macro; -use crate::vault::{GlobalVault, Vault, create_vault_password_file, interpolate_secrets}; +use crate::vault::{ + GlobalVault, Vault, create_vault_password_file, interpolate_secrets, prompt_provider_choice, +}; use anyhow::{Context, Result, anyhow, bail}; use fancy_regex::Regex; use gman::providers::SupportedProvider; @@ -677,7 +679,13 @@ pub async fn create_config_file(config_path: &Path) -> Result<()> { process::exit(0); } - let mut vault = Vault::init_bare(); + let provider_choice = prompt_provider_choice()?; + let mut vault = match &provider_choice { + None => Vault::init_bare(), + Some(provider) => Vault { + provider: provider.clone(), + }, + }; create_vault_password_file(&mut vault)?; let client = Select::new("API Provider (required):", list_client_types()).prompt()?; @@ -685,7 +693,16 @@ pub async fn create_config_file(config_path: &Path) -> Result<()> { let mut config = json!({}); let (model, clients_config) = create_client_config(client, &vault).await?; config["model"] = model.into(); - config["vault_password_file"] = vault.local_password_file()?.display().to_string().into(); + match &provider_choice { + None => { + config["vault_password_file"] = + vault.local_password_file()?.display().to_string().into(); + } + Some(provider) => { + config["secrets_provider"] = serde_json::to_value(provider) + .with_context(|| "failed to serialize secrets_provider config")?; + } + } config["stream"] = json!(true); config["save"] = json!(true); config["keybindings"] = json!("vi"); diff --git a/src/vault/mod.rs b/src/vault/mod.rs index dc29134..272841e 100644 --- a/src/vault/mod.rs +++ b/src/vault/mod.rs @@ -3,6 +3,7 @@ mod utils; use std::path::PathBuf; pub use utils::create_vault_password_file; pub use utils::interpolate_secrets; +pub use utils::prompt_provider_choice; use crate::cli::Cli; use crate::config::AppConfig; diff --git a/src/vault/utils.rs b/src/vault/utils.rs index ad2fdf3..5e0e84d 100644 --- a/src/vault/utils.rs +++ b/src/vault/utils.rs @@ -3,11 +3,17 @@ use crate::vault::{SECRET_RE, Vault}; use anyhow::Result; use anyhow::anyhow; use gman::providers::SupportedProvider; +use gman::providers::aws_secrets_manager::AwsSecretsManagerProvider; +use gman::providers::azure_key_vault::AzureKeyVaultProvider; +use gman::providers::gcp_secret_manager::GcpSecretManagerProvider; +use gman::providers::gopass::GopassProvider; use gman::providers::local::LocalProvider; +use gman::providers::one_password::OnePasswordProvider; use indoc::formatdoc; use inquire::validator::Validation; -use inquire::{Confirm, Password, PasswordDisplayMode, Text, min_length, required}; +use inquire::{Confirm, Password, PasswordDisplayMode, Select, Text, min_length, required}; use std::path::PathBuf; +use std::process::Command; use gman::SecretError; pub fn ensure_password_file_initialized(local_provider: &mut LocalProvider) -> Result<()> { @@ -173,6 +179,180 @@ pub fn create_vault_password_file(vault: &mut Vault) -> Result<()> { Ok(()) } +pub fn prompt_provider_choice() -> Result> { + let choices = vec![ + "local - encrypted file on this machine", + "aws_secrets_manager - AWS Secrets Manager", + "gcp_secret_manager - Google Cloud Secret Manager", + "azure_key_vault - Azure Key Vault", + "gopass - gopass password manager (requires the `gopass` CLI)", + "one_password - 1Password (requires the `op` CLI)", + ]; + let choice = Select::new("Which secrets provider would you like to use?", choices) + .with_starting_cursor(0) + .prompt()?; + + if choice.starts_with("local") { + return Ok(None); + } + + let provider = if choice.starts_with("aws_secrets_manager") { + prompt_aws_provider()? + } else if choice.starts_with("gcp_secret_manager") { + prompt_gcp_provider()? + } else if choice.starts_with("azure_key_vault") { + prompt_azure_provider()? + } else if choice.starts_with("gopass") { + prompt_gopass_provider()? + } else if choice.starts_with("one_password") { + prompt_one_password_provider()? + } else { + return Err(anyhow!("unexpected provider choice: {choice}")); + }; + + Ok(Some(provider)) +} + +fn prompt_aws_provider() -> Result { + let aws_profile = Text::new("AWS profile name:") + .with_default("default") + .with_validator(required!()) + .with_help_message("From your ~/.aws/config and ~/.aws/credentials") + .prompt()?; + let aws_region = Text::new("AWS region:") + .with_default("us-east-1") + .with_validator(required!()) + .with_help_message("Where your secrets live (e.g. us-east-1, eu-west-2)") + .prompt()?; + + advisory_preflight( + "AWS", + "aws", + &["sts", "get-caller-identity", "--profile", &aws_profile], + ); + + Ok(SupportedProvider::AwsSecretsManager { + provider_def: AwsSecretsManagerProvider { + aws_profile: Some(aws_profile), + aws_region: Some(aws_region), + }, + }) +} + +fn prompt_gcp_provider() -> Result { + let gcp_project_id = Text::new("GCP project ID:") + .with_validator(required!()) + .with_help_message("The project that hosts your Secret Manager secrets") + .prompt()?; + + advisory_preflight( + "GCP", + "gcloud", + &["auth", "application-default", "print-access-token"], + ); + + Ok(SupportedProvider::GcpSecretManager { + provider_def: GcpSecretManagerProvider { + gcp_project_id: Some(gcp_project_id), + }, + }) +} + +fn prompt_azure_provider() -> Result { + let vault_name = Text::new("Azure Key Vault name:") + .with_validator(required!()) + .with_help_message("Just the vault name; the https endpoint is auto-derived") + .prompt()?; + + advisory_preflight("Azure", "az", &["account", "show"]); + + Ok(SupportedProvider::AzureKeyVault { + provider_def: AzureKeyVaultProvider { + vault_name: Some(vault_name), + }, + }) +} + +fn prompt_gopass_provider() -> Result { + let store_raw = Text::new("gopass store (leave blank for default):").prompt()?; + let store = match store_raw.trim() { + "" => None, + s => Some(s.to_string()), + }; + + required_cli_preflight("gopass", "gopass", "https://www.gopass.pw/"); + + Ok(SupportedProvider::Gopass { + provider_def: GopassProvider { store }, + }) +} + +fn prompt_one_password_provider() -> Result { + let vault_raw = Text::new("1Password vault (leave blank for default):").prompt()?; + let vault = match vault_raw.trim() { + "" => None, + s => Some(s.to_string()), + }; + + let account_raw = Text::new("1Password account (leave blank for default):").prompt()?; + let account = match account_raw.trim() { + "" => None, + s => Some(s.to_string()), + }; + + required_cli_preflight( + "1Password CLI", + "op", + "https://developer.1password.com/docs/cli/", + ); + + Ok(SupportedProvider::OnePassword { + provider_def: OnePasswordProvider { vault, account }, + }) +} + +fn advisory_preflight(label: &str, cli: &str, args: &[&str]) { + match Command::new(cli).args(args).output() { + Ok(out) if out.status.success() => { + println!("✓ {label} authentication check succeeded."); + } + Ok(out) => { + let stderr = String::from_utf8_lossy(&out.stderr); + eprintln!("⚠️ {label} preflight returned non-zero:"); + if !stderr.trim().is_empty() { + eprintln!(" {}", stderr.trim()); + } + eprintln!( + " Setup will continue. Fix authentication before using --add-secret etc." + ); + } + Err(_) => { + eprintln!( + "⚠️ `{cli}` CLI not found on PATH. Coyote will still try the {label} SDK directly via standard credentials (env vars, instance metadata, service-account JSON, etc.)." + ); + } + } +} + +fn required_cli_preflight(label: &str, cli: &str, install_url: &str) { + match Command::new(cli).arg("--version").output() { + Ok(out) if out.status.success() => { + println!("✓ {label} is installed and reachable."); + } + Ok(_) => { + eprintln!( + "⚠️ `{cli} --version` returned non-zero. Your {label} install may be broken — verify before using the vault." + ); + } + Err(_) => { + eprintln!("⚠️ `{cli}` not found on PATH."); + eprintln!( + " The {label} secrets provider requires it. Install from {install_url} before running --add-secret etc." + ); + } + } +} + pub fn interpolate_secrets(content: &str, vault: &Vault) -> Result<(String, Vec)> { let mut missing_secrets = vec![]; let mut fatal_error: Option = None; From bf97f2261d4f8174c6d4578c6b7f26b56c5f7279 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Wed, 3 Jun 2026 08:30:47 -0600 Subject: [PATCH 41/85] feat: added round trip validation for vault providers to ensure permissions and authentication --- assets/functions/mcp.json | 2 +- src/config/mod.rs | 3 ++ src/vault/mod.rs | 59 ++++++++++++++++++++++++++++++++++++++- src/vault/utils.rs | 11 ++++++++ 4 files changed, 73 insertions(+), 2 deletions(-) diff --git a/assets/functions/mcp.json b/assets/functions/mcp.json index 71cb26e..b3e801b 100644 --- a/assets/functions/mcp.json +++ b/assets/functions/mcp.json @@ -18,6 +18,6 @@ "type": "stdio", "command": "uvx", "args": ["duckduckgo-mcp-server"] - }, + } } } diff --git a/src/config/mod.rs b/src/config/mod.rs index c623479..70efb43 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -687,6 +687,9 @@ pub async fn create_config_file(config_path: &Path) -> Result<()> { }, }; create_vault_password_file(&mut vault)?; + if provider_choice.is_some() { + vault.validate_round_trip()?; + } let client = Select::new("API Provider (required):", list_client_types()).prompt()?; diff --git a/src/vault/mod.rs b/src/vault/mod.rs index 272841e..9cebd75 100644 --- a/src/vault/mod.rs +++ b/src/vault/mod.rs @@ -8,7 +8,7 @@ pub use utils::prompt_provider_choice; use crate::cli::Cli; use crate::config::AppConfig; use crate::vault::utils::ensure_password_file_initialized; -use anyhow::{Context, Result, anyhow}; +use anyhow::{Context, Result, anyhow, bail}; use fancy_regex::Regex; use gman::providers::SecretProvider; use gman::providers::SupportedProvider; @@ -157,6 +157,63 @@ impl Vault { Ok(secrets) } + pub fn auth_hint(&self) -> Option<&'static str> { + match &self.provider { + SupportedProvider::AwsSecretsManager { .. } => Some( + "Try `aws sso login` (for SSO setups) or `aws configure` (for static keys), then retry.", + ), + SupportedProvider::GcpSecretManager { .. } => Some( + "Try `gcloud auth application-default login`, then retry.", + ), + SupportedProvider::AzureKeyVault { .. } => Some( + "Try `az login`, then retry.", + ), + SupportedProvider::Gopass { .. } => Some( + "Make sure `gopass init` has been run and `gopass` is on your PATH.", + ), + SupportedProvider::OnePassword { .. } => Some( + "Try `op signin`, then retry.", + ), + SupportedProvider::Local { .. } => None, + } + } + + pub fn validate_round_trip(&self) -> Result<()> { + const PROBE_KEY: &str = "__coyote_setup_probe__"; + const PROBE_VALUE: &str = "ok"; + + let h = Handle::current(); + let result: Result<()> = tokio::task::block_in_place(|| { + h.block_on(async { + self.provider_ref() + .set_secret(PROBE_KEY, PROBE_VALUE) + .await + .with_context(|| "vault write probe failed")?; + let got = self + .provider_ref() + .get_secret(PROBE_KEY) + .await + .with_context(|| "vault read probe failed")?; + let _ = self.provider_ref().delete_secret(PROBE_KEY).await; + if got != PROBE_VALUE { + bail!("vault read probe returned an unexpected value"); + } + Ok(()) + }) + }); + + result.with_context(|| { + let base = "Vault validation failed. Check that your credentials have permission to create, read, and delete secrets in the configured backend."; + match self.auth_hint() { + Some(hint) => format!("{base}\n\nHint: {hint}"), + None => base.to_string(), + } + })?; + + println!("✓ Vault validation succeeded."); + Ok(()) + } + pub fn handle_vault_flags(cli: Cli, vault: &Vault) -> Result<()> { if let Some(secret_name) = cli.add_secret { vault.add_secret(&secret_name)?; diff --git a/src/vault/utils.rs b/src/vault/utils.rs index 5e0e84d..9445666 100644 --- a/src/vault/utils.rs +++ b/src/vault/utils.rs @@ -374,6 +374,17 @@ pub fn interpolate_secrets(content: &str, vault: &Vault) -> Result<(String, Vec< missing_secrets.push(name.to_string()); String::new() } + Some(SecretError::AuthFailed { .. }) => { + let base = format!( + "Failed to fetch secret '{name}' from vault: {e}" + ); + let msg = match vault.auth_hint() { + Some(hint) => format!("{base}\n\nHint: {hint}"), + None => base, + }; + fatal_error = Some(anyhow!("{msg}")); + String::new() + } _ => { fatal_error = Some(anyhow!( "Failed to fetch secret '{name}' from vault: {e}" From 863b28f01ebcc3f41e92414353321fefae9a116c Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Wed, 3 Jun 2026 08:36:03 -0600 Subject: [PATCH 42/85] docs: Updated configuration example to include new secret provider support --- config.example.yaml | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/config.example.yaml b/config.example.yaml index b00b5b6..28b71c9 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -34,8 +34,48 @@ right_prompt: '{color.purple}{?session {?consume_tokens {consume_tokens}({consume_percent}%)}{!consume_tokens {consume_tokens}}}{color.reset}' # ---- Vault ---- -# See the [Vault documentation](https://github.com/Dark-Alex-17/coyote/wiki/Vault) for more information on the Coyote vault +# See the [Vault documentation](https://github.com/Dark-Alex-17/coyote/wiki/Vault) for more information on the Coyote vault. +# +# The secrets_provider tells Coyote where to read and write secrets referenced via {{SECRET_NAME}} syntax. +# +# Shorthand: set vault_password_file to enable the local provider with that password file. vault_password_file: null # Path to a file containing the password for the Coyote vault (cannot be a secret template) +# +# Explicit: set secrets_provider to one of the supported types below. When secrets_provider is set, +# vault_password_file is ignored. Note: secrets_provider itself cannot use {{SECRET}} template syntax. +# The vault must be initialized before any secrets can be resolved. +# +# Local (same as the shorthand above): +# secrets_provider: +# type: local +# password_file: ~/.coyote_password +# +# AWS Secrets Manager (requires an authenticated AWS CLI; see `aws sso login` or `aws configure`): +# secrets_provider: +# type: aws_secrets_manager +# aws_profile: default +# aws_region: us-east-1 +# +# GCP Secret Manager (requires `gcloud auth application-default login`): +# secrets_provider: +# type: gcp_secret_manager +# gcp_project_id: my-project-id +# +# Azure Key Vault (requires `az login`): +# secrets_provider: +# type: azure_key_vault +# vault_name: my-vault-name +# +# gopass (requires the `gopass` CLI to be installed and initialized): +# secrets_provider: +# type: gopass +# store: my-store # Optional; omit to use the default store +# +# 1Password (requires the `op` CLI to be installed and signed in via `op signin`): +# secrets_provider: +# type: one_password +# vault: Production # Optional; omit to use the default vault +# account: my.1password.com # Optional; omit to use the default account # ---- Function Calling ---- # See the [Tools documentation](https://github.com/Dark-Alex-17/coyote/wiki/Tools) for more details From 4e616fe7c3e924daab9a6822ebe89b552a49c3df Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Wed, 3 Jun 2026 10:20:39 -0600 Subject: [PATCH 43/85] fix: updated execute_command to not mangle heredocs and also added explicit instructions to the coder and sisyphus agents to use fs_write and fs_patch over execute_command when writing files --- assets/agents/coder/graph.yaml | 10 ++++++++-- assets/agents/sisyphus/config.yaml | 22 ++++++++++++++++++++++ assets/functions/tools/execute_command.sh | 9 +++++++-- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/assets/agents/coder/graph.yaml b/assets/agents/coder/graph.yaml index 741f8c7..3fb4f60 100644 --- a/assets/agents/coder/graph.yaml +++ b/assets/agents/coder/graph.yaml @@ -177,8 +177,14 @@ nodes: 1. Use `fs_patch` for surgical edits to existing files. 2. Use `fs_write` for new files or full rewrites. - 3. NEVER output code to chat. Always use tools. - 4. ALWAYS pass ABSOLUTE paths to fs_write and fs_patch. Relative + 3. NEVER write files via `execute_command`. Do not use `cat >`, + `cat >>`, `echo >`, `printf >`, `tee`, heredocs (`< file`, `cat >> file`, `tee` + - `echo >`, `printf >` + - Heredocs (`<