diff --git a/assets/agents/explore/config.yaml b/assets/agents/explore/config.yaml index 813125e..ce29519 100644 --- a/assets/agents/explore/config.yaml +++ b/assets/agents/explore/config.yaml @@ -1,6 +1,6 @@ name: explore description: Fast codebase exploration agent - finds patterns, structures, and relevant files. Designed to be fanned out 2-5 in parallel by orchestrators. -version: 3.0.0 +version: 3.1.0 skills_enabled: true enabled_skills: @@ -19,6 +19,7 @@ global_tools: - fs_grep.sh - fs_glob.sh - fs_ls.sh + - ast_grep.sh instructions: | You are a codebase explorer. Your job: Search, find, report. Nothing else. @@ -49,6 +50,8 @@ instructions: | 4. **Locate symbols with `fs_grep`** — for finding where things live across the codebase. `fs_grep --pattern "fn handle_request" --include "*.rs"` is faster than reading files. + 4b. **Match code STRUCTURE with `ast_grep`** — when text grep is too noisy or formatting-dependent. It matches syntax trees: `ast_grep --pattern '$X.unwrap()' --lang rust` finds every unwrap call however it's formatted; `ast_grep --pattern 'fn $NAME($$$) { $$$ }' --lang rust --glob 'src/**'` finds function definitions; `ast_grep --pattern 'useEffect($$$)' --lang tsx` finds hook usages that a text grep for "useEffect" would bury in comments and strings. Meta-variables: `$NAME` = one AST node, `$$$` = zero or more. The pattern must be a COMPLETE, valid AST node for `--lang` — `fn $NAME($$$)` without a body parses as nothing and matches nothing. Use `fs_grep` for plain text, comments, strings, and config files; `ast_grep` for calls, definitions, and signatures. If ast-grep isn't installed the tool says so — fall back to fs_grep. + 5. **Read targeted sections with `fs_read --offset/--limit`** — `fs_read --path "src/main.rs" --offset 50 --limit 30` reads lines 50-79 only. `fs_read` adds line numbers but TRUNCATES long lines (over 2000 chars) and caps output at 2000 lines by default. 6. **Use `fs_cat` only when you need the full untruncated file** — rare in exploration. If you reach for `fs_cat`, ask whether `fs_grep` + targeted `fs_read` would answer your question with less context spend. @@ -59,6 +62,7 @@ instructions: | - `fs_grep --pattern "struct User" --include "*.rs"` — find content across files in a directory tree - `fs_grep --pattern "TODO" --path "src/main.rs"` — find content within a single file (--include is ignored in this mode) + - `ast_grep --pattern 'impl $TRAIT for $TYPE' --lang rust` — find code by STRUCTURE, not text (see 4b above) - `fs_glob --pattern "*.rs" --path src/` — find files by name pattern - `fs_read --path "src/main.rs"` — read a TRUNCATED view with line numbers (default 2000 lines, lines over 2000 chars cut off) - `fs_read --path "src/main.rs" --offset 100 --limit 50` — read lines 100-149 only (line numbers; truncation rules still apply) diff --git a/assets/functions/tools/ast_grep.sh b/assets/functions/tools/ast_grep.sh new file mode 100755 index 0000000..f80f0cf --- /dev/null +++ b/assets/functions/tools/ast_grep.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +set -e + +# @describe Structural code search using AST patterns (ast-grep). Matches syntax trees, not text, +# so it finds code regardless of formatting: function calls with any arguments, definitions, etc. +# Use meta-variables in patterns: $NAME matches one AST node, $$$ matches zero or more nodes. +# Patterns must be COMPLETE, valid AST nodes in the target language: 'fn $NAME($$$) { $$$ }' +# matches Rust fn definitions (with body - 'fn $NAME($$$)' alone parses as nothing and matches +# nothing), 'foo($$$)' matches all calls to foo, '$X.unwrap()' matches all unwrap calls. +# Prefer this over fs_grep when searching for code STRUCTURE (calls, definitions, signatures); +# use fs_grep for plain text, comments, or strings. + +# @option --pattern! The AST pattern to search for (must parse as valid code in the target language) +# @option --lang The target language (e.g. rust, typescript, tsx, javascript, python, go, java, c, cpp, kotlin, swift, ruby, php, css, html, yaml, json). Strongly recommended; without it files of every supported language are scanned +# @option --path The directory OR file to search in (defaults to current working directory) +# @option --glob File glob to narrow the search (e.g. "src/**/*.rs", "!**/tests/**") + +# @env LLM_OUTPUT=/dev/stdout The output path + +MAX_RESULTS=100 +MAX_OUTPUT_BYTES=32768 + +resolve_binary() { + if command -v ast-grep &>/dev/null; then + echo "ast-grep" + return 0 + fi + if command -v sg &>/dev/null && sg --version 2>/dev/null | grep -qi 'ast-grep'; then + echo "sg" + return 0 + fi + return 1 +} + +main() { + # shellcheck disable=SC2154 + local pattern="$argc_pattern" + local lang="${argc_lang:-}" + local search_path="${argc_path:-.}" + local glob="${argc_glob:-}" + + local bin + if ! bin=$(resolve_binary); then + printf 'ast-grep is not installed. Fall back to fs_grep for this search.\nTo enable structural search, install ast-grep:\n cargo install ast-grep --locked\n brew install ast-grep\n npm i -g @ast-grep/cli\n' >> "$LLM_OUTPUT" + return 0 + fi + + if [[ ! -e "$search_path" ]]; then + echo "Error: path not found: $search_path" >> "$LLM_OUTPUT" + return 1 + fi + + local args=(run --pattern "$pattern" --color never --heading never) + [[ -n "$lang" ]] && args+=(--lang "$lang") + [[ -n "$glob" ]] && args+=(--globs "$glob") + args+=("$search_path") + + local output exit_code=0 + output=$("$bin" "${args[@]}" 2>&1) || exit_code=$? + + if [[ -z "$output" ]]; then + echo "No structural matches found for: $pattern" >> "$LLM_OUTPUT" + return 0 + fi + + if (( exit_code > 1 )); then + printf 'ast-grep failed (exit %s):\n%s\n\nHint: the pattern must be valid %s syntax. Meta-variables: $NAME (one node), $$$ (zero or more).\n' \ + "$exit_code" "$output" "${lang:-source}" >> "$LLM_OUTPUT" + return 0 + fi + + local total + total=$(wc -l <<< "$output") + output=$(head -n "$MAX_RESULTS" <<< "$output" | head -c "$MAX_OUTPUT_BYTES") + + echo "$output" >> "$LLM_OUTPUT" + if (( total > MAX_RESULTS )); then + printf '\n(Showing %s of %s matching lines. Narrow with --glob, --lang, or a more specific pattern.)\n' \ + "$MAX_RESULTS" "$total" >> "$LLM_OUTPUT" + fi +} diff --git a/src/config/mod.rs b/src/config/mod.rs index 722776e..4d9eb06 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -151,7 +151,7 @@ const SBX_VAULT_MIXINS_DIR_NAME: &str = "sbx-vault-mixins"; const SBX_MIXIN_KITS_DIR_NAME: &str = "sbx-mixin-kits"; const GIT_DIR_NAME: &str = ".git"; const GITIGNORE_FILE_NAME: &str = ".gitignore"; -const DEFAULT_VISIBLE_TOOLS: [&str; 18] = [ +const DEFAULT_VISIBLE_TOOLS: [&str; 19] = [ "execute_command.sh", "execute_py_code.py", "execute_sql_code.sh", @@ -165,6 +165,7 @@ const DEFAULT_VISIBLE_TOOLS: [&str; 18] = [ "fs_read.sh", "fs_rm.sh", "fs_write.sh", + "ast_grep.sh", "get_current_time.sh", "get_current_weather.sh", "search_wikipedia.sh", diff --git a/src/function/mod.rs b/src/function/mod.rs index df47e52..4fe1ce5 100644 --- a/src/function/mod.rs +++ b/src/function/mod.rs @@ -1691,6 +1691,33 @@ mod tests { assert!(f.declarations().is_empty()); } + #[test] + fn bundled_bash_tools_generate_declarations() { + let tools_dir = + std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("assets/functions/tools"); + let mut checked = Vec::new(); + for entry in std::fs::read_dir(&tools_dir).unwrap() { + let path = entry.unwrap().path(); + if path.extension().and_then(OsStr::to_str) != Some("sh") { + continue; + } + let name = path.file_stem().unwrap().to_string_lossy().to_string(); + let declarations = Functions::generate_declarations(&path) + .unwrap_or_else(|e| panic!("bundled tool '{name}' failed to parse: {e}")); + assert!( + !declarations.is_empty(), + "bundled tool '{name}' produced no function declaration" + ); + checked.push(name); + } + for expected in ["fs_grep", "ast_grep", "execute_command"] { + assert!( + checked.iter().any(|n| n == expected), + "expected bundled tool '{expected}' to be checked; found {checked:?}" + ); + } + } + #[test] fn functions_append_todo_adds_declarations() { let mut f = Functions::default();