Baseline project
This commit is contained in:
@@ -0,0 +1,88 @@
|
||||
use anyhow::Result;
|
||||
use crossterm::event::{self, Event, KeyCode, KeyModifiers};
|
||||
use std::{
|
||||
sync::{
|
||||
atomic::{AtomicBool, Ordering},
|
||||
Arc,
|
||||
},
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
pub type AbortSignal = Arc<AbortSignalInner>;
|
||||
|
||||
pub struct AbortSignalInner {
|
||||
ctrlc: AtomicBool,
|
||||
ctrld: AtomicBool,
|
||||
}
|
||||
|
||||
pub fn create_abort_signal() -> AbortSignal {
|
||||
AbortSignalInner::new()
|
||||
}
|
||||
|
||||
impl AbortSignalInner {
|
||||
pub fn new() -> AbortSignal {
|
||||
Arc::new(Self {
|
||||
ctrlc: AtomicBool::new(false),
|
||||
ctrld: AtomicBool::new(false),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn aborted(&self) -> bool {
|
||||
if self.aborted_ctrlc() {
|
||||
return true;
|
||||
}
|
||||
if self.aborted_ctrld() {
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
pub fn aborted_ctrlc(&self) -> bool {
|
||||
self.ctrlc.load(Ordering::SeqCst)
|
||||
}
|
||||
|
||||
pub fn aborted_ctrld(&self) -> bool {
|
||||
self.ctrld.load(Ordering::SeqCst)
|
||||
}
|
||||
|
||||
pub fn reset(&self) {
|
||||
self.ctrlc.store(false, Ordering::SeqCst);
|
||||
self.ctrld.store(false, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
pub fn set_ctrlc(&self) {
|
||||
self.ctrlc.store(true, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
pub fn set_ctrld(&self) {
|
||||
self.ctrld.store(true, Ordering::SeqCst);
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn wait_abort_signal(abort_signal: &AbortSignal) {
|
||||
loop {
|
||||
if abort_signal.aborted() {
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(25)).await;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn poll_abort_signal(abort_signal: &AbortSignal) -> Result<bool> {
|
||||
if event::poll(Duration::from_millis(25))? {
|
||||
if let Event::Key(key) = event::read()? {
|
||||
match key.code {
|
||||
KeyCode::Char('c') if key.modifiers == KeyModifiers::CONTROL => {
|
||||
abort_signal.set_ctrlc();
|
||||
return Ok(true);
|
||||
}
|
||||
KeyCode::Char('d') if key.modifiers == KeyModifiers::CONTROL => {
|
||||
abort_signal.set_ctrld();
|
||||
return Ok(true);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
use anyhow::Context;
|
||||
|
||||
#[cfg(not(any(target_os = "android", target_os = "emscripten")))]
|
||||
mod internal {
|
||||
use arboard::Clipboard;
|
||||
use base64::{engine::general_purpose::STANDARD, Engine as _};
|
||||
use std::sync::{LazyLock, Mutex};
|
||||
|
||||
static CLIPBOARD: LazyLock<Mutex<Option<Clipboard>>> =
|
||||
LazyLock::new(|| Mutex::new(Clipboard::new().ok()));
|
||||
|
||||
pub fn set_text(text: &str) -> anyhow::Result<()> {
|
||||
let mut clipboard = CLIPBOARD.lock().unwrap();
|
||||
match clipboard.as_mut() {
|
||||
Some(clipboard) => {
|
||||
clipboard.set_text(text)?;
|
||||
#[cfg(target_os = "linux")]
|
||||
std::thread::sleep(std::time::Duration::from_millis(50));
|
||||
Ok(())
|
||||
}
|
||||
None => set_text_osc52(text),
|
||||
}
|
||||
}
|
||||
|
||||
/// Attempts to set text to clipboard with OSC52 escape sequence
|
||||
/// Works in many modern terminals, including over SSH.
|
||||
fn set_text_osc52(text: &str) -> anyhow::Result<()> {
|
||||
let encoded = STANDARD.encode(text);
|
||||
let seq = format!("\x1b]52;c;{encoded}\x07");
|
||||
if let Err(e) = std::io::Write::write_all(&mut std::io::stdout(), seq.as_bytes()) {
|
||||
return Err(anyhow::anyhow!("Failed to send OSC52 sequence").context(e));
|
||||
}
|
||||
if let Err(e) = std::io::Write::flush(&mut std::io::stdout()) {
|
||||
return Err(anyhow::anyhow!("Failed to flush OSC52 sequence").context(e));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "android", target_os = "emscripten"))]
|
||||
mod internal {
|
||||
pub fn set_text(_text: &str) -> anyhow::Result<()> {
|
||||
Err(anyhow::anyhow!("No clipboard available"))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_text(text: &str) -> anyhow::Result<()> {
|
||||
internal::set_text(text).context("Failed to copy")
|
||||
}
|
||||
@@ -0,0 +1,242 @@
|
||||
use super::*;
|
||||
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
env,
|
||||
ffi::OsStr,
|
||||
fs::OpenOptions,
|
||||
io::{self, Write},
|
||||
path::{Path, PathBuf},
|
||||
process::Command,
|
||||
};
|
||||
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use dirs::home_dir;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
pub static SHELL: LazyLock<Shell> = LazyLock::new(detect_shell);
|
||||
|
||||
pub struct Shell {
|
||||
pub name: String,
|
||||
pub cmd: String,
|
||||
pub arg: String,
|
||||
}
|
||||
|
||||
impl Shell {
|
||||
pub fn new(name: &str, cmd: &str, arg: &str) -> Self {
|
||||
Self {
|
||||
name: name.to_string(),
|
||||
cmd: cmd.to_string(),
|
||||
arg: arg.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn detect_shell() -> Shell {
|
||||
let cmd = env::var(get_env_name("shell")).ok().or_else(|| {
|
||||
if cfg!(windows) {
|
||||
if let Ok(ps_module_path) = env::var("PSModulePath") {
|
||||
let ps_module_path = ps_module_path.to_lowercase();
|
||||
if ps_module_path.starts_with(r"c:\users") {
|
||||
return if ps_module_path.contains(r"\powershell\7\") {
|
||||
Some("pwsh.exe".to_string())
|
||||
} else {
|
||||
Some("powershell.exe".to_string())
|
||||
};
|
||||
}
|
||||
}
|
||||
None
|
||||
} else {
|
||||
env::var("SHELL").ok()
|
||||
}
|
||||
});
|
||||
let name = cmd
|
||||
.as_ref()
|
||||
.and_then(|v| Path::new(v).file_stem().and_then(|v| v.to_str()))
|
||||
.map(|v| {
|
||||
if v == "nu" {
|
||||
"nushell".into()
|
||||
} else {
|
||||
v.to_lowercase()
|
||||
}
|
||||
});
|
||||
let (cmd, name) = match (cmd.as_deref(), name.as_deref()) {
|
||||
(Some(cmd), Some(name)) => (cmd, name),
|
||||
_ => {
|
||||
if cfg!(windows) {
|
||||
("cmd.exe", "cmd")
|
||||
} else {
|
||||
("/bin/sh", "sh")
|
||||
}
|
||||
}
|
||||
};
|
||||
let shell_arg = match name {
|
||||
"powershell" => "-Command",
|
||||
"cmd" => "/C",
|
||||
_ => "-c",
|
||||
};
|
||||
Shell::new(name, cmd, shell_arg)
|
||||
}
|
||||
|
||||
pub fn run_command<T: AsRef<OsStr>>(
|
||||
cmd: &str,
|
||||
args: &[T],
|
||||
envs: Option<HashMap<String, String>>,
|
||||
) -> Result<i32> {
|
||||
let status = Command::new(cmd)
|
||||
.args(args.iter())
|
||||
.envs(envs.unwrap_or_default())
|
||||
.status()?;
|
||||
Ok(status.code().unwrap_or_default())
|
||||
}
|
||||
|
||||
pub fn run_command_with_output<T: AsRef<OsStr>>(
|
||||
cmd: &str,
|
||||
args: &[T],
|
||||
envs: Option<HashMap<String, String>>,
|
||||
) -> Result<(bool, String, String)> {
|
||||
let output = Command::new(cmd)
|
||||
.args(args.iter())
|
||||
.envs(envs.unwrap_or_default())
|
||||
.output()?;
|
||||
let status = output.status;
|
||||
let stdout = std::str::from_utf8(&output.stdout).context("Invalid UTF-8 in stdout")?;
|
||||
let stderr = std::str::from_utf8(&output.stderr).context("Invalid UTF-8 in stderr")?;
|
||||
|
||||
if !status.success() {
|
||||
debug!("Command `{cmd}` exited with non-zero: {status}");
|
||||
}
|
||||
|
||||
if !stdout.is_empty() {
|
||||
debug!("Command `{cmd}` exited with non-zero. stderr: {stderr}");
|
||||
}
|
||||
|
||||
if !stderr.is_empty() {
|
||||
debug!("Command `{cmd}` executed successfully. stdout: {stdout}");
|
||||
}
|
||||
|
||||
Ok((status.success(), stdout.to_string(), stderr.to_string()))
|
||||
}
|
||||
|
||||
pub fn run_loader_command(path: &str, extension: &str, loader_command: &str) -> Result<String> {
|
||||
let cmd_args = shell_words::split(loader_command)
|
||||
.with_context(|| anyhow!("Invalid document loader '{extension}': `{loader_command}`"))?;
|
||||
let mut use_stdout = true;
|
||||
let outpath = temp_file("-output-", "").display().to_string();
|
||||
let cmd_args: Vec<_> = cmd_args
|
||||
.into_iter()
|
||||
.map(|mut v| {
|
||||
if v.contains("$1") {
|
||||
v = v.replace("$1", path);
|
||||
}
|
||||
if v.contains("$2") {
|
||||
use_stdout = false;
|
||||
v = v.replace("$2", &outpath);
|
||||
}
|
||||
v
|
||||
})
|
||||
.collect();
|
||||
let cmd_eval = shell_words::join(&cmd_args);
|
||||
debug!("run `{cmd_eval}`");
|
||||
let (cmd, args) = cmd_args.split_at(1);
|
||||
let cmd = &cmd[0];
|
||||
if use_stdout {
|
||||
let (success, stdout, stderr) =
|
||||
run_command_with_output(cmd, args, None).with_context(|| {
|
||||
format!("Unable to run `{cmd_eval}`, Perhaps '{cmd}' is not installed?")
|
||||
})?;
|
||||
if !success {
|
||||
let err = if !stderr.is_empty() {
|
||||
stderr
|
||||
} else {
|
||||
format!("The command `{cmd_eval}` exited with non-zero.")
|
||||
};
|
||||
bail!("{err}")
|
||||
}
|
||||
Ok(stdout)
|
||||
} else {
|
||||
let status = run_command(cmd, args, None).with_context(|| {
|
||||
format!("Unable to run `{cmd_eval}`, Perhaps '{cmd}' is not installed?")
|
||||
})?;
|
||||
if status != 0 {
|
||||
bail!("The command `{cmd_eval}` exited with non-zero.")
|
||||
}
|
||||
let contents = std::fs::read_to_string(&outpath)
|
||||
.context("Failed to read file generated by the loader")?;
|
||||
Ok(contents)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn edit_file(editor: &str, path: &Path) -> Result<()> {
|
||||
let mut child = Command::new(editor).arg(path).spawn()?;
|
||||
child.wait()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn append_to_shell_history(shell: &str, command: &str, exit_code: i32) -> io::Result<()> {
|
||||
if let Some(history_file) = get_history_file(shell) {
|
||||
let command = command.replace('\n', " ");
|
||||
let now = now_timestamp();
|
||||
let history_txt = if shell == "fish" {
|
||||
format!("- cmd: {command}\n when: {now}")
|
||||
} else if shell == "zsh" {
|
||||
format!(": {now}:{exit_code};{command}",)
|
||||
} else {
|
||||
command
|
||||
};
|
||||
let mut file = OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&history_file)?;
|
||||
writeln!(file, "{history_txt}")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_history_file(shell: &str) -> Option<PathBuf> {
|
||||
match shell {
|
||||
"bash" | "sh" => env::var("HISTFILE")
|
||||
.ok()
|
||||
.map(PathBuf::from)
|
||||
.or(Some(home_dir()?.join(".bash_history"))),
|
||||
"zsh" => env::var("HISTFILE")
|
||||
.ok()
|
||||
.map(PathBuf::from)
|
||||
.or(Some(home_dir()?.join(".zsh_history"))),
|
||||
"nushell" => Some(dirs::config_dir()?.join("nushell").join("history.txt")),
|
||||
"fish" => Some(
|
||||
home_dir()?
|
||||
.join(".local")
|
||||
.join("share")
|
||||
.join("fish")
|
||||
.join("fish_history"),
|
||||
),
|
||||
"powershell" | "pwsh" => {
|
||||
#[cfg(not(windows))]
|
||||
{
|
||||
Some(
|
||||
home_dir()?
|
||||
.join(".local")
|
||||
.join("share")
|
||||
.join("powershell")
|
||||
.join("PSReadLine")
|
||||
.join("ConsoleHost_history.txt"),
|
||||
)
|
||||
}
|
||||
#[cfg(windows)]
|
||||
{
|
||||
Some(
|
||||
dirs::data_dir()?
|
||||
.join("Microsoft")
|
||||
.join("Windows")
|
||||
.join("PowerShell")
|
||||
.join("PSReadLine")
|
||||
.join("ConsoleHost_history.txt"),
|
||||
)
|
||||
}
|
||||
}
|
||||
"ksh" => Some(home_dir()?.join(".ksh_history")),
|
||||
"tcsh" => Some(home_dir()?.join(".history")),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
use base64::{engine::general_purpose::STANDARD, Engine};
|
||||
use hmac::{Hmac, Mac};
|
||||
use sha2::{Digest, Sha256};
|
||||
|
||||
pub fn sha256(input: &str) -> String {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(input);
|
||||
format!("{:x}", hasher.finalize())
|
||||
}
|
||||
|
||||
pub fn hmac_sha256(key: &[u8], msg: &str) -> Vec<u8> {
|
||||
let mut mac = Hmac::<Sha256>::new_from_slice(key).expect("HMAC can take key of any size");
|
||||
mac.update(msg.as_bytes());
|
||||
mac.finalize().into_bytes().to_vec()
|
||||
}
|
||||
|
||||
pub fn hex_encode(bytes: &[u8]) -> String {
|
||||
bytes
|
||||
.iter()
|
||||
.fold(String::new(), |acc, b| acc + &format!("{b:02x}"))
|
||||
}
|
||||
|
||||
pub fn encode_uri(uri: &str) -> String {
|
||||
uri.split('/')
|
||||
.map(|v| urlencoding::encode(v))
|
||||
.collect::<Vec<_>>()
|
||||
.join("/")
|
||||
}
|
||||
|
||||
pub fn base64_encode<T: AsRef<[u8]>>(input: T) -> String {
|
||||
STANDARD.encode(input)
|
||||
}
|
||||
pub fn base64_decode<T: AsRef<[u8]>>(input: T) -> Result<Vec<u8>, base64::DecodeError> {
|
||||
STANDARD.decode(input)
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
use std::{cell::RefCell, rc::Rc};
|
||||
|
||||
use html_to_markdown::{markdown, TagHandler};
|
||||
|
||||
pub fn html_to_md(html: &str) -> String {
|
||||
let mut handlers: Vec<TagHandler> = vec![
|
||||
Rc::new(RefCell::new(markdown::ParagraphHandler)),
|
||||
Rc::new(RefCell::new(markdown::HeadingHandler)),
|
||||
Rc::new(RefCell::new(markdown::ListHandler)),
|
||||
Rc::new(RefCell::new(markdown::TableHandler::new())),
|
||||
Rc::new(RefCell::new(markdown::StyledTextHandler)),
|
||||
Rc::new(RefCell::new(markdown::CodeHandler)),
|
||||
Rc::new(RefCell::new(markdown::WebpageChromeRemover)),
|
||||
];
|
||||
|
||||
html_to_markdown::convert_html_to_markdown(html.as_bytes(), &mut handlers)
|
||||
.unwrap_or_else(|_| html.to_string())
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
use anyhow::Result;
|
||||
use crossterm::event::{self, Event, KeyCode, KeyEvent, KeyModifiers};
|
||||
use crossterm::terminal::{disable_raw_mode, enable_raw_mode};
|
||||
use std::io::{stdout, Write};
|
||||
|
||||
/// Reads a single character from stdin without requiring Enter
|
||||
/// Returns the character if it's one of the valid options, or the default if Enter is pressed
|
||||
pub fn read_single_key(valid_chars: &[char], default: char, prompt: &str) -> Result<char> {
|
||||
print!("{prompt}");
|
||||
stdout().flush()?;
|
||||
|
||||
enable_raw_mode()?;
|
||||
|
||||
let result = loop {
|
||||
if let Ok(Event::Key(KeyEvent {
|
||||
code, modifiers, ..
|
||||
})) = event::read()
|
||||
{
|
||||
match code {
|
||||
KeyCode::Char('c') if modifiers.contains(KeyModifiers::CONTROL) => {
|
||||
break Err(anyhow::anyhow!("Interrupted"));
|
||||
}
|
||||
KeyCode::Char(c) => {
|
||||
if valid_chars.contains(&c) {
|
||||
break Ok(c);
|
||||
}
|
||||
// Invalid character, continue loop
|
||||
}
|
||||
KeyCode::Enter => {
|
||||
break Ok(default);
|
||||
}
|
||||
_ => {
|
||||
// Other keys are ignored, continue loop
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
disable_raw_mode()?;
|
||||
|
||||
// Print the chosen character and newline for clean output
|
||||
if let Ok(chosen) = &result {
|
||||
println!("{chosen}");
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
@@ -0,0 +1,125 @@
|
||||
use super::*;
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use indexmap::IndexMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
pub const EXTENSION_METADATA: &str = "__extension__";
|
||||
|
||||
pub type DocumentMetadata = IndexMap<String, String>;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct LoadedDocument {
|
||||
pub path: String,
|
||||
pub contents: String,
|
||||
#[serde(default)]
|
||||
pub metadata: DocumentMetadata,
|
||||
}
|
||||
|
||||
impl LoadedDocument {
|
||||
pub fn new(path: String, contents: String, metadata: DocumentMetadata) -> Self {
|
||||
Self {
|
||||
path,
|
||||
contents,
|
||||
metadata,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn load_recursive_url(
|
||||
loaders: &HashMap<String, String>,
|
||||
path: &str,
|
||||
) -> Result<Vec<LoadedDocument>> {
|
||||
let extension = RECURSIVE_URL_LOADER;
|
||||
let pages: Vec<Page> = match loaders.get(extension) {
|
||||
Some(loader_command) => {
|
||||
let contents = run_loader_command(path, extension, loader_command)?;
|
||||
serde_json::from_str(&contents).context(r#"The crawler response is invalid. It should follow the JSON format: `[{"path":"...", "text":"..."}]`."#)?
|
||||
}
|
||||
None => {
|
||||
let options = CrawlOptions::preset(path);
|
||||
crawl_website(path, options).await?
|
||||
}
|
||||
};
|
||||
let output = pages
|
||||
.into_iter()
|
||||
.map(|v| {
|
||||
let Page { path, text } = v;
|
||||
let mut metadata: DocumentMetadata = Default::default();
|
||||
metadata.insert(EXTENSION_METADATA.into(), "md".into());
|
||||
LoadedDocument::new(path, text, metadata)
|
||||
})
|
||||
.collect();
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
pub async fn load_file(loaders: &HashMap<String, String>, path: &str) -> Result<LoadedDocument> {
|
||||
let extension = get_patch_extension(path).unwrap_or_else(|| DEFAULT_EXTENSION.into());
|
||||
match loaders.get(&extension) {
|
||||
Some(loader_command) => load_with_command(path, &extension, loader_command),
|
||||
None => load_plain(path, &extension).await,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn load_url(loaders: &HashMap<String, String>, path: &str) -> Result<LoadedDocument> {
|
||||
let (contents, extension) = fetch_with_loaders(loaders, path, false).await?;
|
||||
let mut metadata: DocumentMetadata = Default::default();
|
||||
metadata.insert(EXTENSION_METADATA.into(), extension);
|
||||
Ok(LoadedDocument::new(path.into(), contents, metadata))
|
||||
}
|
||||
|
||||
async fn load_plain(path: &str, extension: &str) -> Result<LoadedDocument> {
|
||||
let contents = tokio::fs::read_to_string(path).await?;
|
||||
let mut metadata: DocumentMetadata = Default::default();
|
||||
metadata.insert(EXTENSION_METADATA.into(), extension.to_string());
|
||||
Ok(LoadedDocument::new(path.into(), contents, metadata))
|
||||
}
|
||||
|
||||
fn load_with_command(path: &str, extension: &str, loader_command: &str) -> Result<LoadedDocument> {
|
||||
let contents = run_loader_command(path, extension, loader_command)?;
|
||||
let mut metadata: DocumentMetadata = Default::default();
|
||||
metadata.insert(EXTENSION_METADATA.into(), DEFAULT_EXTENSION.to_string());
|
||||
Ok(LoadedDocument::new(path.into(), contents, metadata))
|
||||
}
|
||||
|
||||
pub fn is_loader_protocol(loaders: &HashMap<String, String>, path: &str) -> bool {
|
||||
match path.split_once(':') {
|
||||
Some((protocol, _)) => loaders.contains_key(protocol),
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_protocol_path(
|
||||
loaders: &HashMap<String, String>,
|
||||
path: &str,
|
||||
) -> Result<Vec<LoadedDocument>> {
|
||||
let (protocol, loader_command, new_path) = path
|
||||
.split_once(':')
|
||||
.and_then(|(protocol, path)| {
|
||||
let loader_command = loaders.get(protocol)?;
|
||||
Some((protocol, loader_command, path))
|
||||
})
|
||||
.ok_or_else(|| anyhow!("No document loader for '{}'", path))?;
|
||||
let contents = run_loader_command(new_path, protocol, loader_command)?;
|
||||
let output = if let Ok(list) = serde_json::from_str::<Vec<LoadedDocument>>(&contents) {
|
||||
list.into_iter()
|
||||
.map(|mut v| {
|
||||
if v.path.starts_with(path) {
|
||||
} else if v.path.starts_with(new_path) {
|
||||
v.path = format!("{}:{}", protocol, v.path);
|
||||
} else {
|
||||
v.path = format!("{}/{}", path, v.path);
|
||||
}
|
||||
v
|
||||
})
|
||||
.collect()
|
||||
} else {
|
||||
vec![LoadedDocument::new(
|
||||
path.into(),
|
||||
contents,
|
||||
Default::default(),
|
||||
)]
|
||||
};
|
||||
Ok(output)
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
use crate::config::Config;
|
||||
use colored::Colorize;
|
||||
use fancy_regex::Regex;
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Seek, SeekFrom};
|
||||
use std::process;
|
||||
|
||||
pub async fn tail_logs(no_color: bool) {
|
||||
let re = Regex::new(r"^(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3})\s+<(?P<opid>[^\s>]+)>\s+\[(?P<level>[A-Z]+)\]\s+(?P<logger>[^:]+):(?P<line>\d+)\s+-\s+(?P<message>.*)$").unwrap();
|
||||
let file_path = Config::log_path();
|
||||
let file = File::open(&file_path).expect("Cannot open file");
|
||||
let mut reader = BufReader::new(file);
|
||||
|
||||
if let Err(e) = reader.seek(SeekFrom::End(0)) {
|
||||
eprintln!("Unable to tail log file: {e:?}");
|
||||
process::exit(1);
|
||||
};
|
||||
|
||||
let mut lines = reader.lines();
|
||||
|
||||
loop {
|
||||
if let Some(Ok(line)) = lines.next() {
|
||||
if no_color {
|
||||
println!("{line}");
|
||||
} else {
|
||||
let colored_line = colorize_log_line(&line, &re);
|
||||
println!("{colored_line}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn colorize_log_line(line: &str, re: &Regex) -> String {
|
||||
if let Some(caps) = re.captures(line).expect("Failed to capture log line") {
|
||||
let level = &caps["level"];
|
||||
let message = &caps["message"];
|
||||
|
||||
let colored_message = match level {
|
||||
"ERROR" => message.red(),
|
||||
"WARN" => message.yellow(),
|
||||
"INFO" => message.green(),
|
||||
"DEBUG" => message.blue(),
|
||||
_ => message.normal(),
|
||||
};
|
||||
|
||||
let timestamp = &caps["timestamp"];
|
||||
let opid = &caps["opid"];
|
||||
let logger = &caps["logger"];
|
||||
let line_number = &caps["line"];
|
||||
|
||||
format!(
|
||||
"{} <{}> [{}] {}:{} - {}",
|
||||
timestamp.white(),
|
||||
opid.cyan(),
|
||||
level.bold(),
|
||||
logger.magenta(),
|
||||
line_number.bold(),
|
||||
colored_message
|
||||
)
|
||||
} else {
|
||||
line.to_string()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,252 @@
|
||||
mod abort_signal;
|
||||
mod clipboard;
|
||||
mod command;
|
||||
mod crypto;
|
||||
mod html_to_md;
|
||||
mod input;
|
||||
mod loader;
|
||||
mod logs;
|
||||
pub mod native;
|
||||
mod path;
|
||||
mod render_prompt;
|
||||
mod request;
|
||||
mod spinner;
|
||||
mod variables;
|
||||
|
||||
pub use self::abort_signal::*;
|
||||
pub use self::clipboard::set_text;
|
||||
pub use self::command::*;
|
||||
pub use self::crypto::*;
|
||||
pub use self::html_to_md::*;
|
||||
pub use self::input::*;
|
||||
pub use self::loader::*;
|
||||
pub use self::logs::*;
|
||||
pub use self::path::*;
|
||||
pub use self::render_prompt::render_prompt;
|
||||
pub use self::request::*;
|
||||
pub use self::spinner::*;
|
||||
pub use self::variables::*;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use fancy_regex::Regex;
|
||||
use fuzzy_matcher::{skim::SkimMatcherV2, FuzzyMatcher};
|
||||
use is_terminal::IsTerminal;
|
||||
use std::borrow::Cow;
|
||||
use std::sync::LazyLock;
|
||||
use std::{env, path::PathBuf, process};
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
pub static CODE_BLOCK_RE: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"(?ms)```\w*(.*)```").unwrap());
|
||||
pub static THINK_TAG_RE: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"(?s)^\s*<think>.*?</think>(\s*|$)").unwrap());
|
||||
pub static IS_STDOUT_TERMINAL: LazyLock<bool> = LazyLock::new(|| std::io::stdout().is_terminal());
|
||||
pub static NO_COLOR: LazyLock<bool> = LazyLock::new(|| {
|
||||
env::var("NO_COLOR")
|
||||
.ok()
|
||||
.and_then(|v| parse_bool(&v))
|
||||
.unwrap_or_default()
|
||||
|| !*IS_STDOUT_TERMINAL
|
||||
});
|
||||
|
||||
pub fn now() -> String {
|
||||
chrono::Local::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, false)
|
||||
}
|
||||
|
||||
pub fn now_timestamp() -> i64 {
|
||||
chrono::Local::now().timestamp()
|
||||
}
|
||||
|
||||
pub fn get_env_name(key: &str) -> String {
|
||||
format!("{}_{key}", env!("CARGO_CRATE_NAME"),).to_ascii_uppercase()
|
||||
}
|
||||
|
||||
pub fn normalize_env_name(value: &str) -> String {
|
||||
value.replace('-', "_").to_ascii_uppercase()
|
||||
}
|
||||
|
||||
pub fn parse_bool(value: &str) -> Option<bool> {
|
||||
match value {
|
||||
"1" | "true" => Some(true),
|
||||
"0" | "false" => Some(false),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn estimate_token_length(text: &str) -> usize {
|
||||
let words: Vec<&str> = text.unicode_words().collect();
|
||||
let mut output: f32 = 0.0;
|
||||
for word in words {
|
||||
if word.is_ascii() {
|
||||
output += 1.3;
|
||||
} else {
|
||||
let count = word.chars().count();
|
||||
if count == 1 {
|
||||
output += 1.0
|
||||
} else {
|
||||
output += (count as f32) * 0.5;
|
||||
}
|
||||
}
|
||||
}
|
||||
output.ceil() as usize
|
||||
}
|
||||
|
||||
pub fn strip_think_tag(text: &str) -> Cow<'_, str> {
|
||||
THINK_TAG_RE.replace_all(text, "")
|
||||
}
|
||||
|
||||
pub fn extract_code_block(text: &str) -> &str {
|
||||
CODE_BLOCK_RE
|
||||
.captures(text)
|
||||
.ok()
|
||||
.and_then(|v| v?.get(1).map(|v| v.as_str().trim()))
|
||||
.unwrap_or(text)
|
||||
}
|
||||
|
||||
pub fn convert_option_string(value: &str) -> Option<String> {
|
||||
if value.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(value.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fuzzy_filter<T, F>(values: Vec<T>, get: F, pattern: &str) -> Vec<T>
|
||||
where
|
||||
F: Fn(&T) -> &str,
|
||||
{
|
||||
let matcher = SkimMatcherV2::default();
|
||||
let mut list: Vec<(T, i64)> = values
|
||||
.into_iter()
|
||||
.filter_map(|v| {
|
||||
let score = matcher.fuzzy_match(get(&v), pattern)?;
|
||||
Some((v, score))
|
||||
})
|
||||
.collect();
|
||||
list.sort_unstable_by(|a, b| b.1.cmp(&a.1));
|
||||
list.into_iter().map(|(v, _)| v).collect()
|
||||
}
|
||||
|
||||
pub fn pretty_error(err: &anyhow::Error) -> String {
|
||||
let mut output = vec![];
|
||||
output.push(format!("Error: {err}"));
|
||||
let causes: Vec<_> = err.chain().skip(1).collect();
|
||||
let causes_len = causes.len();
|
||||
if causes_len > 0 {
|
||||
output.push("\nCaused by:".to_string());
|
||||
if causes_len == 1 {
|
||||
output.push(format!(" {}", indent_text(causes[0], 4).trim()));
|
||||
} else {
|
||||
for (i, cause) in causes.into_iter().enumerate() {
|
||||
output.push(format!("{i:5}: {}", indent_text(cause, 7).trim()));
|
||||
}
|
||||
}
|
||||
}
|
||||
output.join("\n")
|
||||
}
|
||||
|
||||
pub fn indent_text<T: ToString>(s: T, size: usize) -> String {
|
||||
let indent_str = " ".repeat(size);
|
||||
s.to_string()
|
||||
.split('\n')
|
||||
.map(|line| format!("{indent_str}{line}"))
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
pub fn error_text(input: &str) -> String {
|
||||
color_text(input, nu_ansi_term::Color::Red)
|
||||
}
|
||||
|
||||
pub fn warning_text(input: &str) -> String {
|
||||
color_text(input, nu_ansi_term::Color::Yellow)
|
||||
}
|
||||
|
||||
pub fn color_text(input: &str, color: nu_ansi_term::Color) -> String {
|
||||
if *NO_COLOR {
|
||||
return input.to_string();
|
||||
}
|
||||
nu_ansi_term::Style::new()
|
||||
.fg(color)
|
||||
.paint(input)
|
||||
.to_string()
|
||||
}
|
||||
|
||||
pub fn dimmed_text(input: &str) -> String {
|
||||
if *NO_COLOR {
|
||||
return input.to_string();
|
||||
}
|
||||
nu_ansi_term::Style::new().dimmed().paint(input).to_string()
|
||||
}
|
||||
|
||||
pub fn multiline_text(input: &str) -> String {
|
||||
input
|
||||
.split('\n')
|
||||
.enumerate()
|
||||
.map(|(i, v)| {
|
||||
if i == 0 {
|
||||
v.to_string()
|
||||
} else {
|
||||
format!(".. {v}")
|
||||
}
|
||||
})
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
pub fn temp_file(prefix: &str, suffix: &str) -> PathBuf {
|
||||
env::temp_dir().join(format!(
|
||||
"{}-{}{prefix}{}{suffix}",
|
||||
env!("CARGO_CRATE_NAME").to_lowercase(),
|
||||
process::id(),
|
||||
uuid::Uuid::new_v4()
|
||||
))
|
||||
}
|
||||
|
||||
pub fn is_url(path: &str) -> bool {
|
||||
path.starts_with("http://") || path.starts_with("https://")
|
||||
}
|
||||
|
||||
pub fn set_proxy(
|
||||
mut builder: reqwest::ClientBuilder,
|
||||
proxy: &str,
|
||||
) -> Result<reqwest::ClientBuilder> {
|
||||
builder = builder.no_proxy();
|
||||
if !proxy.is_empty() && proxy != "-" {
|
||||
builder = builder
|
||||
.proxy(reqwest::Proxy::all(proxy).with_context(|| format!("Invalid proxy `{proxy}`"))?);
|
||||
};
|
||||
Ok(builder)
|
||||
}
|
||||
|
||||
pub fn decode_bin<T: serde::de::DeserializeOwned>(data: &[u8]) -> Result<T> {
|
||||
let (v, _) = bincode::serde::decode_from_slice(data, bincode::config::legacy())?;
|
||||
Ok(v)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
fn test_safe_join_path() {
|
||||
assert_eq!(
|
||||
safe_join_path("/home/user/dir1", "files/file1"),
|
||||
Some(PathBuf::from("/home/user/dir1/files/file1"))
|
||||
);
|
||||
assert!(safe_join_path("/home/user/dir1", "/files/file1").is_none());
|
||||
assert!(safe_join_path("/home/user/dir1", "../file1").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_os = "windows")]
|
||||
fn test_safe_join_path() {
|
||||
assert_eq!(
|
||||
safe_join_path("C:\\Users\\user\\dir1", "files/file1"),
|
||||
Some(PathBuf::from("C:\\Users\\user\\dir1\\files\\file1"))
|
||||
);
|
||||
assert!(safe_join_path("C:\\Users\\user\\dir1", "/files/file1").is_none());
|
||||
assert!(safe_join_path("C:\\Users\\user\\dir1", "../file1").is_none());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
#[cfg(windows)]
|
||||
pub mod runtime {
|
||||
use std::path::Path;
|
||||
|
||||
pub fn bash_path() -> Option<String> {
|
||||
let bash_path = "C:\\Program Files\\Git\\bin\\bash.exe";
|
||||
if exist_path(bash_path) {
|
||||
return Some(bash_path.into());
|
||||
}
|
||||
let git_path = which("git")?;
|
||||
let git_parent_path = parent_path(&git_path)?;
|
||||
let bash_path = join_path(&parent_path(&git_parent_path)?, &["bin", "bash.exe"]);
|
||||
if exist_path(&bash_path) {
|
||||
return Some(bash_path);
|
||||
}
|
||||
let bash_path = join_path(&git_parent_path, &["bash.exe"]);
|
||||
if exist_path(&bash_path) {
|
||||
return Some(bash_path);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn exist_path(path: &str) -> bool {
|
||||
Path::new(path).exists()
|
||||
}
|
||||
|
||||
pub fn which(name: &str) -> Option<String> {
|
||||
which::which(name)
|
||||
.ok()
|
||||
.map(|path| path.to_string_lossy().into())
|
||||
}
|
||||
|
||||
fn parent_path(path: &str) -> Option<String> {
|
||||
Path::new(path)
|
||||
.parent()
|
||||
.map(|path| path.to_string_lossy().into())
|
||||
}
|
||||
|
||||
fn join_path(path: &str, parts: &[&str]) -> String {
|
||||
let mut path = Path::new(path).to_path_buf();
|
||||
for part in parts {
|
||||
path = path.join(part);
|
||||
}
|
||||
path.to_string_lossy().into()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,356 @@
|
||||
use std::fs;
|
||||
use std::path::{Component, Path, PathBuf};
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use fancy_regex::Regex;
|
||||
use indexmap::IndexSet;
|
||||
use path_absolutize::Absolutize;
|
||||
|
||||
type ParseGlobResult = (String, Option<Vec<String>>, bool, Option<usize>);
|
||||
|
||||
pub fn safe_join_path<T1: AsRef<Path>, T2: AsRef<Path>>(
|
||||
base_path: T1,
|
||||
sub_path: T2,
|
||||
) -> Option<PathBuf> {
|
||||
let base_path = base_path.as_ref();
|
||||
let sub_path = sub_path.as_ref();
|
||||
if sub_path.is_absolute() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut joined_path = PathBuf::from(base_path);
|
||||
|
||||
for component in sub_path.components() {
|
||||
if Component::ParentDir == component {
|
||||
return None;
|
||||
}
|
||||
joined_path.push(component);
|
||||
}
|
||||
|
||||
if joined_path.starts_with(base_path) {
|
||||
Some(joined_path)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn expand_glob_paths<T: AsRef<str>>(
|
||||
paths: &[T],
|
||||
bail_non_exist: bool,
|
||||
) -> Result<IndexSet<String>> {
|
||||
let mut new_paths = IndexSet::new();
|
||||
for path in paths {
|
||||
let (path_str, suffixes, current_only, depth) = parse_glob(path.as_ref())?;
|
||||
list_files(
|
||||
&mut new_paths,
|
||||
Path::new(&path_str),
|
||||
suffixes.as_ref(),
|
||||
current_only,
|
||||
bail_non_exist,
|
||||
depth,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Ok(new_paths)
|
||||
}
|
||||
|
||||
pub fn clear_dir(dir: &Path) -> Result<()> {
|
||||
for entry in fs::read_dir(dir)? {
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
|
||||
if path.is_dir() {
|
||||
fs::remove_dir_all(&path)?;
|
||||
} else {
|
||||
fs::remove_file(&path)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn list_file_names<T: AsRef<Path>>(dir: T, ext: &str) -> Vec<String> {
|
||||
match fs::read_dir(dir.as_ref()) {
|
||||
Ok(rd) => {
|
||||
let mut names = vec![];
|
||||
for entry in rd.flatten() {
|
||||
let name = entry.file_name();
|
||||
if let Some(name) = name.to_string_lossy().strip_suffix(ext) {
|
||||
names.push(name.to_string());
|
||||
}
|
||||
}
|
||||
names.sort_unstable();
|
||||
names
|
||||
}
|
||||
Err(_) => vec![],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_patch_extension(path: &str) -> Option<String> {
|
||||
Path::new(&path)
|
||||
.extension()
|
||||
.map(|v| v.to_string_lossy().to_lowercase())
|
||||
}
|
||||
|
||||
pub fn to_absolute_path(path: &str) -> Result<String> {
|
||||
Ok(Path::new(&path).absolutize()?.display().to_string())
|
||||
}
|
||||
|
||||
pub fn resolve_home_dir(path: &str) -> String {
|
||||
let mut path = path.to_string();
|
||||
if path.starts_with("~/") || path.starts_with("~\\") {
|
||||
if let Some(home_dir) = dirs::home_dir() {
|
||||
path.replace_range(..1, &home_dir.display().to_string());
|
||||
}
|
||||
}
|
||||
path
|
||||
}
|
||||
|
||||
fn parse_glob(path_str: &str) -> Result<ParseGlobResult> {
|
||||
let globbed_single_subdir_regex = Regex::new(r"\*/[^/]+\.[^/]+$").expect("invalid regex");
|
||||
let globbed_recursive_subdir_regex = Regex::new(r"\*\*/[^/]+\.[^/]+$").expect("invalid regex");
|
||||
let glob_result =
|
||||
if let Some(start) = path_str.find("/**/*.").or_else(|| path_str.find(r"\**\*.")) {
|
||||
Some((start, 6, false, None))
|
||||
} else if let Some(start) = path_str.find("**/*.").or_else(|| path_str.find(r"**\*.")) {
|
||||
if start == 0 {
|
||||
Some((start, 5, false, None))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else if let Some(m) = globbed_recursive_subdir_regex.find(path_str)? {
|
||||
Some((m.start(), 3, false, None))
|
||||
} else if let Some(m) = globbed_single_subdir_regex.find(path_str)? {
|
||||
Some((m.start(), 2, false, Some(1usize)))
|
||||
} else if let Some(start) = path_str.find("/*.").or_else(|| path_str.find(r"\*.")) {
|
||||
Some((start, 3, true, None))
|
||||
} else if let Some(start) = path_str.find("*.") {
|
||||
if start == 0 {
|
||||
Some((start, 2, true, None))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
if let Some((start, offset, current_only, depth)) = glob_result {
|
||||
let mut base_path = path_str[..start].to_string();
|
||||
if base_path.is_empty() {
|
||||
base_path = if path_str
|
||||
.chars()
|
||||
.next()
|
||||
.map(|v| v == '/')
|
||||
.unwrap_or_default()
|
||||
{
|
||||
"/"
|
||||
} else {
|
||||
"."
|
||||
}
|
||||
.into();
|
||||
}
|
||||
|
||||
let extensions = if let Some(curly_brace_end) = path_str[start..].find('}') {
|
||||
let end = start + curly_brace_end;
|
||||
let extensions_str = &path_str[start + offset..end + 1];
|
||||
if extensions_str.starts_with('{') && extensions_str.ends_with('}') {
|
||||
extensions_str[1..extensions_str.len() - 1]
|
||||
.split(',')
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<String>>()
|
||||
} else {
|
||||
bail!("Invalid path '{path_str}'");
|
||||
}
|
||||
} else {
|
||||
let extensions_str = &path_str[start + offset..];
|
||||
vec![extensions_str.to_string()]
|
||||
};
|
||||
let extensions = if extensions.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(extensions)
|
||||
};
|
||||
Ok((base_path, extensions, current_only, depth))
|
||||
} else if path_str.ends_with("/**") || path_str.ends_with(r"\**") {
|
||||
Ok((
|
||||
path_str[0..path_str.len() - 3].to_string(),
|
||||
None,
|
||||
false,
|
||||
None,
|
||||
))
|
||||
} else {
|
||||
Ok((path_str.to_string(), None, false, None))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_recursion::async_recursion]
|
||||
async fn list_files(
|
||||
files: &mut IndexSet<String>,
|
||||
entry_path: &Path,
|
||||
suffixes: Option<&Vec<String>>,
|
||||
current_only: bool,
|
||||
bail_non_exist: bool,
|
||||
depth: Option<usize>,
|
||||
) -> Result<()> {
|
||||
if !entry_path.exists() {
|
||||
if bail_non_exist {
|
||||
bail!("Not found '{}'", entry_path.display());
|
||||
} else {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
if entry_path.is_dir() {
|
||||
let mut reader = tokio::fs::read_dir(entry_path).await?;
|
||||
while let Some(entry) = reader.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.is_dir() {
|
||||
if !current_only {
|
||||
if let Some(remaining_depth) = depth {
|
||||
if remaining_depth > 0 {
|
||||
list_files(
|
||||
files,
|
||||
&path,
|
||||
suffixes,
|
||||
current_only,
|
||||
bail_non_exist,
|
||||
Some(remaining_depth - 1),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
} else {
|
||||
list_files(files, &path, suffixes, current_only, bail_non_exist, None)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
add_file(files, suffixes, &path);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
add_file(files, suffixes, entry_path);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn add_file(files: &mut IndexSet<String>, suffixes: Option<&Vec<String>>, path: &Path) {
|
||||
if is_valid_extension(suffixes, path) {
|
||||
let path = path.display().to_string();
|
||||
if !files.contains(&path) {
|
||||
files.insert(path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_valid_extension(suffixes: Option<&Vec<String>>, path: &Path) -> bool {
|
||||
let filename_regex = Regex::new(r"^.+\.*").unwrap();
|
||||
if let Some(suffixes) = suffixes {
|
||||
if !suffixes.is_empty() {
|
||||
if let Ok(Some(_)) = filename_regex.find(&suffixes.join(",")) {
|
||||
let file_name = path
|
||||
.file_name()
|
||||
.and_then(|v| v.to_str())
|
||||
.expect("invalid filename")
|
||||
.to_string();
|
||||
return suffixes.contains(&file_name);
|
||||
} else if let Some(extension) =
|
||||
path.extension().map(|v| v.to_string_lossy().to_string())
|
||||
{
|
||||
return suffixes.contains(&extension);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_glob() {
|
||||
assert_eq!(
|
||||
parse_glob("dir").unwrap(),
|
||||
("dir".into(), None, false, None)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_glob("dir/**").unwrap(),
|
||||
("dir".into(), None, false, None)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_glob("dir/file.md").unwrap(),
|
||||
("dir/file.md".into(), None, false, None)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_glob("**/*.md").unwrap(),
|
||||
(".".into(), Some(vec!["md".into()]), false, None)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_glob("/**/*.md").unwrap(),
|
||||
("/".into(), Some(vec!["md".into()]), false, None)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_glob("dir/**/*.md").unwrap(),
|
||||
("dir".into(), Some(vec!["md".into()]), false, None)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_glob("dir/**/test.md").unwrap(),
|
||||
("dir/".into(), Some(vec!["test.md".into()]), false, None)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_glob("dir/*/test.md").unwrap(),
|
||||
(
|
||||
"dir/".into(),
|
||||
Some(vec!["test.md".into()]),
|
||||
false,
|
||||
Some(1usize)
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_glob("dir/**/*.{md,txt}").unwrap(),
|
||||
(
|
||||
"dir".into(),
|
||||
Some(vec!["md".into(), "txt".into()]),
|
||||
false,
|
||||
None
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_glob("C:\\dir\\**\\*.{md,txt}").unwrap(),
|
||||
(
|
||||
"C:\\dir".into(),
|
||||
Some(vec!["md".into(), "txt".into()]),
|
||||
false,
|
||||
None
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_glob("*.md").unwrap(),
|
||||
(".".into(), Some(vec!["md".into()]), true, None)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_glob("/*.md").unwrap(),
|
||||
("/".into(), Some(vec!["md".into()]), true, None)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_glob("dir/*.md").unwrap(),
|
||||
("dir".into(), Some(vec!["md".into()]), true, None)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_glob("dir/*.{md,txt}").unwrap(),
|
||||
(
|
||||
"dir".into(),
|
||||
Some(vec!["md".into(), "txt".into()]),
|
||||
true,
|
||||
None
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_glob("C:\\dir\\*.{md,txt}").unwrap(),
|
||||
(
|
||||
"C:\\dir".into(),
|
||||
Some(vec!["md".into(), "txt".into()]),
|
||||
true,
|
||||
None
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,155 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Render REPL prompt
|
||||
///
|
||||
/// The template comprises plain text and `{...}`.
|
||||
///
|
||||
/// The syntax of `{...}`:
|
||||
/// - `{var}` - When `var` has a value, replace `var` with the value and eval `template`
|
||||
/// - `{?var <template>}` - Eval `template` when `var` is evaluated as true
|
||||
/// - `{!var <template>}` - Eval `template` when `var` is evaluated as false
|
||||
pub fn render_prompt(template: &str, variables: &HashMap<&str, String>) -> String {
|
||||
let exprs = parse_template(template);
|
||||
eval_exprs(&exprs, variables)
|
||||
}
|
||||
|
||||
fn parse_template(template: &str) -> Vec<Expr> {
|
||||
let chars: Vec<char> = template.chars().collect();
|
||||
let mut exprs = vec![];
|
||||
let mut current = vec![];
|
||||
let mut balances = vec![];
|
||||
for ch in chars.iter().cloned() {
|
||||
if !balances.is_empty() {
|
||||
if ch == '}' {
|
||||
balances.pop();
|
||||
if balances.is_empty() {
|
||||
if !current.is_empty() {
|
||||
let block = parse_block(&mut current);
|
||||
exprs.push(block)
|
||||
}
|
||||
} else {
|
||||
current.push(ch);
|
||||
}
|
||||
} else if ch == '{' {
|
||||
balances.push(ch);
|
||||
current.push(ch);
|
||||
} else {
|
||||
current.push(ch);
|
||||
}
|
||||
} else if ch == '{' {
|
||||
balances.push(ch);
|
||||
add_text(&mut exprs, &mut current);
|
||||
} else {
|
||||
current.push(ch)
|
||||
}
|
||||
}
|
||||
add_text(&mut exprs, &mut current);
|
||||
exprs
|
||||
}
|
||||
|
||||
fn parse_block(current: &mut Vec<char>) -> Expr {
|
||||
let value: String = current.drain(..).collect();
|
||||
match value.split_once(' ') {
|
||||
Some((name, tail)) => {
|
||||
if let Some(name) = name.strip_prefix('?') {
|
||||
let block_exprs = parse_template(tail);
|
||||
Expr::Block(BlockType::Yes, name.to_string(), block_exprs)
|
||||
} else if let Some(name) = name.strip_prefix('!') {
|
||||
let block_exprs = parse_template(tail);
|
||||
Expr::Block(BlockType::No, name.to_string(), block_exprs)
|
||||
} else {
|
||||
Expr::Text(format!("{{{value}}}"))
|
||||
}
|
||||
}
|
||||
None => Expr::Variable(value),
|
||||
}
|
||||
}
|
||||
|
||||
fn eval_exprs(exprs: &[Expr], variables: &HashMap<&str, String>) -> String {
|
||||
let mut output = String::new();
|
||||
for part in exprs {
|
||||
match part {
|
||||
Expr::Text(text) => output.push_str(text),
|
||||
Expr::Variable(variable) => {
|
||||
let value = variables
|
||||
.get(variable.as_str())
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
output.push_str(&value);
|
||||
}
|
||||
Expr::Block(typ, variable, block_exprs) => {
|
||||
let value = variables
|
||||
.get(variable.as_str())
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
match typ {
|
||||
BlockType::Yes => {
|
||||
if truly(&value) {
|
||||
let block_output = eval_exprs(block_exprs, variables);
|
||||
output.push_str(&block_output)
|
||||
}
|
||||
}
|
||||
BlockType::No => {
|
||||
if !truly(&value) {
|
||||
let block_output = eval_exprs(block_exprs, variables);
|
||||
output.push_str(&block_output)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
output
|
||||
}
|
||||
|
||||
fn add_text(exprs: &mut Vec<Expr>, current: &mut Vec<char>) {
|
||||
if current.is_empty() {
|
||||
return;
|
||||
}
|
||||
let value: String = current.drain(..).collect();
|
||||
exprs.push(Expr::Text(value));
|
||||
}
|
||||
|
||||
fn truly(value: &str) -> bool {
|
||||
!(value.is_empty() || value == "0" || value == "false")
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Expr {
|
||||
Text(String),
|
||||
Variable(String),
|
||||
Block(BlockType, String, Vec<Expr>),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum BlockType {
|
||||
Yes,
|
||||
No,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
macro_rules! assert_render {
|
||||
($template:expr, [$(($key:literal, $value:literal),)*], $expect:literal) => {
|
||||
let data = HashMap::from([
|
||||
$(($key, $value.into()),)*
|
||||
]);
|
||||
assert_eq!(render_prompt($template, &data), $expect);
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_render() {
|
||||
let prompt = "{?session {session}{?role /}}{role}{?session )}{!session >}";
|
||||
assert_render!(prompt, [], ">");
|
||||
assert_render!(prompt, [("role", "coder"),], "coder>");
|
||||
assert_render!(prompt, [("session", "temp"),], "temp)");
|
||||
assert_render!(
|
||||
prompt,
|
||||
[("session", "temp"), ("role", "coder"),],
|
||||
"temp/coder)"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,464 @@
|
||||
use super::*;
|
||||
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use fancy_regex::Regex;
|
||||
use futures_util::{stream, StreamExt};
|
||||
use http::header::CONTENT_TYPE;
|
||||
use reqwest::Url;
|
||||
use scraper::{Html, Selector};
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value;
|
||||
use std::sync::LazyLock;
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
sync::Arc,
|
||||
time::Duration,
|
||||
};
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tokio::sync::Semaphore;
|
||||
|
||||
pub const URL_LOADER: &str = "url";
|
||||
pub const RECURSIVE_URL_LOADER: &str = "recursive_url";
|
||||
|
||||
pub const MEDIA_URL_EXTENSION: &str = "media_url";
|
||||
pub const DEFAULT_EXTENSION: &str = "txt";
|
||||
|
||||
const MAX_CRAWLS: usize = 5;
|
||||
const BREAK_ON_ERROR: bool = false;
|
||||
const USER_AGENT: &str = "curl/8.6.0";
|
||||
|
||||
static CLIENT: LazyLock<Result<reqwest::Client>> = LazyLock::new(|| {
|
||||
let builder = reqwest::ClientBuilder::new().timeout(Duration::from_secs(16));
|
||||
let client = builder.build()?;
|
||||
Ok(client)
|
||||
});
|
||||
|
||||
static PRESET: LazyLock<Vec<(Regex, CrawlOptions)>> = LazyLock::new(|| {
|
||||
vec![
|
||||
(
|
||||
Regex::new(r"github.com/([^/]+)/([^/]+)/tree/([^/]+)").unwrap(),
|
||||
CrawlOptions {
|
||||
exclude: vec!["changelog".into(), "changes".into(), "license".into()],
|
||||
..Default::default()
|
||||
},
|
||||
),
|
||||
(
|
||||
Regex::new(r"github.com/([^/]+)/([^/]+)/wiki").unwrap(),
|
||||
CrawlOptions {
|
||||
exclude: vec!["_history".into()],
|
||||
extract: Some("#wiki-body".into()),
|
||||
..Default::default()
|
||||
},
|
||||
),
|
||||
]
|
||||
});
|
||||
|
||||
static EXTENSION_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\.[^.]+$").unwrap());
|
||||
static GITHUB_REPO_RE: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"^https://github\.com/([^/]+)/([^/]+)/tree/([^/]+)").unwrap());
|
||||
|
||||
pub async fn fetch(url: &str) -> Result<String> {
|
||||
let client = match *CLIENT {
|
||||
Ok(ref client) => client,
|
||||
Err(ref err) => bail!("{err}"),
|
||||
};
|
||||
let res = client.get(url).send().await?;
|
||||
let output = res.text().await?;
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
pub async fn fetch_with_loaders(
|
||||
loaders: &HashMap<String, String>,
|
||||
path: &str,
|
||||
allow_media: bool,
|
||||
) -> Result<(String, String)> {
|
||||
if let Some(loader_command) = loaders.get(URL_LOADER) {
|
||||
let contents = run_loader_command(path, URL_LOADER, loader_command)?;
|
||||
return Ok((contents, DEFAULT_EXTENSION.into()));
|
||||
}
|
||||
let client = match *CLIENT {
|
||||
Ok(ref client) => client,
|
||||
Err(ref err) => bail!("{err}"),
|
||||
};
|
||||
let mut res = client.get(path).send().await?;
|
||||
if !res.status().is_success() {
|
||||
bail!("Invalid status: {}", res.status());
|
||||
}
|
||||
let content_type = res
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.map(|v| match v.split_once(';') {
|
||||
Some((mime, _)) => mime.trim(),
|
||||
None => v,
|
||||
})
|
||||
.map(|v| v.to_string())
|
||||
.unwrap_or_else(|| {
|
||||
format!(
|
||||
"_/{}",
|
||||
get_patch_extension(path).unwrap_or_else(|| DEFAULT_EXTENSION.into())
|
||||
)
|
||||
});
|
||||
let mut is_media = false;
|
||||
let extension = match content_type.as_str() {
|
||||
"application/pdf" => "pdf".into(),
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document" => "docx".into(),
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => "xlsx".into(),
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation" => {
|
||||
"pptx".into()
|
||||
}
|
||||
"application/vnd.oasis.opendocument.text" => "odt".into(),
|
||||
"application/vnd.oasis.opendocument.spreadsheet" => "ods".into(),
|
||||
"application/vnd.oasis.opendocument.presentation" => "odp".into(),
|
||||
"application/rtf" => "rtf".into(),
|
||||
"text/javascript" => "js".into(),
|
||||
"text/html" => "html".into(),
|
||||
_ => content_type
|
||||
.rsplit_once('/')
|
||||
.map(|(first, last)| {
|
||||
if ["image", "video", "audio"].contains(&first) {
|
||||
is_media = true;
|
||||
MEDIA_URL_EXTENSION.into()
|
||||
} else {
|
||||
last.to_lowercase()
|
||||
}
|
||||
})
|
||||
.unwrap_or_else(|| DEFAULT_EXTENSION.into()),
|
||||
};
|
||||
let result = if is_media {
|
||||
if !allow_media {
|
||||
bail!("Unexpected media type")
|
||||
}
|
||||
let image_bytes = res.bytes().await?;
|
||||
let image_base64 = base64_encode(&image_bytes);
|
||||
let contents = format!("data:{content_type};base64,{image_base64}");
|
||||
(contents, extension)
|
||||
} else {
|
||||
match loaders.get(&extension) {
|
||||
Some(loader_command) => {
|
||||
let save_path = temp_file("-download-", &format!(".{extension}"))
|
||||
.display()
|
||||
.to_string();
|
||||
let mut save_file = tokio::fs::File::create(&save_path).await?;
|
||||
let mut size = 0;
|
||||
while let Some(chunk) = res.chunk().await? {
|
||||
size += chunk.len();
|
||||
save_file.write_all(&chunk).await?;
|
||||
}
|
||||
let contents = if size == 0 {
|
||||
println!("{}", warning_text(&format!("No content at '{path}'")));
|
||||
String::new()
|
||||
} else {
|
||||
run_loader_command(&save_path, &extension, loader_command)?
|
||||
};
|
||||
(contents, DEFAULT_EXTENSION.into())
|
||||
}
|
||||
None => {
|
||||
let contents = res.text().await?;
|
||||
if extension == "html" {
|
||||
(html_to_md(&contents), "md".into())
|
||||
} else {
|
||||
(contents, extension)
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub async fn fetch_models(api_base: &str, api_key: Option<&str>) -> Result<Vec<String>> {
|
||||
let client = match *CLIENT {
|
||||
Ok(ref client) => client,
|
||||
Err(ref err) => bail!("{err}"),
|
||||
};
|
||||
let mut builder = client.get(format!("{}/models", api_base.trim_end_matches('/')));
|
||||
if let Some(api_key) = api_key {
|
||||
builder = builder.bearer_auth(api_key);
|
||||
}
|
||||
let res_body: Value = builder.send().await?.json().await?;
|
||||
let mut result: Vec<String> = res_body
|
||||
.get("data")
|
||||
.and_then(|v| v.as_array())
|
||||
.map(|v| {
|
||||
v.iter()
|
||||
.filter_map(|v| v.get("id").and_then(|v| v.as_str().map(|v| v.to_string())))
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
if result.is_empty() {
|
||||
bail!("No valid models")
|
||||
}
|
||||
result.sort_unstable();
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct CrawlOptions {
|
||||
extract: Option<String>,
|
||||
exclude: Vec<String>,
|
||||
no_log: bool,
|
||||
}
|
||||
|
||||
impl CrawlOptions {
|
||||
pub fn preset(start_url: &str) -> CrawlOptions {
|
||||
for (re, options) in PRESET.iter() {
|
||||
if let Ok(true) = re.is_match(start_url) {
|
||||
return options.clone();
|
||||
}
|
||||
}
|
||||
CrawlOptions::default()
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn crawl_website(start_url: &str, options: CrawlOptions) -> Result<Vec<Page>> {
|
||||
let start_url = Url::parse(start_url)?;
|
||||
let mut paths = vec![start_url.path().to_string()];
|
||||
let normalized_start_url = normalize_start_url(&start_url);
|
||||
if !options.no_log {
|
||||
println!(
|
||||
"Start crawling url={start_url} exclude={} extract={}",
|
||||
options.exclude.join(","),
|
||||
options.extract.as_deref().unwrap_or_default()
|
||||
);
|
||||
}
|
||||
|
||||
if let Ok(true) = GITHUB_REPO_RE.is_match(start_url.as_str()) {
|
||||
paths = crawl_gh_tree(&start_url, &options.exclude)
|
||||
.await
|
||||
.with_context(|| "Failed to craw github repo".to_string())?;
|
||||
}
|
||||
|
||||
let semaphore = Arc::new(Semaphore::new(MAX_CRAWLS));
|
||||
let mut result_pages = Vec::new();
|
||||
|
||||
let mut index = 0;
|
||||
while index < paths.len() {
|
||||
let batch = paths[index..std::cmp::min(index + MAX_CRAWLS, paths.len())].to_vec();
|
||||
|
||||
let tasks: Vec<_> = batch
|
||||
.iter()
|
||||
.map(|path| {
|
||||
let options = options.clone();
|
||||
let permit = semaphore.clone().acquire_owned(); // acquire a permit for concurrency control
|
||||
let normalized_start_url = normalized_start_url.clone();
|
||||
let path = path.clone();
|
||||
|
||||
async move {
|
||||
let _permit = permit.await?;
|
||||
let url = normalized_start_url
|
||||
.join(&path)
|
||||
.map_err(|_| anyhow!("Invalid crawl page at {}", path))?;
|
||||
let mut page = crawl_page(&normalized_start_url, &path, options)
|
||||
.await
|
||||
.with_context(|| format!("Failed to crawl {}", url.as_str()))?;
|
||||
page.0 = url.as_str().to_string();
|
||||
Ok(page)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let results = stream::iter(tasks)
|
||||
.buffer_unordered(MAX_CRAWLS)
|
||||
.collect::<Vec<_>>()
|
||||
.await;
|
||||
|
||||
let mut new_paths = Vec::new();
|
||||
|
||||
for res in results {
|
||||
match res {
|
||||
Ok((path, text, links)) => {
|
||||
if !options.no_log {
|
||||
println!("Crawled {path}");
|
||||
}
|
||||
if !text.is_empty() {
|
||||
result_pages.push(Page { path, text });
|
||||
}
|
||||
for link in links {
|
||||
if !paths.iter().any(|p| match_link(p, &link)) {
|
||||
new_paths.push(link);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
if BREAK_ON_ERROR {
|
||||
return Err(err);
|
||||
} else if !options.no_log {
|
||||
println!("{}", error_text(&pretty_error(&err)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
paths.extend(new_paths);
|
||||
|
||||
index += batch.len();
|
||||
}
|
||||
|
||||
Ok(result_pages)
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct Page {
|
||||
pub path: String,
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
async fn crawl_gh_tree(start_url: &Url, exclude: &[String]) -> Result<Vec<String>> {
|
||||
let path_segs: Vec<&str> = start_url.path().split('/').collect();
|
||||
if path_segs.len() < 4 {
|
||||
bail!("Invalid gh tree {}", start_url.as_str());
|
||||
}
|
||||
let client = match *CLIENT {
|
||||
Ok(ref client) => client,
|
||||
Err(ref err) => bail!("{err}"),
|
||||
};
|
||||
let owner = path_segs[1];
|
||||
let repo = path_segs[2];
|
||||
let branch = path_segs[4];
|
||||
let root_path = path_segs[5..].join("/");
|
||||
|
||||
let url = format!("https://api.github.com/repos/{owner}/{repo}/git/ref/heads/{branch}");
|
||||
|
||||
let res_body: Value = client
|
||||
.get(&url)
|
||||
.header("User-Agent", USER_AGENT)
|
||||
.header("Accept", "application/vnd.github+json")
|
||||
.header("X-GitHub-Api-Version", "2022-11-28")
|
||||
.send()
|
||||
.await?
|
||||
.json()
|
||||
.await?;
|
||||
|
||||
let sha = res_body["object"]["sha"]
|
||||
.as_str()
|
||||
.ok_or_else(|| anyhow!("Not found branch or tag"))?;
|
||||
|
||||
let url = format!("https://api.github.com/repos/{owner}/{repo}/git/trees/{sha}?recursive=true");
|
||||
|
||||
let res_body: Value = client
|
||||
.get(&url)
|
||||
.header("User-Agent", USER_AGENT)
|
||||
.header("Accept", "application/vnd.github+json")
|
||||
.header("X-GitHub-Api-Version", "2022-11-28")
|
||||
.send()
|
||||
.await?
|
||||
.json()
|
||||
.await?;
|
||||
let tree = res_body["tree"]
|
||||
.as_array()
|
||||
.ok_or_else(|| anyhow!("Invalid github repo tree"))?;
|
||||
let paths = tree
|
||||
.iter()
|
||||
.flat_map(|v| {
|
||||
let typ = v["type"].as_str()?;
|
||||
let path = v["path"].as_str()?;
|
||||
if typ == "blob"
|
||||
&& (path.ends_with(".md") || path.ends_with(".MD"))
|
||||
&& path.starts_with(&root_path)
|
||||
&& !should_exclude_link(path, exclude)
|
||||
{
|
||||
Some(format!(
|
||||
"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{path}"
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(paths)
|
||||
}
|
||||
|
||||
async fn crawl_page(
|
||||
start_url: &Url,
|
||||
path: &str,
|
||||
options: CrawlOptions,
|
||||
) -> Result<(String, String, Vec<String>)> {
|
||||
let client = match *CLIENT {
|
||||
Ok(ref client) => client,
|
||||
Err(ref err) => bail!("{err}"),
|
||||
};
|
||||
let location = start_url.join(path)?;
|
||||
let response = client
|
||||
.get(location.as_str())
|
||||
.header("User-Agent", USER_AGENT)
|
||||
.send()
|
||||
.await?;
|
||||
let body = response.text().await?;
|
||||
|
||||
if let Ok(true) = GITHUB_REPO_RE.is_match(start_url.as_str()) {
|
||||
return Ok((path.to_string(), body, vec![]));
|
||||
}
|
||||
|
||||
let mut links = HashSet::new();
|
||||
let document = Html::parse_document(&body);
|
||||
let selector = Selector::parse("a").map_err(|err| anyhow!("Invalid link selector, {}", err))?;
|
||||
|
||||
for element in document.select(&selector) {
|
||||
if let Some(href) = element.value().attr("href") {
|
||||
let href = Url::parse(href).ok().or_else(|| location.join(href).ok());
|
||||
match href {
|
||||
None => continue,
|
||||
Some(href) => {
|
||||
if href.as_str().starts_with(location.as_str())
|
||||
&& !should_exclude_link(href.path(), &options.exclude)
|
||||
{
|
||||
links.insert(href.path().to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let text = if let Some(selector) = &options.extract {
|
||||
let selector = Selector::parse(selector)
|
||||
.map_err(|err| anyhow!("Invalid extract selector, {}", err))?;
|
||||
document
|
||||
.select(&selector)
|
||||
.map(|v| html_to_md(&v.html()))
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n\n")
|
||||
} else {
|
||||
html_to_md(&body)
|
||||
};
|
||||
|
||||
Ok((path.to_string(), text, links.into_iter().collect()))
|
||||
}
|
||||
|
||||
fn should_exclude_link(link: &str, exclude: &[String]) -> bool {
|
||||
if link.contains("#") {
|
||||
return true;
|
||||
}
|
||||
let parts: Vec<&str> = link.trim_end_matches('/').split('/').collect();
|
||||
let name = parts.last().unwrap_or(&"").to_lowercase();
|
||||
|
||||
for exclude_name in exclude {
|
||||
let cond = match EXTENSION_RE.is_match(exclude_name) {
|
||||
Ok(true) => exclude_name.to_lowercase() == name.to_lowercase(),
|
||||
_ => exclude_name.to_lowercase() == EXTENSION_RE.replace(&name, "").to_lowercase(),
|
||||
};
|
||||
if cond {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn normalize_start_url(start_url: &Url) -> Url {
|
||||
let mut start_url = start_url.clone();
|
||||
start_url.set_query(None);
|
||||
start_url.set_fragment(None);
|
||||
let new_path = match start_url.path().rfind('/') {
|
||||
Some(last_slash_index) => start_url.path()[..last_slash_index + 1].to_string(),
|
||||
None => start_url.path().to_string(),
|
||||
};
|
||||
start_url.set_path(&new_path);
|
||||
start_url
|
||||
}
|
||||
|
||||
fn match_link(path: &str, link: &str) -> bool {
|
||||
path == link
|
||||
|| path
|
||||
== link
|
||||
.trim_end_matches("/index.html")
|
||||
.trim_end_matches("/index.htm")
|
||||
}
|
||||
@@ -0,0 +1,217 @@
|
||||
use super::{poll_abort_signal, wait_abort_signal, AbortSignal, IS_STDOUT_TERMINAL};
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use crossterm::{cursor, queue, style, terminal};
|
||||
use std::{
|
||||
future::Future,
|
||||
io::{stdout, Write},
|
||||
time::Duration,
|
||||
};
|
||||
use tokio::{
|
||||
sync::{
|
||||
mpsc::{self, UnboundedReceiver},
|
||||
oneshot,
|
||||
},
|
||||
time::interval,
|
||||
};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct SpinnerInner {
|
||||
index: usize,
|
||||
message: String,
|
||||
}
|
||||
|
||||
impl SpinnerInner {
|
||||
const DATA: [&'static str; 10] = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
|
||||
|
||||
fn step(&mut self) -> Result<()> {
|
||||
if !*IS_STDOUT_TERMINAL || self.message.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
let mut writer = stdout();
|
||||
let frame = Self::DATA[self.index % Self::DATA.len()];
|
||||
let dots = ".".repeat((self.index / 5) % 4);
|
||||
let line = format!("{frame}{}{:<3}", self.message, dots);
|
||||
queue!(writer, cursor::MoveToColumn(0), style::Print(line),)?;
|
||||
if self.index == 0 {
|
||||
queue!(writer, cursor::Hide)?;
|
||||
}
|
||||
writer.flush()?;
|
||||
self.index += 1;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_message(&mut self, message: String) -> Result<()> {
|
||||
self.clear_message()?;
|
||||
if !message.is_empty() {
|
||||
self.message = format!(" {message}");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn clear_message(&mut self) -> Result<()> {
|
||||
if !*IS_STDOUT_TERMINAL || self.message.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
self.message.clear();
|
||||
let mut writer = stdout();
|
||||
queue!(
|
||||
writer,
|
||||
cursor::MoveToColumn(0),
|
||||
terminal::Clear(terminal::ClearType::FromCursorDown),
|
||||
cursor::Show
|
||||
)?;
|
||||
writer.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Spinner(mpsc::UnboundedSender<SpinnerEvent>);
|
||||
|
||||
impl Spinner {
|
||||
pub fn create(message: &str) -> (Self, UnboundedReceiver<SpinnerEvent>) {
|
||||
let (tx, spinner_rx) = mpsc::unbounded_channel();
|
||||
let spinner = Spinner(tx);
|
||||
let _ = spinner.set_message(message.to_string());
|
||||
(spinner, spinner_rx)
|
||||
}
|
||||
|
||||
pub fn set_message(&self, message: String) -> Result<()> {
|
||||
self.0.send(SpinnerEvent::SetMessage(message))?;
|
||||
std::thread::sleep(Duration::from_millis(10));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn stop(&self) {
|
||||
let _ = self.0.send(SpinnerEvent::Stop);
|
||||
std::thread::sleep(Duration::from_millis(10));
|
||||
}
|
||||
}
|
||||
|
||||
pub enum SpinnerEvent {
|
||||
SetMessage(String),
|
||||
Stop,
|
||||
}
|
||||
|
||||
pub fn spawn_spinner(message: &str) -> Spinner {
|
||||
let (spinner, mut spinner_rx) = Spinner::create(message);
|
||||
tokio::spawn(async move {
|
||||
let mut spinner = SpinnerInner::default();
|
||||
let mut interval = interval(Duration::from_millis(50));
|
||||
loop {
|
||||
tokio::select! {
|
||||
evt = spinner_rx.recv() => {
|
||||
if let Some(evt) = evt {
|
||||
match evt {
|
||||
SpinnerEvent::SetMessage(message) => {
|
||||
spinner.set_message(message)?;
|
||||
}
|
||||
SpinnerEvent::Stop => {
|
||||
spinner.clear_message()?;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
_ = interval.tick() => {
|
||||
let _ = spinner.step();
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok::<(), anyhow::Error>(())
|
||||
});
|
||||
spinner
|
||||
}
|
||||
|
||||
pub async fn abortable_run_with_spinner<F, T>(
|
||||
task: F,
|
||||
message: &str,
|
||||
abort_signal: AbortSignal,
|
||||
) -> Result<T>
|
||||
where
|
||||
F: Future<Output = Result<T>>,
|
||||
{
|
||||
let (_, spinner_rx) = Spinner::create(message);
|
||||
abortable_run_with_spinner_rx(task, spinner_rx, abort_signal).await
|
||||
}
|
||||
|
||||
pub async fn abortable_run_with_spinner_rx<F, T>(
|
||||
task: F,
|
||||
spinner_rx: UnboundedReceiver<SpinnerEvent>,
|
||||
abort_signal: AbortSignal,
|
||||
) -> Result<T>
|
||||
where
|
||||
F: Future<Output = Result<T>>,
|
||||
{
|
||||
if *IS_STDOUT_TERMINAL {
|
||||
let (done_tx, done_rx) = oneshot::channel();
|
||||
let run_task = async {
|
||||
tokio::select! {
|
||||
ret = task => {
|
||||
let _ = done_tx.send(());
|
||||
ret
|
||||
}
|
||||
_ = tokio::signal::ctrl_c() => {
|
||||
abort_signal.set_ctrlc();
|
||||
let _ = done_tx.send(());
|
||||
bail!("Aborted!")
|
||||
},
|
||||
_ = wait_abort_signal(&abort_signal) => {
|
||||
let _ = done_tx.send(());
|
||||
bail!("Aborted.");
|
||||
},
|
||||
}
|
||||
};
|
||||
let (task_ret, spinner_ret) = tokio::join!(
|
||||
run_task,
|
||||
run_abortable_spinner(spinner_rx, done_rx, abort_signal.clone())
|
||||
);
|
||||
spinner_ret?;
|
||||
task_ret
|
||||
} else {
|
||||
task.await
|
||||
}
|
||||
}
|
||||
|
||||
async fn run_abortable_spinner(
|
||||
mut spinner_rx: UnboundedReceiver<SpinnerEvent>,
|
||||
mut done_rx: oneshot::Receiver<()>,
|
||||
abort_signal: AbortSignal,
|
||||
) -> Result<()> {
|
||||
let mut spinner = SpinnerInner::default();
|
||||
loop {
|
||||
if abort_signal.aborted() {
|
||||
break;
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(25)).await;
|
||||
|
||||
match done_rx.try_recv() {
|
||||
Ok(_) | Err(oneshot::error::TryRecvError::Closed) => {
|
||||
break;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
match spinner_rx.try_recv() {
|
||||
Ok(SpinnerEvent::SetMessage(message)) => {
|
||||
spinner.set_message(message)?;
|
||||
}
|
||||
Ok(SpinnerEvent::Stop) => {
|
||||
spinner.clear_message()?;
|
||||
}
|
||||
Err(_) => {}
|
||||
}
|
||||
|
||||
if poll_abort_signal(&abort_signal)? {
|
||||
break;
|
||||
}
|
||||
|
||||
spinner.step()?;
|
||||
}
|
||||
|
||||
spinner.clear_message()?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
use super::*;
|
||||
use fancy_regex::{Captures, Regex};
|
||||
use std::sync::LazyLock;
|
||||
|
||||
pub static RE_VARIABLE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{\{(\w+)\}\}").unwrap());
|
||||
pub fn interpolate_variables(text: &mut String) {
|
||||
*text = RE_VARIABLE
|
||||
.replace_all(text, |caps: &Captures<'_>| {
|
||||
let key = &caps[1];
|
||||
match key {
|
||||
"__os__" => env::consts::OS.to_string(),
|
||||
"__os_distro__" => {
|
||||
let info = os_info::get();
|
||||
if env::consts::OS == "linux" {
|
||||
format!("{info} (linux)")
|
||||
} else {
|
||||
info.to_string()
|
||||
}
|
||||
}
|
||||
"__os_family__" => env::consts::FAMILY.to_string(),
|
||||
"__arch__" => env::consts::ARCH.to_string(),
|
||||
"__shell__" => SHELL.name.clone(),
|
||||
"__locale__" => sys_locale::get_locale().unwrap_or_default(),
|
||||
"__now__" => now(),
|
||||
"__cwd__" => env::current_dir()
|
||||
.map(|v| v.display().to_string())
|
||||
.unwrap_or_default(),
|
||||
_ => format!("{{{{{key}}}}}"),
|
||||
}
|
||||
})
|
||||
.to_string();
|
||||
}
|
||||
Reference in New Issue
Block a user