feat: add jina tools (#73)

This commit is contained in:
sigoden
2024-07-11 21:43:31 +08:00
committed by GitHub
parent b56159903b
commit 5600b68e04
4 changed files with 55 additions and 16 deletions
+16
View File
@@ -0,0 +1,16 @@
#!/usr/bin/env bash
set -e
# @describe Extract the content from a given URL.
# @meta require-tools pandoc
# @option --url! The URL to scrape.
main() {
# span and div tags are dropped from the HTML https://pandoc.org/MANUAL.html#raw-htmltex and sed removes any inline SVG images in image tags from the Markdown content.
curl -fsSL "$argc_url" | \
pandoc -f html-native_divs-native_spans -t gfm-raw_html | \
sed -E 's/!\[.*?\]\((data:image\/svg\+xml[^)]+)\)//g' \
>> "$LLM_OUTPUT"
}
eval "$(argc --argc-eval "$0" "$@")"
+17
View File
@@ -0,0 +1,17 @@
#!/usr/bin/env bash
set -e
# @describe Extract the content from a given URL.
# @env JINA_API_KEY The api key
# @option --url! The URL to scrape.
main() {
curl_args=()
if [[ -n "$JINA_API_KEY" ]]; then
curl_args+=("-H" "Authorization: Bearer $JINA_API_KEY")
fi
curl -fsSL "${curl_args[@]}" "https://r.jina.ai/$argc_url" >> "$LLM_OUTPUT"
}
eval "$(argc --argc-eval "$0" "$@")"
-16
View File
@@ -1,16 +0,0 @@
#!/usr/bin/env bash
set -e
# @describe Get webpage content from a given URL.
# @meta require-tools pandoc
# @option --url! The URL to scrape.
main() {
# span and div tags are dropped from the HTML https://pandoc.org/MANUAL.html#raw-htmltex and sed removes any inline SVG images in image tags from the Markdown content.
curl -fsSL "$argc_url" | \
pandoc -f html-native_divs-native_spans -t gfm-raw_html | \
sed -E 's/!\[.*?\]\((data:image\/svg\+xml[^)]+)\)//g' \
>> "$LLM_OUTPUT"
}
eval "$(argc --argc-eval "$0" "$@")"
+22
View File
@@ -0,0 +1,22 @@
#!/usr/bin/env bash
set -e
# @describe Perform a web search using Jina API to get up-to-date information or additional context.
# Use this when you need current information or feel a search could provide a better answer.
# @env JINA_API_KEY The api key
# @env JINA_MAX_RESULTS=5 The max results to return.
# @option --query! The query to search for.
main() {
curl_args=("-H" "Accept: application/json")
if [[ -n "$JINA_API_KEY" ]]; then
curl_args+=("-H" "Authorization: Bearer $JINA_API_KEY")
fi
encoded_query="$(jq -nr --arg q "$argc_query" '$q|@uri')"
curl -fsSL "${curl_args[@]}" "https://s.jina.ai/$encoded_query" | \
jq '[.data[:'"$JINA_MAX_RESULTS"'] | .[] | {title: .title, url: .url, description: .description}]' \
>> "$LLM_OUTPUT"
}
eval "$(argc --argc-eval "$0" "$@")"