diff --git a/config.example.yaml b/config.example.yaml
index 4808563..edeac0f 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -95,7 +95,7 @@ rag_reranker_model: null # Specifies the reranker model used for sorting
rag_top_k: 5 # Specifies the number of documents to retrieve for answering queries
rag_chunk_size: null # Defines the size of chunks for document processing in characters
rag_chunk_overlap: null # Defines the overlap between chunks
-# Defines the query structure using variables like __CONTEXT__ and __INPUT__ to tailor searches to specific needs
+# Defines the query structure using variables like __CONTEXT__, __SOURCES__, and __INPUT__ to tailor searches to specific needs
rag_template: |
Answer the query based on the context while respecting the rules. (user query, some textual context and rules, all inside xml tags)
@@ -103,6 +103,10 @@ rag_template: |
__CONTEXT__
+
+ __SOURCES__
+
+
- If you don't know, just say so.
- If you are not sure, ask for clarification.
@@ -110,6 +114,7 @@ rag_template: |
- If the context appears unreadable or of poor quality, tell the user then answer as best as you can.
- If the answer is not in the context but you think you know the answer, explain that to the user then answer with your own knowledge.
- Answer directly and without using xml tags.
+ - When using information from the context, cite the relevant source from the section.
diff --git a/docs/RAG.md b/docs/RAG.md
index 184f024..ea3fe9e 100644
--- a/docs/RAG.md
+++ b/docs/RAG.md
@@ -265,12 +265,14 @@ When you use RAG in Loki, after Loki performs the lookup for relevant chunks of
will add the retrieved text chunks as context to your query before sending it to the model. The format of this context
is determined by the `rag_template` setting in your global Loki configuration file.
-This template utilizes two placeholders:
+This template utilizes three placeholders:
* `__INPUT__`: The user's actual query
* `__CONTEXT__`: The context retrieved from RAG
+* `__SOURCES__`: A numbered list of the source file paths or URLs that the retrieved context came from
These placeholders are replaced with the corresponding values into the template and make up what's actually passed to
-the model at query-time.
+the model at query-time. The `__SOURCES__` placeholder enables the model to cite which documents its answer is based on,
+which is especially useful when building knowledge-base assistants that need to provide verifiable references.
The default template that Loki uses is the following:
@@ -281,6 +283,10 @@ Answer the query based on the context while respecting the rules. (user query, s
__CONTEXT__
+
+__SOURCES__
+
+
- If you don't know, just say so.
- If you are not sure, ask for clarification.
@@ -288,6 +294,7 @@ __CONTEXT__
- If the context appears unreadable or of poor quality, tell the user then answer as best as you can.
- If the answer is not in the context but you think you know the answer, explain that to the user then answer with your own knowledge.
- Answer directly and without using xml tags.
+- When using information from the context, cite the relevant source from the section.
@@ -296,4 +303,5 @@ __INPUT__
```
You can customize this template by specifying the `rag_template` setting in your global Loki configuration file. Your
-template *must* include both the `__INPUT__` and `__CONTEXT__` placeholders in order for it to be valid.
+template *must* include both the `__INPUT__` and `__CONTEXT__` placeholders in order for it to be valid. The
+`__SOURCES__` placeholder is optional. If it is omitted, source references will not be included in the prompt.
diff --git a/src/config/mod.rs b/src/config/mod.rs
index fb2d5ae..e1e4b24 100644
--- a/src/config/mod.rs
+++ b/src/config/mod.rs
@@ -96,6 +96,10 @@ const RAG_TEMPLATE: &str = r#"Answer the query based on the context while respec
__CONTEXT__
+
+__SOURCES__
+
+
- If you don't know, just say so.
- If you are not sure, ask for clarification.
@@ -103,6 +107,7 @@ __CONTEXT__
- If the context appears unreadable or of poor quality, tell the user then answer as best as you can.
- If the answer is not in the context but you think you know the answer, explain that to the user then answer with your own knowledge.
- Answer directly and without using xml tags.
+- When using information from the context, cite the relevant source from the section.
@@ -1756,10 +1761,10 @@ impl Config {
abort_signal: AbortSignal,
) -> Result {
let (reranker_model, top_k) = rag.get_config();
- let (embeddings, ids) = rag
+ let (embeddings, sources, ids) = rag
.search(text, top_k, reranker_model.as_deref(), abort_signal)
.await?;
- let text = config.read().rag_template(&embeddings, text);
+ let text = config.read().rag_template(&embeddings, &sources, text);
rag.set_last_sources(&ids);
Ok(text)
}
@@ -1781,7 +1786,7 @@ impl Config {
}
}
- pub fn rag_template(&self, embeddings: &str, text: &str) -> String {
+ pub fn rag_template(&self, embeddings: &str, sources: &str, text: &str) -> String {
if embeddings.is_empty() {
return text.to_string();
}
@@ -1789,6 +1794,7 @@ impl Config {
.as_deref()
.unwrap_or(RAG_TEMPLATE)
.replace("__CONTEXT__", embeddings)
+ .replace("__SOURCES__", sources)
.replace("__INPUT__", text)
}
diff --git a/src/rag/mod.rs b/src/rag/mod.rs
index 8d5f6bf..2f9320d 100644
--- a/src/rag/mod.rs
+++ b/src/rag/mod.rs
@@ -298,16 +298,48 @@ impl Rag {
top_k: usize,
rerank_model: Option<&str>,
abort_signal: AbortSignal,
- ) -> Result<(String, Vec)> {
+ ) -> Result<(String, String, Vec)> {
let ret = abortable_run_with_spinner(
self.hybird_search(text, top_k, rerank_model),
"Searching",
abort_signal,
)
.await;
- let (ids, documents): (Vec<_>, Vec<_>) = ret?.into_iter().unzip();
- let embeddings = documents.join("\n\n");
- Ok((embeddings, ids))
+ let results = ret?;
+ let ids: Vec<_> = results.iter().map(|(id, _)| *id).collect();
+ let embeddings = results
+ .iter()
+ .map(|(id, content)| {
+ let source = self.resolve_source(id);
+ format!("[Source: {source}]\n{content}")
+ })
+ .collect::>()
+ .join("\n\n");
+ let sources = self.format_sources(&ids);
+ Ok((embeddings, sources, ids))
+ }
+
+ fn resolve_source(&self, id: &DocumentId) -> String {
+ let (file_index, _) = id.split();
+ self.data
+ .files
+ .get(&file_index)
+ .map(|f| f.path.clone())
+ .unwrap_or_else(|| "unknown".to_string())
+ }
+
+ fn format_sources(&self, ids: &[DocumentId]) -> String {
+ let mut seen = IndexSet::new();
+ for id in ids {
+ let (file_index, _) = id.split();
+ if let Some(file) = self.data.files.get(&file_index) {
+ seen.insert(file.path.clone());
+ }
+ }
+ seen.into_iter()
+ .map(|path| format!("- {path}"))
+ .collect::>()
+ .join("\n")
}
pub async fn sync_documents(