feat: Supported the injection of RAG sources into the prompt, not just via the .sources rag command in the REPL so models can directly reference the documents that supported their responses
This commit is contained in:
+6
-1
@@ -95,7 +95,7 @@ rag_reranker_model: null # Specifies the reranker model used for sorting
|
|||||||
rag_top_k: 5 # Specifies the number of documents to retrieve for answering queries
|
rag_top_k: 5 # Specifies the number of documents to retrieve for answering queries
|
||||||
rag_chunk_size: null # Defines the size of chunks for document processing in characters
|
rag_chunk_size: null # Defines the size of chunks for document processing in characters
|
||||||
rag_chunk_overlap: null # Defines the overlap between chunks
|
rag_chunk_overlap: null # Defines the overlap between chunks
|
||||||
# Defines the query structure using variables like __CONTEXT__ and __INPUT__ to tailor searches to specific needs
|
# Defines the query structure using variables like __CONTEXT__, __SOURCES__, and __INPUT__ to tailor searches to specific needs
|
||||||
rag_template: |
|
rag_template: |
|
||||||
Answer the query based on the context while respecting the rules. (user query, some textual context and rules, all inside xml tags)
|
Answer the query based on the context while respecting the rules. (user query, some textual context and rules, all inside xml tags)
|
||||||
|
|
||||||
@@ -103,6 +103,10 @@ rag_template: |
|
|||||||
__CONTEXT__
|
__CONTEXT__
|
||||||
</context>
|
</context>
|
||||||
|
|
||||||
|
<sources>
|
||||||
|
__SOURCES__
|
||||||
|
</sources>
|
||||||
|
|
||||||
<rules>
|
<rules>
|
||||||
- If you don't know, just say so.
|
- If you don't know, just say so.
|
||||||
- If you are not sure, ask for clarification.
|
- If you are not sure, ask for clarification.
|
||||||
@@ -110,6 +114,7 @@ rag_template: |
|
|||||||
- If the context appears unreadable or of poor quality, tell the user then answer as best as you can.
|
- If the context appears unreadable or of poor quality, tell the user then answer as best as you can.
|
||||||
- If the answer is not in the context but you think you know the answer, explain that to the user then answer with your own knowledge.
|
- If the answer is not in the context but you think you know the answer, explain that to the user then answer with your own knowledge.
|
||||||
- Answer directly and without using xml tags.
|
- Answer directly and without using xml tags.
|
||||||
|
- When using information from the context, cite the relevant source from the <sources> section.
|
||||||
</rules>
|
</rules>
|
||||||
|
|
||||||
<user_query>
|
<user_query>
|
||||||
|
|||||||
+11
-3
@@ -265,12 +265,14 @@ When you use RAG in Loki, after Loki performs the lookup for relevant chunks of
|
|||||||
will add the retrieved text chunks as context to your query before sending it to the model. The format of this context
|
will add the retrieved text chunks as context to your query before sending it to the model. The format of this context
|
||||||
is determined by the `rag_template` setting in your global Loki configuration file.
|
is determined by the `rag_template` setting in your global Loki configuration file.
|
||||||
|
|
||||||
This template utilizes two placeholders:
|
This template utilizes three placeholders:
|
||||||
* `__INPUT__`: The user's actual query
|
* `__INPUT__`: The user's actual query
|
||||||
* `__CONTEXT__`: The context retrieved from RAG
|
* `__CONTEXT__`: The context retrieved from RAG
|
||||||
|
* `__SOURCES__`: A numbered list of the source file paths or URLs that the retrieved context came from
|
||||||
|
|
||||||
These placeholders are replaced with the corresponding values into the template and make up what's actually passed to
|
These placeholders are replaced with the corresponding values into the template and make up what's actually passed to
|
||||||
the model at query-time.
|
the model at query-time. The `__SOURCES__` placeholder enables the model to cite which documents its answer is based on,
|
||||||
|
which is especially useful when building knowledge-base assistants that need to provide verifiable references.
|
||||||
|
|
||||||
The default template that Loki uses is the following:
|
The default template that Loki uses is the following:
|
||||||
|
|
||||||
@@ -281,6 +283,10 @@ Answer the query based on the context while respecting the rules. (user query, s
|
|||||||
__CONTEXT__
|
__CONTEXT__
|
||||||
</context>
|
</context>
|
||||||
|
|
||||||
|
<sources>
|
||||||
|
__SOURCES__
|
||||||
|
</sources>
|
||||||
|
|
||||||
<rules>
|
<rules>
|
||||||
- If you don't know, just say so.
|
- If you don't know, just say so.
|
||||||
- If you are not sure, ask for clarification.
|
- If you are not sure, ask for clarification.
|
||||||
@@ -288,6 +294,7 @@ __CONTEXT__
|
|||||||
- If the context appears unreadable or of poor quality, tell the user then answer as best as you can.
|
- If the context appears unreadable or of poor quality, tell the user then answer as best as you can.
|
||||||
- If the answer is not in the context but you think you know the answer, explain that to the user then answer with your own knowledge.
|
- If the answer is not in the context but you think you know the answer, explain that to the user then answer with your own knowledge.
|
||||||
- Answer directly and without using xml tags.
|
- Answer directly and without using xml tags.
|
||||||
|
- When using information from the context, cite the relevant source from the <sources> section.
|
||||||
</rules>
|
</rules>
|
||||||
|
|
||||||
<user_query>
|
<user_query>
|
||||||
@@ -296,4 +303,5 @@ __INPUT__
|
|||||||
```
|
```
|
||||||
|
|
||||||
You can customize this template by specifying the `rag_template` setting in your global Loki configuration file. Your
|
You can customize this template by specifying the `rag_template` setting in your global Loki configuration file. Your
|
||||||
template *must* include both the `__INPUT__` and `__CONTEXT__` placeholders in order for it to be valid.
|
template *must* include both the `__INPUT__` and `__CONTEXT__` placeholders in order for it to be valid. The
|
||||||
|
`__SOURCES__` placeholder is optional. If it is omitted, source references will not be included in the prompt.
|
||||||
|
|||||||
+9
-3
@@ -96,6 +96,10 @@ const RAG_TEMPLATE: &str = r#"Answer the query based on the context while respec
|
|||||||
__CONTEXT__
|
__CONTEXT__
|
||||||
</context>
|
</context>
|
||||||
|
|
||||||
|
<sources>
|
||||||
|
__SOURCES__
|
||||||
|
</sources>
|
||||||
|
|
||||||
<rules>
|
<rules>
|
||||||
- If you don't know, just say so.
|
- If you don't know, just say so.
|
||||||
- If you are not sure, ask for clarification.
|
- If you are not sure, ask for clarification.
|
||||||
@@ -103,6 +107,7 @@ __CONTEXT__
|
|||||||
- If the context appears unreadable or of poor quality, tell the user then answer as best as you can.
|
- If the context appears unreadable or of poor quality, tell the user then answer as best as you can.
|
||||||
- If the answer is not in the context but you think you know the answer, explain that to the user then answer with your own knowledge.
|
- If the answer is not in the context but you think you know the answer, explain that to the user then answer with your own knowledge.
|
||||||
- Answer directly and without using xml tags.
|
- Answer directly and without using xml tags.
|
||||||
|
- When using information from the context, cite the relevant source from the <sources> section.
|
||||||
</rules>
|
</rules>
|
||||||
|
|
||||||
<user_query>
|
<user_query>
|
||||||
@@ -1756,10 +1761,10 @@ impl Config {
|
|||||||
abort_signal: AbortSignal,
|
abort_signal: AbortSignal,
|
||||||
) -> Result<String> {
|
) -> Result<String> {
|
||||||
let (reranker_model, top_k) = rag.get_config();
|
let (reranker_model, top_k) = rag.get_config();
|
||||||
let (embeddings, ids) = rag
|
let (embeddings, sources, ids) = rag
|
||||||
.search(text, top_k, reranker_model.as_deref(), abort_signal)
|
.search(text, top_k, reranker_model.as_deref(), abort_signal)
|
||||||
.await?;
|
.await?;
|
||||||
let text = config.read().rag_template(&embeddings, text);
|
let text = config.read().rag_template(&embeddings, &sources, text);
|
||||||
rag.set_last_sources(&ids);
|
rag.set_last_sources(&ids);
|
||||||
Ok(text)
|
Ok(text)
|
||||||
}
|
}
|
||||||
@@ -1781,7 +1786,7 @@ impl Config {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn rag_template(&self, embeddings: &str, text: &str) -> String {
|
pub fn rag_template(&self, embeddings: &str, sources: &str, text: &str) -> String {
|
||||||
if embeddings.is_empty() {
|
if embeddings.is_empty() {
|
||||||
return text.to_string();
|
return text.to_string();
|
||||||
}
|
}
|
||||||
@@ -1789,6 +1794,7 @@ impl Config {
|
|||||||
.as_deref()
|
.as_deref()
|
||||||
.unwrap_or(RAG_TEMPLATE)
|
.unwrap_or(RAG_TEMPLATE)
|
||||||
.replace("__CONTEXT__", embeddings)
|
.replace("__CONTEXT__", embeddings)
|
||||||
|
.replace("__SOURCES__", sources)
|
||||||
.replace("__INPUT__", text)
|
.replace("__INPUT__", text)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+36
-4
@@ -298,16 +298,48 @@ impl Rag {
|
|||||||
top_k: usize,
|
top_k: usize,
|
||||||
rerank_model: Option<&str>,
|
rerank_model: Option<&str>,
|
||||||
abort_signal: AbortSignal,
|
abort_signal: AbortSignal,
|
||||||
) -> Result<(String, Vec<DocumentId>)> {
|
) -> Result<(String, String, Vec<DocumentId>)> {
|
||||||
let ret = abortable_run_with_spinner(
|
let ret = abortable_run_with_spinner(
|
||||||
self.hybird_search(text, top_k, rerank_model),
|
self.hybird_search(text, top_k, rerank_model),
|
||||||
"Searching",
|
"Searching",
|
||||||
abort_signal,
|
abort_signal,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
let (ids, documents): (Vec<_>, Vec<_>) = ret?.into_iter().unzip();
|
let results = ret?;
|
||||||
let embeddings = documents.join("\n\n");
|
let ids: Vec<_> = results.iter().map(|(id, _)| *id).collect();
|
||||||
Ok((embeddings, ids))
|
let embeddings = results
|
||||||
|
.iter()
|
||||||
|
.map(|(id, content)| {
|
||||||
|
let source = self.resolve_source(id);
|
||||||
|
format!("[Source: {source}]\n{content}")
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("\n\n");
|
||||||
|
let sources = self.format_sources(&ids);
|
||||||
|
Ok((embeddings, sources, ids))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn resolve_source(&self, id: &DocumentId) -> String {
|
||||||
|
let (file_index, _) = id.split();
|
||||||
|
self.data
|
||||||
|
.files
|
||||||
|
.get(&file_index)
|
||||||
|
.map(|f| f.path.clone())
|
||||||
|
.unwrap_or_else(|| "unknown".to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_sources(&self, ids: &[DocumentId]) -> String {
|
||||||
|
let mut seen = IndexSet::new();
|
||||||
|
for id in ids {
|
||||||
|
let (file_index, _) = id.split();
|
||||||
|
if let Some(file) = self.data.files.get(&file_index) {
|
||||||
|
seen.insert(file.path.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
seen.into_iter()
|
||||||
|
.map(|path| format!("- {path}"))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn sync_documents(
|
pub async fn sync_documents(
|
||||||
|
|||||||
Reference in New Issue
Block a user