这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions frontends/search/src/components/ResultsPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ const ResultsPage = (props: ResultsPageProps) => {
if (!dataset) return;

let sort_by;
let mmr;

if (isSortBySearchType(props.search.debounced.sort_by)) {
props.search.debounced.sort_by.rerank_type != ""
Expand All @@ -262,6 +263,12 @@ const ResultsPage = (props: ResultsPageProps) => {
: (sort_by = undefined);
}

if (!props.search.debounced.mmr.use_mmr) {
mmr = undefined;
} else {
mmr = props.search.debounced.mmr;
}

const query =
props.search.debounced.multiQueries.length > 0
? props.search.debounced.multiQueries
Expand All @@ -280,6 +287,7 @@ const ResultsPage = (props: ResultsPageProps) => {
score_threshold: props.search.debounced.scoreThreshold,
sort_options: {
sort_by: sort_by,
mmr: mmr,
},
slim_chunks: props.search.debounced.slimChunks ?? false,
page_size: props.search.debounced.pageSize ?? 10,
Expand Down
40 changes: 40 additions & 0 deletions frontends/search/src/components/SearchForm.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -1193,6 +1193,46 @@ const SearchForm = (props: {
}}
/>
</div>
<div class="flex items-center justify-between space-x-2 p-1">
<label>Use MMR:</label>
<input
class="h-4 w-4"
type="checkbox"
checked={tempSearchValues().mmr.use_mmr}
onChange={(e) => {
setTempSearchValues((prev) => {
return {
...prev,
mmr: {
...prev.mmr,
use_mmr: e.target.checked,
},
};
});
}}
/>
</div>
<div class="flex items-center justify-between space-x-2 p-1">
<label>MMR Lambda:</label>
<input
class="w-16 rounded border border-neutral-400 p-0.5 text-black"
type="number"
value={tempSearchValues().mmr.mmr_lambda}
onChange={(e) => {
setTempSearchValues((prev) => {
return {
...prev,
mmr: {
...prev.mmr,
mmr_lambda: parseFloat(
e.currentTarget.value,
),
},
};
});
}}
/>
</div>
<div class="px-1 font-bold">Search Refinement</div>
<div class="flex items-center justify-between space-x-2 p-1">
<label>Use Quote Negated Words:</label>
Expand Down
13 changes: 13 additions & 0 deletions frontends/search/src/hooks/useSearch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ export interface SearchOptions {
prioritize_domain_specifc_words: boolean | null;
disableOnWords: string[];
sort_by: SortByField | SortBySearchType;
mmr: {
use_mmr: boolean;
mmr_lambda?: number;
};
pageSize: number;
getTotalPages: boolean;
highlightResults: boolean;
Expand Down Expand Up @@ -98,6 +102,9 @@ const initalState: SearchOptions = {
sort_by: {
field: "",
},
mmr: {
use_mmr: false,
},
pageSize: 10,
getTotalPages: true,
correctTypos: false,
Expand Down Expand Up @@ -146,6 +153,7 @@ const fromStateToParams = (state: SearchOptions): Params => {
oneTypoWordRangeMax: state.oneTypoWordRangeMax?.toString() ?? "6",
twoTypoWordRangeMin: state.twoTypoWordRangeMin.toString(),
twoTypoWordRangeMax: state.twoTypoWordRangeMax?.toString() ?? "",
mmr: JSON.stringify(state.mmr),
prioritize_domain_specifc_words:
state.prioritize_domain_specifc_words?.toString() ?? "",
disableOnWords: state.disableOnWords.join(","),
Expand Down Expand Up @@ -189,6 +197,11 @@ const fromParamsToState = (
initalState.sort_by,
pageSize: parseInt(params.pageSize ?? "10"),
getTotalPages: (params.getTotalPages ?? "true") === "true",
mmr:
(JSON.parse(params.mmr ?? "{}") as {
use_mmr: boolean;
mmr_lambda?: number;
}) ?? initalState.mmr,
correctTypos: (params.correctTypos ?? "false") === "true",
oneTypoWordRangeMin: parseInt(params.oneTypoWordRangeMin ?? "4"),
oneTypoWordRangeMax: parseIntOrNull(params.oneTypoWordRangeMax),
Expand Down
24 changes: 24 additions & 0 deletions server/src/data/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3517,6 +3517,7 @@ impl ApiKeyRequestParams {
new_message_content: payload.new_message_content,
topic_id: payload.topic_id,
user_id: payload.user_id,
sort_options: payload.sort_options,
highlight_options: self.highlight_options.or(payload.highlight_options),
search_type: self.search_type.or(payload.search_type),
use_group_search: payload.use_group_search,
Expand Down Expand Up @@ -6667,6 +6668,17 @@ pub struct SortOptions {
pub use_weights: Option<bool>,
/// Tag weights is a JSON object which can be used to boost the ranking of chunks with certain tags. This is useful for when you want to be able to bias towards chunks with a certain tag on the fly. The keys are the tag names and the values are the weights.
pub tag_weights: Option<HashMap<String, f32>>,
/// Set use_mmr to true to use the Maximal Marginal Relevance algorithm to rerank the results. If not specified, this defaults to false.
pub mmr: Option<MmrOptions>,
}

#[derive(Serialize, Deserialize, Debug, Clone, ToSchema, Default)]
/// MMR Options lets you specify different methods to rerank the chunks in the result set. If not specified, this defaults to the score of the chunks.
pub struct MmrOptions {
/// Set use_mmr to true to use the Maximal Marginal Relevance algorithm to rerank the results.
pub use_mmr: bool,
/// Set mmr_lambda to a value between 0.0 and 1.0 to control the tradeoff between relevance and diversity. Closer to 1.0 will give more diverse results, closer to 0.0 will give more relevant results. If not specified, this defaults to 0.5.
pub mmr_lambda: Option<f32>,
}

#[derive(Serialize, Deserialize, Debug, Clone, ToSchema, Default)]
Expand Down Expand Up @@ -6787,6 +6799,9 @@ fn extract_sort_highlight_options(
if let Some(value) = other.remove("tag_weights") {
sort_options.tag_weights = serde_json::from_value(value).ok();
}
if let Some(value) = other.remove("mmr") {
sort_options.mmr = serde_json::from_value(value).ok();
}

// Extract highlight options
if let Some(value) = other.remove("highlight_results") {
Expand Down Expand Up @@ -6815,6 +6830,7 @@ fn extract_sort_highlight_options(
&& sort_options.location_bias.is_none()
&& sort_options.use_weights.is_none()
&& sort_options.tag_weights.is_none()
&& sort_options.mmr.is_none()
{
None
} else {
Expand Down Expand Up @@ -7140,6 +7156,7 @@ impl<'de> Deserialize<'de> for CreateMessageReqPayload {
pub search_type: Option<SearchMethod>,
pub concat_user_messages_query: Option<bool>,
pub search_query: Option<String>,
pub sort_options: Option<SortOptions>,
pub page_size: Option<u64>,
pub filters: Option<ChunkFilter>,
pub score_threshold: Option<f32>,
Expand Down Expand Up @@ -7169,6 +7186,7 @@ impl<'de> Deserialize<'de> for CreateMessageReqPayload {
new_message_content: helper.new_message_content,
topic_id: helper.topic_id,
highlight_options,
sort_options: helper.sort_options,
search_type: helper.search_type,
use_group_search: helper.use_group_search,
concat_user_messages_query: helper.concat_user_messages_query,
Expand All @@ -7195,6 +7213,8 @@ impl<'de> Deserialize<'de> for RegenerateMessageReqPayload {
pub highlight_options: Option<HighlightOptions>,
pub search_type: Option<SearchMethod>,
pub concat_user_messages_query: Option<bool>,
pub sort_options: Option<SortOptions>,

pub search_query: Option<String>,
pub page_size: Option<u64>,
pub filters: Option<ChunkFilter>,
Expand Down Expand Up @@ -7224,6 +7244,7 @@ impl<'de> Deserialize<'de> for RegenerateMessageReqPayload {
Ok(RegenerateMessageReqPayload {
topic_id: helper.topic_id,
highlight_options,
sort_options: helper.sort_options,
search_type: helper.search_type,
concat_user_messages_query: helper.concat_user_messages_query,
search_query: helper.search_query,
Expand Down Expand Up @@ -7251,6 +7272,8 @@ impl<'de> Deserialize<'de> for EditMessageReqPayload {
pub new_message_content: String,
pub highlight_options: Option<HighlightOptions>,
pub search_type: Option<SearchMethod>,
pub sort_options: Option<SortOptions>,

pub use_group_search: Option<bool>,
pub concat_user_messages_query: Option<bool>,
pub search_query: Option<String>,
Expand Down Expand Up @@ -7281,6 +7304,7 @@ impl<'de> Deserialize<'de> for EditMessageReqPayload {
Ok(EditMessageReqPayload {
topic_id: helper.topic_id,
message_sort_order: helper.message_sort_order,
sort_options: helper.sort_options,
new_message_content: helper.new_message_content,
highlight_options,
search_type: helper.search_type,
Expand Down
10 changes: 9 additions & 1 deletion server/src/handlers/message_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::{
data::models::{
self, ChunkMetadata, ChunkMetadataStringTagSet, ChunkMetadataTypes, ContextOptions,
DatasetAndOrgWithSubAndPlan, DatasetConfiguration, HighlightOptions, LLMOptions, Pool,
QdrantChunkMetadata, RedisPool, SearchMethod, SuggestType,
QdrantChunkMetadata, RedisPool, SearchMethod, SortOptions, SuggestType,
},
errors::ServiceError,
get_env,
Expand Down Expand Up @@ -98,6 +98,8 @@ pub struct CreateMessageReqPayload {
pub search_query: Option<String>,
/// Page size is the number of chunks to fetch during RAG. If 0, then no search will be performed. If specified, this will override the N retrievals to include in the dataset configuration. Default is None.
pub page_size: Option<u64>,
/// Sort Options lets you specify different methods to rerank the chunks in the result set. If not specified, this defaults to the score of the chunks.
pub sort_options: Option<SortOptions>,
/// Filters is a JSON object which can be used to filter chunks. This is useful for when you want to filter chunks by arbitrary metadata. Unlike with tag filtering, there is a performance hit for filtering on metadata.
pub filters: Option<ChunkFilter>,
/// Set score_threshold to a float to filter out chunks with a score below the threshold. This threshold applies before weight and bias modifications. If not specified, this defaults to 0.0.
Expand Down Expand Up @@ -349,6 +351,8 @@ pub struct RegenerateMessageReqPayload {
pub search_query: Option<String>,
/// Page size is the number of chunks to fetch during RAG. If 0, then no search will be performed. If specified, this will override the N retrievals to include in the dataset configuration. Default is None.
pub page_size: Option<u64>,
/// Sort Options lets you specify different methods to rerank the chunks in the result set. If not specified, this defaults to the score of the chunks.
pub sort_options: Option<SortOptions>,
/// Filters is a JSON object which can be used to filter chunks. This is useful for when you want to filter chunks by arbitrary metadata. Unlike with tag filtering, there is a performance hit for filtering on metadata.
pub filters: Option<ChunkFilter>,
/// Set score_threshold to a float to filter out chunks with a score below the threshold. This threshold applies before weight and bias modifications. If not specified, this defaults to 0.0.
Expand Down Expand Up @@ -381,6 +385,8 @@ pub struct EditMessageReqPayload {
pub concat_user_messages_query: Option<bool>,
/// Query is the search query. This can be any string. The search_query will be used to create a dense embedding vector and/or sparse vector which will be used to find the result set. If not specified, will default to the last user message or HyDE if HyDE is enabled in the dataset configuration. Default is None.
pub search_query: Option<String>,
/// Sort Options lets you specify different methods to rerank the chunks in the result set. If not specified, this defaults to the score of the chunks.
pub sort_options: Option<SortOptions>,
/// Page size is the number of chunks to fetch during RAG. If 0, then no search will be performed. If specified, this will override the N retrievals to include in the dataset configuration. Default is None.
pub page_size: Option<u64>,
/// Filters is a JSON object which can be used to filter chunks. This is useful for when you want to filter chunks by arbitrary metadata. Unlike with tag filtering, there is a performance hit for filtering on metadata.
Expand All @@ -404,6 +410,7 @@ impl From<EditMessageReqPayload> for CreateMessageReqPayload {
topic_id: data.topic_id,
highlight_options: data.highlight_options,
search_type: data.search_type,
sort_options: data.sort_options,
use_group_search: data.use_group_search,
concat_user_messages_query: data.concat_user_messages_query,
search_query: data.search_query,
Expand All @@ -426,6 +433,7 @@ impl From<RegenerateMessageReqPayload> for CreateMessageReqPayload {
highlight_options: data.highlight_options,
search_type: data.search_type,
use_group_search: data.use_group_search,
sort_options: data.sort_options,
concat_user_messages_query: data.concat_user_messages_query,
search_query: data.search_query,
page_size: data.page_size,
Expand Down
1 change: 1 addition & 0 deletions server/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,7 @@ impl Modify for SecurityAddon {
data::models::OrganizationUsageCount,
data::models::Dataset,
data::models::DatasetAndUsage,
data::models::MmrOptions,
data::models::DatasetUsageCount,
data::models::DatasetDTO,
data::models::DatasetUsageCount,
Expand Down
2 changes: 1 addition & 1 deletion server/src/operators/chunk_operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ pub struct ChunkMetadataWithQdrantId {
pub qdrant_id: uuid::Uuid,
}

pub async fn get_chunk_metadatas_and_collided_chunks_from_point_ids_query(
pub async fn get_chunk_metadatas_from_point_ids_query(
point_ids: Vec<uuid::Uuid>,
pool: web::Data<Pool>,
) -> Result<Vec<ChunkMetadataTypes>, ServiceError> {
Expand Down
2 changes: 2 additions & 0 deletions server/src/operators/message_operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ pub async fn get_rag_chunks_query(
.page_size
.unwrap_or(n_retrievals_to_include.try_into().unwrap_or(8)),
),
sort_options: create_message_req_payload.sort_options,
highlight_options: create_message_req_payload.highlight_options,
filters: create_message_req_payload.filters,
group_size: Some(1),
Expand Down Expand Up @@ -453,6 +454,7 @@ pub async fn get_rag_chunks_query(
search_type: search_type.clone(),
query: QueryTypes::Single(query.clone()),
score_threshold: create_message_req_payload.score_threshold,
sort_options: create_message_req_payload.sort_options,
page_size: Some(
create_message_req_payload
.page_size
Expand Down
Loading