这是indexloc提供的服务,不要输入任何密码
Skip to content

support customize how the file_search tool chunks your data and how m… #35

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package com.theokanning.openai.assistants.assistant;

import lombok.Data;

/**
* @author LiangTao
* @date 2024年06月05 10:57
**/
@Data
public class AutoChunkingStrategy implements ChunkingStrategy {
public static final AutoChunkingStrategy instance = new AutoChunkingStrategy();

private final String type = "auto";

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package com.theokanning.openai.assistants.assistant;

import com.fasterxml.jackson.annotation.JsonSubTypes;
import com.fasterxml.jackson.annotation.JsonTypeInfo;

/**
* @author LiangTao
* @date 2024年06月05 10:56
**/
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type")
@JsonSubTypes({
@JsonSubTypes.Type(value = AutoChunkingStrategy.class, name = "auto"),
@JsonSubTypes.Type(value = StaticChunkingStrategy.class, name = "static"),
})
public interface ChunkingStrategy {
String getType();

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package com.theokanning.openai.assistants.assistant;

/**
* @author LiangTao
* @date 2024年06月05 11:00
**/

Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package com.theokanning.openai.assistants.assistant;

import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Data;

/**
* @author LiangTao
* @date 2024年06月05 10:59
**/
@Data
public class StaticChunkingStrategy implements ChunkingStrategy {
private final String type = "static";

@JsonProperty("static")
private Static aStatic;

public static StaticChunkingStrategy of(Integer maxChunkSizeTokens, Integer chunkOverlapTokens) {
StaticChunkingStrategy staticChunkingStrategy = new StaticChunkingStrategy();
Static aStatic = new Static();
aStatic.setMaxChunkSizeTokens(maxChunkSizeTokens);
aStatic.setChunkOverlapTokens(chunkOverlapTokens);
staticChunkingStrategy.setAStatic(aStatic);
return staticChunkingStrategy;
}


@Data
public static class Static {
/**
* The maximum number of tokens in each chunk. The default value is 800. The minimum value is 100 and the maximum value is 4096.
*/
@JsonProperty("max_chunk_size_tokens")
private Integer maxChunkSizeTokens;

/**
* The number of tokens that overlap between chunks. The default value is 400.
* <p>
* Note that the overlap must not exceed half of max_chunk_size_tokens.
*/
@JsonProperty("chunk_overlap_tokens")
private Integer chunkOverlapTokens;

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,8 @@ public class VectorStoreFileRequest {

@JsonProperty("file_id")
String fileId;

@JsonProperty("chunking_strategy")
ChunkingStrategy chunkingStrategy;

}
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
{
"file_id": "file-abc123"
"file_id": "fileid_here",
"chunking_strategy": {
"type": "static",
"static": {
"max_chunk_size_tokens": 800,
"chunk_overlap_tokens": 400
}
}
}