diff --git a/api/src/main/java/com/theokanning/openai/audio/CreateTranscriptionRequest.java b/api/src/main/java/com/theokanning/openai/audio/CreateTranscriptionRequest.java
index 5c50480..33f15ad 100644
--- a/api/src/main/java/com/theokanning/openai/audio/CreateTranscriptionRequest.java
+++ b/api/src/main/java/com/theokanning/openai/audio/CreateTranscriptionRequest.java
@@ -3,6 +3,8 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.*;
+import java.util.List;
+
/**
* A request for OpenAi to create transcription based on an audio file
* All fields except model are optional
@@ -43,4 +45,13 @@ public class CreateTranscriptionRequest {
* The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.
*/
String language;
+
+ /**
+ * The timestamp granularities to populate for this transcription. response_format must be set verbose_json to use timestamp granularities.
+ * Either or both of these options are supported: word, or segment.
+ * Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency.
+ */
+ @JsonProperty("timestamp_granularities ")
+ List timestampGranularities;
+
}
diff --git a/api/src/main/java/com/theokanning/openai/audio/TranscriptionResult.java b/api/src/main/java/com/theokanning/openai/audio/TranscriptionResult.java
index 2794a34..561f624 100644
--- a/api/src/main/java/com/theokanning/openai/audio/TranscriptionResult.java
+++ b/api/src/main/java/com/theokanning/openai/audio/TranscriptionResult.java
@@ -41,4 +41,6 @@ public class TranscriptionResult {
*/
List segments;
+ List words;
+
}
diff --git a/api/src/main/java/com/theokanning/openai/audio/Words.java b/api/src/main/java/com/theokanning/openai/audio/Words.java
new file mode 100644
index 0000000..dbf19ba
--- /dev/null
+++ b/api/src/main/java/com/theokanning/openai/audio/Words.java
@@ -0,0 +1,16 @@
+package com.theokanning.openai.audio;
+
+import lombok.Data;
+
+/**
+ * @author LiangTao
+ * @date 2024年05月14 09:56
+ **/
+@Data
+public class Words {
+ private String word;
+
+ private Double start;
+
+ private Double end;
+}
diff --git a/service/src/main/java/com/theokanning/openai/service/OpenAiService.java b/service/src/main/java/com/theokanning/openai/service/OpenAiService.java
index bd1c4c2..5b7814c 100644
--- a/service/src/main/java/com/theokanning/openai/service/OpenAiService.java
+++ b/service/src/main/java/com/theokanning/openai/service/OpenAiService.java
@@ -378,7 +378,11 @@ public TranscriptionResult createTranscription(CreateTranscriptionRequest reques
if (request.getLanguage() != null) {
builder.addFormDataPart("language", request.getLanguage());
}
-
+ if (request.getTimestampGranularities() != null && !request.getTimestampGranularities().isEmpty()) {
+ for (String granularity : request.getTimestampGranularities()) {
+ builder.addFormDataPart("timestamp_granularities[]", granularity);
+ }
+ }
return execute(api.createTranscription(builder.build()));
}
diff --git a/service/src/test/java/com/theokanning/openai/service/AudioTest.java b/service/src/test/java/com/theokanning/openai/service/AudioTest.java
index efb89d7..6371770 100644
--- a/service/src/test/java/com/theokanning/openai/service/AudioTest.java
+++ b/service/src/test/java/com/theokanning/openai/service/AudioTest.java
@@ -7,6 +7,7 @@
import java.io.IOException;
import java.time.Duration;
+import java.util.Arrays;
import static org.junit.jupiter.api.Assertions.*;
@@ -34,6 +35,7 @@ void createTranscriptionVerbose() {
CreateTranscriptionRequest createTranscriptionRequest = CreateTranscriptionRequest.builder()
.model("whisper-1")
.responseFormat("verbose_json")
+ .timestampGranularities(Arrays.asList("word", "segment"))
.build();
TranscriptionResult result = service.createTranscription(createTranscriptionRequest, englishAudioFilePath);
@@ -42,6 +44,7 @@ void createTranscriptionVerbose() {
assertEquals("english", result.getLanguage());
assertTrue(result.getDuration() > 0);
assertEquals(1, result.getSegments().size());
+ assertEquals(2, result.getWords().size());
}
@Test