diff --git a/api/src/main/java/com/theokanning/openai/audio/CreateTranscriptionRequest.java b/api/src/main/java/com/theokanning/openai/audio/CreateTranscriptionRequest.java index 5c50480..33f15ad 100644 --- a/api/src/main/java/com/theokanning/openai/audio/CreateTranscriptionRequest.java +++ b/api/src/main/java/com/theokanning/openai/audio/CreateTranscriptionRequest.java @@ -3,6 +3,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; import lombok.*; +import java.util.List; + /** * A request for OpenAi to create transcription based on an audio file * All fields except model are optional @@ -43,4 +45,13 @@ public class CreateTranscriptionRequest { * The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency. */ String language; + + /** + * The timestamp granularities to populate for this transcription. response_format must be set verbose_json to use timestamp granularities.
+ * Either or both of these options are supported: word, or segment.
+ * Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency. + */ + @JsonProperty("timestamp_granularities ") + List timestampGranularities; + } diff --git a/api/src/main/java/com/theokanning/openai/audio/TranscriptionResult.java b/api/src/main/java/com/theokanning/openai/audio/TranscriptionResult.java index 2794a34..561f624 100644 --- a/api/src/main/java/com/theokanning/openai/audio/TranscriptionResult.java +++ b/api/src/main/java/com/theokanning/openai/audio/TranscriptionResult.java @@ -41,4 +41,6 @@ public class TranscriptionResult { */ List segments; + List words; + } diff --git a/api/src/main/java/com/theokanning/openai/audio/Words.java b/api/src/main/java/com/theokanning/openai/audio/Words.java new file mode 100644 index 0000000..dbf19ba --- /dev/null +++ b/api/src/main/java/com/theokanning/openai/audio/Words.java @@ -0,0 +1,16 @@ +package com.theokanning.openai.audio; + +import lombok.Data; + +/** + * @author LiangTao + * @date 2024年05月14 09:56 + **/ +@Data +public class Words { + private String word; + + private Double start; + + private Double end; +} diff --git a/service/src/main/java/com/theokanning/openai/service/OpenAiService.java b/service/src/main/java/com/theokanning/openai/service/OpenAiService.java index bd1c4c2..5b7814c 100644 --- a/service/src/main/java/com/theokanning/openai/service/OpenAiService.java +++ b/service/src/main/java/com/theokanning/openai/service/OpenAiService.java @@ -378,7 +378,11 @@ public TranscriptionResult createTranscription(CreateTranscriptionRequest reques if (request.getLanguage() != null) { builder.addFormDataPart("language", request.getLanguage()); } - + if (request.getTimestampGranularities() != null && !request.getTimestampGranularities().isEmpty()) { + for (String granularity : request.getTimestampGranularities()) { + builder.addFormDataPart("timestamp_granularities[]", granularity); + } + } return execute(api.createTranscription(builder.build())); } diff --git a/service/src/test/java/com/theokanning/openai/service/AudioTest.java b/service/src/test/java/com/theokanning/openai/service/AudioTest.java index efb89d7..6371770 100644 --- a/service/src/test/java/com/theokanning/openai/service/AudioTest.java +++ b/service/src/test/java/com/theokanning/openai/service/AudioTest.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.time.Duration; +import java.util.Arrays; import static org.junit.jupiter.api.Assertions.*; @@ -34,6 +35,7 @@ void createTranscriptionVerbose() { CreateTranscriptionRequest createTranscriptionRequest = CreateTranscriptionRequest.builder() .model("whisper-1") .responseFormat("verbose_json") + .timestampGranularities(Arrays.asList("word", "segment")) .build(); TranscriptionResult result = service.createTranscription(createTranscriptionRequest, englishAudioFilePath); @@ -42,6 +44,7 @@ void createTranscriptionVerbose() { assertEquals("english", result.getLanguage()); assertTrue(result.getDuration() > 0); assertEquals(1, result.getSegments().size()); + assertEquals(2, result.getWords().size()); } @Test