这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions libraries-data-3/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@
<artifactId>jackson-databind</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>io.github.jbellis</groupId>
<artifactId>jvector</artifactId>
<version>4.0.0-rc.2</version>
</dependency>
</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package com.baeldung.jvector;

import java.io.IOException;
import java.nio.file.Path;
import java.util.List;

import io.github.jbellis.jvector.graph.GraphIndexBuilder;
import io.github.jbellis.jvector.graph.ListRandomAccessVectorValues;
import io.github.jbellis.jvector.graph.OnHeapGraphIndex;
import io.github.jbellis.jvector.graph.RandomAccessVectorValues;
import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
import io.github.jbellis.jvector.graph.similarity.BuildScoreProvider;
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
import io.github.jbellis.jvector.vector.types.VectorFloat;

public class VectorSearch {

public static void persistIndex(List<VectorFloat<?>> baseVectors, Path indexPath) throws IOException {
int originalDimension = baseVectors.get(0)
.length();

RandomAccessVectorValues vectorValues = new ListRandomAccessVectorValues(baseVectors, originalDimension);

BuildScoreProvider scoreProvider = BuildScoreProvider.randomAccessScoreProvider(vectorValues, VectorSimilarityFunction.EUCLIDEAN);

try (GraphIndexBuilder builder = new GraphIndexBuilder(scoreProvider, vectorValues.dimension(), 28, 100, 1.2f, 1.2f, true)) {
OnHeapGraphIndex index = builder.build(vectorValues);

OnDiskGraphIndex.write(index, vectorValues, indexPath);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package com.baeldung.jvector;

import static com.baeldung.jvector.VectorSearch.persistIndex;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
import static org.junit.jupiter.api.Assertions.assertNotNull;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

import io.github.jbellis.jvector.disk.ReaderSupplier;
import io.github.jbellis.jvector.disk.ReaderSupplierFactory;
import io.github.jbellis.jvector.graph.GraphIndex;
import io.github.jbellis.jvector.graph.GraphSearcher;
import io.github.jbellis.jvector.graph.ListRandomAccessVectorValues;
import io.github.jbellis.jvector.graph.SearchResult;
import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
import io.github.jbellis.jvector.util.Bits;
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
import io.github.jbellis.jvector.vector.VectorizationProvider;
import io.github.jbellis.jvector.vector.types.VectorFloat;
import io.github.jbellis.jvector.vector.types.VectorTypeSupport;

class VectorSearchTest {

private static final VectorTypeSupport VECTOR_TYPE_SUPPORT = VectorizationProvider.getInstance()
.getVectorTypeSupport();
private static Path indexPath;
private static Map<String, VectorFloat<?>> datasetVectors;

@BeforeAll
static void setup() throws IOException {
datasetVectors = new VectorSearchTest().loadGlove6B50dDataSet(1000);
indexPath = Files.createTempFile("sample", ".inline");
persistIndex(new ArrayList<>(datasetVectors.values()), indexPath);
}

@Test
void givenLoadedDataset_whenPersistingIndex_thenPersistIndexInDisk() throws IOException {
try (ReaderSupplier readerSupplier = ReaderSupplierFactory.open(indexPath)) {
GraphIndex index = OnDiskGraphIndex.load(readerSupplier);
assertInstanceOf(OnDiskGraphIndex.class, index);
}
}

@Test
void givenLoadedDataset_whenSearchingSimilarVectors_thenReturnValidSearchResult() throws IOException {
VectorFloat<?> queryVector = datasetVectors.get("said");
ArrayList<VectorFloat<?>> vectorsList = new ArrayList<>(datasetVectors.values());

try (ReaderSupplier readerSupplier = ReaderSupplierFactory.open(indexPath)) {
GraphIndex index = OnDiskGraphIndex.load(readerSupplier);

SearchResult result = GraphSearcher.search(queryVector, 10,
new ListRandomAccessVectorValues(vectorsList, vectorsList.get(0).length()),
VectorSimilarityFunction.EUCLIDEAN, index, Bits.ALL);

assertNotNull(result.getNodes());
assertEquals(10, result.getNodes().length);
}
}

private Map<String, VectorFloat<?>> loadGlove6B50dDataSet(int limit) throws IOException {
URL datasetResource = getClass().getClassLoader()
.getResource("jvector/glove.6B.50d.txt");
assertNotNull(datasetResource);

Map<String, VectorFloat<?>> vectors = new HashMap<>();

try (BufferedReader reader = new BufferedReader(new FileReader(datasetResource.getFile()))) {
String line;
int count = 0;
while ((line = reader.readLine()) != null && count < limit) {
String[] values = line.split(" ");
String word = values[0];
VectorFloat<?> vector = VECTOR_TYPE_SUPPORT.createFloatVector(50);
for (int i = 0; i < 50; i++) {
vector.set(i, Float.parseFloat(values[i + 1]));
}
vectors.put(word, vector);
count++;
}
}
assertEquals(1000, vectors.size());
return vectors;
}
}
Loading