Skip to content

Commit 6dc505f

Browse files
authored
Merge pull request #143 from jbellis/cv-dimensions
add getOriginalSize and getCompressedSize to CompressedVectors interface
2 parents b6f5774 + 329d8e3 commit 6dc505f

File tree

6 files changed

+41
-2
lines changed

6 files changed

+41
-2
lines changed

jvector-base/src/main/java/io/github/jbellis/jvector/pq/BQVectors.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,16 @@ public long[] get(int i) {
9898
return compressedVectors[i];
9999
}
100100

101+
@Override
102+
public int getOriginalSize() {
103+
return bq.getOriginalDimension() * Float.BYTES;
104+
}
105+
106+
@Override
107+
public int getCompressedSize() {
108+
return compressedVectors[0].length * Long.BYTES;
109+
}
110+
101111
@Override
102112
public long ramBytesUsed() {
103113
return compressedVectors.length * RamUsageEstimator.sizeOf(compressedVectors[0]);

jvector-base/src/main/java/io/github/jbellis/jvector/pq/BinaryQuantization.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@ public void write(DataOutput out) throws IOException {
107107
Io.writeFloats(out, globalCentroid);
108108
}
109109

110+
public int getOriginalDimension() {
111+
return globalCentroid.length;
112+
}
113+
110114
public static BinaryQuantization load(RandomAccessReader in) throws IOException {
111115
int length = in.readInt();
112116
var centroid = new float[length];

jvector-base/src/main/java/io/github/jbellis/jvector/pq/CompressedVectors.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,18 @@
2424
import java.io.IOException;
2525

2626
public interface CompressedVectors extends Accountable {
27+
/** write the compressed vectors to the given DataOutput */
2728
void write(DataOutput out) throws IOException;
2829

30+
/**
31+
* @return a ScoreFunction suitable for performing search against the compressed vectors,
32+
* potentially without decompression them first
33+
*/
2934
NeighborSimilarity.ApproximateScoreFunction approximateScoreFunctionFor(float[] q, VectorSimilarityFunction similarityFunction);
35+
36+
/** @return the original size of the vectors, in bytes, before compression */
37+
int getOriginalSize();
38+
39+
/** @return the compressed size of the vectors, in bytes */
40+
int getCompressedSize();
3041
}

jvector-base/src/main/java/io/github/jbellis/jvector/pq/PQVectors.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,16 @@ float[] reusablePartialMagnitudes() {
125125
return partialMagnitudes.get();
126126
}
127127

128+
@Override
129+
public int getOriginalSize() {
130+
return pq.originalDimension * Float.BYTES;
131+
}
132+
133+
@Override
134+
public int getCompressedSize() {
135+
return pq.codebooks.length;
136+
}
137+
128138
@Override
129139
public long ramBytesUsed() {
130140
long codebooksSize = pq.memorySize();

jvector-base/src/main/java/io/github/jbellis/jvector/pq/ProductQuantization.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ public class ProductQuantization implements VectorCompressor<byte[]> {
4848
static final int MAX_PQ_TRAINING_SET_SIZE = 128000;
4949

5050
final float[][][] codebooks;
51-
final int M;
52-
private final int originalDimension;
51+
private final int M; // codebooks.length, redundantly reproduced for convenience
52+
final int originalDimension;
5353
private final float[] globalCentroid;
5454
final int[][] subvectorSizesAndOffsets;
5555

jvector-tests/src/test/java/io/github/jbellis/jvector/pq/TestCompressedVectors.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ public void testSaveLoadPQ() throws Exception {
4646
// Compress the vectors
4747
var compressed = pq.encodeAll(vectors);
4848
var cv = new PQVectors(pq, compressed);
49+
assertEquals(2 * Float.BYTES, cv.getOriginalSize());
50+
assertEquals(1, cv.getCompressedSize());
4951

5052
// Write compressed vectors
5153
File cvFile = File.createTempFile("pqtest", ".cv");
@@ -68,6 +70,8 @@ public void testSaveLoadBQ() throws Exception {
6870
// Compress the vectors
6971
var compressed = bq.encodeAll(vectors);
7072
var cv = new BQVectors(bq, compressed);
73+
assertEquals(64 * Float.BYTES, cv.getOriginalSize());
74+
assertEquals(8, cv.getCompressedSize());
7175

7276
// Write compressed vectors
7377
File cvFile = File.createTempFile("bqtest", ".cv");

0 commit comments

Comments
 (0)