Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions src/collections/config/types/vectorizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export type Vectorizer =
| 'multi2vec-bind'
| Multi2VecPalmVectorizer
| 'multi2vec-google'
| 'multi2vec-google-gemini'
| 'multi2vec-jinaai'
| 'multi2multivec-jinaai'
| 'multi2multivec-weaviate'
Expand All @@ -46,7 +47,7 @@ export type Vectorizer =
| 'text2vec-openai'
| Text2VecPalmVectorizer
| 'text2vec-google'
| 'text2vec-google-ai-studio'
| 'text2vec-google-gemini'
| 'text2vec-transformers'
| 'text2vec-voyageai'
| 'text2vec-weaviate'
Expand Down Expand Up @@ -220,9 +221,11 @@ export type Multi2VecPalmConfig = Multi2VecGoogleConfig;
*/
export type Multi2VecGoogleConfig = {
/** The project ID of the model in GCP. */
projectId: string;
projectId?: string;
/** The location where the model runs. */
location: string;
location?: string;
/** The base URL for the vectorizer. */
apiEndpoint?: string;
/** The image fields used when vectorizing. */
imageFields?: string[];
/** The text fields used when vectorizing. */
Expand Down Expand Up @@ -251,6 +254,15 @@ export type Multi2VecGoogleConfig = {
};
};

/** The configuration for multi-media vectorization using the Google module with Gemini API settings.
*
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings) for detailed usage.
*/
export type Multi2VecGoogleGeminiConfig = Omit<
Multi2VecGoogleConfig,
'location' | 'projectId' | 'apiEndpoint'
>;

/** The configuration for multi-media-to-multi-vector vectorization using
* the jina-embeddings-v4 model
*
Expand Down Expand Up @@ -572,7 +584,10 @@ export type Text2VecGoogleConfig = {
vectorizeCollectionName?: boolean;
};

export type Text2VecGoogleAiStudioConfig = {
/** @deprecated Use [Text2VecGoogleGeminiConfig]. */
export type Text2VecGoogleAiStudioConfig = Text2VecGoogleGeminiConfig;

export type Text2VecGoogleGeminiConfig = {
/** The model ID to use. */
model?: string;
/** The Weaviate property name for the `gecko-002` or `gecko-003` model to use as the title. */
Expand Down Expand Up @@ -658,6 +673,7 @@ export type VectorizerConfig =
| Multi2VecClipConfig
| Multi2VecBindConfig
| Multi2VecGoogleConfig
| Multi2VecGoogleGeminiConfig
| Multi2VecJinaAIConfig
| Multi2MultivecJinaAIConfig
| Multi2MultivecWeaviateConfig
Expand Down Expand Up @@ -693,6 +709,8 @@ export type VectorizerConfigType<V> = V extends 'img2vec-neural'
? Multi2VecBindConfig | undefined
: V extends 'multi2vec-google'
? Multi2VecGoogleConfig
: V extends 'multi2vec-google-gemini'
? Multi2VecGoogleGeminiConfig
: V extends 'multi2vec-jinaai'
? Multi2VecJinaAIConfig | undefined
: V extends 'multi2multivec-jinaai'
Expand Down
24 changes: 20 additions & 4 deletions src/collections/configure/types/vectorizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {
Multi2VecCohereConfig,
Multi2VecField,
Multi2VecGoogleConfig,
Multi2VecGoogleGeminiConfig,
Multi2VecJinaAIConfig,
Multi2VecNvidiaConfig,
Multi2VecVoyageAIConfig,
Expand All @@ -20,8 +21,8 @@ import {
Text2VecContextionaryConfig,
Text2VecDatabricksConfig,
Text2VecGPT4AllConfig,
Text2VecGoogleAiStudioConfig,
Text2VecGoogleConfig,
Text2VecGoogleGeminiConfig,
Text2VecHuggingFaceConfig,
Text2VecJinaAIConfig,
Text2VecMistralConfig,
Expand Down Expand Up @@ -228,6 +229,16 @@ export type Multi2VecGoogleConfigCreate = Omit<Multi2VecGoogleConfig, Multi2VecO
videoFields?: string[] | Multi2VecField[];
};

/** The configuration for the `multi2vec-google` vectorizer. */
export type Multi2VecGoogleGeminiConfigCreate = Omit<Multi2VecGoogleGeminiConfig, Multi2VecOmissions> & {
/** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
imageFields?: string[] | Multi2VecField[];
/** The text fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
textFields?: string[] | Multi2VecField[];
/** The video fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
videoFields?: string[] | Multi2VecField[];
};

export type Multi2VecVoyageAIConfigCreate = Omit<Multi2VecVoyageAIConfig, Multi2VecOmissions> & {
/** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
imageFields?: string[] | Multi2VecField[];
Expand Down Expand Up @@ -272,7 +283,10 @@ export type Text2VecPalmConfigCreate = Text2VecGoogleConfig;

export type Text2VecGoogleConfigCreate = Text2VecGoogleConfig;

export type Text2VecGoogleAiStudioConfigCreate = Text2VecGoogleAiStudioConfig;
/** @deprecated Use [Text2VecGoogleGeminiConfigCreate] */
export type Text2VecGoogleAiStudioConfigCreate = Text2VecGoogleGeminiConfig;

export type Text2VecGoogleGeminiConfigCreate = Text2VecGoogleGeminiConfig;

export type Text2VecTransformersConfigCreate = Text2VecTransformersConfig;

Expand Down Expand Up @@ -302,6 +316,8 @@ export type VectorizerConfigCreateType<V> = V extends 'img2vec-neural'
? Multi2VecPalmConfigCreate
: V extends 'multi2vec-google'
? Multi2VecGoogleConfigCreate
: V extends 'multi2vec-google-gemini'
? Multi2VecGoogleGeminiConfigCreate
: V extends 'multi2vec-voyageai'
? Multi2VecVoyageAIConfigCreate | undefined
: V extends 'ref2vec-centroid'
Expand Down Expand Up @@ -338,8 +354,8 @@ export type VectorizerConfigCreateType<V> = V extends 'img2vec-neural'
? Text2VecPalmConfigCreate | undefined
: V extends 'text2vec-google'
? Text2VecGoogleConfigCreate | undefined
: V extends 'text2vec-google-ai-studio'
? Text2VecGoogleAiStudioConfigCreate | undefined
: V extends 'text2vec-google-gemini'
? Text2VecGoogleGeminiConfigCreate | undefined
: V extends 'text2vec-transformers'
? Text2VecTransformersConfigCreate | undefined
: V extends 'text2vec-voyageai'
Expand Down
51 changes: 47 additions & 4 deletions src/collections/configure/unit.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1542,8 +1542,8 @@ describe('Unit testing of the vectorizer factory class', () => {
});
});

it('should create the correct Text2VecGoogleAiStudioConfig type with defaults', () => {
const config = configure.vectors.text2VecGoogleAiStudio();
it('should create the correct Text2VecGoogleGeminiConfig type with defaults', () => {
const config = configure.vectors.text2VecGoogleGemini();
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'text2vec-google'>>({
name: undefined,
vectorIndex: {
Expand All @@ -1559,8 +1559,8 @@ describe('Unit testing of the vectorizer factory class', () => {
});
});

it('should create the correct Text2VecGoogleAiStudioConfig type with all values', () => {
const config = configure.vectors.text2VecGoogleAiStudio({
it('should create the correct Text2VecGoogleGeminiConfig type with all values', () => {
const config = configure.vectors.text2VecGoogleGemini({
name: 'test',
model: 'model-id',
titleProperty: 'title',
Expand All @@ -1582,6 +1582,49 @@ describe('Unit testing of the vectorizer factory class', () => {
});
});

it('should create the correct Multi2VecGoogleGeminiConfig type with defaults', () => {
const config = configure.vectors.multi2VecGoogleGemini();
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'multi2vec-google'>>({
name: undefined,
vectorIndex: {
name: 'hnsw',
config: undefined,
},
vectorizer: {
name: 'multi2vec-google',
config: {
apiEndpoint: 'generativelanguage.googleapis.com',
},
},
});
});

it('should create the correct Multi2VecGoogleGeminiConfig type', () => {
const config = configure.vectors.multi2VecGoogleGemini({
imageFields: ['image'],
textFields: ['text'],
videoFields: ['video'],
dimensions: 768,
});
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'multi2vec-google'>>({
name: undefined,
vectorIndex: {
name: 'hnsw',
config: undefined,
},
vectorizer: {
name: 'multi2vec-google',
config: {
apiEndpoint: 'generativelanguage.googleapis.com',
imageFields: ['image'],
textFields: ['text'],
videoFields: ['video'],
dimensions: 768,
},
},
});
});

it('should create the correct Text2VecPalmConfig type using deprecated method with defaults', () => {
const config = configure.vectorizer.text2VecPalm();
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'text2vec-palm'>>({
Expand Down
67 changes: 65 additions & 2 deletions src/collections/configure/vectorizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import {
Multi2VecBindConfig,
Multi2VecClipConfig,
Multi2VecField,
Multi2VecGoogleGeminiConfig,
Multi2VecNvidiaConfig,
Multi2VecPalmConfig,
Multi2VecVoyageAIConfig,
Expand Down Expand Up @@ -978,11 +979,38 @@ export const vectors = (({ text2VecPalm, multi2VecPalm, ...rest }) => ({
*
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings) for detailed usage.
*
* @param {ConfigureTextVectorizerOptions<T, N, I, 'text2vec-google-ai-studio'>} [opts] The configuration for the `text2vec-google` vectorizer.
* @param {ConfigureTextVectorizerOptions<T, N, I, 'text2vec-google-gemini'>} [opts] The configuration for the `text2vec-google` vectorizer.
* @returns {VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-google'>} The configuration object.
*
* @deprecated Use [text2VecGoogleGemini]
*/
text2VecGoogleAiStudio: <T, N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
opts?: ConfigureTextVectorizerOptions<T, N, I, 'text2vec-google-ai-studio'>
opts?: ConfigureTextVectorizerOptions<T, N, I, 'text2vec-google-gemini'>
): VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-google'> => {
const { name, sourceProperties, quantizer, vectorIndexConfig, ...config } = opts || {};
return makeVectorizer(name, {
quantizer,
sourceProperties,
vectorIndexConfig,
vectorizerConfig: {
name: 'text2vec-google',
config: {
apiEndpoint: 'generativelanguage.googleapis.com',
...config,
},
},
});
},
/**
* Create a `VectorConfigCreate` object with the vectorizer set to `'text2vec-google'` with specific options for AI studio deployments.
*
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings) for detailed usage.
*
* @param {ConfigureTextVectorizerOptions<T, N, I, 'text2vec-google-gemini'>} [opts] The configuration for the `text2vec-google` vectorizer.
* @returns {VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-google'>} The configuration object.
*/
text2VecGoogleGemini: <T, N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
opts?: ConfigureTextVectorizerOptions<T, N, I, 'text2vec-google-gemini'>
): VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-google'> => {
const { name, sourceProperties, quantizer, vectorIndexConfig, ...config } = opts || {};
return makeVectorizer(name, {
Expand All @@ -998,6 +1026,41 @@ export const vectors = (({ text2VecPalm, multi2VecPalm, ...rest }) => ({
},
});
},
/**
* Create a `VectorConfigCreate` object with the vectorizer set to `'multi2vec-google'` with Google Gemini API endpoint.
*
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings) for detailed usage.
*
* @param {ConfigureTextVectorizerOptions<T, N, I, 'multi2vec-google-gemini'>} [opts] The configuration for the `multi2vec-google` vectorizer.
* @returns {VectorConfigCreate<PrimitiveKeys<T>, N, I, 'multi2vec-google'>} The configuration object.
*/
multi2VecGoogleGemini: <T, N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
opts?: ConfigureTextVectorizerOptions<T, N, I, 'multi2vec-google-gemini'>
): VectorConfigCreate<PrimitiveKeys<T>, N, I, 'multi2vec-google'> => {
const { name, quantizer, vectorIndexConfig, ...config } = opts || {};
const imageFields = config.imageFields?.map(mapMulti2VecField);
const textFields = config.textFields?.map(mapMulti2VecField);
const videoFields = config.videoFields?.map(mapMulti2VecField);
let weights: Multi2VecGoogleGeminiConfig['weights'] = {};
weights = formatMulti2VecFields(weights, 'imageFields', imageFields);
weights = formatMulti2VecFields(weights, 'textFields', textFields);
weights = formatMulti2VecFields(weights, 'videoFields', videoFields);
return makeVectorizer(name, {
quantizer,
vectorIndexConfig,
vectorizerConfig: {
name: 'multi2vec-google',
config: {
...config,
apiEndpoint: 'generativelanguage.googleapis.com',
imageFields: imageFields?.map((f) => f.name),
textFields: textFields?.map((f) => f.name),
videoFields: videoFields?.map((f) => f.name),
weights: Object.keys(weights).length === 0 ? undefined : weights,
},
},
});
},
text2VecMorph: <T, N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
opts?: ConfigureTextVectorizerOptions<T, N, I, 'text2vec-morph'>
): VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-morph'> => {
Expand Down