diff --git a/src/collections/config/types/vectorizer.ts b/src/collections/config/types/vectorizer.ts index 34bd0717..ac3ad015 100644 --- a/src/collections/config/types/vectorizer.ts +++ b/src/collections/config/types/vectorizer.ts @@ -25,6 +25,7 @@ export type Vectorizer = | 'multi2vec-bind' | Multi2VecPalmVectorizer | 'multi2vec-google' + | 'multi2vec-google-gemini' | 'multi2vec-jinaai' | 'multi2multivec-jinaai' | 'multi2multivec-weaviate' @@ -46,7 +47,7 @@ export type Vectorizer = | 'text2vec-openai' | Text2VecPalmVectorizer | 'text2vec-google' - | 'text2vec-google-ai-studio' + | 'text2vec-google-gemini' | 'text2vec-transformers' | 'text2vec-voyageai' | 'text2vec-weaviate' @@ -220,9 +221,11 @@ export type Multi2VecPalmConfig = Multi2VecGoogleConfig; */ export type Multi2VecGoogleConfig = { /** The project ID of the model in GCP. */ - projectId: string; + projectId?: string; /** The location where the model runs. */ - location: string; + location?: string; + /** The base URL for the vectorizer. */ + apiEndpoint?: string; /** The image fields used when vectorizing. */ imageFields?: string[]; /** The text fields used when vectorizing. */ @@ -251,6 +254,15 @@ export type Multi2VecGoogleConfig = { }; }; +/** The configuration for multi-media vectorization using the Google module with Gemini API settings. + * + * See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings) for detailed usage. + */ +export type Multi2VecGoogleGeminiConfig = Omit< + Multi2VecGoogleConfig, + 'location' | 'projectId' | 'apiEndpoint' +>; + /** The configuration for multi-media-to-multi-vector vectorization using * the jina-embeddings-v4 model * @@ -572,7 +584,10 @@ export type Text2VecGoogleConfig = { vectorizeCollectionName?: boolean; }; -export type Text2VecGoogleAiStudioConfig = { +/** @deprecated Use [Text2VecGoogleGeminiConfig]. */ +export type Text2VecGoogleAiStudioConfig = Text2VecGoogleGeminiConfig; + +export type Text2VecGoogleGeminiConfig = { /** The model ID to use. */ model?: string; /** The Weaviate property name for the `gecko-002` or `gecko-003` model to use as the title. */ @@ -658,6 +673,7 @@ export type VectorizerConfig = | Multi2VecClipConfig | Multi2VecBindConfig | Multi2VecGoogleConfig + | Multi2VecGoogleGeminiConfig | Multi2VecJinaAIConfig | Multi2MultivecJinaAIConfig | Multi2MultivecWeaviateConfig @@ -693,6 +709,8 @@ export type VectorizerConfigType = V extends 'img2vec-neural' ? Multi2VecBindConfig | undefined : V extends 'multi2vec-google' ? Multi2VecGoogleConfig + : V extends 'multi2vec-google-gemini' + ? Multi2VecGoogleGeminiConfig : V extends 'multi2vec-jinaai' ? Multi2VecJinaAIConfig | undefined : V extends 'multi2multivec-jinaai' diff --git a/src/collections/configure/types/vectorizer.ts b/src/collections/configure/types/vectorizer.ts index 9641c814..5178e98d 100644 --- a/src/collections/configure/types/vectorizer.ts +++ b/src/collections/configure/types/vectorizer.ts @@ -9,6 +9,7 @@ import { Multi2VecCohereConfig, Multi2VecField, Multi2VecGoogleConfig, + Multi2VecGoogleGeminiConfig, Multi2VecJinaAIConfig, Multi2VecNvidiaConfig, Multi2VecVoyageAIConfig, @@ -20,8 +21,8 @@ import { Text2VecContextionaryConfig, Text2VecDatabricksConfig, Text2VecGPT4AllConfig, - Text2VecGoogleAiStudioConfig, Text2VecGoogleConfig, + Text2VecGoogleGeminiConfig, Text2VecHuggingFaceConfig, Text2VecJinaAIConfig, Text2VecMistralConfig, @@ -228,6 +229,16 @@ export type Multi2VecGoogleConfigCreate = Omit & { + /** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */ + imageFields?: string[] | Multi2VecField[]; + /** The text fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */ + textFields?: string[] | Multi2VecField[]; + /** The video fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */ + videoFields?: string[] | Multi2VecField[]; +}; + export type Multi2VecVoyageAIConfigCreate = Omit & { /** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */ imageFields?: string[] | Multi2VecField[]; @@ -272,7 +283,10 @@ export type Text2VecPalmConfigCreate = Text2VecGoogleConfig; export type Text2VecGoogleConfigCreate = Text2VecGoogleConfig; -export type Text2VecGoogleAiStudioConfigCreate = Text2VecGoogleAiStudioConfig; +/** @deprecated Use [Text2VecGoogleGeminiConfigCreate] */ +export type Text2VecGoogleAiStudioConfigCreate = Text2VecGoogleGeminiConfig; + +export type Text2VecGoogleGeminiConfigCreate = Text2VecGoogleGeminiConfig; export type Text2VecTransformersConfigCreate = Text2VecTransformersConfig; @@ -302,6 +316,8 @@ export type VectorizerConfigCreateType = V extends 'img2vec-neural' ? Multi2VecPalmConfigCreate : V extends 'multi2vec-google' ? Multi2VecGoogleConfigCreate + : V extends 'multi2vec-google-gemini' + ? Multi2VecGoogleGeminiConfigCreate : V extends 'multi2vec-voyageai' ? Multi2VecVoyageAIConfigCreate | undefined : V extends 'ref2vec-centroid' @@ -338,8 +354,8 @@ export type VectorizerConfigCreateType = V extends 'img2vec-neural' ? Text2VecPalmConfigCreate | undefined : V extends 'text2vec-google' ? Text2VecGoogleConfigCreate | undefined - : V extends 'text2vec-google-ai-studio' - ? Text2VecGoogleAiStudioConfigCreate | undefined + : V extends 'text2vec-google-gemini' + ? Text2VecGoogleGeminiConfigCreate | undefined : V extends 'text2vec-transformers' ? Text2VecTransformersConfigCreate | undefined : V extends 'text2vec-voyageai' diff --git a/src/collections/configure/unit.test.ts b/src/collections/configure/unit.test.ts index 48e9459a..99157a6a 100644 --- a/src/collections/configure/unit.test.ts +++ b/src/collections/configure/unit.test.ts @@ -1542,8 +1542,8 @@ describe('Unit testing of the vectorizer factory class', () => { }); }); - it('should create the correct Text2VecGoogleAiStudioConfig type with defaults', () => { - const config = configure.vectors.text2VecGoogleAiStudio(); + it('should create the correct Text2VecGoogleGeminiConfig type with defaults', () => { + const config = configure.vectors.text2VecGoogleGemini(); expect(config).toEqual>({ name: undefined, vectorIndex: { @@ -1559,8 +1559,8 @@ describe('Unit testing of the vectorizer factory class', () => { }); }); - it('should create the correct Text2VecGoogleAiStudioConfig type with all values', () => { - const config = configure.vectors.text2VecGoogleAiStudio({ + it('should create the correct Text2VecGoogleGeminiConfig type with all values', () => { + const config = configure.vectors.text2VecGoogleGemini({ name: 'test', model: 'model-id', titleProperty: 'title', @@ -1582,6 +1582,49 @@ describe('Unit testing of the vectorizer factory class', () => { }); }); + it('should create the correct Multi2VecGoogleGeminiConfig type with defaults', () => { + const config = configure.vectors.multi2VecGoogleGemini(); + expect(config).toEqual>({ + name: undefined, + vectorIndex: { + name: 'hnsw', + config: undefined, + }, + vectorizer: { + name: 'multi2vec-google', + config: { + apiEndpoint: 'generativelanguage.googleapis.com', + }, + }, + }); + }); + + it('should create the correct Multi2VecGoogleGeminiConfig type', () => { + const config = configure.vectors.multi2VecGoogleGemini({ + imageFields: ['image'], + textFields: ['text'], + videoFields: ['video'], + dimensions: 768, + }); + expect(config).toEqual>({ + name: undefined, + vectorIndex: { + name: 'hnsw', + config: undefined, + }, + vectorizer: { + name: 'multi2vec-google', + config: { + apiEndpoint: 'generativelanguage.googleapis.com', + imageFields: ['image'], + textFields: ['text'], + videoFields: ['video'], + dimensions: 768, + }, + }, + }); + }); + it('should create the correct Text2VecPalmConfig type using deprecated method with defaults', () => { const config = configure.vectorizer.text2VecPalm(); expect(config).toEqual>({ diff --git a/src/collections/configure/vectorizer.ts b/src/collections/configure/vectorizer.ts index 3345251a..2851120d 100644 --- a/src/collections/configure/vectorizer.ts +++ b/src/collections/configure/vectorizer.ts @@ -4,6 +4,7 @@ import { Multi2VecBindConfig, Multi2VecClipConfig, Multi2VecField, + Multi2VecGoogleGeminiConfig, Multi2VecNvidiaConfig, Multi2VecPalmConfig, Multi2VecVoyageAIConfig, @@ -978,11 +979,38 @@ export const vectors = (({ text2VecPalm, multi2VecPalm, ...rest }) => ({ * * See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings) for detailed usage. * - * @param {ConfigureTextVectorizerOptions} [opts] The configuration for the `text2vec-google` vectorizer. + * @param {ConfigureTextVectorizerOptions} [opts] The configuration for the `text2vec-google` vectorizer. * @returns {VectorConfigCreate, N, I, 'text2vec-google'>} The configuration object. + * + * @deprecated Use [text2VecGoogleGemini] */ text2VecGoogleAiStudio: ( - opts?: ConfigureTextVectorizerOptions + opts?: ConfigureTextVectorizerOptions + ): VectorConfigCreate, N, I, 'text2vec-google'> => { + const { name, sourceProperties, quantizer, vectorIndexConfig, ...config } = opts || {}; + return makeVectorizer(name, { + quantizer, + sourceProperties, + vectorIndexConfig, + vectorizerConfig: { + name: 'text2vec-google', + config: { + apiEndpoint: 'generativelanguage.googleapis.com', + ...config, + }, + }, + }); + }, + /** + * Create a `VectorConfigCreate` object with the vectorizer set to `'text2vec-google'` with specific options for AI studio deployments. + * + * See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings) for detailed usage. + * + * @param {ConfigureTextVectorizerOptions} [opts] The configuration for the `text2vec-google` vectorizer. + * @returns {VectorConfigCreate, N, I, 'text2vec-google'>} The configuration object. + */ + text2VecGoogleGemini: ( + opts?: ConfigureTextVectorizerOptions ): VectorConfigCreate, N, I, 'text2vec-google'> => { const { name, sourceProperties, quantizer, vectorIndexConfig, ...config } = opts || {}; return makeVectorizer(name, { @@ -998,6 +1026,41 @@ export const vectors = (({ text2VecPalm, multi2VecPalm, ...rest }) => ({ }, }); }, + /** + * Create a `VectorConfigCreate` object with the vectorizer set to `'multi2vec-google'` with Google Gemini API endpoint. + * + * See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings) for detailed usage. + * + * @param {ConfigureTextVectorizerOptions} [opts] The configuration for the `multi2vec-google` vectorizer. + * @returns {VectorConfigCreate, N, I, 'multi2vec-google'>} The configuration object. + */ + multi2VecGoogleGemini: ( + opts?: ConfigureTextVectorizerOptions + ): VectorConfigCreate, N, I, 'multi2vec-google'> => { + const { name, quantizer, vectorIndexConfig, ...config } = opts || {}; + const imageFields = config.imageFields?.map(mapMulti2VecField); + const textFields = config.textFields?.map(mapMulti2VecField); + const videoFields = config.videoFields?.map(mapMulti2VecField); + let weights: Multi2VecGoogleGeminiConfig['weights'] = {}; + weights = formatMulti2VecFields(weights, 'imageFields', imageFields); + weights = formatMulti2VecFields(weights, 'textFields', textFields); + weights = formatMulti2VecFields(weights, 'videoFields', videoFields); + return makeVectorizer(name, { + quantizer, + vectorIndexConfig, + vectorizerConfig: { + name: 'multi2vec-google', + config: { + ...config, + apiEndpoint: 'generativelanguage.googleapis.com', + imageFields: imageFields?.map((f) => f.name), + textFields: textFields?.map((f) => f.name), + videoFields: videoFields?.map((f) => f.name), + weights: Object.keys(weights).length === 0 ? undefined : weights, + }, + }, + }); + }, text2VecMorph: ( opts?: ConfigureTextVectorizerOptions ): VectorConfigCreate, N, I, 'text2vec-morph'> => {