diff --git a/README.md b/README.md index 78230b4..49e1558 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ npm install @langchain/ollama This example demonstrates extracting structured product data from a real e-commerce website using a local headed Playwright browser. For production environments, you can use a Playwright browser in [serverless](#serverless-browser) or [remote](#remote-browser) mode. ```typescript -import { ChatGoogleGenerativeAI } from "@langchain/google-genai"; +import { ChatGoogle } from "@langchain/google"; import { extract, ContentFormat, Browser } from "@lightfeed/extractor"; import { z } from "zod"; @@ -135,7 +135,7 @@ try { // Extract structured product data console.log("Extracting product data using LLM..."); const result = await extract({ - llm: new ChatGoogleGenerativeAI({ + llm: new ChatGoogle({ apiKey: process.env.GOOGLE_API_KEY, model: "gemini-2.5-flash", temperature: 0, @@ -191,11 +191,11 @@ try { You can also extract structured data directly from HTML, Markdown or text string. Pass any [LangChain chat model](https://js.langchain.com/docs/integrations/chat/): ```typescript -import { ChatGoogleGenerativeAI } from "@langchain/google-genai"; +import { ChatGoogle } from "@langchain/google"; import { extract, ContentFormat } from "@lightfeed/extractor"; const result = await extract({ - llm: new ChatGoogleGenerativeAI({ + llm: new ChatGoogle({ apiKey: process.env.GOOGLE_API_KEY, model: "gemini-2.5-flash", temperature: 0, @@ -280,8 +280,8 @@ import { ChatOpenAI } from "@langchain/openai"; const llm = new ChatOpenAI({ modelName: "gpt-4o-mini", apiKey: process.env.OPENAI_API_KEY }); // Google Gemini -import { ChatGoogleGenerativeAI } from "@langchain/google-genai"; -const llm = new ChatGoogleGenerativeAI({ model: "gemini-2.5-flash", apiKey: process.env.GOOGLE_API_KEY }); +import { ChatGoogle } from "@langchain/google"; +const llm = new ChatGoogle({ model: "gemini-2.5-flash", apiKey: process.env.GOOGLE_API_KEY }); // Anthropic import { ChatAnthropic } from "@langchain/anthropic"; @@ -398,7 +398,7 @@ Main function to extract structured data from content. | Option | Type | Description | Default | |--------|------|-------------|---------| -| `llm` | `BaseChatModel` | A [LangChain chat model](https://js.langchain.com/docs/integrations/chat/) instance (ChatOpenAI, ChatGoogleGenerativeAI, ChatAnthropic, etc.) | Required | +| `llm` | `BaseChatModel` | A [LangChain chat model](https://js.langchain.com/docs/integrations/chat/) instance (ChatOpenAI, ChatGoogle, etc.) | Required | | `content` | `string` | HTML, markdown, or plain text content to extract from | Required | | `format` | `ContentFormat` | Content format (HTML, MARKDOWN, or TXT) | Required | | `schema` | `z.ZodTypeAny` | Zod schema defining the structure to extract | Required | diff --git a/package-lock.json b/package-lock.json index b256ca2..929b62d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -6,7 +6,7 @@ "packages": { "": { "name": "@lightfeed/extractor", - "version": "0.2.1", + "version": "0.3.0", "license": "Apache-2.0", "dependencies": { "cheerio": "^1.0.0", @@ -18,9 +18,9 @@ "zod": "^3.24.3" }, "devDependencies": { - "@langchain/core": "^1.1.31", - "@langchain/google-genai": "^2.1.24", - "@langchain/openai": "^1.2.12", + "@langchain/core": "^1.1.36", + "@langchain/google": "^0.1.8", + "@langchain/openai": "^1.3.1", "@types/jest": "^29.5.12", "@types/node": "^22.15.3", "@types/turndown": "^5.0.5", @@ -559,15 +559,6 @@ "@jridgewell/sourcemap-codec": "^1.4.10" } }, - "node_modules/@google/generative-ai": { - "version": "0.24.1", - "resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.24.1.tgz", - "integrity": "sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q==", - "dev": true, - "engines": { - "node": ">=18.0.0" - } - }, "node_modules/@isaacs/cliui": { "version": "8.0.2", "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", @@ -1053,9 +1044,9 @@ } }, "node_modules/@langchain/core": { - "version": "1.1.31", - "resolved": "https://registry.npmjs.org/@langchain/core/-/core-1.1.31.tgz", - "integrity": "sha512-FxsgIUONjKaRpjx59sISgmb0OMCbAetPGyhzjGa2kX0y1f8LZ5xm9VB2db7W9HYWyLvzRWcMA51Uu4OSTJmtZQ==", + "version": "1.1.36", + "resolved": "https://registry.npmjs.org/@langchain/core/-/core-1.1.36.tgz", + "integrity": "sha512-9NWsdzU3uZD13lJwunXK0t6SIwew+UwcbHggW5yUdaiMmzKeNkDpp1lRD6p49N8+D0Vv4qmQBEKB4Ukh2jfnvw==", "dev": true, "license": "MIT", "dependencies": { @@ -1147,39 +1138,55 @@ "url": "https://github.com/sponsors/colinhacks" } }, - "node_modules/@langchain/google-genai": { - "version": "2.1.24", - "resolved": "https://registry.npmjs.org/@langchain/google-genai/-/google-genai-2.1.24.tgz", - "integrity": "sha512-gBuYWIrTiT4S8U3AxaAMl6SKeGKtB10fXc+m0p2BPSvTfCaTbIlycH7IZjZUTD1L92dQMi/SULwCWffq5OIBgQ==", + "node_modules/@langchain/google": { + "version": "0.1.8", + "resolved": "https://registry.npmjs.org/@langchain/google/-/google-0.1.8.tgz", + "integrity": "sha512-MPG7uiDRC0dR3RXGEl7zPXKKWt84bwgWtuW7GVpzwmg6fE+xZId2yUhhcoFIJvqGjay2ER3RG+cf8v9dhg+MtA==", "dev": true, "license": "MIT", "dependencies": { - "@google/generative-ai": "^0.24.0", - "uuid": "^11.1.0" + "eventsource-parser": "^3.0.6", + "google-auth-library": "^10.5.0", + "jose": "^6.1.3", + "uuid": "^10.0.0" }, "engines": { "node": ">=20" }, "peerDependencies": { - "@langchain/core": "^1.1.30" + "@langchain/core": "^1.0.0" + } + }, + "node_modules/@langchain/google/node_modules/uuid": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-10.0.0.tgz", + "integrity": "sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==", + "dev": true, + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" } }, "node_modules/@langchain/openai": { - "version": "1.2.12", - "resolved": "https://registry.npmjs.org/@langchain/openai/-/openai-1.2.12.tgz", - "integrity": "sha512-Im6PPNujrfkZk4vpc9JAjbeERg+RbNtWRe3KSFOP7aNGa/yZ+XD69lxXwbsZGaZkbiUN/hwe9RYeisUfThb5wg==", + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/@langchain/openai/-/openai-1.3.1.tgz", + "integrity": "sha512-6yN3XFRUKUsGREGk4VtCvnMp5NHh2gWujiuWdn/G7cCeHboYrdKLWnwGqopuFOm7Tivv423gtMN1GQ7EJ3kg+g==", "dev": true, "license": "MIT", "dependencies": { "js-tiktoken": "^1.0.12", - "openai": "^6.24.0", + "openai": "^6.27.0", "zod": "^3.25.76 || ^4" }, "engines": { "node": ">=20" }, "peerDependencies": { - "@langchain/core": "^1.1.30" + "@langchain/core": "^1.1.36" } }, "node_modules/@langchain/openai/node_modules/openai": { @@ -1442,6 +1449,16 @@ "node": ">=0.4.0" } }, + "node_modules/agent-base": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", + "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, "node_modules/ansi-escapes": { "version": "4.3.2", "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-4.3.2.tgz", @@ -1651,6 +1668,16 @@ } ] }, + "node_modules/bignumber.js": { + "version": "9.3.1", + "resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.3.1.tgz", + "integrity": "sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": "*" + } + }, "node_modules/boolbase": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", @@ -1731,6 +1758,13 @@ "node-int64": "^0.4.0" } }, + "node_modules/buffer-equal-constant-time": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz", + "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==", + "dev": true, + "license": "BSD-3-Clause" + }, "node_modules/buffer-from": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", @@ -2093,6 +2127,16 @@ "url": "https://github.com/sponsors/fb55" } }, + "node_modules/data-uri-to-buffer": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz", + "integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 12" + } + }, "node_modules/debug": { "version": "4.4.0", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.0.tgz", @@ -2242,6 +2286,16 @@ "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", "dev": true }, + "node_modules/ecdsa-sig-formatter": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", + "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "safe-buffer": "^5.0.1" + } + }, "node_modules/ejs": { "version": "3.1.10", "resolved": "https://registry.npmjs.org/ejs/-/ejs-3.1.10.tgz", @@ -2352,6 +2406,16 @@ "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==", "dev": true }, + "node_modules/eventsource-parser": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.6.tgz", + "integrity": "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/execa": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/execa/-/execa-5.1.1.tgz", @@ -2406,6 +2470,13 @@ "node": "^14.15.0 || ^16.10.0 || >=18.0.0" } }, + "node_modules/extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", + "dev": true, + "license": "MIT" + }, "node_modules/fast-json-stable-stringify": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", @@ -2421,6 +2492,30 @@ "bser": "2.1.1" } }, + "node_modules/fetch-blob": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz", + "integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/jimmywarting" + }, + { + "type": "paypal", + "url": "https://paypal.me/jimmywarting" + } + ], + "license": "MIT", + "dependencies": { + "node-domexception": "^1.0.0", + "web-streams-polyfill": "^3.0.3" + }, + "engines": { + "node": "^12.20 || >= 14.13" + } + }, "node_modules/filelist": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/filelist/-/filelist-1.0.4.tgz", @@ -2483,6 +2578,19 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/formdata-polyfill": { + "version": "4.0.10", + "resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz", + "integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==", + "dev": true, + "license": "MIT", + "dependencies": { + "fetch-blob": "^3.1.2" + }, + "engines": { + "node": ">=12.20.0" + } + }, "node_modules/fs.realpath": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", @@ -2512,6 +2620,36 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/gaxios": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-7.1.4.tgz", + "integrity": "sha512-bTIgTsM2bWn3XklZISBTQX7ZSddGW+IO3bMdGaemHZ3tbqExMENHLx6kKZ/KlejgrMtj8q7wBItt51yegqalrA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "extend": "^3.0.2", + "https-proxy-agent": "^7.0.1", + "node-fetch": "^3.3.2" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/gcp-metadata": { + "version": "8.1.2", + "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-8.1.2.tgz", + "integrity": "sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "gaxios": "^7.0.0", + "google-logging-utils": "^1.0.0", + "json-bigint": "^1.0.0" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/gensync": { "version": "1.0.0-beta.2", "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", @@ -2580,6 +2718,34 @@ "node": ">=4" } }, + "node_modules/google-auth-library": { + "version": "10.6.2", + "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-10.6.2.tgz", + "integrity": "sha512-e27Z6EThmVNNvtYASwQxose/G57rkRuaRbQyxM2bvYLLX/GqWZ5chWq2EBoUchJbCc57eC9ArzO5wMsEmWftCw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "base64-js": "^1.3.0", + "ecdsa-sig-formatter": "^1.0.11", + "gaxios": "^7.1.4", + "gcp-metadata": "8.1.2", + "google-logging-utils": "1.1.3", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/google-logging-utils": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-1.1.3.tgz", + "integrity": "sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=14" + } + }, "node_modules/graceful-fs": { "version": "4.2.11", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", @@ -2632,6 +2798,20 @@ "entities": "^4.5.0" } }, + "node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "dev": true, + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, "node_modules/human-signals": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/human-signals/-/human-signals-2.1.0.tgz", @@ -3514,6 +3694,16 @@ "url": "https://github.com/chalk/supports-color?sponsor=1" } }, + "node_modules/jose": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/jose/-/jose-6.2.2.tgz", + "integrity": "sha512-d7kPDd34KO/YnzaDOlikGpOurfF0ByC2sEV4cANCtdqLlTfBlw2p14O/5d/zv40gJPbIQxfES3nSx1/oYNyuZQ==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/panva" + } + }, "node_modules/js-tiktoken": { "version": "1.0.20", "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.20.tgz", @@ -3541,6 +3731,16 @@ "node": ">=6" } }, + "node_modules/json-bigint": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-bigint/-/json-bigint-1.0.0.tgz", + "integrity": "sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "bignumber.js": "^9.0.0" + } + }, "node_modules/json-parse-even-better-errors": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", @@ -3568,6 +3768,29 @@ "jsonrepair": "bin/cli.js" } }, + "node_modules/jwa": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.1.tgz", + "integrity": "sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==", + "dev": true, + "license": "MIT", + "dependencies": { + "buffer-equal-constant-time": "^1.0.1", + "ecdsa-sig-formatter": "1.0.11", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/jws": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/jws/-/jws-4.0.1.tgz", + "integrity": "sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA==", + "dev": true, + "license": "MIT", + "dependencies": { + "jwa": "^2.0.1", + "safe-buffer": "^5.0.1" + } + }, "node_modules/kleur": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/kleur/-/kleur-3.0.3.tgz", @@ -3719,6 +3942,46 @@ "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", "dev": true }, + "node_modules/node-domexception": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", + "deprecated": "Use your platform's native DOMException instead", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/jimmywarting" + }, + { + "type": "github", + "url": "https://paypal.me/jimmywarting" + } + ], + "license": "MIT", + "engines": { + "node": ">=10.5.0" + } + }, + "node_modules/node-fetch": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.2.tgz", + "integrity": "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==", + "dev": true, + "license": "MIT", + "dependencies": { + "data-uri-to-buffer": "^4.0.0", + "fetch-blob": "^3.1.4", + "formdata-polyfill": "^4.0.10" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/node-fetch" + } + }, "node_modules/node-int64": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/node-int64/-/node-int64-0.4.0.tgz", @@ -4205,6 +4468,27 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, "node_modules/safer-buffer": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", @@ -4808,6 +5092,16 @@ "makeerror": "1.0.12" } }, + "node_modules/web-streams-polyfill": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz", + "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 8" + } + }, "node_modules/whatwg-encoding": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", @@ -5491,12 +5785,6 @@ } } }, - "@google/generative-ai": { - "version": "0.24.1", - "resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.24.1.tgz", - "integrity": "sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q==", - "dev": true - }, "@isaacs/cliui": { "version": "8.0.2", "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", @@ -5884,9 +6172,9 @@ } }, "@langchain/core": { - "version": "1.1.31", - "resolved": "https://registry.npmjs.org/@langchain/core/-/core-1.1.31.tgz", - "integrity": "sha512-FxsgIUONjKaRpjx59sISgmb0OMCbAetPGyhzjGa2kX0y1f8LZ5xm9VB2db7W9HYWyLvzRWcMA51Uu4OSTJmtZQ==", + "version": "1.1.36", + "resolved": "https://registry.npmjs.org/@langchain/core/-/core-1.1.36.tgz", + "integrity": "sha512-9NWsdzU3uZD13lJwunXK0t6SIwew+UwcbHggW5yUdaiMmzKeNkDpp1lRD6p49N8+D0Vv4qmQBEKB4Ukh2jfnvw==", "dev": true, "requires": { "@cfworker/json-schema": "^4.0.2", @@ -5938,24 +6226,34 @@ } } }, - "@langchain/google-genai": { - "version": "2.1.24", - "resolved": "https://registry.npmjs.org/@langchain/google-genai/-/google-genai-2.1.24.tgz", - "integrity": "sha512-gBuYWIrTiT4S8U3AxaAMl6SKeGKtB10fXc+m0p2BPSvTfCaTbIlycH7IZjZUTD1L92dQMi/SULwCWffq5OIBgQ==", + "@langchain/google": { + "version": "0.1.8", + "resolved": "https://registry.npmjs.org/@langchain/google/-/google-0.1.8.tgz", + "integrity": "sha512-MPG7uiDRC0dR3RXGEl7zPXKKWt84bwgWtuW7GVpzwmg6fE+xZId2yUhhcoFIJvqGjay2ER3RG+cf8v9dhg+MtA==", "dev": true, "requires": { - "@google/generative-ai": "^0.24.0", - "uuid": "^11.1.0" + "eventsource-parser": "^3.0.6", + "google-auth-library": "^10.5.0", + "jose": "^6.1.3", + "uuid": "^10.0.0" + }, + "dependencies": { + "uuid": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-10.0.0.tgz", + "integrity": "sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==", + "dev": true + } } }, "@langchain/openai": { - "version": "1.2.12", - "resolved": "https://registry.npmjs.org/@langchain/openai/-/openai-1.2.12.tgz", - "integrity": "sha512-Im6PPNujrfkZk4vpc9JAjbeERg+RbNtWRe3KSFOP7aNGa/yZ+XD69lxXwbsZGaZkbiUN/hwe9RYeisUfThb5wg==", + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/@langchain/openai/-/openai-1.3.1.tgz", + "integrity": "sha512-6yN3XFRUKUsGREGk4VtCvnMp5NHh2gWujiuWdn/G7cCeHboYrdKLWnwGqopuFOm7Tivv423gtMN1GQ7EJ3kg+g==", "dev": true, "requires": { "js-tiktoken": "^1.0.12", - "openai": "^6.24.0", + "openai": "^6.27.0", "zod": "^3.25.76 || ^4" }, "dependencies": { @@ -6187,6 +6485,12 @@ "acorn": "^8.11.0" } }, + "agent-base": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", + "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", + "dev": true + }, "ansi-escapes": { "version": "4.3.2", "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-4.3.2.tgz", @@ -6336,6 +6640,12 @@ "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", "dev": true }, + "bignumber.js": { + "version": "9.3.1", + "resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.3.1.tgz", + "integrity": "sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ==", + "dev": true + }, "boolbase": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", @@ -6389,6 +6699,12 @@ "node-int64": "^0.4.0" } }, + "buffer-equal-constant-time": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz", + "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==", + "dev": true + }, "buffer-from": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", @@ -6645,6 +6961,12 @@ "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz", "integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==" }, + "data-uri-to-buffer": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz", + "integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==", + "dev": true + }, "debug": { "version": "4.4.0", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.0.tgz", @@ -6736,6 +7058,15 @@ "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", "dev": true }, + "ecdsa-sig-formatter": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", + "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==", + "dev": true, + "requires": { + "safe-buffer": "^5.0.1" + } + }, "ejs": { "version": "3.1.10", "resolved": "https://registry.npmjs.org/ejs/-/ejs-3.1.10.tgz", @@ -6810,6 +7141,12 @@ "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==", "dev": true }, + "eventsource-parser": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.6.tgz", + "integrity": "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==", + "dev": true + }, "execa": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/execa/-/execa-5.1.1.tgz", @@ -6854,6 +7191,12 @@ "jest-util": "^29.7.0" } }, + "extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", + "dev": true + }, "fast-json-stable-stringify": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", @@ -6869,6 +7212,16 @@ "bser": "2.1.1" } }, + "fetch-blob": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz", + "integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==", + "dev": true, + "requires": { + "node-domexception": "^1.0.0", + "web-streams-polyfill": "^3.0.3" + } + }, "filelist": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/filelist/-/filelist-1.0.4.tgz", @@ -6918,6 +7271,15 @@ "signal-exit": "^4.0.1" } }, + "formdata-polyfill": { + "version": "4.0.10", + "resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz", + "integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==", + "dev": true, + "requires": { + "fetch-blob": "^3.1.2" + } + }, "fs.realpath": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", @@ -6937,6 +7299,28 @@ "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", "dev": true }, + "gaxios": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-7.1.4.tgz", + "integrity": "sha512-bTIgTsM2bWn3XklZISBTQX7ZSddGW+IO3bMdGaemHZ3tbqExMENHLx6kKZ/KlejgrMtj8q7wBItt51yegqalrA==", + "dev": true, + "requires": { + "extend": "^3.0.2", + "https-proxy-agent": "^7.0.1", + "node-fetch": "^3.3.2" + } + }, + "gcp-metadata": { + "version": "8.1.2", + "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-8.1.2.tgz", + "integrity": "sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg==", + "dev": true, + "requires": { + "gaxios": "^7.0.0", + "google-logging-utils": "^1.0.0", + "json-bigint": "^1.0.0" + } + }, "gensync": { "version": "1.0.0-beta.2", "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", @@ -6981,6 +7365,26 @@ "integrity": "sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==", "dev": true }, + "google-auth-library": { + "version": "10.6.2", + "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-10.6.2.tgz", + "integrity": "sha512-e27Z6EThmVNNvtYASwQxose/G57rkRuaRbQyxM2bvYLLX/GqWZ5chWq2EBoUchJbCc57eC9ArzO5wMsEmWftCw==", + "dev": true, + "requires": { + "base64-js": "^1.3.0", + "ecdsa-sig-formatter": "^1.0.11", + "gaxios": "^7.1.4", + "gcp-metadata": "8.1.2", + "google-logging-utils": "1.1.3", + "jws": "^4.0.0" + } + }, + "google-logging-utils": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-1.1.3.tgz", + "integrity": "sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA==", + "dev": true + }, "graceful-fs": { "version": "4.2.11", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", @@ -7019,6 +7423,16 @@ "entities": "^4.5.0" } }, + "https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "dev": true, + "requires": { + "agent-base": "^7.1.2", + "debug": "4" + } + }, "human-signals": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/human-signals/-/human-signals-2.1.0.tgz", @@ -7692,6 +8106,12 @@ } } }, + "jose": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/jose/-/jose-6.2.2.tgz", + "integrity": "sha512-d7kPDd34KO/YnzaDOlikGpOurfF0ByC2sEV4cANCtdqLlTfBlw2p14O/5d/zv40gJPbIQxfES3nSx1/oYNyuZQ==", + "dev": true + }, "js-tiktoken": { "version": "1.0.20", "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.20.tgz", @@ -7713,6 +8133,15 @@ "integrity": "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA==", "dev": true }, + "json-bigint": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-bigint/-/json-bigint-1.0.0.tgz", + "integrity": "sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==", + "dev": true, + "requires": { + "bignumber.js": "^9.0.0" + } + }, "json-parse-even-better-errors": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", @@ -7730,6 +8159,27 @@ "resolved": "https://registry.npmjs.org/jsonrepair/-/jsonrepair-3.12.0.tgz", "integrity": "sha512-SWfjz8SuQ0wZjwsxtSJ3Zy8vvLg6aO/kxcp9TWNPGwJKgTZVfhNEQBMk/vPOpYCDFWRxD6QWuI6IHR1t615f0w==" }, + "jwa": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.1.tgz", + "integrity": "sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==", + "dev": true, + "requires": { + "buffer-equal-constant-time": "^1.0.1", + "ecdsa-sig-formatter": "1.0.11", + "safe-buffer": "^5.0.1" + } + }, + "jws": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/jws/-/jws-4.0.1.tgz", + "integrity": "sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA==", + "dev": true, + "requires": { + "jwa": "^2.0.1", + "safe-buffer": "^5.0.1" + } + }, "kleur": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/kleur/-/kleur-3.0.3.tgz", @@ -7848,6 +8298,23 @@ "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", "dev": true }, + "node-domexception": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", + "dev": true + }, + "node-fetch": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.2.tgz", + "integrity": "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==", + "dev": true, + "requires": { + "data-uri-to-buffer": "^4.0.0", + "fetch-blob": "^3.1.4", + "formdata-polyfill": "^4.0.10" + } + }, "node-int64": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/node-int64/-/node-int64-0.4.0.tgz", @@ -8174,6 +8641,12 @@ "glob": "^10.3.7" } }, + "safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "dev": true + }, "safer-buffer": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", @@ -8573,6 +9046,12 @@ "makeerror": "1.0.12" } }, + "web-streams-polyfill": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz", + "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==", + "dev": true + }, "whatwg-encoding": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", diff --git a/package.json b/package.json index 7f5c023..d87d4a0 100644 --- a/package.json +++ b/package.json @@ -55,7 +55,7 @@ }, "homepage": "https://github.com/lightfeed/extractor#readme", "peerDependencies": { - "@langchain/core": ">=1.1.31" + "@langchain/core": ">=1.1.36" }, "dependencies": { "cheerio": "^1.0.0", @@ -67,9 +67,9 @@ "zod": "^3.24.3" }, "devDependencies": { - "@langchain/core": "^1.1.31", - "@langchain/google-genai": "^2.1.24", - "@langchain/openai": "^1.2.12", + "@langchain/core": "^1.1.36", + "@langchain/google": "^0.1.8", + "@langchain/openai": "^1.3.1", "@types/jest": "^29.5.12", "@types/node": "^22.15.3", "@types/turndown": "^5.0.5", diff --git a/src/dev/runLocalTest.ts b/src/dev/runLocalTest.ts index 1f51da9..3ec836c 100644 --- a/src/dev/runLocalTest.ts +++ b/src/dev/runLocalTest.ts @@ -3,7 +3,7 @@ import * as path from "path"; import { config } from "dotenv"; import { z } from "zod"; import { ChatOpenAI } from "@langchain/openai"; -import { ChatGoogleGenerativeAI } from "@langchain/google-genai"; +import { ChatGoogle } from "@langchain/google"; import { extract, ContentFormat } from "../index"; // Load environment variables from .env file @@ -13,7 +13,7 @@ type Provider = "gemini" | "openai"; function createLLM(provider: Provider) { if (provider === "gemini") { - return new ChatGoogleGenerativeAI({ + return new ChatGoogle({ apiKey: process.env.GOOGLE_API_KEY, model: "gemini-2.5-flash", temperature: 0, @@ -30,7 +30,7 @@ function createLLM(provider: Provider) { function loadFixture(filename: string): string { return fs.readFileSync( path.resolve(__dirname, "../../tests/fixtures", filename), - "utf8" + "utf8", ); } @@ -68,7 +68,7 @@ const productSchema = z.object({ rating: z.string().optional(), description: z.string().optional(), features: z.array(z.string()).optional(), - }) + }), ), }); @@ -81,7 +81,7 @@ const productSchemaOpenAI = z.object({ rating: z.string().nullable(), description: z.string().nullable(), features: z.array(z.string()).nullable(), - }) + }), ), }); @@ -166,7 +166,11 @@ async function main() { const contentType = args[0] || "all"; const providerArg = args[1]?.toUpperCase(); const provider: Provider | "all" = - providerArg === "OPENAI" ? "openai" : providerArg === "GEMINI" ? "gemini" : "all"; + providerArg === "OPENAI" + ? "openai" + : providerArg === "GEMINI" + ? "gemini" + : "all"; console.log("API Keys available:"); console.log(`- GOOGLE_API_KEY: ${process.env.GOOGLE_API_KEY ? "Yes" : "No"}`); diff --git a/src/dev/testBrowserExtraction.ts b/src/dev/testBrowserExtraction.ts index bcc2aca..15944cc 100644 --- a/src/dev/testBrowserExtraction.ts +++ b/src/dev/testBrowserExtraction.ts @@ -1,4 +1,4 @@ -import { ChatGoogleGenerativeAI } from "@langchain/google-genai"; +import { ChatGoogle } from "@langchain/google"; import { extract, ContentFormat, Browser } from "../index"; import { z } from "zod"; import * as path from "path"; @@ -22,7 +22,7 @@ const productCatalogSchema = z.object({ reviewCount: z.number().optional().describe("Number of reviews"), productUrl: z.string().url().describe("Link to product detail page"), imageUrl: z.string().url().optional().describe("Product image URL"), - }) + }), ) .describe("List of bread and bakery products"), }); @@ -66,7 +66,7 @@ async function testProductCatalogExtraction() { console.log("\n🧠 Extracting product data using LLM..."); const result = await extract({ - llm: new ChatGoogleGenerativeAI({ + llm: new ChatGoogle({ apiKey: process.env.GOOGLE_API_KEY, model: "gemini-2.5-flash", temperature: 0, diff --git a/src/types.ts b/src/types.ts index 3642a0b..522faf0 100644 --- a/src/types.ts +++ b/src/types.ts @@ -121,7 +121,7 @@ export interface ExtractorOptions { /** * A LangChain chat model instance to use for extraction. - * Accepts any LangChain chat model (ChatOpenAI, ChatAnthropic, ChatGoogleGenerativeAI, etc.). + * Accepts any LangChain chat model (ChatOpenAI, ChatGoogle, etc.). * * @example * ```typescript diff --git a/tests/integration/browser-extraction.test.ts b/tests/integration/browser-extraction.test.ts index f62125f..da93e69 100644 --- a/tests/integration/browser-extraction.test.ts +++ b/tests/integration/browser-extraction.test.ts @@ -1,9 +1,9 @@ -import { ChatGoogleGenerativeAI } from "@langchain/google-genai"; +import { ChatGoogle } from "@langchain/google"; import { extract, ContentFormat, Browser } from "../../src/index"; import { z } from "zod"; function createGeminiLLM() { - return new ChatGoogleGenerativeAI({ + return new ChatGoogle({ apiKey: process.env.GOOGLE_API_KEY, model: "gemini-2.5-flash", temperature: 0, @@ -76,7 +76,7 @@ describe("Browser + Extraction Integration Tests", () => { const unreachableUrl = "https://this-domain-does-not-exist-12345.com"; await expect( - page.goto(unreachableUrl, { timeout: 5000 }) + page.goto(unreachableUrl, { timeout: 5000 }), ).rejects.toThrow(); await browser.close(); diff --git a/tests/integration/extract.test.ts b/tests/integration/extract.test.ts index 26d3054..c2b3d4a 100644 --- a/tests/integration/extract.test.ts +++ b/tests/integration/extract.test.ts @@ -2,16 +2,12 @@ import * as fs from "fs"; import * as path from "path"; import { z } from "zod"; import { ChatOpenAI } from "@langchain/openai"; -import { ChatGoogleGenerativeAI } from "@langchain/google-genai"; -import { - extract, - ContentFormat, - ExtractorResult, -} from "../../src"; +import { ChatGoogle } from "@langchain/google"; +import { extract, ContentFormat, ExtractorResult } from "../../src"; import { htmlToMarkdown } from "../../src/converters"; function createGeminiLLM() { - return new ChatGoogleGenerativeAI({ + return new ChatGoogle({ apiKey: process.env.GOOGLE_API_KEY, model: "gemini-2.5-flash", temperature: 0, @@ -29,7 +25,7 @@ function createOpenAILLM(modelName = "gpt-4o-mini") { // Read the sample HTML files const blogPostHtml = fs.readFileSync( path.resolve(__dirname, "../fixtures/blog-post.html"), - "utf8" + "utf8", ); // Define schemas that will be reused const blogSchema = z.object({ @@ -78,10 +74,10 @@ function verifyBlogPostExtraction(result: ExtractorResult): void { expect(result.data.links).toBeDefined(); expect(Array.isArray(result.data.links)).toBe(true); expect(result.data.links).toContain( - "https://example.com/blog/javascript-tutorials" + "https://example.com/blog/javascript-tutorials", ); expect(result.data.links).toContain( - "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/async_function" + "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/async_function", ); // Verify that usage statistics are returned @@ -119,7 +115,7 @@ describe("Extract Integration Tests", () => { const productListHtml = fs.readFileSync( path.resolve(__dirname, "../fixtures/product-list.html"), - "utf8" + "utf8", ); const productSchema = z.object({ @@ -132,7 +128,7 @@ describe("Extract Integration Tests", () => { features: z.array(z.string()).optional(), imageUrl: z.string().url().optional(), productUrl: z.string().url().optional(), - }) + }), ), }); @@ -147,7 +143,7 @@ describe("Extract Integration Tests", () => { features: z.array(z.string()).nullable(), imageUrl: z.string().url().nullable(), productUrl: z.string().url().nullable(), - }) + }), ), }); @@ -212,7 +208,7 @@ describe("Extract Integration Tests", () => { for (const product of result.data.products) { // Find matching product in ground truth by name const groundTruthProduct = groundTruthProductList.find( - (p) => p.name === product.name + (p) => p.name === product.name, ); // Ensure the product exists in ground truth @@ -287,7 +283,7 @@ describe("Extract Integration Tests", () => { expect.objectContaining({ product: expect.stringMatching(/^Apple(?:, Price: N\/A)?$/), price: null, - }) + }), ); }); @@ -303,7 +299,7 @@ describe("Extract Integration Tests", () => { .array(z.string()) .optional() .describe( - "Tags appear after the date. Do not include the # symbol." + "Tags appear after the date. Do not include the # symbol.", ), summary: z.string(), // For this test, adding an additional content field seems to cause the Google Gemini model @@ -365,13 +361,13 @@ describe("Extract Integration Tests", () => { // Verify the extracted data expect(result.data.title).toBe("Meeting Links"); expect(result.data.links).toContain( - "https://example.com/meetings/q4-planning-(2023)" + "https://example.com/meetings/q4-planning-(2023)", ); expect(result.data.links).toContain( - "https://example.com/budget/review-[2024]" + "https://example.com/budget/review-[2024]", ); expect(result.data.links).toContain( - "https://example.com/products/launch-(may-2024)" + "https://example.com/products/launch-(may-2024)", ); }); }); @@ -478,7 +474,7 @@ describe("Extract Integration Tests", () => { // Read the sample HTML file with images const articleWithImages = fs.readFileSync( path.resolve(__dirname, "../fixtures/article-with-images.html"), - "utf8" + "utf8", ); // Define a schema that includes image extraction @@ -497,11 +493,11 @@ const articleSchema = z.object({ url: z.string().url(), alt: z.string().optional(), caption: z.string().optional(), - }) + }), ) .optional() .describe( - "Extract all images from the article with their URLs and alt text" + "Extract all images from the article with their URLs and alt text", ), }); @@ -521,11 +517,11 @@ const articleSchemaOpenAI = z.object({ url: z.string().url(), alt: z.string().nullable(), caption: z.string().nullable(), - }) + }), ) .nullable() .describe( - "Extract all images from the article with their URLs and alt text" + "Extract all images from the article with their URLs and alt text", ), }); @@ -534,7 +530,7 @@ function verifyImageExtraction(result: ExtractorResult): void { // Check the data is extracted correctly expect(result.data).toBeDefined(); expect(result.data.title).toBe( - "Modern Web Development with React and Node.js" + "Modern Web Development with React and Node.js", ); expect(result.data.author).toBe("Jane Smith"); expect(result.data.date).toBe("March 20, 2023"); @@ -549,21 +545,21 @@ function verifyImageExtraction(result: ExtractorResult): void { // Check for the main architecture image const architectureImage = result.data.images.find((img: any) => - img.url.includes("react-node-architecture.png") + img.url.includes("react-node-architecture.png"), ); expect(architectureImage).toBeDefined(); expect(architectureImage.alt).toBe("React and Node.js Architecture"); // Check for the event loop image const eventLoopImage = result.data.images.find((img: any) => - img.url.includes("nodejs-event-loop.jpg") + img.url.includes("nodejs-event-loop.jpg"), ); expect(eventLoopImage).toBeDefined(); expect(eventLoopImage.alt).toBe("Node.js Event Loop"); // Check for the webpack image const webpackImage = result.data.images.find((img: any) => - img.url.includes("webpack-logo.png") + img.url.includes("webpack-logo.png"), ); expect(webpackImage).toBeDefined(); expect(webpackImage.alt).toBe("Webpack Logo"); @@ -585,15 +581,15 @@ describe("Image Extraction Integration Tests", () => { // With includeImages: true, markdown should contain image references expect(markdownWithImages).toContain( - "![React and Node.js Architecture](https://example.com/images/react-node-architecture.png)" + "![React and Node.js Architecture](https://example.com/images/react-node-architecture.png)", ); expect(markdownWithImages).toContain( - "![Node.js Event Loop](https://example.com/images/nodejs-event-loop.jpg)" + "![Node.js Event Loop](https://example.com/images/nodejs-event-loop.jpg)", ); // Without includeImages, markdown should not contain image references expect(markdownWithoutImages).not.toContain( - "![React and Node.js Architecture]" + "![React and Node.js Architecture]", ); expect(markdownWithoutImages).not.toContain("![Node.js Event Loop]"); });