diff --git a/.env.example b/.env.example index e11a70c69..04613d4c3 100644 --- a/.env.example +++ b/.env.example @@ -77,3 +77,12 @@ GOOGLE_CSE_ID= # Miscellaneous options # Skip loading Chroma. OVERWRITE_CHROMA=true + +# Enable SentenceEmbedding model served via BentoML +# For local embedding service, use: +# docker run --rm -p 3001:3001 ghcr.io/bentoml/sentence-embedding-bento:latest --port 3001 +# Then set the following env var: +# BENTOML_EMBEDDING_ENDPOINT=http://localhost:3001 +# Instructions for customizing your embedding model server: https://github.com/bentoml/sentence-embedding-bento +BENTOML_EMBEDDING_ENDPOINT= + diff --git a/README.md b/README.md index 21cafd017..eaec9da7a 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ __Demo settings: Web, GPT4, ElevenLabs with voice clone, Chroma, Google Speech t - ✅**Web**: [React JS](https://react.dev/), [Vanilla JS](http://vanilla-js.com/), [WebSockets](https://developer.mozilla.org/en-US/docs/Web/API/WebSockets_API) - ✅**Mobile**: [Swift](https://developer.apple.com/swift/), [WebSockets](https://developer.mozilla.org/en-US/docs/Web/API/WebSockets_API) -- ✅**Backend**: [FastAPI](https://fastapi.tiangolo.com/), [SQLite](https://www.sqlite.org/index.html), [Docker](https://www.docker.com/) +- ✅**Backend**: [FastAPI](https://fastapi.tiangolo.com/), [SQLite](https://www.sqlite.org/index.html), [Docker](https://www.docker.com/), [BentoML](https://bentoml.com/) - ✅**Data Ingestion**: [LlamaIndex](https://www.llamaindex.ai/), [Chroma](https://www.trychroma.com/) - ✅**LLM Orchestration**: [LangChain](https://langchain.com/), [Chroma](https://www.trychroma.com/) - ✅**LLM**: [OpenAI GPT3.5/4](https://platform.openai.com/docs/api-reference/chat), [Anthropic Claude 2](https://docs.anthropic.com/claude/docs/getting-started-with-claude) @@ -159,6 +159,26 @@ ELEVEN_LABS_API_KEY= ``` +### 4. (Optional) Prepare self-hosted embedding service - BentoML Deployment Endpoint +
👇click me + +1. Install [Docker](https://docs.docker.com/engine/install/) + +2. Run the text embedding service docker image generated with BentoML: + + ```bash + docker run --rm -p 3001:3001 ghcr.io/bentoml/sentence-embedding-bento:latest --port 3001 + ``` + +3. Set the Text Embedding Endpoint in your .env file: + + ``` + BENTOML_EMBEDDING_ENDPOINT=http://localhost:3001 + ``` + +For cloud deployment options and customizing your own embeddding model, check out the source repo [here](https://github.com/bentoml/sentence-embedding-bento) +
+ ## 💿 Installation via Python - **Step 1**. Clone the repo ```sh diff --git a/cli.py b/cli.py index a7f5b0cb2..df2ea7f5b 100755 --- a/cli.py +++ b/cli.py @@ -115,10 +115,17 @@ def image_exists(name): return result.returncode == 0 +@click.command(help="Run BentoML text embedding service locally via Docker at localhost:3000") +def run_embedding_service(): + click.secho("Launching BentoML SentenceEmbedding Service...", fg='green') + subprocess.run(["docker", "run", "--rm", "-p", "3001:3001", "ghcr.io/bentoml/sentence-embedding-bento:latest"]) + + cli.add_command(docker_build) cli.add_command(docker_run) cli.add_command(docker_delete) cli.add_command(run_uvicorn) +cli.add_command(run_embedding_service) cli.add_command(web_build) cli.add_command(docker_next_web_build) diff --git a/realtime_ai_character/database/chroma.py b/realtime_ai_character/database/chroma.py index e8365a2dd..b705c7ae7 100644 --- a/realtime_ai_character/database/chroma.py +++ b/realtime_ai_character/database/chroma.py @@ -1,22 +1,47 @@ import os from dotenv import load_dotenv +from bentoml.client import Client from langchain.vectorstores import Chroma from langchain.embeddings import OpenAIEmbeddings +from langchain.embeddings.base import Embeddings from realtime_ai_character.logger import get_logger load_dotenv() logger = get_logger(__name__) -embedding = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")) -if os.getenv('OPENAI_API_TYPE') == 'azure': - embedding = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"), deployment=os.getenv( - "OPENAI_API_EMBEDDING_DEPLOYMENT_NAME", "text-embedding-ada-002"), chunk_size=1) + +class BentoEmbeddings(Embeddings): + def __init__(self, embedding_svc_client: Client): + self.client = embedding_svc_client + + def embed_documents(self, texts: list[str]) -> list[list[float]]: + return self.client.encode(texts).tolist() + + def embed_query(self, text: str) -> list[float]: + return self.client.encode([text]).tolist()[0] + + +embedding_endpoint = os.getenv("BENTOML_EMBEDDING_ENDPOINT") + +if embedding_endpoint: + # Use self-hosted embedding model via BentoML API endpoint + client = Client.from_url(embedding_endpoint) + embedding_func = BentoEmbeddings(client) +else: + embedding_func = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")) + if os.getenv('OPENAI_API_TYPE') == 'azure': + embedding_func = OpenAIEmbeddings( + openai_api_key=os.getenv("OPENAI_API_KEY"), + deployment=os.getenv( + "OPENAI_API_EMBEDDING_DEPLOYMENT_NAME", "text-embedding-ada-002" + ), + chunk_size=1) def get_chroma(): chroma = Chroma( collection_name='llm', - embedding_function=embedding, + embedding_function=embedding_func, persist_directory='./chroma.db' ) return chroma diff --git a/requirements.txt b/requirements.txt index ab3cccf7a..75d72c421 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ aioconsole aiofiles alembic anthropic +bentoml>=1.1 chromadb>=0.4.2 click EbookLib diff --git a/sample_cloud_deployment/deployment.yaml b/sample_cloud_deployment/deployment.yaml index 9975021a2..514f00e20 100644 --- a/sample_cloud_deployment/deployment.yaml +++ b/sample_cloud_deployment/deployment.yaml @@ -50,6 +50,8 @@ spec: value: - name: BRUCE_VOICE value: + - name: BENTOML_EMBEDDING_ENDPOINT + value: bentoml-embedding-service..svc.cluster.local --- apiVersion: v1 kind: Service diff --git a/sample_cloud_deployment/embedding_service.yaml b/sample_cloud_deployment/embedding_service.yaml new file mode 100644 index 000000000..47a9cda5f --- /dev/null +++ b/sample_cloud_deployment/embedding_service.yaml @@ -0,0 +1,41 @@ +# For advanced BentoML deployment on kubernetes, see: +# https://www.kubeflow.org/docs/external-add-ons/serving/bentoml/ +# https://github.com/bentoml/yatai +apiVersion: apps/v1 +kind: Deployment +metadata: + name: bentoml-embedding-deployment + labels: + app: bentoml-text-embedding +spec: + replicas: 1 + selector: + matchLabels: + app: bentoml-text-embedding + template: + metadata: + labels: + app: bentoml-text-embedding + spec: + containers: + - name: bentoml-text-embedding + image: ghcr.io/bentoml/sentence-embedding-bento:0.1.0 + ports: + - containerPort: 3000 + env: + - name: BENTOML_CONFIG_OPTIONS + value: "api_server.metrics.namespace=realchar,api_server.traffic.timeout=10" +--- +apiVersion: v1 +kind: Service +metadata: + name: bentoml-embedding-service +spec: + type: ClusterIP + selector: + app: bentoml-text-embedding + ports: + - protocol: TCP + port: 80 + targetPort: 3000 +