microsoft · MichaelMcCulloch · Jul 17, 2024 · Jul 18, 2024 · Aug 8, 2024 · Sep 22, 2024
diff --git a/.dockerignore b/.dockerignore
@@ -2,4 +2,8 @@ __pycache__
 *.pyc
 *.pyo
 *.log
-.git
+.git
+.venv
+.uv
+dist
+site
diff --git a/.env.template b/.env.template
@@ -0,0 +1,5 @@
+LITELLM_BASE_URL=http://localhost:4000
+LITELLM_API_KEY=
+LITELLM_MODEL=deepseek-v4-pro
+
+LIDA_MO_INSTANT_ANALYSIS=true
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,6 @@
 output/
 .vscode
-frontend
+
 notebooks/test.ipynb
 notebooks/experimental/*   
 .azure

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -0,0 +1,29 @@
+image: docker:latest
+
+variables:
+  DOCKER_HOST: tcp://docker:2375
+  DOCKER_TLS_CERTDIR: ""
+  DOCKER_DRIVER: overlay2
+
+services:
+  - name: docker:dind
+    alias: docker
+    command: ["--tls=false"]
+
+stages:
+  - build
+
+before_script:
+  - echo "$CI_REGISTRY_PASSWORD" | docker login $CI_REGISTRY -u $CI_REGISTRY_USER --password-stdin
+
+build_frontend:
+  stage: build
+  script:
+    - docker build --target frontend -t $CI_REGISTRY_IMAGE/frontend:latest .
+    - docker push $CI_REGISTRY_IMAGE/frontend:latest
+
+build_backend:
+  stage: build
+  script:
+    - docker build --target backend -t $CI_REGISTRY_IMAGE/backend:latest .
+    - docker push $CI_REGISTRY_IMAGE/backend:latest
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,53 @@
+# Stage 1: Build Frontend
+FROM node:18-alpine AS frontend-builder
+WORKDIR /build
+
+# Copy frontend package files
+COPY src/lida/web/frontend/package.json src/lida/web/frontend/package-lock.json ./
+
+# Install dependencies
+RUN npm ci
+
+# Copy frontend source code
+COPY src/lida/web/frontend ./
+
+# Build the frontend
+RUN npm run build
+
+
+# Stage 2: Serve Frontend with Nginx
+FROM nginx:alpine AS frontend
+COPY --from=frontend-builder /build/dist /usr/share/nginx/html
+COPY nginx.conf /etc/nginx/conf.d/default.conf
+EXPOSE 80
+CMD ["nginx", "-g", "daemon off;"]
+
+
+# Stage 3: Build Backend
+FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS backend
+
+ENV UV_COMPILE_BYTECODE=1
+ENV UV_LINK_MODE=copy
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y git && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+COPY uv.lock pyproject.toml /app/
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-install-project --no-dev
+
+COPY . /app
+
+# Note: We do NOT copy frontend dist here anymore.
+# The backend operates purely as an API server.
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-dev
+
+ENV PATH="/app/.venv/bin:$PATH"
+
+EXPOSE 8080
+
+ENTRYPOINT ["lida", "ui", "--host", "0.0.0.0", "--port", "8080"]
diff --git a/Makefile b/Makefile
@@ -0,0 +1,20 @@
+install:
+	uv sync
+
+test:
+	uv run pytest tests/
+
+lint:
+	uv run ruff check .
+	uv run ruff format --check .
+
+format:
+	uv run ruff format .
+	uv run ruff check --fix .
+
+clean:
+	rm -rf .venv
+	rm -rf .pytest_cache
+	rm -rf .ruff_cache
+	rm -rf dist
+	find . -type d -name "__pycache__" -exec rm -rf {} +
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# LIDA: Automatic Generation of Visualizations and Infographics using Large Language Models
+# LIDA: Automatic Generation of Visualizations and Infographics using Large Language Models 📊📈
 
 [![PyPI version](https://badge.fury.io/py/lida.svg)](https://badge.fury.io/py/lida)
 [![arXiv](https://img.shields.io/badge/arXiv-2303.02927-<COLOR>.svg)](https://arxiv.org/abs/2303.02927)
@@ -10,7 +10,7 @@
 
 <!-- <img src="docs/images/lidascreen.png" width="100%" /> -->
 
-LIDA is a library for generating data visualizations and data-faithful infographics. LIDA is grammar agnostic (will work with any programming language and visualization libraries e.g. matplotlib, seaborn, altair, d3 etc) and works with multiple large language model providers (OpenAI, Azure OpenAI, PaLM, Cohere, Huggingface). Details on the components of LIDA are described in the [paper here](https://arxiv.org/abs/2303.02927) and in this tutorial [notebook](notebooks/tutorial.ipynb). See the project page [here](https://microsoft.github.io/lida/) for updates!.
+LIDA is a library for generating data visualizations and data-faithful infographics. LIDA is grammar agnostic (will work with any programming language and visualization libraries e.g. matplotlib, seaborn, altair, d3 etc) and works with multiple large language model providers (OpenAI, Azure OpenAI, PaLM, Gemini, Cohere, Huggingface). Details on the components of LIDA are described in the [paper here](https://arxiv.org/abs/2303.02927) and in this tutorial [notebook](notebooks/tutorial.ipynb). See the project page [here](https://microsoft.github.io/lida/) for updates!.
 
 > **Note on Code Execution:**
 > To create visualizations, LIDA _generates_ and _executes_ code.
@@ -35,7 +35,9 @@ LIDA treats _**visualizations as code**_ and provides a clean api for generating
 
 from lida import Manager, llm
 
-lida = Manager(text_gen = llm("openai")) # palm, cohere ..
+# LIDA automatically uses the default model from your config file (e.g., Gemini).
+# To override, specify a provider: lida = Manager(text_gen = llm("openai"))
+lida = Manager() 
 summary = lida.summarize("data/cars.csv")
 goals = lida.goals(summary, n=2) # exploratory data analysis
 charts = lida.visualize(summary=summary, goal=goals[0]) # exploratory data analysis
@@ -49,16 +51,16 @@ Setup and verify that your python environment is **`python 3.10`** or higher (pr
 pip install -U lida 
 ```
 
-LIDA depends on `llmx` and `openai`. If you had these libraries installed previously, consider updating them.
+LIDA depends on `llmx`. If you had this library installed previously, consider updating it.
 
 ```bash
-pip install -U llmx openai
+pip install -U llmx
 ```
 
-Once requirements are met, setup your api key. Learn more about setting up keys for other LLM providers [here](https://github.com/victordibia/llmx).
+Once requirements are met, setup your api key. Learn more about setting up keys for other LLM providers [here](https://github.com/victordibia/llmx). By default, LIDA is configured to use Gemini.
 
 ```bash
-export OPENAI_API_KEY=<your key>
+export GEMINI_API_KEY=<your key>
 ```
 
 Alternatively you can install the library in dev model by cloning this repo and running `pip install -e .` in the repository root.
@@ -77,7 +79,7 @@ The fastest and recommended way to get started after installation will be to try
 
 ## Building the Web API and UI with Docker
 
-The LIDA web api and ui can be setup using docker and the command below (ensure that you have docker installed, and you have set your `OPENAI_API_KEY` environment variable).
+The LIDA web api and ui can be setup using docker and the command below (ensure that you have docker installed, and you have set your `GEMINI_API_KEY` environment variable).
 
 ```bash
 docker compose up
@@ -194,7 +196,7 @@ lida = Manager(text_gen = text_gen)
 - LIDA generates and executes code based on provided input. Ensure that you run LIDA in a secure environment with appropriate permissions.
 - LIDA currently works best with datasets that have a small number of columns (<= 10). This is mainly due to the limited context size for most models. For larger datasets, consider preprocessing your dataset to use a subset of the columns.
 - LIDA assumes the dataset exists and is in a format that can be loaded into a pandas dataframe. For example, a csv file, or a json file with a list of objects. In practices the right dataset may need to be curated and preprocessed to ensure that it is suitable for the task at hand.
-- Smaller LLMs (e.g., OSS LLMs on Huggingface) have limited instruction following capabilities and may not work well with LIDA. LIDA works best with larger LLMs (e.g., OpenAI GPT 3.5, GPT 4).
+- Smaller LLMs (e.g., OSS LLMs on Huggingface) have limited instruction following capabilities and may not work well with LIDA. LIDA works best with larger LLMs (e.g., Gemini Pro, OpenAI GPT-4).
 - How reliable is the LIDA approach? The LIDA [paper](https://aclanthology.org/2023.acl-demo.11/) describes experiments that evaluate the reliability of LIDA using a visualization error rate metric. With the current version of prompts, data summarization techniques, preprocessing/postprocessing logic and LLMs, LIDA has an error rate of < 3.5% on over 2200 visualizations generated (compared to a baseline of over 10% error rate). This area is work in progress.
 - Can I build my own apps with LIDA? Yes! You can either use the python api directly in your app or setup a web api endpoint and use the web api in your app. See the [web api](#web-api-and-ui) section for more details.
 - How is LIDA related to OpenAI Code Interpreter: LIDA shares several similarities with code interpreter in the sense that both involve writing and executing code to address user intent. LIDA differs in its focus on visualization, providing a modular api for developer reuse and providing evaluation metrics on the visualization use case.
@@ -224,4 +226,4 @@ A short paper describing LIDA (Accepted at ACL 2023 Conference) is available [he
 }
 ```
 
-LIDA builds on insights in automatic generation of visualization from an earlier paper - [Data2Vis: Automatic Generation of Data Visualizations Using Sequence to Sequence Recurrent Neural Networks](https://arxiv.org/abs/1804.03126).
+LIDA builds on insights in automatic generation of visualization from an earlier paper - [Data2Vis: Automatic Generation of Data Visualizations Using Sequence to Sequence Recurrent Neural Networks](https://arxiv.org/abs/1804.03126).
diff --git a/config/cfg.yml b/config/cfg.yml
@@ -0,0 +1,8 @@
+# LiteLLM is the only supported endpoint. Configure the proxy URL, token,
+# and model via environment variables (LITELLM_BASE_URL, LITELLM_API_KEY,
+# LITELLM_MODEL). The values below mirror the in-code defaults.
+model:
+  provider: litellm
+  parameters:
+    base_url: http://localhost:4000
+    model: deepseek-v4-pro
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,11 +1,21 @@
-version: '3.8'
 services:
-  web:
-    image: web-ui
+  frontend:
     build:
       context: .
-      dockerfile: Dockerfile
+      target: frontend
     ports:
-      - "8080:8080"
+      - "8080:80"
+    depends_on:
+      - lida-backend
+
+  lida-backend:
+    build:
+      context: .
+      target: backend
+    ports:
+      - "8000:8080"
+    env_file: .env
     environment:
-      - OPENAI_API_KEY
+      - LLMX_CONFIG_PATH=/config/cfg.yml
+    volumes:
+      - ./config:/config
diff --git a/dockerfile b/dockerfile
diff --git a/docs/capabilities.md b/docs/capabilities.md
@@ -6,32 +6,32 @@ These are the fundamental features that form the primary functionality of LIDA r
 
 | Core Feature                      | Description                                                             | Status |
 | --------------------------------- | ----------------------------------------------------------------------- | ------ |
-| Data Summarization                | Generates a compact summary of the data.                                | ✅     |
-| Goal Generation                   | Produces a set of visualization goals from a data summary.              | ✅     |
-| Visualization Generation          | Creates and executes visualization code based on data summary and goal. | ✅     |
-| Visualization Editing             | Modifies visualizations using natural language instructions.            | ✅     |
-| Visualization Explanation         | Generates natural language explanations of visualization code.          | ✅     |
-| Visualization Evaluation & Repair | Evaluates visualizations and provides repair instructions.              | ✅     |
-| Visualization Recommendation      | Recommends a set of visualizations based on a dataset.                  | ✅     |
-| Infographic Generation            | Converts visualizations to data-faithful infographics.                  | 🚧     |
+| Data Summarization                | Generates a compact summary of the data.                                | ✅      |
+| Goal Generation                   | Produces a set of visualization goals from a data summary.              | ✅      |
+| Visualization Generation          | Creates and executes visualization code based on data summary and goal. | ✅      |
+| Visualization Editing             | Modifies visualizations using natural language instructions.            | ✅      |
+| Visualization Explanation         | Generates natural language explanations of visualization code.          | ✅      |
+| Visualization Evaluation & Repair | Evaluates visualizations and provides repair instructions.              | ✅      |
+| Visualization Recommendation      | Recommends a set of visualizations based on a dataset.                  | ✅      |
+| Infographic Generation            | Converts visualizations to data-faithful infographics.                  | 🚧      |
 
 > ⚠️ **Note**: LIDA is currently optimized for generating visualizations i.e. tasks for which the output is a visualization. It may not be the best tool for tasks that do not involve visualizations, such as creating machine learning models (e.g., create a time series model for forecasting), data analysis with a single value answer (what is square root of the smallest value in the dataset). This may be supported in the future.
 
 ### Other Capabilities
 
 These features support the core capabilities and provide additional utility and flexibility.
 
-| Other Feature                 | Description                                                                            | Status | Notes                                                 |
-| ----------------------------- | -------------------------------------------------------------------------------------- | ------ | ----------------------------------------------------- |
-| Grammar-Agnostic              | Works with any programming language and visualization library.                         | ✅     |                                                       |
-| Multi-LLM Provider Support    | Compatible with various large language model providers like OpenAI, Azure OpenAI, etc. | ✅     |                                                       |
-| Python API                    | Provides a Python-based API for generating visualizations & infographics.              | ✅     | Requires Python 3.10 or higher.                       |
-| Web API & UI                  | Optional user interface and web API included for exploration.                          | ✅     | Setup via Docker; accessible via localhost.           |
-| Docker Support                | Can be set up and run using Docker.                                                    | ✅     | Facilitates deployment and containerization.          |
-| HuggingFace Model Integration | Supports using HuggingFace models for text generation.                                 | ✅     | User can opt for direct use or via a local endpoint.  |
-| Security Note                 | Generates and executes code; should be run in a secure environment.                    | ⚠️     | Proper permissions management is crucial.             |
-| Community Expansion           | Encourages community contributions and extensions of the tool.                         | ✅     | Examples available, e.g., lida-streamlit.             |
-| Documentation & Citation      | Well-documented with available academic paper citation.                                | ✅     | Provides theoretical background and use case details. |
+| Other Feature                 | Description                                                                      | Status | Notes                                                 |
+| ----------------------------- | -------------------------------------------------------------------------------- | ------ | ----------------------------------------------------- |
+| Grammar-Agnostic              | Works with any programming language and visualization library.                   | ✅      |                                                       |
+| Multi-LLM Provider Support    | Compatible with various large language model providers like OpenAI, Gemini, etc. | ✅      |                                                       |
+| Python API                    | Provides a Python-based API for generating visualizations & infographics.        | ✅      | Requires Python 3.10 or higher.                       |
+| Web API & UI                  | Optional user interface and web API included for exploration.                    | ✅      | Setup via Docker; accessible via localhost.           |
+| Docker Support                | Can be set up and run using Docker.                                              | ✅      | Facilitates deployment and containerization.          |
+| HuggingFace Model Integration | Supports using HuggingFace models for text generation.                           | ✅      | User can opt for direct use or via a local endpoint.  |
+| Security Note                 | Generates and executes code; should be run in a secure environment.              | ⚠️      | Proper permissions management is crucial.             |
+| Community Expansion           | Encourages community contributions and extensions of the tool.                   | ✅      | Examples available, e.g., lida-streamlit.             |
+| Documentation & Citation      | Well-documented with available academic paper citation.                          | ✅      | Provides theoretical background and use case details. |
 
 Symbols used:
 

diff --git a/lida/__init__.py b/lida/__init__.py
diff --git a/lida/cli.py b/lida/cli.py
diff --git a/lida/components/__init__.py b/lida/components/__init__.py
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,4 +2,8 @@ __pycache__ @@
     *.pyc
     *.pyo
     *.log
-    .git
+    .git
+    .venv
+    .uv
+    dist
+    site