diff --git a/src/content/docs/en/pages/guides/ai-inference-starter-kit/index.mdx b/src/content/docs/en/pages/guides/ai-inference-starter-kit/index.mdx
new file mode 100644
index 0000000000..58ad7d8fa6
--- /dev/null
+++ b/src/content/docs/en/pages/guides/ai-inference-starter-kit/index.mdx
@@ -0,0 +1,114 @@
+---
+title: Deploy AI Inference Starter Kit
+description: Deploy a complete AI Inference environment using the Starter Kit template on Azion.
+meta_tags: >-
+ ai inference, starter kit, template, deployment, artificial intelligence, edge computing
+namespace: docs_guides_ai_inference_starter_kit
+permalink: /documentation/products/guides/ai-inference-starter-kit/
+menu_namespace: AIInferenceMenu
+
+---
+
+import LinkButton from 'azion-webkit/linkbutton';
+
+The **AI Inference Starter Kit** provides a ready-to-use template for deploying AI capabilities on Azion's edge infrastructure. It includes pre-configured applications, functions, and model integrations.
+
+## Overview
+
+The Starter Kit includes:
+
+- An edge application configured for AI workloads
+- Pre-built functions for common AI tasks
+- Integration with available AI models
+- Sample code for quick implementation
+
+## Requirements
+
+Before you begin, ensure you have:
+
+- An Azion account
+- Access to Real-Time Manager
+
+## Deploy the Starter Kit
+
+1. Access the [Azion Console](https://console.azion.com/).
+2. On the **+ Create** page, search for **AI Inference Starter Kit**.
+3. Select the template.
+4. Configure your edge application:
+ - Enter a name for your application
+ - Select the models you want to use
+ - Configure additional settings as needed
+5. Click **Deploy**.
+
+
+
+## Next steps
+
+After deployment, you can:
+
+- [Explore available models](/en/documentation/products/ai/ai-inference/models/)
+- [Build a simple agent](/en/documentation/products/guides/ai-inference-agent/)
+- [Integrate with SQL Database](/en/documentation/products/store/sql-database/) for vector search capabilities
+
+## Example usage
+
+After deploying, test your AI Inference with a simple request:
+
+```javascript
+const response = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", {
+ "stream": false,
+ "messages": [
+ {
+ "role": "user",
+ "content": "Explain edge computing in one sentence."
+ }
+ ]
+})
+
+console.log(response.choices[0].message.content)
+```
+
+Expected response:
+
+```json
+{
+ "id": "chatcmpl-abc123",
+ "choices": [{
+ "message": {
+ "role": "assistant",
+ "content": "Edge computing processes data closer to its source, reducing latency and bandwidth usage by bringing computation and storage resources near the devices or sensors that generate the data."
+ }
+ }]
+}
+```
+
+## Troubleshooting
+
+### Deployment fails
+
+If deployment fails, check:
+
+- Your account has sufficient permissions
+- All required fields are filled correctly
+- Error logs in the console for specific issues
+
+### Model not responding
+
+If the model isn't responding:
+
+- Verify the function is correctly configured
+- Confirm the model name is correct in `Azion.AI.run()`
+- Check the rate limits in the [Limits section](/en/documentation/products/ai/ai-inference/#limits)
+
+### High latency
+
+If you experience high latency:
+
+- Consider using streaming responses (`"stream": true`)
+- Check if your edge application is deployed in the optimal region
+- Review your function's timeout settings
diff --git a/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx b/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx
new file mode 100644
index 0000000000..89dcd478c0
--- /dev/null
+++ b/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx
@@ -0,0 +1,383 @@
+---
+title: Build your first AI agent
+description: Build a simple AI agent in 5 minutes — a conversational assistant that runs on Azion's edge infrastructure.
+meta_tags: >-
+ ai inference, ai agent, artificial intelligence, edge computing, quick start, tutorial
+namespace: docs_guides_ai_inference_build_agent
+permalink: /documentation/products/guides/ai-inference-agent/
+menu_namespace: AIInferenceMenu
+
+---
+
+import LinkButton from 'azion-webkit/linkbutton';
+
+Build AI agents that think, respond, and act. Agents run on Azion's global edge network, providing low-latency responses and seamless scalability.
+
+**What you will build:** A conversational AI agent that answers questions and maintains context.
+
+**Time:** ~5 minutes
+
+---
+
+## Create a new project
+
+Click the button below to create a new project with a pre-configured AI agent:
+
+
+Or follow these steps manually:
+
+1. Access the [Azion Console](https://console.azion.com/).
+2. Click **+ Create** and select **AI Inference Starter Kit**.
+3. Enter a name for your application, such as `my-first-agent`.
+4. Click **Deploy**.
+
+This creates a project with:
+
+- An **Edge Application** configured for AI workloads
+- A **Function** with pre-configured AI Inference integration
+- Example code to get you started
+
+---
+
+## Your first agent
+
+After deployment, navigate to your function and replace the code with this simple agent:
+
+```javascript
+async function handleRequest(request) {
+ // Check if the request is a POST request and has a JSON body
+ if (request.method !== "POST" || request.headers.get("content-type") !== "application/json") {
+ return new Response(JSON.stringify({
+ error: "Request must be a POST request with JSON body",
+ }), {
+ status: 400,
+ headers: { "Content-Type": "application/json" },
+ });
+ }
+
+ let input;
+ try {
+ input = await request.json();
+ } catch (err) {
+ return new Response(JSON.stringify({
+ error: "Invalid JSON in request body",
+ }), {
+ status: 400,
+ headers: { "Content-Type": "application/json" },
+ });
+ }
+
+ // Check if the required "model" field is present in the input
+ if (!input.hasOwnProperty("model")) {
+ return new Response(JSON.stringify({
+ error: "Missing 'model' field in request body",
+ }), {
+ status: 400,
+ headers: { "Content-Type": "application/json" },
+ });
+ }
+
+ const model = input["model"];
+
+ try {
+ const response = await Azion.AI.run(model, input);
+
+ if (input.stream) {
+ const { readable, writable } = new TransformStream();
+ const writer = writable.getWriter();
+ const encoder = new TextEncoder();
+
+ (async () => {
+ for await (const chunk of response) {
+ await writer.write(encoder.encode(`data: ${JSON.stringify(chunk)}n`));
+ }
+ await writer.write(encoder.encode("data: [DONE]n"));
+ await writer.close();
+ })();
+
+ return new Response(readable, {
+ headers: { "Content-Type": "text/event-stream" },
+ });
+ } else {
+ return new Response(JSON.stringify(response), {
+ headers: { "Content-Type": "application/json" },
+ });
+ }
+ } catch (e) {
+ console.error(`${e.name}: ${e.message}`);
+
+ if (e.message.includes("validation error")) {
+ return new Response(JSON.stringify({
+ error: `Invalid input for ${model}`,
+ }), {
+ status: 400,
+ headers: { "Content-Type": "application/json" },
+ });
+ }
+
+ if (e.message.includes("model not found")) {
+ return new Response(JSON.stringify({
+ error: `${model} not found or not allowed`,
+ }), {
+ status: 400,
+ headers: { "Content-Type": "application/json" },
+ });
+ }
+
+ return new Response(JSON.stringify({
+ error: "AI internal error",
+ }), {
+ status: 500,
+ headers: { "Content-Type": "application/json" },
+ });
+ }
+}
+
+addEventListener("fetch", (event) => {
+ event.respondWith(handleRequest(event.request));
+});
+```
+
+---
+
+## Test your agent
+
+Send a POST request to your function's endpoint replacing `https://your-function-url.azion.net` with your function's actual URL:
+
+```bash
+curl -X POST https://your-function-url.azion.net -H "Content-Type: application/json" -d '{"model":"casperhansen/mistral-small-24b-instruct-2501-awq","messages":[{"role":"user","content":"What is edge computing?"}]}'
+```
+
+Expected response:
+
+```json
+{
+ "id": "chatcmpl-123",
+ "object": "chat.completion",
+ "created": 1677652288,
+ "model": "casperhansen/mistral-small-24b-instruct-2501-awq",
+ "choices": [{
+ "index": 0,
+ "message": {
+ "role": "assistant",
+ "content": "Edge computing processes data closer to its source, reducing latency and bandwidth by bringing computation near end users or devices."
+ },
+ "finish_reason": "stop"
+ }],
+ "usage": {
+ "prompt_tokens": 22,
+ "completion_tokens": 24,
+ "total_tokens": 46
+ }
+}
+```
+
+---
+
+## Add conversation memory
+
+To maintain context across messages, you need to manage conversation history. Since edge functions are stateless, you have two options:
+
+### Option 1: Pass history in the request body
+
+```javascript
+async function handler(event) {
+ const body = JSON.parse(event.request.body || '{}');
+ const userMessage = body.message || 'Hello!';
+ const conversationHistory = body.history || [];
+
+ // Add user message to history
+ conversationHistory.push({
+ role: "user",
+ content: userMessage
+ });
+
+ const modelResponse = await Azion.AI.run("casperhansen/mistral-small-24b-instruct-2501-awq", {
+ "stream": false,
+ "messages": [
+ {
+ "role": "system",
+ "content": "You are a helpful AI assistant. Be concise and friendly."
+ },
+ ...conversationHistory
+ ],
+ "max_tokens": 500
+ });
+
+ const assistantMessage = modelResponse.choices[0].message.content;
+
+ // Add assistant response to history
+ conversationHistory.push({
+ role: "assistant",
+ content: assistantMessage
+ });
+
+ return new Response(JSON.stringify({
+ response: assistantMessage,
+ history: conversationHistory
+ }), {
+ headers: { "Content-Type": "application/json" }
+ });
+}
+
+addEventListener("fetch", handler);
+```
+
+### Option 2: Use KV Store for persistent sessions
+
+For persistent conversation history across requests, use [KV Store](/en/documentation/products/store/kv-database/) to store session data with a unique session ID.
+
+---
+
+## What just happened?
+
+When you sent a message:
+
+1. **Request** arrived at your edge function
+2. **Function** called `Azion.AI.run()` with your message
+3. **Model** processed the request at the edge
+4. **Response** returned to the client with minimal latency
+
+```mermaid
+flowchart LR
+ A[Client] -->|POST request| B[Edge Function]
+ B -->|Azion.AI.run| C[AI Model]
+ C -->|Response| B
+ B -->|JSON response| A
+```
+
+### Key concepts
+
+| Concept | What it means |
+|---------|---------------|
+| **Edge execution** | Code runs on Azion's distributed network, close to users |
+| **Azion.AI.run()** | SDK method to invoke AI models |
+| **Model selection** | Choose from available models based on your use case |
+| **Streaming** | Enable real-time responses with `stream: true` |
+
+---
+
+## Add tool calling
+
+Enable your agent to call external functions:
+
+```javascript
+async function handler(event) {
+ const body = JSON.parse(event.request.body || '{}');
+ const userMessage = body.message;
+
+ const tools = [
+ {
+ "type": "function",
+ "function": {
+ "name": "get_weather",
+ "description": "Get current weather for a location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "City name"
+ }
+ },
+ "required": ["location"]
+ }
+ }
+ }
+ ];
+
+ const modelResponse = await Azion.AI.run("casperhansen/mistral-small-24b-instruct-2501-awq", {
+ "stream": false,
+ "messages": [
+ {
+ "role": "system",
+ "content": "You are a helpful assistant with access to tools."
+ },
+ {
+ "role": "user",
+ "content": userMessage
+ }
+ ],
+ "tools": tools
+ });
+
+ // Check if the model wants to call a tool
+ if (modelResponse.choices[0].message.tool_calls) {
+ const toolCall = modelResponse.choices[0].message.tool_calls[0];
+ const args = JSON.parse(toolCall.function.arguments);
+
+ // Execute the tool (you would implement this)
+ const weatherData = await getWeather(args.location);
+
+ return new Response(JSON.stringify({
+ tool: toolCall.function.name,
+ location: args.location,
+ weather: weatherData
+ }), {
+ headers: { "Content-Type": "application/json" }
+ });
+ }
+
+ return new Response(JSON.stringify({
+ response: modelResponse.choices[0].message.content
+ }), {
+ headers: { "Content-Type": "application/json" }
+ });
+}
+
+async function getWeather(location) {
+ // Implement your weather API call here
+ return { location, temperature: "22°C", condition: "Sunny" };
+}
+
+addEventListener("fetch", handler);
+```
+
+---
+
+## Troubleshooting
+
+### "Model not found" error
+
+Make sure:
+1. The model name matches exactly (case-sensitive)
+2. Check [available models](/en/documentation/products/ai/ai-inference/models/) for correct names
+
+### High latency
+
+Try these solutions:
+1. Enable streaming: `"stream": true`
+2. Reduce `max_tokens` for shorter responses
+3. Choose a smaller model for faster inference
+
+### Rate limit errors
+
+Check the default limits:
+- **300 requests per minute**
+
+Contact support to increase limits for production workloads.
+
+### Function timeout
+
+If your function times out:
+1. Reduce `max_tokens`
+2. Simplify your prompt
+3. Consider breaking complex tasks into smaller steps
+
+---
+
+## Next steps
+
+Now that you have a working agent, explore:
+
+| Learn how to | Refer to |
+|-------------|----------|
+| Use different models | [Available models](/en/documentation/products/ai/ai-inference/models/) |
+| Implement tool calling | [Tool calling example](/en/documentation/products/ai/ai-inference/models/mistral-3-small/#tool-calling-example) |
+| Build RAG applications | [Vector Search](/en/documentation/products/store/sql-database/vector-search/) |
+| Deploy with templates | [AI Inference Starter Kit](/en/documentation/products/guides/ai-inference-starter-kit/) |
diff --git a/src/content/docs/en/pages/guides/langgraph-ai-agent-boilerplate/index.mdx b/src/content/docs/en/pages/guides/langgraph-ai-agent-boilerplate/index.mdx
new file mode 100644
index 0000000000..b134738411
--- /dev/null
+++ b/src/content/docs/en/pages/guides/langgraph-ai-agent-boilerplate/index.mdx
@@ -0,0 +1,153 @@
+---
+title: Deploy LangGraph AI Agent Boilerplate
+description: Deploy a LangGraph-based AI agent on Azion using the boilerplate template.
+meta_tags: >-
+ ai inference, langgraph, ai agent, boilerplate, template, deployment, artificial intelligence, edge computing
+namespace: docs_guides_ai_inference_langgraph_boilerplate
+permalink: /documentation/products/guides/langgraph-ai-agent-boilerplate/
+menu_namespace: AIInferenceMenu
+
+---
+
+import LinkButton from 'azion-webkit/linkbutton';
+
+The **LangGraph AI Agent Boilerplate** provides a template for deploying AI agents built with LangGraph on Azion's edge infrastructure. LangGraph enables you to build stateful, multi-actor applications with LLMs.
+
+## Overview
+
+LangGraph is a framework for building stateful, multi-actor applications with LLMs. It extends LangChain capabilities by adding the ability to coordinate multiple chains (or actors) across multiple steps of computation.
+
+The boilerplate includes:
+
+- Pre-configured LangGraph agent structure
+- Integration with AI Inference models
+- State management for multi-step workflows
+- Example tool implementations
+
+## Requirements
+
+Before you begin, ensure you have:
+
+- An Azion account
+- Node.js 18+ installed
+- Basic knowledge of LangGraph concepts
+
+## Architecture
+
+The LangGraph boilerplate implements:
+
+- **Graph-based workflow**: Define agent behavior as a graph of nodes and edges
+- **State management**: Maintain context across conversation turns
+- **Tool integration**: Connect external APIs and services
+- **Memory**: Persist conversation history
+
+## Deploy the Boilerplate
+
+1. Access the [Azion Console](https://console.azion.com/).
+2. On the **+ Create** page, search for **LangGraph AI Agent Boilerplate**.
+3. Select the template.
+4. Configure your deployment:
+ - Enter a name for your application
+ - Select the AI model for your agent
+ - Configure environment variables
+5. Click **Deploy**.
+
+## Configure your agent
+
+After deployment, you can customize your LangGraph agent:
+
+### Define the graph structure
+
+```python
+from langgraph.graph import StateGraph, END
+
+def build_graph():
+ graph = StateGraph(AgentState)
+ graph.add_node("reasoning", reasoning_node)
+ graph.add_node("action", action_node)
+ graph.add_edge("reasoning", "action")
+ graph.add_edge("action", END)
+ return graph.compile()
+```
+
+### Connect to AI Inference
+
+Configure your agent to use Azion's AI Inference endpoint:
+
+```python
+from langchain_openai import ChatOpenAI
+
+llm = ChatOpenAI(
+ base_url="https://ai.azion.com/v1",
+ api_key="your-api-key",
+ model="Qwen/Qwen3-30B-A3B-Instruct-2507-FP8"
+)
+```
+
+## Next steps
+
+- [Explore available models](/en/documentation/products/ai/ai-inference/models/)
+- Learn about [tool calling](/en/documentation/products/ai/ai-inference/models/mistral-3-small/#tool-calling-example) capabilities
+- Integrate with [Vector Search](/en/documentation/products/store/sql-database/vector-search/) for RAG implementations
+
+## Example: Multi-step agent
+
+Here's an example of a multi-step agent using LangGraph:
+
+```python
+from langgraph.graph import StateGraph, END
+from typing import TypedDict
+
+class AgentState(TypedDict):
+ messages: list
+ current_step: str
+ result: str
+
+def reasoning_node(state: AgentState):
+ # Process the input and decide next action
+ response = llm.invoke(state["messages"])
+ return {"current_step": "action", "result": response.content}
+
+def action_node(state: AgentState):
+ # Execute the decided action
+ result = execute_action(state["result"])
+ return {"current_step": "complete", "result": result}
+
+def build_agent():
+ graph = StateGraph(AgentState)
+ graph.add_node("reasoning", reasoning_node)
+ graph.add_node("action", action_node)
+ graph.set_entry_point("reasoning")
+ graph.add_edge("reasoning", "action")
+ graph.add_edge("action", END)
+ return graph.compile()
+
+agent = build_agent()
+result = agent.invoke({"messages": ["What's the weather in Tokyo?"]})
+```
+
+## Troubleshooting
+
+### Model connection error
+
+If the agent can't connect to the model:
+
+- Verify the `base_url` is correct
+- Confirm the API key is valid
+- Check that the model name is correct
+
+### State not persisting
+
+If state isn't being maintained between turns:
+
+- Verify checkpoint is configured correctly
+- Confirm memory store is functioning
+- Review StateGraph configuration
+
+### Tool calling issues
+
+If tools aren't being called correctly:
+
+- Ensure tool definitions match the expected schema
+- Check that the model supports tool calling (see model capabilities)
+- Verify function parameters are correctly formatted
diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx
index 99ba24511f..2ed521dd49 100644
--- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx
+++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx
@@ -4,12 +4,15 @@ description: >-
Qwen3-30B-A3B-Instruct-2507-FP8 is an instruction-tuned 30B-parameter FP8 causal language model for long-context (256K) text generation and reasoning, supporting chat/QA, summarization, multilingual tasks, math/science problem solving, coding, and tool-augmented workflows.
meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, qwen'
namespace: docs_edge_ai_models_qwen_3_30ba3b
+menu_namespace: AIInferenceMenu
permalink: /documentation/products/ai/ai-inference/models/qwen3-30ba3b/
---
**Qwen3-30B-A3B-Instruct-2507-FP8** is an instruction-tuned 30B-parameter FP8 causal language model for long-context (256K) text generation and reasoning, supporting chat/QA, summarization, multilingual tasks, math/science problem solving, coding, and tool-augmented workflows.
- Category | Details |
+## Model details
+
+| Category | Details |
|----------|---------|
| **Model Name** | Qwen/Qwen3-30B-A3B-Instruct-2507-FP8 |
| **Version** | 32B - FP8 |
@@ -21,12 +24,12 @@ permalink: /documentation/products/ai/ai-inference/models/qwen3-30ba3b/
## Capabilities
-| Feature | Status |
+| Feature | Details |
|---------|--------|
-| Tool Calling | ✅|
+| Tool Calling | ✅ |
| Context Length | 64k |
| Supports LoRA | ✅ |
-| Input data | TEXT |
+| Input data | Text |
## Usage
diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx
index 89db438047..379ab601bb 100644
--- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx
+++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx
@@ -4,6 +4,7 @@ description: >-
BAAI/bge-reranker-v2-m3 is a lightweight reranker model with strong multilingual capabilities.
meta_tags: 'ai inference, ai models, artificial intelligence, edge computing'
namespace: docs_edge_ai_models_baai_bge_reranker_v2_m3
+menu_namespace: AIInferenceMenu
permalink: /documentation/products/ai/ai-inference/models/baai-bge-reranker-v2-m3/
---
@@ -35,7 +36,7 @@ permalink: /documentation/products/ai/ai-inference/models/baai-bge-reranker-v2-m
This is an example of a basic rerank request using this model:
```ts
-const modelResponse = await Azion.AI.run("baai-bge-reranker-v2-m3", {
+const modelResponse = await Azion.AI.run("BAAI/bge-reranker-v2-m3", {
"query": "What is deep learning?",
"documents": [
"Deep learning is a subset of machine learning that uses neural networks with many layers",
@@ -56,7 +57,7 @@ const modelResponse = await Azion.AI.run("baai-bge-reranker-v2-m3", {
This is an example of a basic score request using this model:
```ts
-const modelResponse = await Azion.AI.run("baai-bge-reranker-v2-m3", {
+const modelResponse = await Azion.AI.run("BAAI/bge-reranker-v2-m3", {
"text_1": "What is deep learning?",
"text_2": [
"Deep learning is a subset of machine learning that uses neural networks with many layers",
@@ -77,7 +78,7 @@ Response example:
```json
{
"id": "rerank-356bf11f0e794f3c8f726bec7ba698bb",
- "model": "baai-bge-reranker-v2-m3",
+ "model": "BAAI/bge-reranker-v2-m3",
"usage": {
"total_tokens": 78
},
diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/internvl3.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/internvl3.mdx
index 6a7d24d3c6..74bb0b609c 100644
--- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/internvl3.mdx
+++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/internvl3.mdx
@@ -4,6 +4,7 @@ description: >-
InternVL3 is an advanced multimodal large language model with capabilities to encompass tool usage, GUI agents, industrial image analysis, 3D vision perception, and more.
meta_tags: 'ai inference, ai models, artificial intelligence, edge computing'
namespace: docs_edge_ai_models_internvl3
+menu_namespace: AIInferenceMenu
permalink: /documentation/products/ai/ai-inference/models/internvl3/
---
@@ -37,7 +38,7 @@ permalink: /documentation/products/ai/ai-inference/models/internvl3/
This is a basic chat completion example using this model:
```ts
-const modelResponse = await Azion.AI.run("opengvlab-internvl3-1b-instruct", {
+const modelResponse = await Azion.AI.run("OpenGVLab/InternVL3-1B-Instruct", {
"stream": true,
"messages": [
{
@@ -64,7 +65,7 @@ const modelResponse = await Azion.AI.run("opengvlab-internvl3-1b-instruct", {
This is a multimodal example using this model:
```ts
-const modelResponse = await Azion.AI.run("opengvlab-internvl3-1b-instruct", {
+const modelResponse = await Azion.AI.run("OpenGVLab/InternVL3-1B-Instruct", {
"stream": true,
"messages": [
{
diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/mistral-3-small.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/mistral-3-small.mdx
index 4d75e3bfa0..37c86ba6ff 100644
--- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/mistral-3-small.mdx
+++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/mistral-3-small.mdx
@@ -4,6 +4,7 @@ description: >-
Mistral 3 Small provides a range of capabilities, including text generation, image analysis, embeddings, and more.
meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, mistral'
namespace: docs_edge_ai_models_mistral_3_small
+menu_namespace: AIInferenceMenu
permalink: /documentation/products/ai/ai-inference/models/mistral-3-small/
---
@@ -37,7 +38,7 @@ permalink: /documentation/products/ai/ai-inference/models/mistral-3-small/
This is an example of a basic chat completion request using this model:
```ts
-const modelResponse = await Azion.AI.run("casperhansen-mistral-small-24b-instruct-2501-awq", {
+const modelResponse = await Azion.AI.run("casperhansen/mistral-small-24b-instruct-2501-awq", {
"stream": true,
"max_tokens": 1024,
"messages": [
@@ -68,7 +69,7 @@ Response example:
"id": "chatcmpl-e27716424abf4b3f891ff4850470cb09",
"object": "chat.completion",
"created": 1746821581,
- "model": "casperhansen-mistral-small-24b-instruct-2501-awq",
+ "model": "casperhansen/mistral-small-24b-instruct-2501-awq",
"choices": [
{
"index": 0,
@@ -121,7 +122,7 @@ Response example:
This is an example of a tool calling request using this model:
```ts
-const modelResponse = await Azion.AI.run("casperhansen-mistral-small-24b-instruct-2501-awq", {
+const modelResponse = await Azion.AI.run("casperhansen/mistral-small-24b-instruct-2501-awq", {
"stream": true,
"max_tokens": 1024,
"messages": [
@@ -179,7 +180,7 @@ Response example:
"id": "chatcmpl-88affc4730cf4219a06d2b15aad9ad44",
"object": "chat.completion",
"created": 1746821866,
- "model": "qwen-qwen25-vl-3b-instruct-awq",
+ "model": "casperhansen/mistral-small-24b-instruct-2501-awq",
"choices": [
{
"index": 0,
diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/nanonets-ocr-r.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/nanonets-ocr-r.mdx
index 554f22d9b3..553c0163df 100644
--- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/nanonets-ocr-r.mdx
+++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/nanonets-ocr-r.mdx
@@ -4,17 +4,28 @@ description: >-
Nanonets-OCR-s is an OCR model that converts document images to structured Markdown, preserving layout (headings, lists, tables) and basic tags. The output is easy to parse and feed into LLM pipelines.
meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, qwen'
namespace: docs_edge_ai_models_nanonets_ocr_s
+menu_namespace: AIInferenceMenu
permalink: /documentation/products/ai/ai-inference/models/nanonets-ocr-s/
---
**Nanonets-OCR-s** is an OCR model that converts document images to structured Markdown, preserving layout (headings, lists, tables) and basic tags. The output is easy to parse and feed into LLM pipelines.
+## Model details
+
+| Category | Details |
+|----------|---------|
+| **Model Name** | Nanonets-OCR-s |
+| **Version** | Original |
+| **Model Category** | OCR |
+| **HuggingFace Model** | [nanonets/Nanonets-OCR-s](https://huggingface.co/nanonets/Nanonets-OCR-s) |
+| **OpenAI Compatible Endpoint** | [Chat Completions](https://platform.openai.com/docs/api-reference/chat/create) |
+
## Capabilities
-| Feature | Status |
+| Feature | Details |
|---------|--------|
| Context Length | 32k tokens |
-| Input Data | Text+Image |
+| Input Data | Text + Image |
## Usage
diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx
index d90ac3ffef..db66a7fa74 100644
--- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx
+++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx
@@ -1,13 +1,14 @@
---
title: Qwen2.5 VL AWQ 3B
description: >-
- Qwen2.5 VL AWQ 3B is a vision-language model that supports 3 bilion parameters and offers advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation.
+ Qwen2.5 VL AWQ 3B is a vision-language model that supports 3 billion parameters and offers advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation.
meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, qwen'
namespace: docs_edge_ai_models_qwen_2_5_vl_3b
+menu_namespace: AIInferenceMenu
permalink: /documentation/products/ai/ai-inference/models/qwen-2-5-vl-3b/
---
-**Qwen2.5 VL AWQ 3B** is a vision-language model that offers advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation. It supports 3 bilion parameters.
+**Qwen2.5 VL AWQ 3B** is a vision-language model that offers advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation. It supports 3 billion parameters.
## Model details
@@ -37,7 +38,7 @@ permalink: /documentation/products/ai/ai-inference/models/qwen-2-5-vl-3b/
This is a basic chat completion request example using this model:
```ts
-const modelResponse = await Azion.AI.run("qwen-qwen25-vl-3b-instruct-awq", {
+const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-3B-Instruct-AWQ", {
"stream": true,
"messages": [
{
@@ -66,7 +67,7 @@ Response example:
"id": "chatcmpl-e27716424abf4b3f891ff4850470cb09",
"object": "chat.completion",
"created": 1746821581,
- "model": "qwen-qwen25-vl-3b-instruct-awq",
+ "model": "Qwen/Qwen2.5-VL-3B-Instruct-AWQ",
"choices": [
{
"index": 0,
@@ -118,7 +119,7 @@ Response example:
This is a tool calling request example using this model:
```ts
-const modelResponse = await Azion.AI.run("qwen-qwen25-vl-3b-instruct-awq", {
+const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-3B-Instruct-AWQ", {
"stream": true,
"messages": [
{
@@ -173,7 +174,7 @@ Response example:
"id": "chatcmpl-88affc4730cf4219a06d2b15aad9ad44",
"object": "chat.completion",
"created": 1746821866,
- "model": "qwen-qwen25-vl-3b-instruct-awq",
+ "model": "Qwen/Qwen2.5-VL-3B-Instruct-AWQ",
"choices": [
{
"index": 0,
@@ -235,7 +236,7 @@ Response example:
This is a multimodal request example using this model:
```ts
-const modelResponse = await Azion.AI.run("qwen-qwen25-vl-3b-instruct-awq", {
+const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-3B-Instruct-AWQ", {
"stream": true,
"messages": [
{
diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx
index 28cec39803..7c7960b699 100644
--- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx
+++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx
@@ -4,6 +4,7 @@ description: >-
Qwen2.5 VL AWQ 7B is a vision-language model that supports 7 billion parameters, offering advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation.
meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, qwen'
namespace: docs_edge_ai_models_qwen_2_5_vl_7b
+menu_namespace: AIInferenceMenu
permalink: /documentation/products/ai/ai-inference/models/qwen-2-5-vl-7b/
---
@@ -17,7 +18,7 @@ permalink: /documentation/products/ai/ai-inference/models/qwen-2-5-vl-7b/
| **Version** | AWQ 7B |
| **Model Category** | VLM |
| **Size** | 7B params |
-| **HuggingFace Model** | [Qwen/Qwen2.5-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct-AWQ) |
+| **HuggingFace Model** | [Qwen/Qwen2.5-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct-AWQ) |
| **OpenAI Compatible endpoint** | [Chat Completions](https://platform.openai.com/docs/api-reference/chat/create) |
| **License** | [Apache 2.0](https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md) |
@@ -37,7 +38,7 @@ permalink: /documentation/products/ai/ai-inference/models/qwen-2-5-vl-7b/
This is a basic chat completion example using this model:
```ts
-const modelResponse = await Azion.AI.run("qwen-qwen25-vl-7b-instruct-awq", {
+const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-7B-Instruct-AWQ", {
"stream": true,
"messages": [
{
@@ -67,7 +68,7 @@ Response example:
"id": "chatcmpl-e27716424abf4b3f891ff4850470cb09",
"object": "chat.completion",
"created": 1746821581,
- "model": "qwen-qwen25-vl-7b-instruct-awq",
+ "model": "Qwen/Qwen2.5-VL-7B-Instruct-AWQ",
"choices": [
{
"index": 0,
@@ -121,7 +122,7 @@ Response example:
This is a tool calling example using this model:
```ts
-const modelResponse = await Azion.AI.run("qwen-qwen25-vl-7b-instruct-awq", {
+const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-7B-Instruct-AWQ", {
"stream": true,
"messages": [
{
@@ -176,7 +177,7 @@ Response example:
"id": "chatcmpl-88affc4730cf4219a06d2b15aad9ad44",
"object": "chat.completion",
"created": 1746821866,
- "model": "qwen-qwen25-vl-7b-instruct-awq",
+ "model": "Qwen/Qwen2.5-VL-7B-Instruct-AWQ",
"choices": [
{
"index": 0,
@@ -240,7 +241,7 @@ Response example:
This is a multimodal example using this model:
```ts
-const modelResponse = await Azion.AI.run("qwen-qwen25-vl-7b-instruct-awq", {
+const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-7B-Instruct-AWQ", {
"stream": true,
"messages": [
{
diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen3-embedding-4b.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen3-embedding-4b.mdx
index 706ecde2b2..50d76764b2 100644
--- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen3-embedding-4b.mdx
+++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen3-embedding-4b.mdx
@@ -4,6 +4,7 @@ description: >-
Qwen3 Embedding 4B is a 4B-parameter multilingual embedding model (36 layers, 32K context) that outputs 2560‑dim vectors for text/code retrieval, classification, clustering, and bitext mining. It supports instruction-conditioned embeddings and is optimized for efficient, cross-lingual representation learning.
meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, qwen'
namespace: docs_edge_ai_models_qwen_3_embedding_4b
+menu_namespace: AIInferenceMenu
permalink: /documentation/products/ai/ai-inference/models/qwen3-embedding-4b/
---
@@ -49,7 +50,7 @@ Response example:
Different dimensions can be selected by setting the `dimensions` parameter:
```ts
-const modelResponse = await Azion.AI.run("qwen-qwen3-embedding-4b", {
+const modelResponse = await Azion.AI.run("Qwen/Qwen3-Embedding-4B", {
"input": "The food was delicious and the waiter...",
"encoding_format": "float",
"dimensions": 256
diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx
index d9086690f7..3607342558 100644
--- a/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx
+++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx
@@ -1,59 +1,74 @@
---
title: AI Inference
description: >-
- Azion AI Inference empowers you to build and deploy intelligent applications that process data close to where it is generated.
-meta_tags: 'ai inference, artificial intelligence, edge computing'
+ AI Inference enables you to run AI models directly on Azion's highly distributed infrastructure.
+meta_tags: 'ai inference, artificial intelligence, edge computing, ai assistant, ai agent'
namespace: docs_edge_ai_reference
permalink: /documentation/products/ai/ai-inference/
+menu_namespace: AIInferenceMenu
+
---
import LinkButton from 'azion-webkit/linkbutton';
-**AI Inference** empowers you to build and deploy intelligent applications that process data close to where it is generated. By combining artificial intelligence with edge computing, it eliminates the complexities of scaling and infrastructure management, enabling real-time decision-making and enhanced performance.
-
-With Azion AI Inference, you can seamlessly integrate AI capabilities into your applications, leveraging tools like Functions, Applications, and the Azion API to create scalable, secure, and efficient solutions.
+**AI Inference** enables you to run AI models directly on Azion's highly distributed infrastructure. You can integrate AI capabilities into your applications, leveraging tools like Functions, Applications, Vector Search, and the Azion API to create scalable, secure, and efficient solutions.
-AI Inference gives you access to:
+Get started by deploying the AI Inference Starter Kit Template:
-- **Run AI models on Edge Runtime**, enabling advanced AI architectures to execute directly at the edge for minimal latency and maximum performance.
-- **Deploy autonomous AI agents** that analyze data and make decisions at the edge.
-- **Real-time processing** with reduced latency and enhanced efficiency.
-- All as part of a **complete platform**, including Applications, Functions, SQL Database vector search, and more.
+
---
## Features
-### Available Models
+### OpenAI-Compatible API
+
+Connect applications using Azion’s OpenAI-compatible endpoint format.
+
+### Run edge-optimized models
-Access our catalog of open-source AI models that you can run directly on Azion Runtime. These models are optimized for edge deployment with minimal resource requirements.
+- Run AI models on Azion’s globally distributed edge to minimize latency and enable real-time inference.
+- Access a curated catalog of open-source models, ready to run on Azion Runtime and optimized for distributed deployment with low resource footprints.
+- Native inference support for large language models (LLMs) and vision-language models (VLMs).
-### Model customization
+### Fine-tune models with LoRA
-AI Inference allows you to fine-tune, train, and specialize models using **Low-Rank Adaptation (LoRA)**. This capability enables you to optimize models for specific tasks, ensuring they are both efficient and accurate for your business needs.
+You can fine-tune, train, and specialize models with your own data and parameters. This capability enables you to optimize models for specific tasks, ensuring they're both efficient and accurate for your business needs.
-### AI Agents
+---
-AI Inference supports deploying AI agents like ReAct (Reasoning + Acting) at the edge, enabling advanced tasks such as context-aware responses, semantic search, and intelligent data processing.
+### Examples of what you can build with AI Inference
-### Integration with SQL Database
+- **AI Assistants**: Build and deploy AI assistants that serve thousands of users simultaneously with low latency, delivering real-time support, dynamic FAQs, and customer assistance without cloud overload.
-Integrate with **SQL Database** to enable vector search capabilities, allowing for semantic queries and hybrid search. This integration enhances AI-powered applications by providing precise, contextually relevant results and supporting efficient Retrieval-Augmented Generation (RAG) implementations.
+- **AI Agents**: Build AI agents that automate multi‑step workflows, collapse days of manual effort into minutes, and free teams for higher‑value work—boosting productivity across operations.
----
+- **Automate Threat Detection and Takedown with AI**: Combine LLMs and vision-language models (VLMs) to monitor digital assets, spot phishing/abuse patterns in text and imagery, and automate threat classification and takedown across distributed environments.
-## Related products
+## Integration with SQL Database
-- [Applications](/en/documentation/products/build/applications/): build applications that run directly on Azion's distributed network, delivering exceptional performance and customization options.
-- [Functions](/en/documentation/products/build/applications/functions/): execute code closer to end users, enhancing performance and enabling custom logic for handling requests and responses.
-- [SQL Database](/en/documentation/products/store/sql-database/): an edge-native SQL solution designed for serverless applications, providing data storage and querying capabilities at the edge.
-- [Vector Search](/en/documentation/products/store/sql-database/vector-search/): enable semantic search engines and AI-powered recommendations through vector embeddings at the edge.
+Integrate your application with **SQL Database** to enable [vector search](/en/documentation/products/store/sql-database/vector-search/) capabilities, allowing for semantic queries and hybrid search. This integration enhances AI-powered applications by providing precise, contextually relevant results and supporting efficient Retrieval-Augmented Generation (RAG) implementations.
+
+## Limits
+
+These are the **default limits**:
+
+| Scope | Limit |
+| ----- | ----- |
+| Requests per minute | 300 |
---
-Explore practical examples of how to implement AI solutions with Azion:
+## Related products
+
+- [Applications](/en/documentation/products/build/applications/): build applications that run directly on Azion's distributed infrastructure, delivering exceptional performance and customization options.
+- [Functions](/en/documentation/products/build/applications/functions/): execute code closer to end users, enhancing performance and enabling custom logic for handling requests and responses.
+- [SQL Database](/en/documentation/products/store/sql-database/): an edge-native SQL solution designed for serverless applications, providing data storage and querying capabilities at the edge. Also enables [Vector Search](/en/documentation/products/store/sql-database/vector-search/) for performing semantic search and AI-powered recommendations through vector embedding.
-
-
diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx
index 3fa5bf5a01..d9a4054238 100644
--- a/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx
+++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx
@@ -4,14 +4,15 @@ description: >-
AI Inference offers a diverse range of edge-optimized models for various AI domains, ensuring efficient deployment and performance.
meta_tags: 'ai inference, ai models, artificial intelligence, edge computing'
namespace: docs_edge_ai_models
+menu_namespace: AIInferenceMenu
permalink: /documentation/products/ai/ai-inference/models/
---
import LinkButton from 'azion-webkit/linkbutton';
-Azion's edge-optimized models span multiple AI domains including text generation, image analysis, embeddings, and more. Each model is designed to balance performance and resource efficiency for edge deployment.
+Azion's edge-optimized models span multiple AI domains including text generation, image analysis, embeddings, and more. Each model is designed to balance performance and resource efficiency for distributed deployment.
-This page provides a list of models available for use with **AI Inference**. To learn more about it, visit the [AI Inference Reference](/en/documentation/products/ai/ai-inference/).
+This page provides a list of models available for use with AI Inference. To learn more about it, visit the [AI Inference Reference](/en/documentation/products/ai/ai-inference/).
## Available Models
@@ -41,7 +42,7 @@ A Vision Language Model (VLM) that offers advanced capabilities such as visual a
### Qwen2.5 VL AWQ 7B
-An instruction-tuned 30B-parameter FP8 causal language model for long-context (256K) text generation and reasoning, supporting chat/QA, summarization, multilingual tasks, math/science problem solving, coding, and tool-augmented workflows.
+A vision-language model (VLM) with advanced capabilities including visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation. Optimized for edge deployment with efficient resource usage.
diff --git a/src/content/docs/pt-br/pages/guias/ai-inference-starter-kit/index.mdx b/src/content/docs/pt-br/pages/guias/ai-inference-starter-kit/index.mdx
new file mode 100644
index 0000000000..23ac2162cd
--- /dev/null
+++ b/src/content/docs/pt-br/pages/guias/ai-inference-starter-kit/index.mdx
@@ -0,0 +1,74 @@
+---
+title: Implantar AI Inference Starter Kit
+description: Implante um ambiente completo de AI Inference usando o template Starter Kit na Azion.
+meta_tags: >-
+ ai inference, starter kit, template, implantação, inteligência artificial, computação de borda
+namespace: docs_guides_ai_inference_starter_kit
+permalink: /documentacao/produtos/guias/ai-inference-starter-kit/
+menu_namespace: AIInferenceMenu
+
+---
+
+import LinkButton from 'azion-webkit/linkbutton';
+
+O **AI Inference Starter Kit** oferece um template pronto para uso para implantar capacidades de AI na infraestrutura de edge da Azion. Inclui aplicações, funções e integrações de modelos pré-configuradas.
+
+## Visão geral
+
+O Starter Kit inclui:
+
+- Uma edge application configurada para workloads de AI
+- Funções pré-construídas para tarefas comuns de AI
+- Integração com modelos de AI disponíveis
+- Código de exemplo para implementação rápida
+
+## Requisitos
+
+Antes de começar, certifique-se de ter:
+
+- Uma conta na Azion
+- Acesso ao Real-Time Manager
+
+## Implantar o Starter Kit
+
+1. Acesse o [Console da Azion](https://console.azion.com/).
+2. Na página **+ Create**, procure por **AI Inference Starter Kit**.
+3. Selecione o template.
+4. Configure sua edge application:
+ - Digite um nome para sua aplicação
+ - Selecione os modelos que deseja usar
+ - Configure definições adicionais conforme necessário
+5. Clique em **Deploy**.
+
+
+
+## Próximos passos
+
+Após a implantação, você pode:
+
+- [Explorar modelos disponíveis](/pt-br/documentacao/produtos/ai/ai-inference/modelos/)
+- [Construir um agente simples](/pt-br/documentacao/produtos/guias/ai-inference-agent/)
+- [Integrar com Banco de Dados SQL](/pt-br/documentacao/produtos/store/sql-database/) para capacidades de vector search
+
+## Solução de problemas
+
+### Erro de implantação
+
+Se a implantação falhar, verifique:
+
+- Se sua conta tem permissões suficientes
+- Se todos os campos obrigatórios foram preenchidos corretamente
+- Os logs de erro no console para identificar problemas específicos
+
+### Modelo não responde
+
+Se o modelo não estiver respondendo:
+
+- Verifique se a função está corretamente configurada
+- Confirme se o nome do modelo está correto na chamada `Azion.AI.run()`
+- Verifique os limites de requisições na seção Limits da documentação
diff --git a/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx b/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx
new file mode 100644
index 0000000000..75f4ef87a6
--- /dev/null
+++ b/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx
@@ -0,0 +1,383 @@
+---
+title: Construa seu primeiro agente de AI
+description: Construa um agente de AI simples em 5 minutos — um assistente conversacional que roda na infraestrutura de edge da Azion.
+meta_tags: >-
+ ai inference, agente ai, inteligência artificial, computação de borda, guia rápido, tutorial
+namespace: docs_guides_ai_inference_build_agent
+permalink: /documentacao/produtos/guias/ai-inference-agent/
+menu_namespace: AIInferenceMenu
+
+---
+
+import LinkButton from 'azion-webkit/linkbutton';
+
+Construa agentes de AI que pensam, respondem e agem. Os agentes rodam na rede global de edge da Azion, fornecendo respostas com baixa latência e escalabilidade seamless.
+
+**O que você vai construir:** Um agente de AI conversacional que responde perguntas e mantém contexto.
+
+**Tempo:** ~5 minutos
+
+---
+
+## Criar um novo projeto
+
+Clique no botão abaixo para criar um novo projeto com um agente de AI pré-configurado:
+
+
+Ou siga estas etapas manualmente:
+
+1. Acesse o [Console da Azion](https://console.azion.com/).
+2. Clique em **+ Create** e selecione **AI Inference Starter Kit**.
+3. Digite um nome para sua aplicação, como `meu-primeiro-agente`.
+4. Clique em **Deploy**.
+
+Isso cria um projeto com:
+
+- Uma **Edge Application** configurada para workloads de AI
+- Uma **Function** com integração de AI Inference pré-configurada
+- Código de exemplo para começar
+
+---
+
+## Seu primeiro agente
+
+Após a implantação, navegue até sua função e substitua o código por este agente simples:
+
+```javascript
+async function handleRequest(request) {
+ // Verifica se a requisição é POST e tem corpo JSON
+ if (request.method !== "POST" || request.headers.get("content-type") !== "application/json") {
+ return new Response(JSON.stringify({
+ error: "A requisição deve ser POST com corpo JSON",
+ }), {
+ status: 400,
+ headers: { "Content-Type": "application/json" },
+ });
+ }
+
+ let input;
+ try {
+ input = await request.json();
+ } catch (err) {
+ return new Response(JSON.stringify({
+ error: "JSON inválido no corpo da requisição",
+ }), {
+ status: 400,
+ headers: { "Content-Type": "application/json" },
+ });
+ }
+
+ // Verifica se o campo "model" está presente
+ if (!input.hasOwnProperty("model")) {
+ return new Response(JSON.stringify({
+ error: "Campo 'model' obrigatório não encontrado",
+ }), {
+ status: 400,
+ headers: { "Content-Type": "application/json" },
+ });
+ }
+
+ const model = input["model"];
+
+ try {
+ const response = await Azion.AI.run(model, input);
+
+ if (input.stream) {
+ const { readable, writable } = new TransformStream();
+ const writer = writable.getWriter();
+ const encoder = new TextEncoder();
+
+ (async () => {
+ for await (const chunk of response) {
+ await writer.write(encoder.encode(`data: ${JSON.stringify(chunk)}n`));
+ }
+ await writer.write(encoder.encode("data: [DONE]n"));
+ await writer.close();
+ })();
+
+ return new Response(readable, {
+ headers: { "Content-Type": "text/event-stream" },
+ });
+ } else {
+ return new Response(JSON.stringify(response), {
+ headers: { "Content-Type": "application/json" },
+ });
+ }
+ } catch (e) {
+ console.error(`${e.name}: ${e.message}`);
+
+ if (e.message.includes("validation error")) {
+ return new Response(JSON.stringify({
+ error: `Entrada inválida para ${model}`,
+ }), {
+ status: 400,
+ headers: { "Content-Type": "application/json" },
+ });
+ }
+
+ if (e.message.includes("model not found")) {
+ return new Response(JSON.stringify({
+ error: `${model} não encontrado ou não permitido`,
+ }), {
+ status: 400,
+ headers: { "Content-Type": "application/json" },
+ });
+ }
+
+ return new Response(JSON.stringify({
+ error: "Erro interno de AI",
+ }), {
+ status: 500,
+ headers: { "Content-Type": "application/json" },
+ });
+ }
+}
+
+addEventListener("fetch", (event) => {
+ event.respondWith(handleRequest(event.request));
+});
+```
+
+---
+
+## Testar seu agente
+
+Envie uma requisição POST para o endpoint da sua função substituindo `https://url-da-sua-funcao.azion.net` pela URL real da sua função:
+
+```bash
+curl -X POST https://url-da-sua-funcao.azion.net -H "Content-Type: application/json" -d '{"model":"casperhansen/mistral-small-24b-instruct-2501-awq","messages":[{"role":"user","content":"O que é computação de borda?"}]}'
+```
+
+Resposta esperada:
+
+```json
+{
+ "id": "chatcmpl-123",
+ "object": "chat.completion",
+ "created": 1677652288,
+ "model": "casperhansen/mistral-small-24b-instruct-2501-awq",
+ "choices": [{
+ "index": 0,
+ "message": {
+ "role": "assistant",
+ "content": "Computação de borda processa dados mais próximo de sua origem, reduzindo latência e uso de banda ao trazer computação perto dos usuários finais ou dispositivos."
+ },
+ "finish_reason": "stop"
+ }],
+ "usage": {
+ "prompt_tokens": 22,
+ "completion_tokens": 24,
+ "total_tokens": 46
+ }
+}
+```
+
+---
+
+## Adicionar memória de conversação
+
+Para manter contexto entre mensagens, você precisa gerenciar o histórico de conversação. Como as edge functions são stateless, você tem duas opções:
+
+### Opção 1: Passar histórico no corpo da requisição
+
+```javascript
+async function handler(event) {
+ const body = JSON.parse(event.request.body || '{}');
+ const userMessage = body.message || 'Olá!';
+ const conversationHistory = body.history || [];
+
+ // Adiciona mensagem do usuário ao histórico
+ conversationHistory.push({
+ role: "user",
+ content: userMessage
+ });
+
+ const modelResponse = await Azion.AI.run("casperhansen/mistral-small-24b-instruct-2501-awq", {
+ "stream": false,
+ "messages": [
+ {
+ "role": "system",
+ "content": "Você é um assistente de AI útil. Seja conciso e amigável."
+ },
+ ...conversationHistory
+ ],
+ "max_tokens": 500
+ });
+
+ const assistantMessage = modelResponse.choices[0].message.content;
+
+ // Adiciona resposta do assistente ao histórico
+ conversationHistory.push({
+ role: "assistant",
+ content: assistantMessage
+ });
+
+ return new Response(JSON.stringify({
+ response: assistantMessage,
+ history: conversationHistory
+ }), {
+ headers: { "Content-Type": "application/json" }
+ });
+}
+
+addEventListener("fetch", handler);
+```
+
+### Opção 2: Usar KV Store para sessões persistentes
+
+Para histórico de conversação persistente entre requisições, use [KV Store](/pt-br/documentacao/produtos/store/kv-database/) para armazenar dados de sessão com um ID de sessão único.
+
+---
+
+## O que acabou de acontecer?
+
+Quando você enviou uma mensagem:
+
+1. **Requisição** chegou na sua edge function
+2. **Function** chamou `Azion.AI.run()` com sua mensagem
+3. **Modelo** processou a requisição no edge
+4. **Resposta** retornou ao cliente com latência mínima
+
+```mermaid
+flowchart LR
+ A[Cliente] -->|Requisição POST| B[Edge Function]
+ B -->|Azion.AI.run| C[Modelo AI]
+ C -->|Resposta| B
+ B -->|Resposta JSON| A
+```
+
+### Conceitos-chave
+
+| Conceito | O que significa |
+|----------|----------------|
+| **Execução no edge** | Código roda na rede distribuída da Azion, perto dos usuários |
+| **Azion.AI.run()** | Método SDK para invocar modelos de AI |
+| **Seleção de modelo** | Escolha entre modelos disponíveis baseado no seu caso de uso |
+| **Streaming** | Habilite respostas em tempo real com `stream: true` |
+
+---
+
+## Adicionar chamada de ferramentas
+
+Habilite seu agente a chamar funções externas:
+
+```javascript
+async function handler(event) {
+ const body = JSON.parse(event.request.body || '{}');
+ const userMessage = body.message;
+
+ const tools = [
+ {
+ "type": "function",
+ "function": {
+ "name": "get_weather",
+ "description": "Obter clima atual para um local",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "Nome da cidade"
+ }
+ },
+ "required": ["location"]
+ }
+ }
+ }
+ ];
+
+ const modelResponse = await Azion.AI.run("casperhansen/mistral-small-24b-instruct-2501-awq", {
+ "stream": false,
+ "messages": [
+ {
+ "role": "system",
+ "content": "Você é um assistente útil com acesso a ferramentas."
+ },
+ {
+ "role": "user",
+ "content": userMessage
+ }
+ ],
+ "tools": tools
+ });
+
+ // Verifica se o modelo quer chamar uma ferramenta
+ if (modelResponse.choices[0].message.tool_calls) {
+ const toolCall = modelResponse.choices[0].message.tool_calls[0];
+ const args = JSON.parse(toolCall.function.arguments);
+
+ // Executa a ferramenta (você implementaria isso)
+ const weatherData = await getWeather(args.location);
+
+ return new Response(JSON.stringify({
+ tool: toolCall.function.name,
+ location: args.location,
+ weather: weatherData
+ }), {
+ headers: { "Content-Type": "application/json" }
+ });
+ }
+
+ return new Response(JSON.stringify({
+ response: modelResponse.choices[0].message.content
+ }), {
+ headers: { "Content-Type": "application/json" }
+ });
+}
+
+async function getWeather(location) {
+ // Implemente sua chamada de API de clima aqui
+ return { location, temperature: "22°C", condition: "Ensolarado" };
+}
+
+addEventListener("fetch", handler);
+```
+
+---
+
+## Solução de problemas
+
+### Erro "Model not found"
+
+Verifique:
+1. O nome do modelo corresponde exatamente (case-sensitive)
+2. Consulte os [modelos disponíveis](/pt-br/documentacao/produtos/ai/ai-inference/modelos/) para nomes corretos
+
+### Alta latência
+
+Tente estas soluções:
+1. Habilite streaming: `"stream": true`
+2. Reduza `max_tokens` para respostas mais curtas
+3. Escolha um modelo menor para inferência mais rápida
+
+### Erros de rate limit
+
+Verifique os limites padrão:
+- **300 requisições por minuto**
+
+Contate o suporte para aumentar limites em produção.
+
+### Timeout na função
+
+Se sua função atinge timeout:
+1. Reduza `max_tokens`
+2. Simplifique seu prompt
+3. Considere dividir tarefas complexas em etapas menores
+
+---
+
+## Próximos passos
+
+Agora que você tem um agente funcionando, explore:
+
+| Aprenda a | Consulte |
+|-----------|----------|
+| Usar diferentes modelos | [Modelos disponíveis](/pt-br/documentacao/produtos/ai/ai-inference/modelos/) |
+| Implementar tool calling | [Exemplo de tool calling](/pt-br/documentacao/produtos/ai/ai-inference/modelos/mistral-3-small/#exemplo-de-tool-calling) |
+| Construir aplicações RAG | [Vector Search](/pt-br/documentacao/produtos/store/sql-database/vector-search/) |
+| Implantar com templates | [AI Inference Starter Kit](/pt-br/documentacao/produtos/guias/ai-inference-starter-kit/) |
diff --git a/src/content/docs/pt-br/pages/guias/langgraph-ai-agent-boilerplate/index.mdx b/src/content/docs/pt-br/pages/guias/langgraph-ai-agent-boilerplate/index.mdx
new file mode 100644
index 0000000000..3338591924
--- /dev/null
+++ b/src/content/docs/pt-br/pages/guias/langgraph-ai-agent-boilerplate/index.mdx
@@ -0,0 +1,109 @@
+---
+title: Implantar LangGraph AI Agent Boilerplate
+description: Implante um agente de AI baseado em LangGraph na Azion usando o template boilerplate.
+meta_tags: >-
+ ai inference, langgraph, agente ai, boilerplate, template, implantação, inteligência artificial, computação de borda
+namespace: docs_guides_ai_inference_langgraph_boilerplate
+permalink: /documentacao/produtos/guias/langgraph-ai-agent-boilerplate/
+menu_namespace: AIInferenceMenu
+
+---
+
+import LinkButton from 'azion-webkit/linkbutton';
+
+O **LangGraph AI Agent Boilerplate** oferece um template para implantar agentes de AI construídos com LangGraph na infraestrutura de edge da Azion. O LangGraph permite construir aplicações stateful e multi-ator com LLMs.
+
+## Visão geral
+
+O LangGraph é um framework para construir aplicações stateful e multi-ator com LLMs. Ele estende as capacidades do LangChain adicionando a habilidade de coordenar múltiplas chains (ou atores) através de múltiplos passos de computação.
+
+O boilerplate inclui:
+
+- Estrutura de agente LangGraph pré-configurada
+- Integração com modelos de AI Inference
+- Gerenciamento de estado para workflows de múltiplas etapas
+- Implementações de ferramentas de exemplo
+
+## Requisitos
+
+Antes de começar, certifique-se de ter:
+
+- Uma conta na Azion
+- Node.js 18+ instalado
+- Conhecimento básico de conceitos do LangGraph
+
+## Arquitetura
+
+O boilerplate LangGraph implementa:
+
+- **Workflow baseado em grafo**: Define o comportamento do agente como um grafo de nós e arestas
+- **Gerenciamento de estado**: Mantém contexto através de turnos de conversação
+- **Integração de ferramentas**: Conecta APIs e serviços externos
+- **Memória**: Persiste histórico de conversação
+
+## Implantar o Boilerplate
+
+1. Acesse o [Console da Azion](https://console.azion.com/).
+2. Na página **+ Create**, procure por **LangGraph AI Agent Boilerplate**.
+3. Selecione o template.
+4. Configure sua implantação:
+ - Digite um nome para sua aplicação
+ - Selecione o modelo de AI para seu agente
+ - Configure as variáveis de ambiente
+5. Clique em **Deploy**.
+
+## Configurar seu agente
+
+Após a implantação, você pode personalizar seu agente LangGraph:
+
+### Definir a estrutura do grafo
+
+```python
+from langgraph.graph import StateGraph, END
+
+def build_graph():
+ graph = StateGraph(AgentState)
+ graph.add_node("reasoning", reasoning_node)
+ graph.add_node("action", action_node)
+ graph.add_edge("reasoning", "action")
+ graph.add_edge("action", END)
+ return graph.compile()
+```
+
+### Conectar ao AI Inference
+
+Configure seu agente para usar o endpoint de AI Inference da Azion:
+
+```python
+from langchain_openai import ChatOpenAI
+
+llm = ChatOpenAI(
+ base_url="https://ai.azion.com/v1",
+ api_key="sua-api-key",
+ model="Qwen/Qwen3-30B-A3B-Instruct-2507-FP8"
+)
+```
+
+## Próximos passos
+
+- [Explorar modelos disponíveis](/pt-br/documentacao/produtos/ai/ai-inference/modelos/)
+- Saiba mais sobre [tool calling](/pt-br/documentacao/produtos/ai/ai-inference/modelos/mistral-3-small/#exemplo-de-tool-calling)
+- Integre com [Vector Search](/pt-br/documentacao/produtos/store/sql-database/vector-search/) para implementações RAG
+
+## Solução de problemas
+
+### Erro de conexão com o modelo
+
+Se o agente não conseguir conectar ao modelo:
+
+- Verifique se a `base_url` está correta
+- Confirme se a API key é válida
+- Verifique se o nome do modelo está correto
+
+### Estado não persistindo
+
+Se o estado não estiver sendo mantido entre turnos:
+
+- Verifique se o checkpoint está configurado corretamente
+- Confirme se o memory store está funcionando
+- Revise a configuração do StateGraph
diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx
index 6254adfed0..2362968c46 100644
--- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx
+++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx
@@ -5,10 +5,13 @@ description: >-
meta_tags: 'ai inference, ai models, inteligência artificial, edge computing, qwen'
namespace: docs_edge_ai_models_qwen_3_30ba3b
permalink: /documentacao/produtos/ai/ai-inference/modelos/qwen3-30ba3b/
+menu_namespace: AIInferenceMenu
---
**Qwen3-30B-A3B-Instruct-2507-FP8** é um modelo de linguagem causal FP8 ajustado por instruções com 30 bilhões de parâmetros para geração de texto de longo contexto (256K) e raciocínio, suportando chat/QA, sumarização, tarefas multilíngues, resolução de problemas de matemática/ciência, codificação e fluxos de trabalho aumentados por ferramentas.
+## Detalhes do modelo
+
| Categoria | Detalhes |
|----------|---------|
| **Nome do modelo** | Qwen/Qwen3-30B-A3B-Instruct-2507-FP8 |
@@ -21,12 +24,12 @@ permalink: /documentacao/produtos/ai/ai-inference/modelos/qwen3-30ba3b/
## Capacidades
-| Recurso | Status |
+| Recurso | Detalhes |
|---------|--------|
-| Chamada de ferramentas | ✅|
+| Chamada de ferramentas | ✅ |
| Comprimento do contexto | 64k |
| Suporta LoRA | ✅ |
-| Dados de entrada | TEXTO |
+| Dados de entrada | Texto |
## Uso
diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx
index 5c3bebb63b..d61f334a09 100644
--- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx
+++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx
@@ -5,6 +5,7 @@ description: >-
meta_tags: 'ai inference, ai models, artificial intelligence, edge computing'
namespace: docs_edge_ai_models_baai_bge_reranker_v2_m3
permalink: /documentacao/produtos/ai/ai-inference/modelos/baai-bge-reranker-v2-m3/
+menu_namespace: AIInferenceMenu
---
**BAAI/bge-reranker-v2-m3** é um modelo de reranking leve com fortes capacidades multilíngues. Ele é fácil de implementar e oferece inferência rápida.
@@ -35,7 +36,7 @@ permalink: /documentacao/produtos/ai/ai-inference/modelos/baai-bge-reranker-v2-m
Este é um exemplo de uma requisição básica de reranking usando este modelo:
```ts
-const modelResponse = await Azion.AI.run("baai-bge-reranker-v2-m3", {
+const modelResponse = await Azion.AI.run("BAAI/bge-reranker-v2-m3", {
"query": "What is deep learning?",
"documents": [
"Deep learning is a subset of machine learning that uses neural networks with many layers",
@@ -56,7 +57,7 @@ const modelResponse = await Azion.AI.run("baai-bge-reranker-v2-m3", {
Este é um exemplo de uma requisição básica de pontuação usando este modelo:
```ts
-const modelResponse = await Azion.AI.run("baai-bge-reranker-v2-m3", {
+const modelResponse = await Azion.AI.run("BAAI/bge-reranker-v2-m3", {
"text_1": "What is deep learning?",
"text_2": [
"Deep learning is a subset of machine learning that uses neural networks with many layers",
@@ -77,7 +78,7 @@ Exemplo de resposta:
```json
{
"id": "rerank-356bf11f0e794f3c8f726bec7ba698bb",
- "model": "baai-bge-reranker-v2-m3",
+ "model": "BAAI/bge-reranker-v2-m3",
"usage": {
"total_tokens": 78
},
diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/internvl3.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/internvl3.mdx
index f73ce0e88e..da8d048ecd 100644
--- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/internvl3.mdx
+++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/internvl3.mdx
@@ -5,6 +5,7 @@ description: >-
meta_tags: 'ai inference, modelos de ia, inteligência artificial, edge computing'
namespace: docs_edge_ai_models_internvl3
permalink: /documentacao/produtos/ai/ai-inference/modelos/internvl3/
+menu_namespace: AIInferenceMenu
---
**InternVL3** é um Multimodal Large Language Model avançado (MLLM) com capacidades para abranger tool calling, agentes GUI, análise de imagem industrial, percepção de visão 3D e mais.
@@ -37,7 +38,7 @@ permalink: /documentacao/produtos/ai/ai-inference/modelos/internvl3/
Este é um exemplo básico de chat completion usando este modelo:
```ts
-const modelResponse = await Azion.AI.run("opengvlab-internvl3-1b-instruct", {
+const modelResponse = await Azion.AI.run("OpenGVLab/InternVL3-1B-Instruct", {
"stream": true,
"messages": [
{
@@ -64,7 +65,7 @@ const modelResponse = await Azion.AI.run("opengvlab-internvl3-1b-instruct", {
Este é um exemplo de requisição multimodal usando este modelo:
```ts
-const modelResponse = await Azion.AI.run("opengvlab-internvl3-1b-instruct", {
+const modelResponse = await Azion.AI.run("OpenGVLab/InternVL3-1B-Instruct", {
"stream": true,
"messages": [
{
diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/mistral-3-small.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/mistral-3-small.mdx
index ee108738a1..f7215b040c 100644
--- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/mistral-3-small.mdx
+++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/mistral-3-small.mdx
@@ -5,6 +5,7 @@ description: >-
meta_tags: 'ai inference, modelos ai, inteligência artificial, computação edge, mistral'
namespace: docs_edge_ai_models_mistral_3_small
permalink: /documentacao/produtos/ai/ai-inference/modelos/mistral-3-small/
+menu_namespace: AIInferenceMenu
---
**Mistral 3 Small** é um modelo de linguagem que, embora sendo compacto, oferece capacidades comparáveis às de modelos maiores. Ele é ideal para agentes conversacionais, chamada de função, ajuste fino e inferência local com dados sensíveis.
@@ -37,7 +38,7 @@ permalink: /documentacao/produtos/ai/ai-inference/modelos/mistral-3-small/
Este é um exemplo de uma requisição básica de chat completion usando este modelo:
```ts
-const modelResponse = await Azion.AI.run("casperhansen-mistral-small-24b-instruct-2501-awq", {
+const modelResponse = await Azion.AI.run("casperhansen/mistral-small-24b-instruct-2501-awq", {
"stream": true,
"max_tokens": 1024,
"messages": [
@@ -68,7 +69,7 @@ Exemplo de resposta:
"id": "chatcmpl-e27716424abf4b3f891ff4850470cb09",
"object": "chat.completion",
"created": 1746821581,
- "model": "casperhansen-mistral-small-24b-instruct-2501-awq",
+ "model": "casperhansen/mistral-small-24b-instruct-2501-awq",
"choices": [
{
"index": 0,
@@ -121,7 +122,7 @@ Exemplo de resposta:
Este é um exemplo de uma requisição de Tool Calling usando este modelo:
```ts
-const modelResponse = await Azion.AI.run("casperhansen-mistral-small-24b-instruct-2501-awq", {
+const modelResponse = await Azion.AI.run("casperhansen/mistral-small-24b-instruct-2501-awq", {
"stream": true,
"max_tokens": 1024,
"messages": [
@@ -179,7 +180,7 @@ Exemplo de resposta:
"id": "chatcmpl-88affc4730cf4219a06d2b15aad9ad44",
"object": "chat.completion",
"created": 1746821866,
- "model": "qwen-qwen25-vl-3b-instruct-awq",
+ "model": "casperhansen/mistral-small-24b-instruct-2501-awq",
"choices": [
{
"index": 0,
diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/nanonets-ocr-r.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/nanonets-ocr-r.mdx
index f10aec2d95..3db79a0af5 100644
--- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/nanonets-ocr-r.mdx
+++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/nanonets-ocr-r.mdx
@@ -5,17 +5,27 @@ description: >-
meta_tags: 'ai inference, ai models, inteligência artificial, edge computing, qwen'
namespace: docs_edge_ai_models_nanonets_ocr_s
permalink: /documentacao/produtos/ai/ai-inference/modelos/nanonets-ocr-s/
+menu_namespace: AIInferenceMenu
---
**Nanonets-OCR-s** é um modelo OCR que converte imagens de documentos em Markdown estruturado, preservando o layout (títulos, listas, tabelas) e tags básicas. A saída é fácil de analisar e alimentar em pipelines de LLM.
+## Detalhes do modelo
+
+| Categoria | Detalhes |
+|----------|---------|
+| **Nome do modelo** | Nanonets-OCR-s |
+| **Versão** | Original |
+| **Categoria do modelo** | OCR |
+| **Modelo HuggingFace** | [nanonets/Nanonets-OCR-s](https://huggingface.co/nanonets/Nanonets-OCR-s) |
+| **Endpoint compatível com OpenAI** | [Chat Completions](https://platform.openai.com/docs/api-reference/chat/create) |
+
## Capacidades
-| Recurso | Status |
+| Recurso | Detalhes |
|---------|--------|
-| Suporte de Longo Prazo da Azion (LTS) | ❌ |
| Comprimento do contexto | 32k tokens |
-| Dados de entrada | Texto+Imagem |
+| Dados de entrada | Texto + Imagem |
## Uso
diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-3b.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-3b.mdx
index 5ba0cc2434..18a22e834e 100644
--- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-3b.mdx
+++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-3b.mdx
@@ -5,6 +5,7 @@ description: >-
meta_tags: 'ai inference, modelos ai, inteligência artificial, edge computing, qwen'
namespace: docs_edge_ai_models_qwen_2_5_vl_3b
permalink: /documentacao/produtos/ai/ai-inference/modelos/qwen-2-5-vl-3b/
+menu_namespace: AIInferenceMenu
---
O **Qwen 2.5 VL AWQ 3B** é um modelo de linguagem e visão que oferece capacidades avançadas como análise visual, raciocínio de agente, compreensão de vídeo longo, localização visual e geração de saída estruturada. Ele suporta 3 bilhões de parâmetros.
@@ -37,7 +38,7 @@ O **Qwen 2.5 VL AWQ 3B** é um modelo de linguagem e visão que oferece capacida
Este é um exemplo básico de uma requisição de chat completion usando este modelo:
```ts
-const modelResponse = await Azion.AI.run("qwen-qwen25-vl-3b-instruct-awq", {
+const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-3B-Instruct-AWQ", {
"stream": true,
"messages": [
{
@@ -66,7 +67,7 @@ Exemplo de resposta:
"id": "chatcmpl-e27716424abf4b3f891ff4850470cb09",
"object": "chat.completion",
"created": 1746821581,
- "model": "qwen-qwen25-vl-3b-instruct-awq",
+ "model": "Qwen/Qwen2.5-VL-3B-Instruct-AWQ",
"choices": [
{
"index": 0,
@@ -118,7 +119,7 @@ Exemplo de resposta:
Este é um exemplo de uma requisição de Tool Calling usando este modelo:
```ts
-const modelResponse = await Azion.AI.run("qwen-qwen25-vl-3b-instruct-awq", {
+const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-3B-Instruct-AWQ", {
"stream": true,
"messages": [
{
@@ -173,7 +174,7 @@ Exemplo de resposta:
"id": "chatcmpl-88affc4730cf4219a06d2b15aad9ad44",
"object": "chat.completion",
"created": 1746821866,
- "model": "qwen-qwen25-vl-3b-instruct-awq",
+ "model": "Qwen/Qwen2.5-VL-3B-Instruct-AWQ",
"choices": [
{
"index": 0,
@@ -235,7 +236,7 @@ Exemplo de resposta:
Este é um exemplo de uma requisição multimodal usando este modelo:
```ts
-const modelResponse = await Azion.AI.run("qwen-qwen25-vl-3b-instruct-awq", {
+const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-3B-Instruct-AWQ", {
"stream": true,
"messages": [
{
diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx
index 37086b812c..d13a31fca0 100644
--- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx
+++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx
@@ -5,6 +5,7 @@ description: >-
meta_tags: 'ai inference, modelos ai, inteligência artificial, computação edge, qwen'
namespace: docs_edge_ai_models_qwen_2_5_vl_7b
permalink: /documentacao/produtos/ai/ai-inference/modelos/qwen-2-5-vl-7b/
+menu_namespace: AIInferenceMenu
---
O **Qwen 2.5 VL AWQ 7B** é um modelo de linguagem e visão que suporta 7 bilhões de parâmetros, oferecendo capacidades avançadas como análise visual, raciocínio de agente, compreensão de vídeo longo, localização visual e geração de saída estruturada.
@@ -17,7 +18,7 @@ O **Qwen 2.5 VL AWQ 7B** é um modelo de linguagem e visão que suporta 7 bilhõ
| **Versão** | AWQ 7B |
| **Categoria do modelo** | VLM |
| **Tamanho** | 7B parâmetros |
-| **Modelo HuggingFace** | [Qwen/Qwen2.5-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct-AWQ) |
+| **Modelo HuggingFace** | [Qwen/Qwen2.5-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct-AWQ) |
| **Endpoint compatível com a OpenAI** | [Chat Completions](https://platform.openai.com/docs/api-reference/chat/create) |
| **Licença** | [Apache 2.0](https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md) |
@@ -37,7 +38,7 @@ O **Qwen 2.5 VL AWQ 7B** é um modelo de linguagem e visão que suporta 7 bilhõ
Este é um exemplo básico de uma requisição de chat completion usando este modelo:
```ts
-const modelResponse = await Azion.AI.run("qwen-qwen25-vl-7b-instruct-awq", {
+const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-7B-Instruct-AWQ", {
"stream": true,
"messages": [
{
@@ -67,7 +68,7 @@ Exemplo de resposta:
"id": "chatcmpl-e27716424abf4b3f891ff4850470cb09",
"object": "chat.completion",
"created": 1746821581,
- "model": "qwen-qwen25-vl-7b-instruct-awq",
+ "model": "Qwen/Qwen2.5-VL-7B-Instruct-AWQ",
"choices": [
{
"index": 0,
@@ -122,7 +123,7 @@ Exemplo de resposta:
Este é um exemplo de uma requisição de Tool Calling usando este modelo:
```ts
-const modelResponse = await Azion.AI.run("qwen-qwen25-vl-7b-instruct-awq", {
+const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-7B-Instruct-AWQ", {
"stream": true,
"messages": [
{
@@ -177,7 +178,7 @@ Exemplo de resposta:
"id": "chatcmpl-88affc4730cf4219a06d2b15aad9ad44",
"object": "chat.completion",
"created": 1746821866,
- "model": "qwen-qwen25-vl-7b-instruct-awq",
+ "model": "Qwen/Qwen2.5-VL-7B-Instruct-AWQ",
"choices": [
{
"index": 0,
@@ -242,7 +243,7 @@ Exemplo de resposta:
Este é um exemplo multimodal usando este modelo:
```ts
-const modelResponse = await Azion.AI.run("qwen-qwen25-vl-7b-instruct-awq", {
+const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-7B-Instruct-AWQ", {
"stream": true,
"messages": [
{
diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen3-embedding-4b.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen3-embedding-4b.mdx
index b986fe2e2c..86adb764da 100644
--- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen3-embedding-4b.mdx
+++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen3-embedding-4b.mdx
@@ -5,6 +5,7 @@ description: >-
meta_tags: 'ai inference, ai modelos, inteligência artificial, edge computing, qwen'
namespace: docs_edge_ai_models_qwen_3_embedding_4b
permalink: /documentacao/produtos/ai/ai-inference/modelos/qwen3-embedding-4b/
+menu_namespace: AIInferenceMenu
---
**Qwen3 Embedding 4B** é um modelo de embedding multilíngue com 4 bilhões de parâmetros (36 camadas, 32K de contexto) que gera vetores de 2560 dimensões para recuperação de texto/código, classificação, agrupamento e mineração de bitexto. Ele suporta embeddings condicionados por instrução e é otimizado para aprendizado de representação eficiente e multilíngue.
@@ -49,7 +50,7 @@ Exemplo de resposta:
Diferentes dimensões podem ser selecionadas definindo o parâmetro `dimensions`:
```ts
-const modelResponse = await Azion.AI.run("qwen-qwen3-embedding-4b", {
+const modelResponse = await Azion.AI.run("Qwen/Qwen3-Embedding-4B", {
"input": "A comida estava deliciosa e o garçom...",
"encoding_format": "float",
"dimensions": 256
diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx
index 49f4147687..3a1d2bbbc3 100644
--- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx
+++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx
@@ -1,59 +1,73 @@
---
title: AI Inference
description: >-
- O AI Inference da Azion capacita você a construir e implementar aplicações inteligentes que processam dados perto de onde são gerados.
-meta_tags: 'ai inference, inteligência artificial, edge computing'
+ A AI Inference permite que você execute modelos de AI diretamente na infraestrutura altamente distribuída da Azion.
+meta_tags: 'inferência de ia, inteligência artificial, computação de borda, assistente de ia, agente de ia'
namespace: docs_edge_ai_reference
permalink: /documentacao/produtos/ai/ai-inference/
+menu_namespace: AIInferenceMenu
+
---
import LinkButton from 'azion-webkit/linkbutton';
-O **AI Inference** capacita você a construir e implementar aplicações inteligentes que processam dados perto de onde são gerados. Ao combinar inteligência artificial com edge computing, o AI Inference elimina as complexidades de escalabilidade e gerenciamento de infraestrutura, permitindo tomadas de decisão em tempo real e desempenho aprimorado.
-
-Com o AI Inference da Azion, você pode integrar perfeitamente capacidades de AI em suas aplicações, aproveitando ferramentas como Functions, Applications e a API da Azion para criar soluções escaláveis, seguras e eficientes.
+**AI Inference** permite que você execute modelos de AI diretamente na infraestrutura altamente distribuída da Azion. Você pode integrar capacidades de AI em suas aplicações, aproveitando ferramentas como Functions, Applications, Vector Search e a API da Azion para criar soluções escaláveis, seguras e eficientes.
-O AI Inference possibilita:
+Comece implantando o Template do Starter Kit do AI Inference:
-- **Executar modelos de AI no Edge Runtime**, permitindo que arquiteturas avançadas de AI sejam executadas diretamente no edge para latência mínima e desempenho máximo.
-- **Implementar agentes de AI autônomos** que analisam dados e tomam decisões no edge.
-- **Processamento em tempo real** com latência reduzida e eficiência aprimorada.
-- Tudo como parte de uma **plataforma completa**, incluindo Applications, Functions, busca vetorial do SQL Database e muito mais.
+
---
-## Recursos
+## Funcionalidades
+
+### API Compatível com OpenAI
-### Modelos disponíveis
+Conecte aplicações usando o formato de endpoint compatível com OpenAI da Azion.
-Acesse nosso catálogo de modelos de AI de código aberto que você pode executar diretamente no Runtime da Azion. Esses modelos são otimizados para implementação no edge com requisitos mínimos de recursos.
+### Execute modelos otimizados para edge
-
+- Execute modelos de AI no edge, utilizando a infraestrutura globalmente distribuída da Azion para minimizar a latência e permitir inferência em tempo real.
+- Acesse um catálogo selecionado de modelos de código aberto, prontos para rodar no Azion Runtime e otimizados para implantação distribuída com baixo consumo de recursos.
+- Suporte nativo para inferência de modelos de linguagem de grande porte (LLMs) e modelos de visão-linguagem (VLMs).
-### Personalização de modelos
+
-O AI Inference permite que você ajuste, treine e especialize modelos usando **Low-Rank Adaptation (LoRA)**. Esse recurso permite que você otimize modelos para tarefas específicas, garantindo que sejam eficientes e precisos para as necessidades do seu negócio.
+### Ajuste fino de modelos com LoRA
-### Agentes de AI
+Você pode ajustar, treinar e especializar modelos com seus próprios dados e parâmetros. Essa capacidade permite otimizar modelos para tarefas específicas, garantindo que sejam eficientes e precisos para as necessidades do seu negócio.
-O AI Inference suporta a implementação de agentes de AI como ReAct (Raciocínio + Ação) no edge, permitindo tarefas avançadas como respostas contextuais, pesquisa semântica e processamento inteligente de dados.
+---
-### Integração com SQL Database
+### Exemplos do que você pode construir com a AI Inference
-Integre o AI Inference com o **SQL Database** para habilitar capacidades de busca vetorial, permitindo consultas semânticas e busca híbrida. Essa integração aprimora aplicativos alimentados por AI, fornecendo resultados precisos e contextualmente relevantes e suportando implementações eficientes de Retrieval Augmented Generation (RAG).
+- **Assistentes de AI**: Construa e implante assistentes de AI que atendem milhares de usuários simultaneamente com baixa latência, oferecendo suporte em tempo real, FAQs dinâmicas e assistência ao cliente sem sobrecarga na nuvem.
----
+- **Agentes de AI**: Construa agentes de AI que automatizam fluxos de trabalho de múltiplas etapas, reduzindo dias de esforço manual para minutos, e liberando equipes para trabalhos de maior valor—impulsionando a produtividade em todas as operações.
-## Produtos relacionados
+- **Automatize a Detecção e Remoção de Ameaças com AI**: Combine LLMs e modelos de visão-linguagem (VLMs) para monitorar ativos digitais, identificar padrões de phishing/abuso em texto e imagens, e automatizar a classificação e remoção de ameaças em ambientes distribuídos.
-- [Applications](/pt-br/documentacao/produtos/build/applications/): construa aplicações que executam diretamente na rede distribuída da Azion, oferecendo desempenho e opções de personalização excepcionais.
-- [Functions](/pt-br/documentacao/produtos/build/applications/functions/): execute código mais próximo dos usuários finais, melhorando o desempenho e permitindo lógica personalizada para lidar com requisições e respostas.
-- [SQL Database](/pt-br/documentacao/produtos/store/sql-database/): uma solução SQL edge-native projetada para aplicações serverless, fornecendo capacidades de armazenamento e consulta de dados no edge.
-- [Vector Search](/pt-br/documentacao/produtos/store/sql-database/vector-search/): ative motores de busca semântica e recomendações impulsionadas por AI através de embeddings vetoriais no edge.
+## Integração com Banco de Dados SQL
+
+Integre sua aplicação com o **Banco de Dados SQL** para habilitar capacidades de [Vector Search](/pt-br/documentacao/produtos/store/sql-database/vector-search/), permitindo consultas semânticas e busca híbrida. Essa integração melhora as aplicações impulsionadas por AI ao fornecer resultados precisos e contextualmente relevantes e ao suportar implementações eficientes de Geração Aumentada por Recuperação (RAG).
+
+## Limites
+
+Estes são os **limites padrão**:
+
+| Escopo | Limite |
+| ----- | ----- |
+| Requests por minuto | 300 |
---
-Explore exemplos práticos de como implementar soluções de AI com a Azion:
+## Produtos relacionados
-
-
+- [Applications](/pt-br/documentacao/produtos/build/applications/): construa aplicações que rodam diretamente na infraestrutura distribuída da Azion, oferecendo desempenho excepcional e opções de personalização.
+- [Functions](/pt-br/documentacao/produtos/build/applications/functions/): execute código mais próximo dos usuários finais, melhorando o desempenho e permitindo lógica personalizada para lidar com solicitações e respostas.
+- [SQL Database](/pt-br/documentacao/produtos/store/sql-database/): uma solução SQL nativa de borda projetada para aplicações serverless, fornecendo capacidades de armazenamento e consulta de dados na borda. Também habilita [Vector Search](/pt-br/documentacao/produtos/store/sql-database/vector-search/) para realizar busca semântica e recomendações impulsionadas por AI através de incorporação vetorial.
diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/modelos.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/modelos.mdx
index 6290c97595..55ec783a2b 100644
--- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/modelos.mdx
+++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/modelos.mdx
@@ -5,13 +5,14 @@ description: >-
meta_tags: 'ai inference, modelos ai, inteligência artificial, edge computing'
namespace: docs_edge_ai_models
permalink: /documentacao/produtos/ai/ai-inference/modelos/
+menu_namespace: AIInferenceMenu
---
import LinkButton from 'azion-webkit/linkbutton';
Os modelos otimizados para o edge da Azion abrangem múltiplos domínios de AI, incluindo geração de texto, análise de imagem, embeddings e mais. Cada modelo é projetado para equilibrar o desempenho e a eficiência de recursos para implementação no edge.
-Esta página fornece uma lista de modelos disponíveis para uso no **AI Inference**. Para saber mais, visite a página de [referência do AI Inference](/pt-br/documentacao/produtos/ai/ai-inference/).
+Esta página fornece uma lista de modelos disponíveis para uso com AI Inference. Para saber mais, visite a página de [referência do AI Inference](/pt-br/documentacao/produtos/ai/ai-inference/).
## Modelos disponíveis
diff --git a/src/data/availableMenu.ts b/src/data/availableMenu.ts
index 4767b19787..a28011e4f0 100644
--- a/src/data/availableMenu.ts
+++ b/src/data/availableMenu.ts
@@ -11,5 +11,6 @@ export const availableMenus = [
{ name: 'deployMenu', langs: ['en', 'pt-br'] },
{ name: 'storeMenu', langs: ['en', 'pt-br'] },
{ name: 'libMenu', langs: ['en', 'pt-br'] },
+ { name: 'AIInferenceMenu', langs: ['en', 'pt-br'] },
{ name: 'mcpMenu', langs: ['en', 'pt-br'] }
]
diff --git a/src/i18n/en/AIInferenceMenu.ts b/src/i18n/en/AIInferenceMenu.ts
new file mode 100644
index 0000000000..9f52ab79ef
--- /dev/null
+++ b/src/i18n/en/AIInferenceMenu.ts
@@ -0,0 +1,41 @@
+/**
+ * This configures the navigation sidebar.
+ * All other languages follow this ordering/structure and will fall back to
+ * English for any entries they haven’t translated.
+ *
+ * - All entries MUST include `text` and `key`
+ * - Heading entries MUST include `header: true` and `type`
+ * - Link entries MUST include `slug` (which excludes the language code)
+ */
+export default [
+ { text: 'Documentation', header: true, onlyMobile: true, anchor: true, type: 'learn', slug: '/documentation/', key: 'documentation' },
+ { text: 'Guides', header: true, onlyMobile: true, anchor: true, type: 'learn', slug: '/documentation/products/guides/', key: 'guides' },
+ { text: 'Dev Tools', header: true, onlyMobile: true, anchor: true, type: 'learn', slug: '/documentation/devtools/', key: 'devTools' },
+
+ /// START HERE :::: DO NOT REMOVE the strings above, it's a work around for header on mobile ///
+
+ { text: 'Overview', header: true, anchor: true, type: 'learn', key: 'overview-aiinference', slug: '/documentation/products/ai/ai-inference/', hasLabel: 'menu.aiinference' },
+
+ { text: 'Get Started', header: true, type: 'learn', key: 'aiinference/get-started', items: [
+ { text: 'Build a simple AI agent', slug: '/documentation/products/guides/ai-inference-agent/', key: 'aiinference/build-agent' },
+ ]},
+
+ { text: ' Available Models', header: true, type: 'learn', key: 'aiinference.models',slug: '/documentation/products/ai/ai-inference/models/', items: [
+ { text: 'BAAI/bge reranker v2 m3', slug: '/documentation/products/ai/ai-inference/models/baai-bge-reranker-v2-m3', key: 'aiinference/BAAI/bge-reranker-v2-m3' },
+ { text: 'InternVL3', slug: '/documentation/products/ai/ai-inference/models/internvl3', key: 'aiinference/InternVL3' },
+ { text: 'Mistral 3 Small (24B AWQ)', slug: '/documentation/products/ai/ai-inference/models/mistral-3-small', key: 'aiinference/mistral-3-small' },
+ { text: 'Qwen2.5 VL AWQ 3B', slug: '/documentation/products/ai/ai-inference/models/qwen-2-5-vl-3b', key: 'aiinference/qwen-2-5-vl-awq-3b' },
+ { text: 'Qwen2.5 VL AWQ 7B', slug: '/documentation/products/ai/ai-inference/models/qwen-2-5-vl-7b', key: 'aiinference/qwen-2-5-vl-awq-7b' },
+ { text: 'Qwen3 30B A3B Instruct 2507 FP8', slug: '/documentation/products/ai/ai-inference/models/qwen3-30ba3b', key: 'aiinference/qwen-3-instruct' },
+ { text: 'Qwen3 Embedding 4B', slug: '/documentation/products/ai/ai-inference/models/qwen3-embedding-4b', key: 'aiinference/qwen3-embedding' },
+ { text: 'Nanonets-OCR-s', slug: '/documentation/products/ai/ai-inference/models/nanonets-ocr-s/', key: 'aiinference/nanonets-OCR-s' },
+ ]},
+
+ { text: 'Guides', header: true, type: 'learn', key: 'aiinference/guides', items: [
+ { text: 'Deploy AI Inference Starter kit', slug: '/documentation/products/guides/ai-inference-starter-kit/', key: 'aiinference/starter-kit' },
+ { text: 'Deploy LangGraph AI Agent Boilerplate', slug: '/documentation/products/guides/langgraph-ai-agent-boilerplate/', key: 'aiinference/langgraph-boilerplate' },
+ ]},
+
+
+
+] as const;
diff --git a/src/i18n/en/ui.ts b/src/i18n/en/ui.ts
index e70f24de44..6e91cf74b2 100644
--- a/src/i18n/en/ui.ts
+++ b/src/i18n/en/ui.ts
@@ -167,7 +167,8 @@ export default {
'menu.runtime': 'Azion Runtime',
'menu.store': 'Store',
'menu.storage': 'Object Storage',
- 'menu.edgeSQL': 'SQL Database'
+ 'menu.edgeSQL': 'SQL Database',
+ 'menu.aiinference': 'AI Inference'
};
diff --git a/src/i18n/pt-br/AIInferenceMenu.ts b/src/i18n/pt-br/AIInferenceMenu.ts
new file mode 100644
index 0000000000..5fbf6afc8c
--- /dev/null
+++ b/src/i18n/pt-br/AIInferenceMenu.ts
@@ -0,0 +1,41 @@
+/**
+ * This configures the navigation sidebar.
+ * All other languages follow this ordering/structure and will fall back to
+ * English for any entries they haven’t translated.
+ *
+ * - All entries MUST include `text` and `key`
+ * - Heading entries MUST include `header: true` and `type`
+ * - Link entries MUST include `slug` (which excludes the language code)
+ */
+export default [
+ { text: 'Documentação', header: true, onlyMobile: true, anchor: true, slug: '/documentacao/', key: 'documentation' },
+ { text: 'Guias',header: true, onlyMobile: true, anchor: true, slug: '/documentacao/produtos/guias/', key: 'guides' },
+ { text: 'Dev Tools',header: true, onlyMobile: true, anchor: true, slug: '/documentacao/produtos/dev-tools/', key: 'devTools' },
+
+ /// START HERE :::: DO NOT REMOVE the strings above, it's a work around for header on mobile ///
+
+ { text: 'Visão Geral', header: true, anchor: true, type: 'learn', key: 'overview-aiinference', slug: '/documentacao/produtos/ai/ai-inference/', hasLabel: 'menu.aiinference' },
+
+ { text: 'Comece Agora', header: true, type: 'learn', key: 'aiinference/get-started', items: [
+ { text: 'Construa um agente de AI simples', slug: '/documentacao/produtos/guias/ai-inference-agent/', key: 'aiinference/build-agent' },
+ ]},
+
+ { text: ' Modelos disponiveis', header: true, type: 'learn', key: 'aiinference.models',slug: '/documentacao/produtos/ai/ai-inference/modelos/', items: [
+ { text: 'BAAI/bge reranker v2 m3', slug: '/documentacao/produtos/ai/ai-inference/modelos/baai-bge-reranker-v2-m3/', key: 'aiinference/BAAI/bge-reranker-v2-m3' },
+ { text: 'InternVL3', slug: '/documentacao/produtos/ai/ai-inference/modelos/internvl3/', key: 'aiinference/InternVL3' },
+ { text: 'Mistral 3 Small (24B AWQ)', slug: '/documentacao/produtos/ai/ai-inference/modelos/mistral-3-small/', key: 'aiinference/mistral-3-small' },
+ { text: 'Qwen2.5 VL AWQ 3B', slug: '/documentacao/produtos/ai/ai-inference/modelos/qwen-2-5-vl-3b/', key: 'aiinference/qwen-2-5-vl-awq-3b' },
+ { text: 'Qwen2.5 VL AWQ 7B', slug: '/documentacao/produtos/ai/ai-inference/modelos/qwen-2-5-vl-7b/', key: 'aiinference/qwen-2-5-vl-awq-7b' },
+ { text: 'Qwen3 30B A3B Instruct 2507 FP8', slug: '/documentacao/produtos/ai/ai-inference/modelos/qwen3-30ba3b/', key: 'aiinference/qwen-3-instruct' },
+ { text: 'Qwen3 Embedding 4B', slug: '/documentacao/produtos/ai/ai-inference/modelos/qwen3-embedding-4b/', key: 'aiinference/qwen3-embedding' },
+ { text: 'Nanonets-OCR-s', slug: '/documentacao/produtos/ai/ai-inference/modelos/nanonets-ocr-s/', key: 'aiinference/nanonets-OCR-s' },
+ ]},
+
+ { text: 'Guias', header: true, type: 'learn', key: 'aiinference/guides', items: [
+ { text: 'Implemente o AI Inference Starter kit', slug: '/documentacao/produtos/guias/ai-inference-starter-kit/', key: 'aiinference/starter-kit' },
+ { text: 'Implemente LangGraph AI Agent Boilerplate', slug: '/documentacao/produtos/guias/langgraph-ai-agent-boilerplate/', key: 'aiinference/langgraph-boilerplate' },
+ ]},
+
+
+
+] as const;