From 9060492686e4c9ade368842b9ab658712c6b1a62 Mon Sep 17 00:00:00 2001 From: Luiza Vargas do Santos Date: Mon, 15 Sep 2025 13:55:29 -0300 Subject: [PATCH 01/12] menu test --- .../ai-inference/edge-ai-reference.mdx | 2 + src/data/availableMenu.ts | 3 +- src/i18n/en/AIInferenceMenu.ts | 44 +++++++++++++++++++ src/i18n/en/ui.ts | 3 +- 4 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 src/i18n/en/AIInferenceMenu.ts diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx index d15d1e9c8b..84bb485e3c 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx @@ -5,6 +5,8 @@ description: >- meta_tags: 'ai inference, artificial intelligence, edge computing' namespace: docs_edge_ai_reference permalink: /documentation/products/ai/ai-inference/ +menu_namespace: AIInferenceMenu + --- import LinkButton from 'azion-webkit/linkbutton'; diff --git a/src/data/availableMenu.ts b/src/data/availableMenu.ts index 2d8569e1ec..5972012090 100644 --- a/src/data/availableMenu.ts +++ b/src/data/availableMenu.ts @@ -10,5 +10,6 @@ export const availableMenus = [ { name: 'observeMenu', langs: ['en', 'pt-br'] }, { name: 'deployMenu', langs: ['en', 'pt-br'] }, { name: 'storeMenu', langs: ['en', 'pt-br'] }, - { name: 'libMenu', langs: ['en', 'pt-br'] } + { name: 'libMenu', langs: ['en', 'pt-br'] }, + { name: 'AIInferenceMenu', langs: ['en', 'pt-br'] } ] diff --git a/src/i18n/en/AIInferenceMenu.ts b/src/i18n/en/AIInferenceMenu.ts new file mode 100644 index 0000000000..76da2ae568 --- /dev/null +++ b/src/i18n/en/AIInferenceMenu.ts @@ -0,0 +1,44 @@ +/** + * This configures the navigation sidebar. + * All other languages follow this ordering/structure and will fall back to + * English for any entries they haven’t translated. + * + * - All entries MUST include `text` and `key` + * - Heading entries MUST include `header: true` and `type` + * - Link entries MUST include `slug` (which excludes the language code) + */ +export default [ + { text: 'Documentation', header: true, onlyMobile: true, anchor: true, type: 'learn', slug: '/documentation/', key: 'documentation' }, + { text: 'Guides', header: true, onlyMobile: true, anchor: true, type: 'learn', slug: '/documentation/products/guides/', key: 'guides' }, + { text: 'Dev Tools', header: true, onlyMobile: true, anchor: true, type: 'learn', slug: '/documentation/devtools/', key: 'devTools' }, + + /// START HERE :::: DO NOT REMOVE the strings above, it's a work around for header on mobile /// + + { text: 'Overview', header: true, anchor: true, type: 'learn', key: 'overview', slug: '/documentation/products/ai/ai-inference/', hasLabel: 'menu.aiinference' }, + { text: 'Get Started', header: true, anchor: true, type: 'learn', key: 'get-started', slug: '/documentation/products/ai/ai-inference/' }, + { text: 'Models', header: true, type: 'learn', key: 'aiinference.models', items: [ + { text: 'BAAI/bge reranker v2 m3', slug: '/documentation/products/ai/ai-inference/models/baai-bge-reranker-v2-m3', key: 'aiinference/BAAI/bge-reranker-v2-m3' }, + { text: 'InternVL3', slug: '/documentation/products/ai/ai-inference/models/internvl3', key: 'aiinference/InternVL3' }, + { text: 'Mistral 3 Small (24B AWQ)', slug: '/documentation/products/ai/ai-inference/models/mistral-3-small', key: 'aiinference/mistral-3-small' }, + { text: 'Qwen2.5 VL AWQ 3B', slug: '/documentation/products/ai/ai-inference/models/qwen-2-5-vl-3b', key: 'aiinference/qwen-2-5-vl-awq-3b' }, + { text: 'Qwen2.5 VL AWQ 7B', slug: '/documentation/products/ai/ai-inference/models/qwen-2-5-vl-7b', key: 'aiinference/qwen-2-5-vl-awq-7b' }, + { text: 'Qwen3 30B A3B Instruct 2507 FP8', slug: '/documentation/products/ai/ai-inference/models/qwen3-30ba3b', key: 'aiinference/qwen-3-instruct' }, + { text: 'Qwen3 Embedding 4B', slug: '/documentation/products/ai/ai-inference/models/qwen3-embedding-4b', key: 'aiinference/qwen3-embedding' }, + { text: 'Nanonets-OCR-s', slug: '/documentation/products/ai/ai-inference/models/nanonets-ocr-s/', key: 'aiinference/nanonets-OCR-s' }, + ]}, + + { text: 'Guides', header: true, type: 'learn', key: 'aiinference/guides', items: [ + { text: 'Deploy AI Inference Starter kit', header: true, anchor: true, type: 'learn', key: 'aiinference/starter-kit', slug: '/documentation/products/guides/ai-inference-starter-kit' }, + { text: 'Deploy LangGraph AI Agent Boilerplate', header: true, anchor: true, type: 'learn', key: 'aiinference/langgraph-boilerplate', slug: '/documentation/products/guides/langgraph-ai-agent-boilerplate' }, + { text: 'Implement a Copilot assistant', header: true, anchor: true, type: 'learn', key: 'aiinference/copilot-architecture', slug: '/documentation/architectures/artificial-intelligence/ai-agent-copilot-assistant' } + ]}, + { text: 'Solutions', header: true, type: 'learn', key: 'aiinference/solutions', items: [ + { text: 'Implement a Copilot assistant', header: true, anchor: true, type: 'learn', key: 'aiinference/copilot-architecture', slug: '/documentation/architectures/artificial-intelligence/ai-agent-copilot-assistant' } + ]}, + { text: 'Reference', header: true, type: 'learn', key: 'aiinference/reference', items: [ + { text: 'AI Inference', header: true, anchor: true, type: 'learn', key: 'aiinference/copilot-architecture', slug: '/documentation/products/ai/ai-inference/' } + ]}, + + + +] as const; diff --git a/src/i18n/en/ui.ts b/src/i18n/en/ui.ts index 934e9420ee..4c7edfb1cc 100644 --- a/src/i18n/en/ui.ts +++ b/src/i18n/en/ui.ts @@ -167,7 +167,8 @@ export default { 'menu.runtime': 'Azion Runtime', 'menu.store': 'Store', 'menu.storage': 'Edge Storage', - 'menu.edgeSQL': 'Edge SQL' + 'menu.edgeSQL': 'Edge SQL', + 'menu.aiinference':'AI Inference' }; From fe44371fa2671f90fe17694bccd71989f2cd12de Mon Sep 17 00:00:00 2001 From: Luiza Vargas do Santos Date: Tue, 16 Sep 2025 09:33:19 -0300 Subject: [PATCH 02/12] fixes --- .../ai-inference/ai-inference-overview.mdx | 50 +++++++++++++++++++ .../ai-inference/edge-ai-reference.mdx | 16 +++--- src/i18n/en/AIInferenceMenu.ts | 11 ++-- src/i18n/en/nav.ts | 2 +- 4 files changed, 67 insertions(+), 12 deletions(-) create mode 100644 src/content/docs/en/pages/main-menu/reference/ai-inference/ai-inference-overview.mdx diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-inference-overview.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-inference-overview.mdx new file mode 100644 index 0000000000..de8679942b --- /dev/null +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-inference-overview.mdx @@ -0,0 +1,50 @@ +--- +title: AI Inference Overview +description: >- + Azion AI Inference empowers you to build and deploy intelligent applications that process data close to where it is generated. +meta_tags: 'ai inference, artificial intelligence, edge computing' +namespace: docs_ai_inference_overview +permalink: /documentation/products/ai/ai-inference/overview/ +menu_namespace: AIInferenceMenu + +--- + +import LinkButton from 'azion-webkit/linkbutton'; + +Build AI-powered applications by running AI models on Azion’s highly distributed infrastructure to deliver scalable, low-latency, and cost-effective inference. + +- Execute AI models directly on Azion’s globally distributed infrastructure to reduce latency and enable real-time responses. +- Use state-of-the-art large language and vision-language models available natively on the Azion platform. +- Connect applications using Azion’s OpenAI-compatible endpoint format. +- Apply LoRA fine-tuning to pre-trained models using your own data and parameters. + + +Get started by deploying the AI Inference Quick Start: + + + +--- + +### What you can build with AI Inference + +- **Deploy Scalable 24/7 AI Assistants**: Build and deploy AI assistants that serve thousands of users simultaneously with low latency, delivering real-time support, dynamic FAQs, and customer assistance without cloud overload. + +- **Build AI Agents**: Built AI agents that automate multi‑step workflows, collapse days of manual effort into minutes, and free teams for higher‑value work—boosting productivity across operations. + +- **Build and Scale AI Applications**: Build scalable, low-latency AI applications that support advanced models, fine-tuning, and seamless integration—enabling real-time processing and interconnected AI solutions that drive innovation and operational efficiency worldwide. + +- **Automate Threat Detection and Takedown with AI**: Combine LLMs and vision-language models (VLMs) to monitor digital assets, spot phishing/abuse patterns in text and imagery, and automate threat classification and takedown across distributed environments. + +## Related products + +Related products you can leverage to power your AI Applications: + +- [Applications](/en/documentation/products/build/applications/): build applications that run directly on Azion's distributed network, delivering exceptional performance and customization options. +- [Functions](/en/documentation/products/build/applications/functions/): execute code closer to end users, enhancing performance and enabling custom logic for handling requests and responses. +- [SQL Database](/en/documentation/products/store/sql-database/): an edge-native SQL solution designed for serverless applications, providing data storage and querying capabilities. +- [Vector Search](/en/documentation/products/store/sql-database/vector-search/): enable semantic search engines and AI-powered recommendations through vector embeddings.. diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx index 84bb485e3c..de1cdb1e1b 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx @@ -11,13 +11,13 @@ menu_namespace: AIInferenceMenu import LinkButton from 'azion-webkit/linkbutton'; -**AI Inference** empowers you to build and deploy intelligent applications that process data close to where it is generated. By combining artificial intelligence with edge computing, it eliminates the complexities of scaling and infrastructure management, enabling real-time decision-making and enhanced performance. +**AI Inference** enables you you to build and deploy intelligent applications that process data close to where it is generated. By combining artificial intelligence with edge computing, it eliminates the complexities of scaling and infrastructure management. -With Azion AI Inference, you can seamlessly integrate AI capabilities into your applications, leveraging tools like Edge Functions, Edge Application, and the Azion API to create scalable, secure, and efficient solutions. +With Azion AI Inference, you can integrate AI capabilities into your applications, leveraging tools like **Functions**, **Applications**, and the Azion API to create scalable, secure, and efficient solutions. AI Inference gives you access to: -- **Run AI models on Edge Runtime**, enabling advanced AI architectures to execute directly at the edge for minimal latency and maximum performance. +- **Run AI models closer to your users**, enabling advanced AI architectures to execute directly at the edge for minimal latency and maximum performance. - **Deploy autonomous AI agents** that analyze data and make decisions at the edge. - **Real-time processing** with reduced latency and enhanced efficiency. - All as part of a **complete platform**, including Edge Applications, Edge Functions, Edge SQL vector search, and more. @@ -36,16 +36,20 @@ Access our catalog of open-source AI models that you can run directly on Azion R AI Inference allows you to fine-tune, train, and specialize models using **Low-Rank Adaptation (LoRA)**. This capability enables you to optimize models for specific tasks, ensuring they are both efficient and accurate for your business needs. + + +### ISSOAQUI E UM USE CASE ### AI Agents AI Inference supports deploying AI agents like ReAct (Reasoning + Acting) at the edge, enabling advanced tasks such as context-aware responses, semantic search, and intelligent data processing. -### Integration with Edge SQL - -Integrate with **Edge SQL** to enable vector search capabilities, allowing for semantic queries and hybrid search. This integration enhances AI-powered applications by providing precise, contextually relevant results and supporting efficient Retrieval-Augmented Generation (RAG) implementations. --- + +### Integration with Edge SQL + +Integrate with **SQL Database** to enable vector search capabilities, allowing for semantic queries and hybrid search. This integration enhances AI-powered applications by providing precise, contextually relevant results and supporting efficient Retrieval-Augmented Generation (RAG) implementations. ## Related products - [Edge Application](/en/documentation/products/build/edge-application/): build applications that run directly on Azion's distributed network, delivering exceptional performance and customization options. diff --git a/src/i18n/en/AIInferenceMenu.ts b/src/i18n/en/AIInferenceMenu.ts index 76da2ae568..15e4c9b0d7 100644 --- a/src/i18n/en/AIInferenceMenu.ts +++ b/src/i18n/en/AIInferenceMenu.ts @@ -14,9 +14,12 @@ export default [ /// START HERE :::: DO NOT REMOVE the strings above, it's a work around for header on mobile /// - { text: 'Overview', header: true, anchor: true, type: 'learn', key: 'overview', slug: '/documentation/products/ai/ai-inference/', hasLabel: 'menu.aiinference' }, + { text: 'Overview', header: true, anchor: true, type: 'learn', key: 'overview', slug: '/documentation/products/ai/ai-inference/overview/', hasLabel: 'menu.aiinference' }, { text: 'Get Started', header: true, anchor: true, type: 'learn', key: 'get-started', slug: '/documentation/products/ai/ai-inference/' }, - { text: 'Models', header: true, type: 'learn', key: 'aiinference.models', items: [ + { text: 'Reference', header: true, type: 'learn', key: 'aiinference/reference', items: [ + { text: 'AI Inference', header: true, anchor: true, type: 'learn', key: 'aiinference/copilot-architecture', slug: '/documentation/products/ai/ai-inference/' } + ]}, + { text: ' Available Models', header: true, type: 'learn', key: 'aiinference.models', items: [ { text: 'BAAI/bge reranker v2 m3', slug: '/documentation/products/ai/ai-inference/models/baai-bge-reranker-v2-m3', key: 'aiinference/BAAI/bge-reranker-v2-m3' }, { text: 'InternVL3', slug: '/documentation/products/ai/ai-inference/models/internvl3', key: 'aiinference/InternVL3' }, { text: 'Mistral 3 Small (24B AWQ)', slug: '/documentation/products/ai/ai-inference/models/mistral-3-small', key: 'aiinference/mistral-3-small' }, @@ -35,9 +38,7 @@ export default [ { text: 'Solutions', header: true, type: 'learn', key: 'aiinference/solutions', items: [ { text: 'Implement a Copilot assistant', header: true, anchor: true, type: 'learn', key: 'aiinference/copilot-architecture', slug: '/documentation/architectures/artificial-intelligence/ai-agent-copilot-assistant' } ]}, - { text: 'Reference', header: true, type: 'learn', key: 'aiinference/reference', items: [ - { text: 'AI Inference', header: true, anchor: true, type: 'learn', key: 'aiinference/copilot-architecture', slug: '/documentation/products/ai/ai-inference/' } - ]}, + diff --git a/src/i18n/en/nav.ts b/src/i18n/en/nav.ts index 52405ff09e..f892800c52 100644 --- a/src/i18n/en/nav.ts +++ b/src/i18n/en/nav.ts @@ -597,7 +597,7 @@ export default [ anchor: true, type: 'learn', key: 'reference/edgeAI', - slug: '/documentation/products/ai/ai-inference/', + slug: '/documentation/products/ai/ai-inference/overview/', }, { text: 'Edge Application', From 95fcd0d219d17b1188d721ce2f6161e18a24787a Mon Sep 17 00:00:00 2001 From: Luiza Vargas do Santos Date: Thu, 18 Sep 2025 14:57:31 -0300 Subject: [PATCH 03/12] more fixes --- .../ai-inference/edge-ai-reference.mdx | 57 +++++++++++++------ .../reference/ai-inference/models.mdx | 4 +- 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx index de1cdb1e1b..e919943a5a 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx @@ -1,7 +1,7 @@ --- title: AI Inference description: >- - Azion AI Inference empowers you to build and deploy intelligent applications that process data close to where it is generated. + AI Inference enables you to run AI models directly on Azion’s highly distributed infrastructure. meta_tags: 'ai inference, artificial intelligence, edge computing' namespace: docs_edge_ai_reference permalink: /documentation/products/ai/ai-inference/ @@ -11,16 +11,16 @@ menu_namespace: AIInferenceMenu import LinkButton from 'azion-webkit/linkbutton'; -**AI Inference** enables you you to build and deploy intelligent applications that process data close to where it is generated. By combining artificial intelligence with edge computing, it eliminates the complexities of scaling and infrastructure management. +**AI Inference** enables you to run AI models directly on Azion’s highly distributed infrastructure. -With Azion AI Inference, you can integrate AI capabilities into your applications, leveraging tools like **Functions**, **Applications**, and the Azion API to create scalable, secure, and efficient solutions. +With Azion AI Inference, you can integrate AI capabilities into your applications, leveraging tools like **Functions**, **Applications**, **Vector Search**, and the Azion API to create scalable, secure, and efficient solutions. -AI Inference gives you access to: + +AI Inference gives allows you to: - **Run AI models closer to your users**, enabling advanced AI architectures to execute directly at the edge for minimal latency and maximum performance. -- **Deploy autonomous AI agents** that analyze data and make decisions at the edge. -- **Real-time processing** with reduced latency and enhanced efficiency. -- All as part of a **complete platform**, including Edge Applications, Edge Functions, Edge SQL vector search, and more. +- **Deploy autonomous AI agents** that analyze data and make decisions. +- **Peform Real-time data processing** with reduced latency and enhanced efficiency. --- @@ -28,7 +28,7 @@ AI Inference gives you access to: ### Available Models -Access our catalog of open-source AI models that you can run directly on Azion Runtime. These models are optimized for edge deployment with minimal resource requirements. +Access our catalog of open-source AI models that you can run directly on Azion Runtime. These models are optimized for distributed deployment with minimal resource requirements. @@ -36,26 +36,47 @@ Access our catalog of open-source AI models that you can run directly on Azion R AI Inference allows you to fine-tune, train, and specialize models using **Low-Rank Adaptation (LoRA)**. This capability enables you to optimize models for specific tasks, ensuring they are both efficient and accurate for your business needs. +--- + +## Usage +AI Inference can be used in a [Function] -### ISSOAQUI E UM USE CASE -### AI Agents +This function receives a POST request to the desired AI model and returns the response. -AI Inference supports deploying AI agents like ReAct (Reasoning + Acting) at the edge, enabling advanced tasks such as context-aware responses, semantic search, and intelligent data processing. +```javascript +const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { + "stream": true, + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Name the european capitals" + } + ] +}) +return modelResponse +``` +This example uses the Qwen3 model. You can change the model and the request parameters according to your preferences. Check the [AI models reference](/en/documentation/products/ai/ai-inference/models/) for more information about the available models and how to use them in your application. ---- +## Integration with SQL Database -### Integration with Edge SQL +Integrate your application with **SQL Database** to enable [vector search](/en/documentation/products/store/sql-database/vector-search/) capabilities, allowing for semantic queries and hybrid search. This integration enhances AI-powered applications by providing precise, contextually relevant results and supporting efficient Retrieval-Augmented Generation (RAG) implementations. + + +--- -Integrate with **SQL Database** to enable vector search capabilities, allowing for semantic queries and hybrid search. This integration enhances AI-powered applications by providing precise, contextually relevant results and supporting efficient Retrieval-Augmented Generation (RAG) implementations. ## Related products -- [Edge Application](/en/documentation/products/build/edge-application/): build applications that run directly on Azion's distributed network, delivering exceptional performance and customization options. -- [Edge Functions](/en/documentation/products/build/edge-application/edge-functions/): execute code closer to end users, enhancing performance and enabling custom logic for handling requests and responses. -- [Edge SQL](/en/documentation/products/store/edge-sql/): an edge-native SQL solution designed for serverless applications, providing data storage and querying capabilities at the edge. -- [Vector Search](/en/documentation/products/store/edge-sql/vector-search/): enable semantic search engines and AI-powered recommendations through vector embeddings at the edge. +- [Applications](/en/documentation/products/build/applications/): build applications that run directly on Azion's distributed network, delivering exceptional performance and customization options. +- [Functions](/en/documentation/products/build/applications/functions/): execute code closer to end users, enhancing performance and enabling custom logic for handling requests and responses. +- [SQL Database](/en/documentation/products/store/sql-database/): an edge-native SQL solution designed for serverless applications, providing data storage and querying capabilities at the edge. +- [Vector Search](/en/documentation/products/store/sql-database/vector-search/): enable semantic search engines and AI-powered recommendations through vector embeddings at the edge. --- diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx index 939d33c850..d33a91a9fe 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx @@ -9,9 +9,9 @@ permalink: /documentation/products/ai/ai-inference/models/ import LinkButton from 'azion-webkit/linkbutton'; -Azion's edge-optimized models span multiple AI domains including text generation, image analysis, embeddings, and more. Each model is designed to balance performance and resource efficiency for edge deployment. +Azion's edge-optimized models span multiple AI domains including text generation, image analysis, embeddings, and more. Each model is designed to balance performance and resource efficiency for distributed deployment. -This page provides a list of models available for use with **Edge AI**. To learn more about it, visit the [Edge AI Reference](/en/documentation/products/ai/ai-inference/). +This page provides a list of models available for use with **AI Inference**. To learn more about it, visit the [AI Inference Reference](/en/documentation/products/ai/ai-inference/). ## Available Models From 4f87a4077b50aad7a90e390cc7e3d5fa7856aa53 Mon Sep 17 00:00:00 2001 From: Luiza Vargas do Santos Date: Fri, 19 Sep 2025 15:42:35 -0300 Subject: [PATCH 04/12] updated --- .../ai-inference/ai-inference-overview.mdx | 50 ---------------- .../Qwen3-30b-a3b-Instruct-2507-fp8.mdx | 1 + .../ai-models/baai-bge-reranker-v2-m3.mdx | 1 + .../ai-inference/ai-models/internvl3.mdx | 1 + .../ai-models/mistral-3-small.mdx | 1 + .../ai-inference/ai-models/nanonets-ocr-r.mdx | 1 + .../ai-inference/ai-models/qwen-2-5-vl-3b.mdx | 1 + .../ai-inference/ai-models/qwen-2-5-vl-7b.mdx | 1 + .../ai-models/qwen3-embedding-4b.mdx | 1 + .../ai-inference/edge-ai-reference.mdx | 57 ++++++++++++++----- .../reference/ai-inference/models.mdx | 1 + src/i18n/en/AIInferenceMenu.ts | 12 +--- src/i18n/en/nav.ts | 2 +- 13 files changed, 56 insertions(+), 74 deletions(-) delete mode 100644 src/content/docs/en/pages/main-menu/reference/ai-inference/ai-inference-overview.mdx diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-inference-overview.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-inference-overview.mdx deleted file mode 100644 index de8679942b..0000000000 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-inference-overview.mdx +++ /dev/null @@ -1,50 +0,0 @@ ---- -title: AI Inference Overview -description: >- - Azion AI Inference empowers you to build and deploy intelligent applications that process data close to where it is generated. -meta_tags: 'ai inference, artificial intelligence, edge computing' -namespace: docs_ai_inference_overview -permalink: /documentation/products/ai/ai-inference/overview/ -menu_namespace: AIInferenceMenu - ---- - -import LinkButton from 'azion-webkit/linkbutton'; - -Build AI-powered applications by running AI models on Azion’s highly distributed infrastructure to deliver scalable, low-latency, and cost-effective inference. - -- Execute AI models directly on Azion’s globally distributed infrastructure to reduce latency and enable real-time responses. -- Use state-of-the-art large language and vision-language models available natively on the Azion platform. -- Connect applications using Azion’s OpenAI-compatible endpoint format. -- Apply LoRA fine-tuning to pre-trained models using your own data and parameters. - - -Get started by deploying the AI Inference Quick Start: - - - ---- - -### What you can build with AI Inference - -- **Deploy Scalable 24/7 AI Assistants**: Build and deploy AI assistants that serve thousands of users simultaneously with low latency, delivering real-time support, dynamic FAQs, and customer assistance without cloud overload. - -- **Build AI Agents**: Built AI agents that automate multi‑step workflows, collapse days of manual effort into minutes, and free teams for higher‑value work—boosting productivity across operations. - -- **Build and Scale AI Applications**: Build scalable, low-latency AI applications that support advanced models, fine-tuning, and seamless integration—enabling real-time processing and interconnected AI solutions that drive innovation and operational efficiency worldwide. - -- **Automate Threat Detection and Takedown with AI**: Combine LLMs and vision-language models (VLMs) to monitor digital assets, spot phishing/abuse patterns in text and imagery, and automate threat classification and takedown across distributed environments. - -## Related products - -Related products you can leverage to power your AI Applications: - -- [Applications](/en/documentation/products/build/applications/): build applications that run directly on Azion's distributed network, delivering exceptional performance and customization options. -- [Functions](/en/documentation/products/build/applications/functions/): execute code closer to end users, enhancing performance and enabling custom logic for handling requests and responses. -- [SQL Database](/en/documentation/products/store/sql-database/): an edge-native SQL solution designed for serverless applications, providing data storage and querying capabilities. -- [Vector Search](/en/documentation/products/store/sql-database/vector-search/): enable semantic search engines and AI-powered recommendations through vector embeddings.. diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx index 99ba24511f..a47998ca5b 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx @@ -4,6 +4,7 @@ description: >- Qwen3-30B-A3B-Instruct-2507-FP8 is an instruction-tuned 30B-parameter FP8 causal language model for long-context (256K) text generation and reasoning, supporting chat/QA, summarization, multilingual tasks, math/science problem solving, coding, and tool-augmented workflows. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, qwen' namespace: docs_edge_ai_models_qwen_3_30ba3b +menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/qwen3-30ba3b/ --- diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx index 89db438047..4ccb27195e 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx @@ -4,6 +4,7 @@ description: >- BAAI/bge-reranker-v2-m3 is a lightweight reranker model with strong multilingual capabilities. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing' namespace: docs_edge_ai_models_baai_bge_reranker_v2_m3 +menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/baai-bge-reranker-v2-m3/ --- diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/internvl3.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/internvl3.mdx index 6a7d24d3c6..5d4de8f99a 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/internvl3.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/internvl3.mdx @@ -4,6 +4,7 @@ description: >- InternVL3 is an advanced multimodal large language model with capabilities to encompass tool usage, GUI agents, industrial image analysis, 3D vision perception, and more. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing' namespace: docs_edge_ai_models_internvl3 +menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/internvl3/ --- diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/mistral-3-small.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/mistral-3-small.mdx index 4d75e3bfa0..d4389ad7e2 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/mistral-3-small.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/mistral-3-small.mdx @@ -4,6 +4,7 @@ description: >- Mistral 3 Small provides a range of capabilities, including text generation, image analysis, embeddings, and more. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, mistral' namespace: docs_edge_ai_models_mistral_3_small +menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/mistral-3-small/ --- diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/nanonets-ocr-r.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/nanonets-ocr-r.mdx index 554f22d9b3..e4df693992 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/nanonets-ocr-r.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/nanonets-ocr-r.mdx @@ -4,6 +4,7 @@ description: >- Nanonets-OCR-s is an OCR model that converts document images to structured Markdown, preserving layout (headings, lists, tables) and basic tags. The output is easy to parse and feed into LLM pipelines. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, qwen' namespace: docs_edge_ai_models_nanonets_ocr_s +menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/nanonets-ocr-s/ --- diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx index d90ac3ffef..471c370b34 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx @@ -4,6 +4,7 @@ description: >- Qwen2.5 VL AWQ 3B is a vision-language model that supports 3 bilion parameters and offers advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, qwen' namespace: docs_edge_ai_models_qwen_2_5_vl_3b +menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/qwen-2-5-vl-3b/ --- diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx index 28cec39803..e72c648e7f 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx @@ -4,6 +4,7 @@ description: >- Qwen2.5 VL AWQ 7B is a vision-language model that supports 7 billion parameters, offering advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, qwen' namespace: docs_edge_ai_models_qwen_2_5_vl_7b +menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/qwen-2-5-vl-7b/ --- diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen3-embedding-4b.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen3-embedding-4b.mdx index 706ecde2b2..904f43400c 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen3-embedding-4b.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen3-embedding-4b.mdx @@ -4,6 +4,7 @@ description: >- Qwen3 Embedding 4B is a 4B-parameter multilingual embedding model (36 layers, 32K context) that outputs 2560‑dim vectors for text/code retrieval, classification, clustering, and bitext mining. It supports instruction-conditioned embeddings and is optimized for efficient, cross-lingual representation learning. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, qwen' namespace: docs_edge_ai_models_qwen_3_embedding_4b +menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/qwen3-embedding-4b/ --- diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx index e919943a5a..84417214d7 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx @@ -15,35 +15,59 @@ import LinkButton from 'azion-webkit/linkbutton'; With Azion AI Inference, you can integrate AI capabilities into your applications, leveraging tools like **Functions**, **Applications**, **Vector Search**, and the Azion API to create scalable, secure, and efficient solutions. +Get started by deploying the AI Inference Quick Start: -AI Inference gives allows you to: + -- **Run AI models closer to your users**, enabling advanced AI architectures to execute directly at the edge for minimal latency and maximum performance. -- **Deploy autonomous AI agents** that analyze data and make decisions. -- **Peform Real-time data processing** with reduced latency and enhanced efficiency. --- ## Features -### Available Models +### Run AI Inference at the Edge +Execute AI models directly on Azion’s globally distributed infrastructure to reduce latency and enable real-time responses. Access our catalog of open-source AI models that you can run directly on Azion Runtime. These models are optimized for distributed deployment with minimal resource requirements. -### Model customization +### Pre-Trained LLMs and VLMs -AI Inference allows you to fine-tune, train, and specialize models using **Low-Rank Adaptation (LoRA)**. This capability enables you to optimize models for specific tasks, ensuring they are both efficient and accurate for your business needs. +Use state-of-the-art large language and vision-language models available natively on the Azion platform. + +### OpenAI-Compatible API + +Connect applications using Azion’s OpenAI-compatible endpoint format. + +### Fine-Tune Models with LoRA + +AI Inference allows you to fine-tune, train, and specialize models your own data and parameters.. This capability enables you to optimize models for specific tasks, ensuring they are both efficient and accurate for your business needs. --- +### Examples of what you can build with AI Inference + +- **Deploy Scalable 24/7 AI Assistants**: Build and deploy AI assistants that serve thousands of users simultaneously with low latency, delivering real-time support, dynamic FAQs, and customer assistance without cloud overload. + +- **Build AI Agents**: Build AI agents that automate multi‑step workflows, collapse days of manual effort into minutes, and free teams for higher‑value work—boosting productivity across operations. + +- **Build and Scale AI Applications**: Build scalable, low-latency AI applications that support advanced models, fine-tuning, and seamless integration—enabling real-time processing and interconnected AI solutions that drive innovation and operational efficiency worldwide. + +- **Automate Threat Detection and Takedown with AI**: Combine LLMs and vision-language models (VLMs) to monitor digital assets, spot phishing/abuse patterns in text and imagery, and automate threat classification and takedown across distributed environments. + + ## Usage AI Inference can be used in a [Function] This function receives a POST request to the desired AI model and returns the response. + ```javascript const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { "stream": true, @@ -68,19 +92,24 @@ This example uses the Qwen3 model. You can change the model and the request para Integrate your application with **SQL Database** to enable [vector search](/en/documentation/products/store/sql-database/vector-search/) capabilities, allowing for semantic queries and hybrid search. This integration enhances AI-powered applications by providing precise, contextually relevant results and supporting efficient Retrieval-Augmented Generation (RAG) implementations. +## Limits + +300 reqs/minute + + +Azion Plans: +- Free: 300 reqs/minute +- Pro: 300 reqs/minute +- Business: 300 reqs/minute +- Enterprise: 720 reqs/minute" --- ## Related products -- [Applications](/en/documentation/products/build/applications/): build applications that run directly on Azion's distributed network, delivering exceptional performance and customization options. +- [Applications](/en/documentation/products/build/applications/): build applications that run directly on Azion's distributed infrastructure, delivering exceptional performance and customization options. - [Functions](/en/documentation/products/build/applications/functions/): execute code closer to end users, enhancing performance and enabling custom logic for handling requests and responses. -- [SQL Database](/en/documentation/products/store/sql-database/): an edge-native SQL solution designed for serverless applications, providing data storage and querying capabilities at the edge. -- [Vector Search](/en/documentation/products/store/sql-database/vector-search/): enable semantic search engines and AI-powered recommendations through vector embeddings at the edge. +- [SQL Database](/en/documentation/products/store/sql-database/): an edge-native SQL solution designed for serverless applications, providing data storage and querying capabilities at the edge. Also enables [Vector Search](/en/documentation/products/store/sql-database/vector-search/) for performing semantic search and AI-powered recommendations through vector embedding. --- -Explore practical examples of how to implement AI solutions with Azion: - - - \ No newline at end of file diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx index d33a91a9fe..e63ffb4833 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx @@ -4,6 +4,7 @@ description: >- Edge AI offers a diverse range of edge-optimized models for various AI domains, ensuring efficient deployment and performance. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing' namespace: docs_edge_ai_models +menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/ --- diff --git a/src/i18n/en/AIInferenceMenu.ts b/src/i18n/en/AIInferenceMenu.ts index 15e4c9b0d7..7ff623251d 100644 --- a/src/i18n/en/AIInferenceMenu.ts +++ b/src/i18n/en/AIInferenceMenu.ts @@ -14,12 +14,11 @@ export default [ /// START HERE :::: DO NOT REMOVE the strings above, it's a work around for header on mobile /// - { text: 'Overview', header: true, anchor: true, type: 'learn', key: 'overview', slug: '/documentation/products/ai/ai-inference/overview/', hasLabel: 'menu.aiinference' }, + { text: 'Overview', header: true, anchor: true, type: 'learn', key: 'overview', slug: '/documentation/products/ai/ai-inference/', hasLabel: 'menu.aiinference' }, { text: 'Get Started', header: true, anchor: true, type: 'learn', key: 'get-started', slug: '/documentation/products/ai/ai-inference/' }, - { text: 'Reference', header: true, type: 'learn', key: 'aiinference/reference', items: [ - { text: 'AI Inference', header: true, anchor: true, type: 'learn', key: 'aiinference/copilot-architecture', slug: '/documentation/products/ai/ai-inference/' } - ]}, + { text: ' Available Models', header: true, type: 'learn', key: 'aiinference.models', items: [ + { text: 'Models', slug: '/documentation/products/ai/ai-inference/models/', key: 'aiinference/models' }, { text: 'BAAI/bge reranker v2 m3', slug: '/documentation/products/ai/ai-inference/models/baai-bge-reranker-v2-m3', key: 'aiinference/BAAI/bge-reranker-v2-m3' }, { text: 'InternVL3', slug: '/documentation/products/ai/ai-inference/models/internvl3', key: 'aiinference/InternVL3' }, { text: 'Mistral 3 Small (24B AWQ)', slug: '/documentation/products/ai/ai-inference/models/mistral-3-small', key: 'aiinference/mistral-3-small' }, @@ -33,12 +32,7 @@ export default [ { text: 'Guides', header: true, type: 'learn', key: 'aiinference/guides', items: [ { text: 'Deploy AI Inference Starter kit', header: true, anchor: true, type: 'learn', key: 'aiinference/starter-kit', slug: '/documentation/products/guides/ai-inference-starter-kit' }, { text: 'Deploy LangGraph AI Agent Boilerplate', header: true, anchor: true, type: 'learn', key: 'aiinference/langgraph-boilerplate', slug: '/documentation/products/guides/langgraph-ai-agent-boilerplate' }, - { text: 'Implement a Copilot assistant', header: true, anchor: true, type: 'learn', key: 'aiinference/copilot-architecture', slug: '/documentation/architectures/artificial-intelligence/ai-agent-copilot-assistant' } ]}, - { text: 'Solutions', header: true, type: 'learn', key: 'aiinference/solutions', items: [ - { text: 'Implement a Copilot assistant', header: true, anchor: true, type: 'learn', key: 'aiinference/copilot-architecture', slug: '/documentation/architectures/artificial-intelligence/ai-agent-copilot-assistant' } - ]}, - diff --git a/src/i18n/en/nav.ts b/src/i18n/en/nav.ts index f892800c52..52405ff09e 100644 --- a/src/i18n/en/nav.ts +++ b/src/i18n/en/nav.ts @@ -597,7 +597,7 @@ export default [ anchor: true, type: 'learn', key: 'reference/edgeAI', - slug: '/documentation/products/ai/ai-inference/overview/', + slug: '/documentation/products/ai/ai-inference/', }, { text: 'Edge Application', From bd1175b3837b4b1111d04048e64ccdbed1f7765e Mon Sep 17 00:00:00 2001 From: Luiza Vargas do Santos Date: Wed, 24 Sep 2025 16:58:21 -0300 Subject: [PATCH 05/12] added ptbr and minor fixes --- .../pages/guides/ai-inference/quick-start.mdx | 40 ++++++++++ .../ai-inference/edge-ai-reference.mdx | 73 +++++-------------- .../pages/guias/ai-inference/quick-start.mdx | 40 ++++++++++ .../Qwen3-30b-a3b-Instruct-2507-fp8.mdx | 1 + .../ai-models/baai-bge-reranker-v2-m3.mdx | 1 + .../ai-inference/ai-models/internvl3.mdx | 1 + .../ai-models/mistral-3-small.mdx | 1 + .../ai-inference/ai-models/nanonets-ocr-r.mdx | 1 + .../ai-inference/ai-models/qwen-2-5-vl-3b.mdx | 1 + .../ai-inference/ai-models/qwen-2-5-vl-7b.mdx | 1 + .../ai-models/qwen3-embedding-4b.mdx | 1 + .../ai-inference/edge-ai-reference.mdx | 72 +++++++++++------- .../referencia/ai-inference/modelos.mdx | 1 + src/i18n/en/AIInferenceMenu.ts | 7 +- src/i18n/pt-br/AIInferenceMenu.ts | 38 ++++++++++ 15 files changed, 191 insertions(+), 88 deletions(-) create mode 100644 src/content/docs/en/pages/guides/ai-inference/quick-start.mdx create mode 100644 src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx create mode 100644 src/i18n/pt-br/AIInferenceMenu.ts diff --git a/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx b/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx new file mode 100644 index 0000000000..67040805bf --- /dev/null +++ b/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx @@ -0,0 +1,40 @@ +--- +title: How to build a simple agent with AI Inference +description: The "+ Create" button accelerates your journey to start building with Azion. +meta_tags: >- + building, onboarding, create resources, Azion Web Platform, import from + GitHub +namespace: docs_guides_ai_inference_build_agent +permalink: /documentation/products/guides/ai-inference-agent/ +menu_namespace: AIInferenceMenu + +--- + + + +## Usage + +AI Inference can be used in a [Function] + +This function receives a POST request to the desired AI model and returns the response. + + +```javascript +const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { + "stream": true, + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Name the european capitals" + } + ] +}) +return modelResponse +``` + +This example uses the Qwen3 model. You can change the model and the request parameters according to your preferences. Check the [AI models reference](/en/documentation/products/ai/ai-inference/models/) for more information about the available models and how to use them in your application. + diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx index 84417214d7..119d075993 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx @@ -2,7 +2,7 @@ title: AI Inference description: >- AI Inference enables you to run AI models directly on Azion’s highly distributed infrastructure. -meta_tags: 'ai inference, artificial intelligence, edge computing' +meta_tags: 'ai inference, artificial intelligence, edge computing, ai assistant, ai agente' namespace: docs_edge_ai_reference permalink: /documentation/products/ai/ai-inference/ menu_namespace: AIInferenceMenu @@ -15,93 +15,56 @@ import LinkButton from 'azion-webkit/linkbutton'; With Azion AI Inference, you can integrate AI capabilities into your applications, leveraging tools like **Functions**, **Applications**, **Vector Search**, and the Azion API to create scalable, secure, and efficient solutions. -Get started by deploying the AI Inference Quick Start: +Get started by deploying the AI Inference Starter Kit Template: - --- ## Features -### Run AI Inference at the Edge - -Execute AI models directly on Azion’s globally distributed infrastructure to reduce latency and enable real-time responses. -Access our catalog of open-source AI models that you can run directly on Azion Runtime. These models are optimized for distributed deployment with minimal resource requirements. +### OpenAI-Compatible API - +Connect applications using Azion’s OpenAI-compatible endpoint format. -### Pre-Trained LLMs and VLMs +### Run Edge optimized models -Use state-of-the-art large language and vision-language models available natively on the Azion platform. +- Run AI models on Azion’s globally distributed edge to minimize latency and enable real-time inference. +- Access a curated catalog of open-source models, ready to run on Azion Runtime and optimized for distributed deployment with low resource footprints. +- Native inference support for large language models (LLMs) and vision-language models (VLMs). -### OpenAI-Compatible API - -Connect applications using Azion’s OpenAI-compatible endpoint format. + ### Fine-Tune Models with LoRA -AI Inference allows you to fine-tune, train, and specialize models your own data and parameters.. This capability enables you to optimize models for specific tasks, ensuring they are both efficient and accurate for your business needs. +AI Inference allows you to fine-tune, train, and specialize models your own data and parameters. This capability enables you to optimize models for specific tasks, ensuring they are both efficient and accurate for your business needs. --- ### Examples of what you can build with AI Inference -- **Deploy Scalable 24/7 AI Assistants**: Build and deploy AI assistants that serve thousands of users simultaneously with low latency, delivering real-time support, dynamic FAQs, and customer assistance without cloud overload. +- **AI Assistants**: Build and deploy AI assistants that serve thousands of users simultaneously with low latency, delivering real-time support, dynamic FAQs, and customer assistance without cloud overload. -- **Build AI Agents**: Build AI agents that automate multi‑step workflows, collapse days of manual effort into minutes, and free teams for higher‑value work—boosting productivity across operations. - -- **Build and Scale AI Applications**: Build scalable, low-latency AI applications that support advanced models, fine-tuning, and seamless integration—enabling real-time processing and interconnected AI solutions that drive innovation and operational efficiency worldwide. +- **AI Agents**: Build AI agents that automate multi‑step workflows, collapse days of manual effort into minutes, and free teams for higher‑value work—boosting productivity across operations. - **Automate Threat Detection and Takedown with AI**: Combine LLMs and vision-language models (VLMs) to monitor digital assets, spot phishing/abuse patterns in text and imagery, and automate threat classification and takedown across distributed environments. - -## Usage - -AI Inference can be used in a [Function] - -This function receives a POST request to the desired AI model and returns the response. - - -```javascript -const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { - "stream": true, - "messages": [ - { - "role": "system", - "content": "You are a helpful assistant." - }, - { - "role": "user", - "content": "Name the european capitals" - } - ] -}) -return modelResponse -``` - -This example uses the Qwen3 model. You can change the model and the request parameters according to your preferences. Check the [AI models reference](/en/documentation/products/ai/ai-inference/models/) for more information about the available models and how to use them in your application. - - ## Integration with SQL Database Integrate your application with **SQL Database** to enable [vector search](/en/documentation/products/store/sql-database/vector-search/) capabilities, allowing for semantic queries and hybrid search. This integration enhances AI-powered applications by providing precise, contextually relevant results and supporting efficient Retrieval-Augmented Generation (RAG) implementations. ## Limits -300 reqs/minute - +These are the **default limits**: -Azion Plans: -- Free: 300 reqs/minute -- Pro: 300 reqs/minute -- Business: 300 reqs/minute -- Enterprise: 720 reqs/minute" +| Scope | Limit | +| ----- | ----- | +| Requests per minute | 300 | --- @@ -111,5 +74,3 @@ Azion Plans: - [Functions](/en/documentation/products/build/applications/functions/): execute code closer to end users, enhancing performance and enabling custom logic for handling requests and responses. - [SQL Database](/en/documentation/products/store/sql-database/): an edge-native SQL solution designed for serverless applications, providing data storage and querying capabilities at the edge. Also enables [Vector Search](/en/documentation/products/store/sql-database/vector-search/) for performing semantic search and AI-powered recommendations through vector embedding. ---- - diff --git a/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx b/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx new file mode 100644 index 0000000000..773c1107e3 --- /dev/null +++ b/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx @@ -0,0 +1,40 @@ +--- +title: How to build a simple agent with AI Inference +description: The "+ Create" button accelerates your journey to start building with Azion. +meta_tags: >- + building, onboarding, create resources, Azion Web Platform, import from + GitHub +namespace: docs_guides_ai_inference_build_agent +permalink: /documentacao/produtos/guias/ai-inference-agent/ +menu_namespace: AIInferenceMenu + +--- + + + +## Usage + +AI Inference can be used in a [Function] + +This function receives a POST request to the desired AI model and returns the response. + + +```javascript +const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { + "stream": true, + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Name the european capitals" + } + ] +}) +return modelResponse +``` + +This example uses the Qwen3 model. You can change the model and the request parameters according to your preferences. Check the [AI models reference](/en/documentation/products/ai/ai-inference/models/) for more information about the available models and how to use them in your application. + diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx index 6254adfed0..61ecb3ccfb 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx @@ -5,6 +5,7 @@ description: >- meta_tags: 'ai inference, ai models, inteligência artificial, edge computing, qwen' namespace: docs_edge_ai_models_qwen_3_30ba3b permalink: /documentacao/produtos/ai/ai-inference/modelos/qwen3-30ba3b/ +menu_namespace: AIInferenceMenu --- **Qwen3-30B-A3B-Instruct-2507-FP8** é um modelo de linguagem causal FP8 ajustado por instruções com 30 bilhões de parâmetros para geração de texto de longo contexto (256K) e raciocínio, suportando chat/QA, sumarização, tarefas multilíngues, resolução de problemas de matemática/ciência, codificação e fluxos de trabalho aumentados por ferramentas. diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx index 5c3bebb63b..191e346f64 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx @@ -5,6 +5,7 @@ description: >- meta_tags: 'ai inference, ai models, artificial intelligence, edge computing' namespace: docs_edge_ai_models_baai_bge_reranker_v2_m3 permalink: /documentacao/produtos/ai/ai-inference/modelos/baai-bge-reranker-v2-m3/ +menu_namespace: AIInferenceMenu --- **BAAI/bge-reranker-v2-m3** é um modelo de reranking leve com fortes capacidades multilíngues. Ele é fácil de implementar e oferece inferência rápida. diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/internvl3.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/internvl3.mdx index f73ce0e88e..1975b951ff 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/internvl3.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/internvl3.mdx @@ -5,6 +5,7 @@ description: >- meta_tags: 'ai inference, modelos de ia, inteligência artificial, edge computing' namespace: docs_edge_ai_models_internvl3 permalink: /documentacao/produtos/ai/ai-inference/modelos/internvl3/ +menu_namespace: AIInferenceMenu --- **InternVL3** é um Multimodal Large Language Model avançado (MLLM) com capacidades para abranger tool calling, agentes GUI, análise de imagem industrial, percepção de visão 3D e mais. diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/mistral-3-small.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/mistral-3-small.mdx index ee108738a1..1b416aacae 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/mistral-3-small.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/mistral-3-small.mdx @@ -5,6 +5,7 @@ description: >- meta_tags: 'ai inference, modelos ai, inteligência artificial, computação edge, mistral' namespace: docs_edge_ai_models_mistral_3_small permalink: /documentacao/produtos/ai/ai-inference/modelos/mistral-3-small/ +menu_namespace: AIInferenceMenu --- **Mistral 3 Small** é um modelo de linguagem que, embora sendo compacto, oferece capacidades comparáveis às de modelos maiores. Ele é ideal para agentes conversacionais, chamada de função, ajuste fino e inferência local com dados sensíveis. diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/nanonets-ocr-r.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/nanonets-ocr-r.mdx index f10aec2d95..1c63f04b65 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/nanonets-ocr-r.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/nanonets-ocr-r.mdx @@ -5,6 +5,7 @@ description: >- meta_tags: 'ai inference, ai models, inteligência artificial, edge computing, qwen' namespace: docs_edge_ai_models_nanonets_ocr_s permalink: /documentacao/produtos/ai/ai-inference/modelos/nanonets-ocr-s/ +menu_namespace: AIInferenceMenu --- **Nanonets-OCR-s** é um modelo OCR que converte imagens de documentos em Markdown estruturado, preservando o layout (títulos, listas, tabelas) e tags básicas. A saída é fácil de analisar e alimentar em pipelines de LLM. diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-3b.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-3b.mdx index 5ba0cc2434..725715e80e 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-3b.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-3b.mdx @@ -5,6 +5,7 @@ description: >- meta_tags: 'ai inference, modelos ai, inteligência artificial, edge computing, qwen' namespace: docs_edge_ai_models_qwen_2_5_vl_3b permalink: /documentacao/produtos/ai/ai-inference/modelos/qwen-2-5-vl-3b/ +menu_namespace: AIInferenceMenu --- O **Qwen 2.5 VL AWQ 3B** é um modelo de linguagem e visão que oferece capacidades avançadas como análise visual, raciocínio de agente, compreensão de vídeo longo, localização visual e geração de saída estruturada. Ele suporta 3 bilhões de parâmetros. diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx index 37086b812c..e0a1fa07d4 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx @@ -5,6 +5,7 @@ description: >- meta_tags: 'ai inference, modelos ai, inteligência artificial, computação edge, qwen' namespace: docs_edge_ai_models_qwen_2_5_vl_7b permalink: /documentacao/produtos/ai/ai-inference/modelos/qwen-2-5-vl-7b/ +menu_namespace: AIInferenceMenu --- O **Qwen 2.5 VL AWQ 7B** é um modelo de linguagem e visão que suporta 7 bilhões de parâmetros, oferecendo capacidades avançadas como análise visual, raciocínio de agente, compreensão de vídeo longo, localização visual e geração de saída estruturada. diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen3-embedding-4b.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen3-embedding-4b.mdx index b986fe2e2c..376c2a25b4 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen3-embedding-4b.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen3-embedding-4b.mdx @@ -5,6 +5,7 @@ description: >- meta_tags: 'ai inference, ai modelos, inteligência artificial, edge computing, qwen' namespace: docs_edge_ai_models_qwen_3_embedding_4b permalink: /documentacao/produtos/ai/ai-inference/modelos/qwen3-embedding-4b/ +menu_namespace: AIInferenceMenu --- **Qwen3 Embedding 4B** é um modelo de embedding multilíngue com 4 bilhões de parâmetros (36 camadas, 32K de contexto) que gera vetores de 2560 dimensões para recuperação de texto/código, classificação, agrupamento e mineração de bitexto. Ele suporta embeddings condicionados por instrução e é otimizado para aprendizado de representação eficiente e multilíngue. diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx index 8be09bd7ed..cb5a0522fb 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx @@ -1,59 +1,75 @@ --- title: AI Inference description: >- - O AI Inference da Azion capacita você a construir e implementar aplicações inteligentes que processam dados perto de onde são gerados. -meta_tags: 'ai inference, inteligência artificial, edge computing' + A AI Inference permite que você execute modelos de AI diretamente na infraestrutura altamente distribuída da Azion. +meta_tags: 'inferência de ia, inteligência artificial, computação de borda, assistente de ia, agente de ia' namespace: docs_edge_ai_reference permalink: /documentacao/produtos/ai/ai-inference/ +menu_namespace: AIInferenceMenu + --- import LinkButton from 'azion-webkit/linkbutton'; -O **AI Inference** capacita você a construir e implementar aplicações inteligentes que processam dados perto de onde são gerados. Ao combinar inteligência artificial com edge computing, o AI Inference elimina as complexidades de escalabilidade e gerenciamento de infraestrutura, permitindo tomadas de decisão em tempo real e desempenho aprimorado. +**AI Inference** permite que você execute modelos de AI diretamente na infraestrutura altamente distribuída da Azion. -Com o AI Inference da Azion, você pode integrar perfeitamente capacidades de AI em suas aplicações, aproveitando ferramentas como Edge Functions, Edge Application e a API da Azion para criar soluções escaláveis, seguras e eficientes. +Com o AI Inference da Azion, você pode integrar capacidades de AI em suas aplicações, aproveitando ferramentas como **Functions**, **Applications**, **Vector Search** e a API da Azion para criar soluções escaláveis, seguras e eficientes. -O AI Inference possibilita: +Comece implementando o Template do Starter Kit do AI Inference: -- **Executar modelos de AI no Edge Runtime**, permitindo que arquiteturas avançadas de AI sejam executadas diretamente no edge para latência mínima e desempenho máximo. -- **Implementar agentes de AI autônomos** que analisam dados e tomam decisões no edge. -- **Processamento em tempo real** com latência reduzida e eficiência aprimorada. -- Tudo como parte de uma **plataforma completa**, incluindo Edge Application, Edge Functions, busca vetorial do Edge SQL e muito mais. + --- -## Recursos +## Funcionalidades -### Modelos disponíveis +### API Compatível com OpenAI -Acesse nosso catálogo de modelos de AI de código aberto que você pode executar diretamente no Runtime da Azion. Esses modelos são otimizados para implementação no edge com requisitos mínimos de recursos. +Conecte aplicações usando o formato de endpoint compatível com OpenAI da Azion. - +### Execute modelos otimizados para o edge -### Personalização de modelos +- Execute modelos de AI no edge, utilizando a infraestrutura globalmente distribuída da Azion para minimizar a latência e permitir inferência em tempo real. +- Acesse um catálogo selecionado de modelos de código aberto, prontos para rodar no Azion Runtime e otimizados para implantação distribuída com baixo consumo de recursos. +- Suporte nativo para inferência de modelos de linguagem de grande porte (LLMs) e modelos de visão-linguagem (VLMs). -O AI Inference permite que você ajuste, treine e especialize modelos usando **Low-Rank Adaptation (LoRA)**. Esse recurso permite que você otimize modelos para tarefas específicas, garantindo que sejam eficientes e precisos para as necessidades do seu negócio. + -### Agentes de AI +### Ajuste Fino de Modelos com LoRA -O AI Inference suporta a implementação de agentes de AI como ReAct (Raciocínio + Ação) no edge, permitindo tarefas avançadas como respostas contextuais, pesquisa semântica e processamento inteligente de dados. +O AI Inference permite que você ajuste, treine e especialize modelos com seus próprios dados e parâmetros. Essa capacidade permite otimizar modelos para tarefas específicas, garantindo que sejam eficientes e precisos para as necessidades do seu negócio. -### Integração com Edge SQL +--- -Integre o AI Inference com o **Edge SQL** para habilitar capacidades de busca vetorial, permitindo consultas semânticas e busca híbrida. Essa integração aprimora aplicativos alimentados por AI, fornecendo resultados precisos e contextualmente relevantes e suportando implementações eficientes de Retrieval Augmented Generation (RAG). +### Exemplos do que você pode construir com a AI Inference ---- +- **Assistentes de AI**: Construa e implante assistentes de AI que atendem milhares de usuários simultaneamente com baixa latência, oferecendo suporte em tempo real, FAQs dinâmicas e assistência ao cliente sem sobrecarga na nuvem. -## Produtos relacionados +- **Agentes de AI**: Construa agentes de AI que automatizam fluxos de trabalho de múltiplas etapas, reduzindo dias de esforço manual para minutos, e liberando equipes para trabalhos de maior valor—impulsionando a produtividade em todas as operações. + +- **Automatize a Detecção e Remoção de Ameaças com AI**: Combine LLMs e modelos de visão-linguagem (VLMs) para monitorar ativos digitais, identificar padrões de phishing/abuso em texto e imagens, e automatizar a classificação e remoção de ameaças em ambientes distribuídos. -- [Edge Application](/pt-br/documentacao/produtos/build/edge-application/): construa aplicações que executam diretamente na rede distribuída da Azion, oferecendo desempenho e opções de personalização excepcionais. -- [Edge Functions](/pt-br/documentacao/produtos/build/edge-application/edge-functions/): execute código mais próximo dos usuários finais, melhorando o desempenho e permitindo lógica personalizada para lidar com requisições e respostas. -- [Edge SQL](/pt-br/documentacao/produtos/store/edge-sql/): uma solução SQL edge-native projetada para aplicações serverless, fornecendo capacidades de armazenamento e consulta de dados no edge. -- [Vector Search](/pt-br/documentacao/produtos/store/edge-sql/vector-search/): ative motores de busca semântica e recomendações impulsionadas por AI através de embeddings vetoriais no edge. +## Integração com Banco de Dados SQL + +Integre sua aplicação com o **Banco de Dados SQL** para habilitar capacidades de [Vector Search](/pt-br/documentacao/produtos/store/sql-database/vector-search/), permitindo consultas semânticas e busca híbrida. Essa integração melhora as aplicações impulsionadas por AI ao fornecer resultados precisos e contextualmente relevantes e ao suportar implementações eficientes de Geração Aumentada por Recuperação (RAG). + +## Limites + +Estes são os **limites padrão**: + +| Escopo | Limite | +| ----- | ----- | +| Requests por minuto | 300 | --- -Explore exemplos práticos de como implementar soluções de AI com a Azion: +## Produtos relacionados - - \ No newline at end of file +- [Applications](/pt-br/documentacao/produtos/build/applications/): construa aplicações que rodam diretamente na infraestrutura distribuída da Azion, oferecendo desempenho excepcional e opções de personalização. +- [Functions](/pt-br/documentacao/produtos/build/applications/functions/): execute código mais próximo dos usuários finais, melhorando o desempenho e permitindo lógica personalizada para lidar com solicitações e respostas. +- [SQL Database](/pt-br/documentacao/produtos/store/sql-database/): uma solução SQL nativa de borda projetada para aplicações serverless, fornecendo capacidades de armazenamento e consulta de dados na borda. Também habilita [Vector Search](/pt-br/documentacao/produtos/store/sql-database/vector-search/) para realizar busca semântica e recomendações impulsionadas por AI através de incorporação vetorial. diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/modelos.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/modelos.mdx index 2c71fd3350..3d6a457ac9 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/modelos.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/modelos.mdx @@ -5,6 +5,7 @@ description: >- meta_tags: 'ai inference, modelos ai, inteligência artificial, edge computing' namespace: docs_edge_ai_models permalink: /documentacao/produtos/ai/ai-inference/modelos/ +menu_namespace: AIInferenceMenu --- import LinkButton from 'azion-webkit/linkbutton'; diff --git a/src/i18n/en/AIInferenceMenu.ts b/src/i18n/en/AIInferenceMenu.ts index 7ff623251d..6a426121d2 100644 --- a/src/i18n/en/AIInferenceMenu.ts +++ b/src/i18n/en/AIInferenceMenu.ts @@ -14,11 +14,10 @@ export default [ /// START HERE :::: DO NOT REMOVE the strings above, it's a work around for header on mobile /// - { text: 'Overview', header: true, anchor: true, type: 'learn', key: 'overview', slug: '/documentation/products/ai/ai-inference/', hasLabel: 'menu.aiinference' }, - { text: 'Get Started', header: true, anchor: true, type: 'learn', key: 'get-started', slug: '/documentation/products/ai/ai-inference/' }, + { text: 'Overview', header: true, anchor: true, type: 'learn', key: 'overview-aiinference', slug: '/documentation/products/ai/ai-inference/', hasLabel: 'menu.aiinference' }, + { text: 'Get Started', header: true, anchor: true, type: 'learn', key: 'get-started-aiinference', slug: '/documentation/products/guides/ai-inference-agent/' }, - { text: ' Available Models', header: true, type: 'learn', key: 'aiinference.models', items: [ - { text: 'Models', slug: '/documentation/products/ai/ai-inference/models/', key: 'aiinference/models' }, + { text: ' Available Models', header: true, type: 'learn', key: 'aiinference.models',slug: '/documentation/products/ai/ai-inference/models/', items: [ { text: 'BAAI/bge reranker v2 m3', slug: '/documentation/products/ai/ai-inference/models/baai-bge-reranker-v2-m3', key: 'aiinference/BAAI/bge-reranker-v2-m3' }, { text: 'InternVL3', slug: '/documentation/products/ai/ai-inference/models/internvl3', key: 'aiinference/InternVL3' }, { text: 'Mistral 3 Small (24B AWQ)', slug: '/documentation/products/ai/ai-inference/models/mistral-3-small', key: 'aiinference/mistral-3-small' }, diff --git a/src/i18n/pt-br/AIInferenceMenu.ts b/src/i18n/pt-br/AIInferenceMenu.ts new file mode 100644 index 0000000000..bd8e8630b9 --- /dev/null +++ b/src/i18n/pt-br/AIInferenceMenu.ts @@ -0,0 +1,38 @@ +/** + * This configures the navigation sidebar. + * All other languages follow this ordering/structure and will fall back to + * English for any entries they haven’t translated. + * + * - All entries MUST include `text` and `key` + * - Heading entries MUST include `header: true` and `type` + * - Link entries MUST include `slug` (which excludes the language code) + */ +export default [ + { text: 'Documentação', header: true, onlyMobile: true, anchor: true, slug: '/documentacao/', key: 'documentation' }, + { text: 'Guias',header: true, onlyMobile: true, anchor: true, slug: '/documentacao/produtos/guias/', key: 'guides' }, + { text: 'Dev Tools',header: true, onlyMobile: true, anchor: true, slug: '/documentacao/produtos/dev-tools/', key: 'devTools' }, + + /// START HERE :::: DO NOT REMOVE the strings above, it's a work around for header on mobile /// + + { text: 'Visão Geral', header: true, anchor: true, type: 'learn', key: 'overview-aiinference', slug: '/documentacao/produtos/ai/ai-inference/', hasLabel: 'menu.aiinference' }, + { text: 'Comece Agora', header: true, anchor: true, type: 'learn', key: 'get-started-aiinference', slug: '/documentacao/produtos/guias/ai-inference-agent/' }, + + { text: ' Modelos disponiveis', header: true, type: 'learn', key: 'aiinference.models',slug: '/documentacao/produtos/ai/ai-inference/modelos/', items: [ + { text: 'BAAI/bge reranker v2 m3', slug: '/documentacao/produtos/ai/ai-inference/modelos/baai-bge-reranker-v2-m3/', key: 'aiinference/BAAI/bge-reranker-v2-m3' }, + { text: 'InternVL3', slug: '/documentacao/produtos/ai/ai-inference/modelos/internvl3/', key: 'aiinference/InternVL3' }, + { text: 'Mistral 3 Small (24B AWQ)', slug: '/documentacao/produtos/ai/ai-inference/modelos/mistral-3-small/', key: 'aiinference/mistral-3-small' }, + { text: 'Qwen2.5 VL AWQ 3B', slug: '/documentacao/produtos/ai/ai-inference/modelos/qwen-2-5-vl-3b/', key: 'aiinference/qwen-2-5-vl-awq-3b' }, + { text: 'Qwen2.5 VL AWQ 7B', slug: '/documentacao/produtos/ai/ai-inference/modelos/qwen-2-5-vl-7b/', key: 'aiinference/qwen-2-5-vl-awq-7b' }, + { text: 'Qwen3 30B A3B Instruct 2507 FP8', slug: '/documentacao/produtos/ai/ai-inference/modelos/qwen3-30ba3b/', key: 'aiinference/qwen-3-instruct' }, + { text: 'Qwen3 Embedding 4B', slug: '/documentacao/produtos/ai/ai-inference/modelos/qwen3-embedding-4b/', key: 'aiinference/qwen3-embedding' }, + { text: 'Nanonets-OCR-s', slug: '/documentacao/produtos/ai/ai-inference/modelos/nanonets-ocr-s/', key: 'aiinference/nanonets-OCR-s' }, + ]}, + + { text: 'Guias', header: true, type: 'learn', key: 'aiinference/guides', items: [ + { text: 'Implemente o AI Inference Starter kit', header: true, anchor: true, type: 'learn', key: 'aiinference/starter-kit', slug: '/documentacao/produtos/guias/ai-inference-starter-kit/' }, + { text: 'Implemente LangGraph AI Agent Boilerplate', header: true, anchor: true, type: 'learn', key: 'aiinference/langgraph-boilerplate', slug: '/documentacao/produtos/guias/langgraph-ai-agent-boilerplate/' }, + ]}, + + + +] as const; From 4aa2474134aba764807b8c09d56c958328ca2cac Mon Sep 17 00:00:00 2001 From: Luiza Vargas do Santos Date: Fri, 26 Sep 2025 13:18:35 -0300 Subject: [PATCH 06/12] updated terms --- .../referencia/ai-inference/edge-ai-reference.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx index cb5a0522fb..3123cf3502 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx @@ -15,10 +15,10 @@ import LinkButton from 'azion-webkit/linkbutton'; Com o AI Inference da Azion, você pode integrar capacidades de AI em suas aplicações, aproveitando ferramentas como **Functions**, **Applications**, **Vector Search** e a API da Azion para criar soluções escaláveis, seguras e eficientes. -Comece implementando o Template do Starter Kit do AI Inference: +Comece implantando o Template do Starter Kit do AI Inference: Date: Mon, 29 Sep 2025 15:41:59 -0300 Subject: [PATCH 07/12] removed guide from menu --- src/i18n/en/AIInferenceMenu.ts | 2 +- src/i18n/pt-br/AIInferenceMenu.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/i18n/en/AIInferenceMenu.ts b/src/i18n/en/AIInferenceMenu.ts index 6a426121d2..a48cb1da42 100644 --- a/src/i18n/en/AIInferenceMenu.ts +++ b/src/i18n/en/AIInferenceMenu.ts @@ -15,7 +15,7 @@ export default [ /// START HERE :::: DO NOT REMOVE the strings above, it's a work around for header on mobile /// { text: 'Overview', header: true, anchor: true, type: 'learn', key: 'overview-aiinference', slug: '/documentation/products/ai/ai-inference/', hasLabel: 'menu.aiinference' }, - { text: 'Get Started', header: true, anchor: true, type: 'learn', key: 'get-started-aiinference', slug: '/documentation/products/guides/ai-inference-agent/' }, + //{ text: 'Get Started', header: true, anchor: true, type: 'learn', key: 'get-started-aiinference', slug: '/documentation/products/guides/ai-inference-agent/' }, { text: ' Available Models', header: true, type: 'learn', key: 'aiinference.models',slug: '/documentation/products/ai/ai-inference/models/', items: [ { text: 'BAAI/bge reranker v2 m3', slug: '/documentation/products/ai/ai-inference/models/baai-bge-reranker-v2-m3', key: 'aiinference/BAAI/bge-reranker-v2-m3' }, diff --git a/src/i18n/pt-br/AIInferenceMenu.ts b/src/i18n/pt-br/AIInferenceMenu.ts index bd8e8630b9..114e8c2114 100644 --- a/src/i18n/pt-br/AIInferenceMenu.ts +++ b/src/i18n/pt-br/AIInferenceMenu.ts @@ -15,7 +15,7 @@ export default [ /// START HERE :::: DO NOT REMOVE the strings above, it's a work around for header on mobile /// { text: 'Visão Geral', header: true, anchor: true, type: 'learn', key: 'overview-aiinference', slug: '/documentacao/produtos/ai/ai-inference/', hasLabel: 'menu.aiinference' }, - { text: 'Comece Agora', header: true, anchor: true, type: 'learn', key: 'get-started-aiinference', slug: '/documentacao/produtos/guias/ai-inference-agent/' }, + //{ text: 'Comece Agora', header: true, anchor: true, type: 'learn', key: 'get-started-aiinference', slug: '/documentacao/produtos/guias/ai-inference-agent/' }, { text: ' Modelos disponiveis', header: true, type: 'learn', key: 'aiinference.models',slug: '/documentacao/produtos/ai/ai-inference/modelos/', items: [ { text: 'BAAI/bge reranker v2 m3', slug: '/documentacao/produtos/ai/ai-inference/modelos/baai-bge-reranker-v2-m3/', key: 'aiinference/BAAI/bge-reranker-v2-m3' }, From 391e44ac5c46eef481164b88d461779ec531c0ec Mon Sep 17 00:00:00 2001 From: "gabriel.alves" Date: Thu, 21 May 2026 17:02:34 -0300 Subject: [PATCH 08/12] update-05-21-26 --- .../guides/ai-inference-starter-kit/index.mdx | 114 +++++++ .../pages/guides/ai-inference/quick-start.mdx | 305 ++++++++++++++++-- .../langgraph-ai-agent-boilerplate/index.mdx | 153 +++++++++ .../Qwen3-30b-a3b-Instruct-2507-fp8.mdx | 10 +- .../ai-inference/ai-models/nanonets-ocr-r.mdx | 14 +- .../ai-inference/ai-models/qwen-2-5-vl-3b.mdx | 4 +- .../ai-inference/ai-models/qwen-2-5-vl-7b.mdx | 2 +- .../ai-inference/edge-ai-reference.mdx | 14 +- .../reference/ai-inference/models.mdx | 4 +- .../guias/ai-inference-starter-kit/index.mdx | 74 +++++ .../pages/guias/ai-inference/quick-start.mdx | 305 ++++++++++++++++-- .../langgraph-ai-agent-boilerplate/index.mdx | 109 +++++++ .../Qwen3-30b-a3b-Instruct-2507-fp8.mdx | 8 +- .../ai-inference/ai-models/nanonets-ocr-r.mdx | 15 +- .../ai-inference/ai-models/qwen-2-5-vl-7b.mdx | 2 +- .../ai-inference/edge-ai-reference.mdx | 10 +- .../referencia/ai-inference/modelos.mdx | 2 +- src/i18n/en/AIInferenceMenu.ts | 9 +- src/i18n/pt-br/AIInferenceMenu.ts | 9 +- 19 files changed, 1084 insertions(+), 79 deletions(-) create mode 100644 src/content/docs/en/pages/guides/ai-inference-starter-kit/index.mdx create mode 100644 src/content/docs/en/pages/guides/langgraph-ai-agent-boilerplate/index.mdx create mode 100644 src/content/docs/pt-br/pages/guias/ai-inference-starter-kit/index.mdx create mode 100644 src/content/docs/pt-br/pages/guias/langgraph-ai-agent-boilerplate/index.mdx diff --git a/src/content/docs/en/pages/guides/ai-inference-starter-kit/index.mdx b/src/content/docs/en/pages/guides/ai-inference-starter-kit/index.mdx new file mode 100644 index 0000000000..58ad7d8fa6 --- /dev/null +++ b/src/content/docs/en/pages/guides/ai-inference-starter-kit/index.mdx @@ -0,0 +1,114 @@ +--- +title: Deploy AI Inference Starter Kit +description: Deploy a complete AI Inference environment using the Starter Kit template on Azion. +meta_tags: >- + ai inference, starter kit, template, deployment, artificial intelligence, edge computing +namespace: docs_guides_ai_inference_starter_kit +permalink: /documentation/products/guides/ai-inference-starter-kit/ +menu_namespace: AIInferenceMenu + +--- + +import LinkButton from 'azion-webkit/linkbutton'; + +The **AI Inference Starter Kit** provides a ready-to-use template for deploying AI capabilities on Azion's edge infrastructure. It includes pre-configured applications, functions, and model integrations. + +## Overview + +The Starter Kit includes: + +- An edge application configured for AI workloads +- Pre-built functions for common AI tasks +- Integration with available AI models +- Sample code for quick implementation + +## Requirements + +Before you begin, ensure you have: + +- An Azion account +- Access to Real-Time Manager + +## Deploy the Starter Kit + +1. Access the [Azion Console](https://console.azion.com/). +2. On the **+ Create** page, search for **AI Inference Starter Kit**. +3. Select the template. +4. Configure your edge application: + - Enter a name for your application + - Select the models you want to use + - Configure additional settings as needed +5. Click **Deploy**. + + + +## Next steps + +After deployment, you can: + +- [Explore available models](/en/documentation/products/ai/ai-inference/models/) +- [Build a simple agent](/en/documentation/products/guides/ai-inference-agent/) +- [Integrate with SQL Database](/en/documentation/products/store/sql-database/) for vector search capabilities + +## Example usage + +After deploying, test your AI Inference with a simple request: + +```javascript +const response = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { + "stream": false, + "messages": [ + { + "role": "user", + "content": "Explain edge computing in one sentence." + } + ] +}) + +console.log(response.choices[0].message.content) +``` + +Expected response: + +```json +{ + "id": "chatcmpl-abc123", + "choices": [{ + "message": { + "role": "assistant", + "content": "Edge computing processes data closer to its source, reducing latency and bandwidth usage by bringing computation and storage resources near the devices or sensors that generate the data." + } + }] +} +``` + +## Troubleshooting + +### Deployment fails + +If deployment fails, check: + +- Your account has sufficient permissions +- All required fields are filled correctly +- Error logs in the console for specific issues + +### Model not responding + +If the model isn't responding: + +- Verify the function is correctly configured +- Confirm the model name is correct in `Azion.AI.run()` +- Check the rate limits in the [Limits section](/en/documentation/products/ai/ai-inference/#limits) + +### High latency + +If you experience high latency: + +- Consider using streaming responses (`"stream": true`) +- Check if your edge application is deployed in the optimal region +- Review your function's timeout settings diff --git a/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx b/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx index 67040805bf..075c710df5 100644 --- a/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx +++ b/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx @@ -1,40 +1,305 @@ --- -title: How to build a simple agent with AI Inference -description: The "+ Create" button accelerates your journey to start building with Azion. +title: Build your first AI agent +description: Build a simple AI agent in 5 minutes — a conversational assistant that runs on Azion's edge infrastructure. meta_tags: >- - building, onboarding, create resources, Azion Web Platform, import from - GitHub + ai inference, ai agent, artificial intelligence, edge computing, quick start, tutorial namespace: docs_guides_ai_inference_build_agent permalink: /documentation/products/guides/ai-inference-agent/ menu_namespace: AIInferenceMenu --- +import LinkButton from 'azion-webkit/linkbutton'; +Build AI agents that think, respond, and act. Agents run on Azion's global edge network, providing low-latency responses and seamless scalability. -## Usage +**What you will build:** A conversational AI agent that answers questions and maintains context. -AI Inference can be used in a [Function] +**Time:** ~5 minutes -This function receives a POST request to the desired AI model and returns the response. +--- + +## Create a new project + + + +1. Access the [Azion Console](https://console.azion.com/). +2. Click **+ Create**. +3. Search for **AI Inference Starter Kit** and select it. +4. Enter a name for your application, such as `my-first-agent`. +5. Click **Deploy**. + +This creates a project with: +- An **Edge Application** configured for AI workloads +- A **Function** with pre-configured AI Inference integration +- Example code to get you started + +--- + +## Your first agent + +After deployment, navigate to your function and replace the code with this simple agent: ```javascript -const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { - "stream": true, - "messages": [ - { - "role": "system", - "content": "You are a helpful assistant." - }, +async function handler(event) { + const body = JSON.parse(event.request.body || '{}'); + const userMessage = body.message || 'Hello!'; + + const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { + "stream": false, + "messages": [ + { + "role": "system", + "content": "You are a helpful AI assistant. Be concise and friendly." + }, + { + "role": "user", + "content": userMessage + } + ], + "max_tokens": 500 + }); + + return new Response(JSON.stringify({ + response: modelResponse.choices[0].message.content, + model: modelResponse.model, + usage: modelResponse.usage + }), { + headers: { "Content-Type": "application/json" } + }); +} + +addEventListener("fetch", handler); +``` + +--- + +## Test your agent + +Send a POST request to your function's endpoint: + +```bash +curl -X POST https://your-function-url.azion.net \ + -H "Content-Type: application/json" \ + -d '{"message": "What is edge computing?"}' +``` + +Expected response: + +```json +{ + "response": "Edge computing processes data closer to its source, reducing latency and bandwidth by bringing computation near end users or devices.", + "model": "Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", + "usage": { + "prompt_tokens": 22, + "completion_tokens": 24, + "total_tokens": 46 + } +} +``` + +--- + +## Add conversation memory + +To maintain context across messages, add conversation history: + +```javascript +let conversationHistory = []; + +async function handler(event) { + const body = JSON.parse(event.request.body || '{}'); + const userMessage = body.message || 'Hello!'; + + // Add user message to history + conversationHistory.push({ + role: "user", + content: userMessage + }); + + const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { + "stream": false, + "messages": [ + { + "role": "system", + "content": "You are a helpful AI assistant. Be concise and friendly." + }, + ...conversationHistory + ], + "max_tokens": 500 + }); + + const assistantMessage = modelResponse.choices[0].message.content; + + // Add assistant response to history + conversationHistory.push({ + role: "assistant", + content: assistantMessage + }); + + return new Response(JSON.stringify({ + response: assistantMessage, + history: conversationHistory + }), { + headers: { "Content-Type": "application/json" } + }); +} + +addEventListener("fetch", handler); +``` + +--- + +## What just happened? + +When you sent a message: + +1. **Request** arrived at your edge function +2. **Function** called `Azion.AI.run()` with your message +3. **Model** processed the request at the edge +4. **Response** returned to the client with minimal latency + +```mermaid +flowchart LR + A[Client] -->|POST request| B[Edge Function] + B -->|Azion.AI.run| C[AI Model] + C -->|Response| B + B -->|JSON response| A +``` + +### Key concepts + +| Concept | What it means | +|---------|---------------| +| **Edge execution** | Code runs on Azion's distributed network, close to users | +| **Azion.AI.run()** | SDK method to invoke AI models | +| **Model selection** | Choose from available models based on your use case | +| **Streaming** | Enable real-time responses with `stream: true` | + +--- + +## Add tool calling + +Enable your agent to call external functions: + +```javascript +async function handler(event) { + const body = JSON.parse(event.request.body || '{}'); + const userMessage = body.message; + + const tools = [ { - "role": "user", - "content": "Name the european capitals" + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "City name" + } + }, + "required": ["location"] + } + } } - ] -}) -return modelResponse + ]; + + const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { + "stream": false, + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant with access to tools." + }, + { + "role": "user", + "content": userMessage + } + ], + "tools": tools + }); + + // Check if the model wants to call a tool + if (modelResponse.choices[0].message.tool_calls) { + const toolCall = modelResponse.choices[0].message.tool_calls[0]; + const args = JSON.parse(toolCall.function.arguments); + + // Execute the tool (you would implement this) + const weatherData = await getWeather(args.location); + + return new Response(JSON.stringify({ + tool: toolCall.function.name, + location: args.location, + weather: weatherData + }), { + headers: { "Content-Type": "application/json" } + }); + } + + return new Response(JSON.stringify({ + response: modelResponse.choices[0].message.content + }), { + headers: { "Content-Type": "application/json" } + }); +} + +async function getWeather(location) { + // Implement your weather API call here + return { location, temperature: "22°C", condition: "Sunny" }; +} + +addEventListener("fetch", handler); ``` -This example uses the Qwen3 model. You can change the model and the request parameters according to your preferences. Check the [AI models reference](/en/documentation/products/ai/ai-inference/models/) for more information about the available models and how to use them in your application. +--- + +## Troubleshooting + +### "Model not found" error + +Make sure: +1. The model name matches exactly (case-sensitive) +2. Check [available models](/en/documentation/products/ai/ai-inference/models/) for correct names + +### High latency + +Try these solutions: +1. Enable streaming: `"stream": true` +2. Reduce `max_tokens` for shorter responses +3. Choose a smaller model for faster inference + +### Rate limit errors + +Check the default limits: +- **300 requests per minute** + +Contact support to increase limits for production workloads. + +### Function timeout + +If your function times out: +1. Reduce `max_tokens` +2. Simplify your prompt +3. Consider breaking complex tasks into smaller steps + +--- + +## Next steps + +Now that you have a working agent, explore: +| Learn how to | Refer to | +|-------------|----------| +| Use different models | [Available models](/en/documentation/products/ai/ai-inference/models/) | +| Implement tool calling | [Tool calling example](/en/documentation/products/ai/ai-inference/models/mistral-3-small/#tool-calling-example) | +| Build RAG applications | [Vector Search](/en/documentation/products/store/sql-database/vector-search/) | +| Deploy with templates | [AI Inference Starter Kit](/en/documentation/products/guides/ai-inference-starter-kit/) | diff --git a/src/content/docs/en/pages/guides/langgraph-ai-agent-boilerplate/index.mdx b/src/content/docs/en/pages/guides/langgraph-ai-agent-boilerplate/index.mdx new file mode 100644 index 0000000000..4972780f9f --- /dev/null +++ b/src/content/docs/en/pages/guides/langgraph-ai-agent-boilerplate/index.mdx @@ -0,0 +1,153 @@ +--- +title: Deploy LangGraph AI Agent Boilerplate +description: Deploy a LangGraph-based AI agent on Azion using the boilerplate template. +meta_tags: >- + ai inference, langgraph, ai agent, boilerplate, template, deployment, artificial intelligence, edge computing +namespace: docs_guides_ai_inference_langgraph_boilerplate +permalink: /documentation/products/guides/langgraph-ai-agent-boilerplate/ +menu_namespace: AIInferenceMenu + +--- + +import LinkButton from 'azion-webkit/linkbutton'; + +The **LangGraph AI Agent Boilerplate** provides a template for deploying AI agents built with LangGraph on Azion's edge infrastructure. LangGraph enables you to build stateful, multi-actor applications with LLMs. + +## Overview + +LangGraph is a framework for building stateful, multi-actor applications with LLMs. It extends LangChain capabilities by adding the ability to coordinate multiple chains (or actors) across multiple steps of computation. + +The boilerplate includes: + +- Pre-configured LangGraph agent structure +- Integration with AI Inference models +- State management for multi-step workflows +- Example tool implementations + +## Requirements + +Before you begin, ensure you have: + +- An Azion account +- Node.js 18+ installed +- Basic knowledge of LangGraph concepts + +## Architecture + +The LangGraph boilerplate implements: + +- **Graph-based workflow**: Define agent behavior as a graph of nodes and edges +- **State management**: Maintain context across conversation turns +- **Tool integration**: Connect external APIs and services +- **Memory**: Persist conversation history + +## Deploy the Boilerplate + +1. Access the [Azion Console](https://console.azion.com/). +2. On the **+ Create** page, search for **LangGraph AI Agent Boilerplate**. +3. Select the template. +4. Configure your deployment: + - Enter a name for your application + - Select the AI model for your agent + - Configure environment variables +5. Click **Deploy**. + +## Configure your agent + +After deployment, you can customize your LangGraph agent: + +### Define the graph structure + +```python +from langgraph.graph import StateGraph, END + +def build_graph(): + graph = StateGraph(AgentState) + graph.add_node("reasoning", reasoning_node) + graph.add_node("action", action_node) + graph.add_edge("reasoning", "action") + graph.add_edge("action", END) + return graph.compile() +``` + +### Connect to AI Inference + +Configure your agent to use Azion's AI Inference endpoint: + +```python +from langchain_openai import ChatOpenAI + +llm = ChatOpenAI( + base_url="https://api.azion.net/v1", + api_key="your-api-key", + model="Qwen/Qwen3-30B-A3B-Instruct-2507-FP8" +) +``` + +## Next steps + +- [Explore available models](/en/documentation/products/ai/ai-inference/models/) +- Learn about [tool calling](/en/documentation/products/ai/ai-inference/models/mistral-3-small/#tool-calling-example) capabilities +- Integrate with [Vector Search](/en/documentation/products/store/sql-database/vector-search/) for RAG implementations + +## Example: Multi-step agent + +Here's an example of a multi-step agent using LangGraph: + +```python +from langgraph.graph import StateGraph, END +from typing import TypedDict + +class AgentState(TypedDict): + messages: list + current_step: str + result: str + +def reasoning_node(state: AgentState): + # Process the input and decide next action + response = llm.invoke(state["messages"]) + return {"current_step": "action", "result": response.content} + +def action_node(state: AgentState): + # Execute the decided action + result = execute_action(state["result"]) + return {"current_step": "complete", "result": result} + +def build_agent(): + graph = StateGraph(AgentState) + graph.add_node("reasoning", reasoning_node) + graph.add_node("action", action_node) + graph.set_entry_point("reasoning") + graph.add_edge("reasoning", "action") + graph.add_edge("action", END) + return graph.compile() + +agent = build_agent() +result = agent.invoke({"messages": ["What's the weather in Tokyo?"]}) +``` + +## Troubleshooting + +### Model connection error + +If the agent can't connect to the model: + +- Verify the `base_url` is correct +- Confirm the API key is valid +- Check that the model name is correct + +### State not persisting + +If state isn't being maintained between turns: + +- Verify checkpoint is configured correctly +- Confirm memory store is functioning +- Review StateGraph configuration + +### Tool calling issues + +If tools aren't being called correctly: + +- Ensure tool definitions match the expected schema +- Check that the model supports tool calling (see model capabilities) +- Verify function parameters are correctly formatted diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx index a47998ca5b..2ed521dd49 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx @@ -10,7 +10,9 @@ permalink: /documentation/products/ai/ai-inference/models/qwen3-30ba3b/ **Qwen3-30B-A3B-Instruct-2507-FP8** is an instruction-tuned 30B-parameter FP8 causal language model for long-context (256K) text generation and reasoning, supporting chat/QA, summarization, multilingual tasks, math/science problem solving, coding, and tool-augmented workflows. - Category | Details | +## Model details + +| Category | Details | |----------|---------| | **Model Name** | Qwen/Qwen3-30B-A3B-Instruct-2507-FP8 | | **Version** | 32B - FP8 | @@ -22,12 +24,12 @@ permalink: /documentation/products/ai/ai-inference/models/qwen3-30ba3b/ ## Capabilities -| Feature | Status | +| Feature | Details | |---------|--------| -| Tool Calling | ✅| +| Tool Calling | ✅ | | Context Length | 64k | | Supports LoRA | ✅ | -| Input data | TEXT | +| Input data | Text | ## Usage diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/nanonets-ocr-r.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/nanonets-ocr-r.mdx index e4df693992..553c0163df 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/nanonets-ocr-r.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/nanonets-ocr-r.mdx @@ -10,12 +10,22 @@ permalink: /documentation/products/ai/ai-inference/models/nanonets-ocr-s/ **Nanonets-OCR-s** is an OCR model that converts document images to structured Markdown, preserving layout (headings, lists, tables) and basic tags. The output is easy to parse and feed into LLM pipelines. +## Model details + +| Category | Details | +|----------|---------| +| **Model Name** | Nanonets-OCR-s | +| **Version** | Original | +| **Model Category** | OCR | +| **HuggingFace Model** | [nanonets/Nanonets-OCR-s](https://huggingface.co/nanonets/Nanonets-OCR-s) | +| **OpenAI Compatible Endpoint** | [Chat Completions](https://platform.openai.com/docs/api-reference/chat/create) | + ## Capabilities -| Feature | Status | +| Feature | Details | |---------|--------| | Context Length | 32k tokens | -| Input Data | Text+Image | +| Input Data | Text + Image | ## Usage diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx index 471c370b34..f77cc6b606 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx @@ -1,14 +1,14 @@ --- title: Qwen2.5 VL AWQ 3B description: >- - Qwen2.5 VL AWQ 3B is a vision-language model that supports 3 bilion parameters and offers advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation. + Qwen2.5 VL AWQ 3B is a vision-language model that supports 3 billion parameters and offers advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, qwen' namespace: docs_edge_ai_models_qwen_2_5_vl_3b menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/qwen-2-5-vl-3b/ --- -**Qwen2.5 VL AWQ 3B** is a vision-language model that offers advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation. It supports 3 bilion parameters. +**Qwen2.5 VL AWQ 3B** is a vision-language model that offers advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation. It supports 3 billion parameters. ## Model details diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx index e72c648e7f..1858b38512 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx @@ -18,7 +18,7 @@ permalink: /documentation/products/ai/ai-inference/models/qwen-2-5-vl-7b/ | **Version** | AWQ 7B | | **Model Category** | VLM | | **Size** | 7B params | -| **HuggingFace Model** | [Qwen/Qwen2.5-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct-AWQ) | +| **HuggingFace Model** | [Qwen/Qwen2.5-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct-AWQ) | | **OpenAI Compatible endpoint** | [Chat Completions](https://platform.openai.com/docs/api-reference/chat/create) | | **License** | [Apache 2.0](https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md) | diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx index 119d075993..3607342558 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx @@ -1,8 +1,8 @@ --- title: AI Inference description: >- - AI Inference enables you to run AI models directly on Azion’s highly distributed infrastructure. -meta_tags: 'ai inference, artificial intelligence, edge computing, ai assistant, ai agente' + AI Inference enables you to run AI models directly on Azion's highly distributed infrastructure. +meta_tags: 'ai inference, artificial intelligence, edge computing, ai assistant, ai agent' namespace: docs_edge_ai_reference permalink: /documentation/products/ai/ai-inference/ menu_namespace: AIInferenceMenu @@ -11,9 +11,7 @@ menu_namespace: AIInferenceMenu import LinkButton from 'azion-webkit/linkbutton'; -**AI Inference** enables you to run AI models directly on Azion’s highly distributed infrastructure. - -With Azion AI Inference, you can integrate AI capabilities into your applications, leveraging tools like **Functions**, **Applications**, **Vector Search**, and the Azion API to create scalable, secure, and efficient solutions. +**AI Inference** enables you to run AI models directly on Azion's highly distributed infrastructure. You can integrate AI capabilities into your applications, leveraging tools like Functions, Applications, Vector Search, and the Azion API to create scalable, secure, and efficient solutions. Get started by deploying the AI Inference Starter Kit Template: @@ -32,7 +30,7 @@ Get started by deploying the AI Inference Starter Kit Template: Connect applications using Azion’s OpenAI-compatible endpoint format. -### Run Edge optimized models +### Run edge-optimized models - Run AI models on Azion’s globally distributed edge to minimize latency and enable real-time inference. - Access a curated catalog of open-source models, ready to run on Azion Runtime and optimized for distributed deployment with low resource footprints. @@ -40,9 +38,9 @@ Connect applications using Azion’s OpenAI-compatible endpoint format. -### Fine-Tune Models with LoRA +### Fine-tune models with LoRA -AI Inference allows you to fine-tune, train, and specialize models your own data and parameters. This capability enables you to optimize models for specific tasks, ensuring they are both efficient and accurate for your business needs. +You can fine-tune, train, and specialize models with your own data and parameters. This capability enables you to optimize models for specific tasks, ensuring they're both efficient and accurate for your business needs. --- diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx index 0db8828e7f..d9a4054238 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx @@ -12,7 +12,7 @@ import LinkButton from 'azion-webkit/linkbutton'; Azion's edge-optimized models span multiple AI domains including text generation, image analysis, embeddings, and more. Each model is designed to balance performance and resource efficiency for distributed deployment. -This page provides a list of models available for use with **AI Inference**. To learn more about it, visit the [AI Inference Reference](/en/documentation/products/ai/ai-inference/). +This page provides a list of models available for use with AI Inference. To learn more about it, visit the [AI Inference Reference](/en/documentation/products/ai/ai-inference/). ## Available Models @@ -42,7 +42,7 @@ A Vision Language Model (VLM) that offers advanced capabilities such as visual a ### Qwen2.5 VL AWQ 7B -An instruction-tuned 30B-parameter FP8 causal language model for long-context (256K) text generation and reasoning, supporting chat/QA, summarization, multilingual tasks, math/science problem solving, coding, and tool-augmented workflows. +A vision-language model (VLM) with advanced capabilities including visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation. Optimized for edge deployment with efficient resource usage. diff --git a/src/content/docs/pt-br/pages/guias/ai-inference-starter-kit/index.mdx b/src/content/docs/pt-br/pages/guias/ai-inference-starter-kit/index.mdx new file mode 100644 index 0000000000..23ac2162cd --- /dev/null +++ b/src/content/docs/pt-br/pages/guias/ai-inference-starter-kit/index.mdx @@ -0,0 +1,74 @@ +--- +title: Implantar AI Inference Starter Kit +description: Implante um ambiente completo de AI Inference usando o template Starter Kit na Azion. +meta_tags: >- + ai inference, starter kit, template, implantação, inteligência artificial, computação de borda +namespace: docs_guides_ai_inference_starter_kit +permalink: /documentacao/produtos/guias/ai-inference-starter-kit/ +menu_namespace: AIInferenceMenu + +--- + +import LinkButton from 'azion-webkit/linkbutton'; + +O **AI Inference Starter Kit** oferece um template pronto para uso para implantar capacidades de AI na infraestrutura de edge da Azion. Inclui aplicações, funções e integrações de modelos pré-configuradas. + +## Visão geral + +O Starter Kit inclui: + +- Uma edge application configurada para workloads de AI +- Funções pré-construídas para tarefas comuns de AI +- Integração com modelos de AI disponíveis +- Código de exemplo para implementação rápida + +## Requisitos + +Antes de começar, certifique-se de ter: + +- Uma conta na Azion +- Acesso ao Real-Time Manager + +## Implantar o Starter Kit + +1. Acesse o [Console da Azion](https://console.azion.com/). +2. Na página **+ Create**, procure por **AI Inference Starter Kit**. +3. Selecione o template. +4. Configure sua edge application: + - Digite um nome para sua aplicação + - Selecione os modelos que deseja usar + - Configure definições adicionais conforme necessário +5. Clique em **Deploy**. + + + +## Próximos passos + +Após a implantação, você pode: + +- [Explorar modelos disponíveis](/pt-br/documentacao/produtos/ai/ai-inference/modelos/) +- [Construir um agente simples](/pt-br/documentacao/produtos/guias/ai-inference-agent/) +- [Integrar com Banco de Dados SQL](/pt-br/documentacao/produtos/store/sql-database/) para capacidades de vector search + +## Solução de problemas + +### Erro de implantação + +Se a implantação falhar, verifique: + +- Se sua conta tem permissões suficientes +- Se todos os campos obrigatórios foram preenchidos corretamente +- Os logs de erro no console para identificar problemas específicos + +### Modelo não responde + +Se o modelo não estiver respondendo: + +- Verifique se a função está corretamente configurada +- Confirme se o nome do modelo está correto na chamada `Azion.AI.run()` +- Verifique os limites de requisições na seção Limits da documentação diff --git a/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx b/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx index 773c1107e3..aa97bc73d3 100644 --- a/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx +++ b/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx @@ -1,40 +1,305 @@ --- -title: How to build a simple agent with AI Inference -description: The "+ Create" button accelerates your journey to start building with Azion. +title: Construa seu primeiro agente de AI +description: Construa um agente de AI simples em 5 minutos — um assistente conversacional que roda na infraestrutura de edge da Azion. meta_tags: >- - building, onboarding, create resources, Azion Web Platform, import from - GitHub + ai inference, agente ai, inteligência artificial, computação de borda, guia rápido, tutorial namespace: docs_guides_ai_inference_build_agent permalink: /documentacao/produtos/guias/ai-inference-agent/ menu_namespace: AIInferenceMenu --- +import LinkButton from 'azion-webkit/linkbutton'; +Construa agentes de AI que pensam, respondem e agem. Os agentes rodam na rede global de edge da Azion, fornecendo respostas com baixa latência e escalabilidade seamless. -## Usage +**O que você vai construir:** Um agente de AI conversacional que responde perguntas e mantém contexto. -AI Inference can be used in a [Function] +**Tempo:** ~5 minutos -This function receives a POST request to the desired AI model and returns the response. +--- + +## Criar um novo projeto + + + +1. Acesse o [Console da Azion](https://console.azion.com/). +2. Clique em **+ Create**. +3. Procure por **AI Inference Starter Kit** e selecione-o. +4. Digite um nome para sua aplicação, como `meu-primeiro-agente`. +5. Clique em **Deploy**. + +Isso cria um projeto com: +- Uma **Edge Application** configurada para workloads de AI +- Uma **Function** com integração de AI Inference pré-configurada +- Código de exemplo para começar + +--- + +## Seu primeiro agente + +Após a implantação, navegue até sua função e substitua o código por este agente simples: ```javascript -const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { - "stream": true, - "messages": [ - { - "role": "system", - "content": "You are a helpful assistant." - }, +async function handler(event) { + const body = JSON.parse(event.request.body || '{}'); + const userMessage = body.message || 'Olá!'; + + const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { + "stream": false, + "messages": [ + { + "role": "system", + "content": "Você é um assistente de AI útil. Seja conciso e amigável." + }, + { + "role": "user", + "content": userMessage + } + ], + "max_tokens": 500 + }); + + return new Response(JSON.stringify({ + response: modelResponse.choices[0].message.content, + model: modelResponse.model, + usage: modelResponse.usage + }), { + headers: { "Content-Type": "application/json" } + }); +} + +addEventListener("fetch", handler); +``` + +--- + +## Testar seu agente + +Envie uma requisição POST para o endpoint da sua função: + +```bash +curl -X POST https://url-da-sua-funcao.azion.net/v1/chat/completions + -H "Content-Type: application/json" + -d '{"message": "O que é computação de borda?"}' +``` + +Resposta esperada: + +```json +{ + "response": "Computação de borda processa dados mais próximo de sua origem, reduzindo latência e uso de banda ao trazer computação perto dos usuários finais ou dispositivos.", + "model": "Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", + "usage": { + "prompt_tokens": 22, + "completion_tokens": 24, + "total_tokens": 46 + } +} +``` + +--- + +## Adicionar memória de conversação + +Para manter contexto entre mensagens, adicione histórico de conversação: + +```javascript +let conversationHistory = []; + +async function handler(event) { + const body = JSON.parse(event.request.body || '{}'); + const userMessage = body.message || 'Olá!'; + + // Adiciona mensagem do usuário ao histórico + conversationHistory.push({ + role: "user", + content: userMessage + }); + + const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { + "stream": false, + "messages": [ + { + "role": "system", + "content": "Você é um assistente de AI útil. Seja conciso e amigável." + }, + ...conversationHistory + ], + "max_tokens": 500 + }); + + const assistantMessage = modelResponse.choices[0].message.content; + + // Adiciona resposta do assistente ao histórico + conversationHistory.push({ + role: "assistant", + content: assistantMessage + }); + + return new Response(JSON.stringify({ + response: assistantMessage, + history: conversationHistory + }), { + headers: { "Content-Type": "application/json" } + }); +} + +addEventListener("fetch", handler); +``` + +--- + +## O que acabou de acontecer? + +Quando você enviou uma mensagem: + +1. **Requisição** chegou na sua edge function +2. **Function** chamou `Azion.AI.run()` com sua mensagem +3. **Modelo** processou a requisição no edge +4. **Resposta** retornou ao cliente com latência mínima + +```mermaid +flowchart LR + A[Cliente] -->|Requisição POST| B[Edge Function] + B -->|Azion.AI.run| C[Modelo AI] + C -->|Resposta| B + B -->|Resposta JSON| A +``` + +### Conceitos-chave + +| Conceito | O que significa | +|----------|----------------| +| **Execução no edge** | Código roda na rede distribuída da Azion, perto dos usuários | +| **Azion.AI.run()** | Método SDK para invocar modelos de AI | +| **Seleção de modelo** | Escolha entre modelos disponíveis baseado no seu caso de uso | +| **Streaming** | Habilite respostas em tempo real com `stream: true` | + +--- + +## Adicionar chamada de ferramentas + +Habilite seu agente a chamar funções externas: + +```javascript +async function handler(event) { + const body = JSON.parse(event.request.body || '{}'); + const userMessage = body.message; + + const tools = [ { - "role": "user", - "content": "Name the european capitals" + "type": "function", + "function": { + "name": "get_weather", + "description": "Obter clima atual para um local", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "Nome da cidade" + } + }, + "required": ["location"] + } + } } - ] -}) -return modelResponse + ]; + + const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { + "stream": false, + "messages": [ + { + "role": "system", + "content": "Você é um assistente útil com acesso a ferramentas." + }, + { + "role": "user", + "content": userMessage + } + ], + "tools": tools + }); + + // Verifica se o modelo quer chamar uma ferramenta + if (modelResponse.choices[0].message.tool_calls) { + const toolCall = modelResponse.choices[0].message.tool_calls[0]; + const args = JSON.parse(toolCall.function.arguments); + + // Executa a ferramenta (você implementaria isso) + const weatherData = await getWeather(args.location); + + return new Response(JSON.stringify({ + tool: toolCall.function.name, + location: args.location, + weather: weatherData + }), { + headers: { "Content-Type": "application/json" } + }); + } + + return new Response(JSON.stringify({ + response: modelResponse.choices[0].message.content + }), { + headers: { "Content-Type": "application/json" } + }); +} + +async function getWeather(location) { + // Implemente sua chamada de API de clima aqui + return { location, temperature: "22°C", condition: "Ensolarado" }; +} + +addEventListener("fetch", handler); ``` -This example uses the Qwen3 model. You can change the model and the request parameters according to your preferences. Check the [AI models reference](/en/documentation/products/ai/ai-inference/models/) for more information about the available models and how to use them in your application. +--- + +## Solução de problemas + +### Erro "Model not found" + +Verifique: +1. O nome do modelo corresponde exatamente (case-sensitive) +2. Consulte os [modelos disponíveis](/pt-br/documentacao/produtos/ai/ai-inference/modelos/) para nomes corretos + +### Alta latência + +Tente estas soluções: +1. Habilite streaming: `"stream": true` +2. Reduza `max_tokens` para respostas mais curtas +3. Escolha um modelo menor para inferência mais rápida + +### Erros de rate limit + +Verifique os limites padrão: +- **300 requisições por minuto** + +Contate o suporte para aumentar limites em produção. + +### Timeout na função + +Se sua função atinge timeout: +1. Reduza `max_tokens` +2. Simplifique seu prompt +3. Considere dividir tarefas complexas em etapas menores + +--- + +## Próximos passos + +Agora que você tem um agente funcionando, explore: +| Aprenda a | Consulte | +|-----------|----------| +| Usar diferentes modelos | [Modelos disponíveis](/pt-br/documentacao/produtos/ai/ai-inference/modelos/) | +| Implementar tool calling | [Exemplo de tool calling](/pt-br/documentacao/produtos/ai/ai-inference/modelos/mistral-3-small/#exemplo-de-tool-calling) | +| Construir aplicações RAG | [Vector Search](/pt-br/documentacao/produtos/store/sql-database/vector-search/) | +| Implantar com templates | [AI Inference Starter Kit](/pt-br/documentacao/produtos/guias/ai-inference-starter-kit/) | diff --git a/src/content/docs/pt-br/pages/guias/langgraph-ai-agent-boilerplate/index.mdx b/src/content/docs/pt-br/pages/guias/langgraph-ai-agent-boilerplate/index.mdx new file mode 100644 index 0000000000..a8f888546e --- /dev/null +++ b/src/content/docs/pt-br/pages/guias/langgraph-ai-agent-boilerplate/index.mdx @@ -0,0 +1,109 @@ +--- +title: Implantar LangGraph AI Agent Boilerplate +description: Implante um agente de AI baseado em LangGraph na Azion usando o template boilerplate. +meta_tags: >- + ai inference, langgraph, agente ai, boilerplate, template, implantação, inteligência artificial, computação de borda +namespace: docs_guides_ai_inference_langgraph_boilerplate +permalink: /documentacao/produtos/guias/langgraph-ai-agent-boilerplate/ +menu_namespace: AIInferenceMenu + +--- + +import LinkButton from 'azion-webkit/linkbutton'; + +O **LangGraph AI Agent Boilerplate** oferece um template para implantar agentes de AI construídos com LangGraph na infraestrutura de edge da Azion. O LangGraph permite construir aplicações stateful e multi-ator com LLMs. + +## Visão geral + +O LangGraph é um framework para construir aplicações stateful e multi-ator com LLMs. Ele estende as capacidades do LangChain adicionando a habilidade de coordenar múltiplas chains (ou atores) através de múltiplos passos de computação. + +O boilerplate inclui: + +- Estrutura de agente LangGraph pré-configurada +- Integração com modelos de AI Inference +- Gerenciamento de estado para workflows de múltiplas etapas +- Implementações de ferramentas de exemplo + +## Requisitos + +Antes de começar, certifique-se de ter: + +- Uma conta na Azion +- Node.js 18+ instalado +- Conhecimento básico de conceitos do LangGraph + +## Arquitetura + +O boilerplate LangGraph implementa: + +- **Workflow baseado em grafo**: Define o comportamento do agente como um grafo de nós e arestas +- **Gerenciamento de estado**: Mantém contexto através de turnos de conversação +- **Integração de ferramentas**: Conecta APIs e serviços externos +- **Memória**: Persiste histórico de conversação + +## Implantar o Boilerplate + +1. Acesse o [Console da Azion](https://console.azion.com/). +2. Na página **+ Create**, procure por **LangGraph AI Agent Boilerplate**. +3. Selecione o template. +4. Configure sua implantação: + - Digite um nome para sua aplicação + - Selecione o modelo de AI para seu agente + - Configure as variáveis de ambiente +5. Clique em **Deploy**. + +## Configurar seu agente + +Após a implantação, você pode personalizar seu agente LangGraph: + +### Definir a estrutura do grafo + +```python +from langgraph.graph import StateGraph, END + +def build_graph(): + graph = StateGraph(AgentState) + graph.add_node("reasoning", reasoning_node) + graph.add_node("action", action_node) + graph.add_edge("reasoning", "action") + graph.add_edge("action", END) + return graph.compile() +``` + +### Conectar ao AI Inference + +Configure seu agente para usar o endpoint de AI Inference da Azion: + +```python +from langchain_openai import ChatOpenAI + +llm = ChatOpenAI( + base_url="https://api.azion.net/v1", + api_key="sua-api-key", + model="Qwen/Qwen3-30B-A3B-Instruct-2507-FP8" +) +``` + +## Próximos passos + +- [Explorar modelos disponíveis](/pt-br/documentacao/produtos/ai/ai-inference/modelos/) +- Saiba mais sobre [tool calling](/pt-br/documentacao/produtos/ai/ai-inference/modelos/mistral-3-small/#exemplo-de-tool-calling) +- Integre com [Vector Search](/pt-br/documentacao/produtos/store/sql-database/vector-search/) para implementações RAG + +## Solução de problemas + +### Erro de conexão com o modelo + +Se o agente não conseguir conectar ao modelo: + +- Verifique se a `base_url` está correta +- Confirme se a API key é válida +- Verifique se o nome do modelo está correto + +### Estado não persistindo + +Se o estado não estiver sendo mantido entre turnos: + +- Verifique se o checkpoint está configurado corretamente +- Confirme se o memory store está funcionando +- Revise a configuração do StateGraph diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx index 61ecb3ccfb..2362968c46 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx @@ -10,6 +10,8 @@ menu_namespace: AIInferenceMenu **Qwen3-30B-A3B-Instruct-2507-FP8** é um modelo de linguagem causal FP8 ajustado por instruções com 30 bilhões de parâmetros para geração de texto de longo contexto (256K) e raciocínio, suportando chat/QA, sumarização, tarefas multilíngues, resolução de problemas de matemática/ciência, codificação e fluxos de trabalho aumentados por ferramentas. +## Detalhes do modelo + | Categoria | Detalhes | |----------|---------| | **Nome do modelo** | Qwen/Qwen3-30B-A3B-Instruct-2507-FP8 | @@ -22,12 +24,12 @@ menu_namespace: AIInferenceMenu ## Capacidades -| Recurso | Status | +| Recurso | Detalhes | |---------|--------| -| Chamada de ferramentas | ✅| +| Chamada de ferramentas | ✅ | | Comprimento do contexto | 64k | | Suporta LoRA | ✅ | -| Dados de entrada | TEXTO | +| Dados de entrada | Texto | ## Uso diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/nanonets-ocr-r.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/nanonets-ocr-r.mdx index 1c63f04b65..3db79a0af5 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/nanonets-ocr-r.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/nanonets-ocr-r.mdx @@ -10,13 +10,22 @@ menu_namespace: AIInferenceMenu **Nanonets-OCR-s** é um modelo OCR que converte imagens de documentos em Markdown estruturado, preservando o layout (títulos, listas, tabelas) e tags básicas. A saída é fácil de analisar e alimentar em pipelines de LLM. +## Detalhes do modelo + +| Categoria | Detalhes | +|----------|---------| +| **Nome do modelo** | Nanonets-OCR-s | +| **Versão** | Original | +| **Categoria do modelo** | OCR | +| **Modelo HuggingFace** | [nanonets/Nanonets-OCR-s](https://huggingface.co/nanonets/Nanonets-OCR-s) | +| **Endpoint compatível com OpenAI** | [Chat Completions](https://platform.openai.com/docs/api-reference/chat/create) | + ## Capacidades -| Recurso | Status | +| Recurso | Detalhes | |---------|--------| -| Suporte de Longo Prazo da Azion (LTS) | ❌ | | Comprimento do contexto | 32k tokens | -| Dados de entrada | Texto+Imagem | +| Dados de entrada | Texto + Imagem | ## Uso diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx index e0a1fa07d4..5dff269399 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx @@ -18,7 +18,7 @@ O **Qwen 2.5 VL AWQ 7B** é um modelo de linguagem e visão que suporta 7 bilhõ | **Versão** | AWQ 7B | | **Categoria do modelo** | VLM | | **Tamanho** | 7B parâmetros | -| **Modelo HuggingFace** | [Qwen/Qwen2.5-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct-AWQ) | +| **Modelo HuggingFace** | [Qwen/Qwen2.5-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct-AWQ) | | **Endpoint compatível com a OpenAI** | [Chat Completions](https://platform.openai.com/docs/api-reference/chat/create) | | **Licença** | [Apache 2.0](https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md) | diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx index 3123cf3502..08167d3dd5 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx @@ -11,9 +11,7 @@ menu_namespace: AIInferenceMenu import LinkButton from 'azion-webkit/linkbutton'; -**AI Inference** permite que você execute modelos de AI diretamente na infraestrutura altamente distribuída da Azion. - -Com o AI Inference da Azion, você pode integrar capacidades de AI em suas aplicações, aproveitando ferramentas como **Functions**, **Applications**, **Vector Search** e a API da Azion para criar soluções escaláveis, seguras e eficientes. +**AI Inference** permite que você execute modelos de AI diretamente na infraestrutura altamente distribuída da Azion. Você pode integrar capacidades de AI em suas aplicações, aproveitando ferramentas como Functions, Applications, Vector Search e a API da Azion para criar soluções escaláveis, seguras e eficientes. Comece implantando o Template do Starter Kit do AI Inference: @@ -32,7 +30,7 @@ Comece implantando o Template do Starter Kit do AI Inference: Conecte aplicações usando o formato de endpoint compatível com OpenAI da Azion. -### Execute modelos otimizados para o edge +### Execute modelos otimizados para edge - Execute modelos de AI no edge, utilizando a infraestrutura globalmente distribuída da Azion para minimizar a latência e permitir inferência em tempo real. - Acesse um catálogo selecionado de modelos de código aberto, prontos para rodar no Azion Runtime e otimizados para implantação distribuída com baixo consumo de recursos. @@ -40,9 +38,9 @@ Conecte aplicações usando o formato de endpoint compatível com OpenAI da Azio -### Ajuste Fino de Modelos com LoRA +### Ajuste fino de modelos com LoRA -O AI Inference permite que você ajuste, treine e especialize modelos com seus próprios dados e parâmetros. Essa capacidade permite otimizar modelos para tarefas específicas, garantindo que sejam eficientes e precisos para as necessidades do seu negócio. +Você pode ajustar, treinar e especializar modelos com seus próprios dados e parâmetros. Essa capacidade permite otimizar modelos para tarefas específicas, garantindo que sejam eficientes e precisos para as necessidades do seu negócio. --- diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/modelos.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/modelos.mdx index fa5c6ad421..55ec783a2b 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/modelos.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/modelos.mdx @@ -12,7 +12,7 @@ import LinkButton from 'azion-webkit/linkbutton'; Os modelos otimizados para o edge da Azion abrangem múltiplos domínios de AI, incluindo geração de texto, análise de imagem, embeddings e mais. Cada modelo é projetado para equilibrar o desempenho e a eficiência de recursos para implementação no edge. -Esta página fornece uma lista de modelos disponíveis para uso no **AI Inference**. Para saber mais, visite a página de [referência do AI Inference](/pt-br/documentacao/produtos/ai/ai-inference/). +Esta página fornece uma lista de modelos disponíveis para uso com AI Inference. Para saber mais, visite a página de [referência do AI Inference](/pt-br/documentacao/produtos/ai/ai-inference/). ## Modelos disponíveis diff --git a/src/i18n/en/AIInferenceMenu.ts b/src/i18n/en/AIInferenceMenu.ts index a48cb1da42..9f52ab79ef 100644 --- a/src/i18n/en/AIInferenceMenu.ts +++ b/src/i18n/en/AIInferenceMenu.ts @@ -15,7 +15,10 @@ export default [ /// START HERE :::: DO NOT REMOVE the strings above, it's a work around for header on mobile /// { text: 'Overview', header: true, anchor: true, type: 'learn', key: 'overview-aiinference', slug: '/documentation/products/ai/ai-inference/', hasLabel: 'menu.aiinference' }, - //{ text: 'Get Started', header: true, anchor: true, type: 'learn', key: 'get-started-aiinference', slug: '/documentation/products/guides/ai-inference-agent/' }, + + { text: 'Get Started', header: true, type: 'learn', key: 'aiinference/get-started', items: [ + { text: 'Build a simple AI agent', slug: '/documentation/products/guides/ai-inference-agent/', key: 'aiinference/build-agent' }, + ]}, { text: ' Available Models', header: true, type: 'learn', key: 'aiinference.models',slug: '/documentation/products/ai/ai-inference/models/', items: [ { text: 'BAAI/bge reranker v2 m3', slug: '/documentation/products/ai/ai-inference/models/baai-bge-reranker-v2-m3', key: 'aiinference/BAAI/bge-reranker-v2-m3' }, @@ -29,8 +32,8 @@ export default [ ]}, { text: 'Guides', header: true, type: 'learn', key: 'aiinference/guides', items: [ - { text: 'Deploy AI Inference Starter kit', header: true, anchor: true, type: 'learn', key: 'aiinference/starter-kit', slug: '/documentation/products/guides/ai-inference-starter-kit' }, - { text: 'Deploy LangGraph AI Agent Boilerplate', header: true, anchor: true, type: 'learn', key: 'aiinference/langgraph-boilerplate', slug: '/documentation/products/guides/langgraph-ai-agent-boilerplate' }, + { text: 'Deploy AI Inference Starter kit', slug: '/documentation/products/guides/ai-inference-starter-kit/', key: 'aiinference/starter-kit' }, + { text: 'Deploy LangGraph AI Agent Boilerplate', slug: '/documentation/products/guides/langgraph-ai-agent-boilerplate/', key: 'aiinference/langgraph-boilerplate' }, ]}, diff --git a/src/i18n/pt-br/AIInferenceMenu.ts b/src/i18n/pt-br/AIInferenceMenu.ts index 114e8c2114..5fbf6afc8c 100644 --- a/src/i18n/pt-br/AIInferenceMenu.ts +++ b/src/i18n/pt-br/AIInferenceMenu.ts @@ -15,7 +15,10 @@ export default [ /// START HERE :::: DO NOT REMOVE the strings above, it's a work around for header on mobile /// { text: 'Visão Geral', header: true, anchor: true, type: 'learn', key: 'overview-aiinference', slug: '/documentacao/produtos/ai/ai-inference/', hasLabel: 'menu.aiinference' }, - //{ text: 'Comece Agora', header: true, anchor: true, type: 'learn', key: 'get-started-aiinference', slug: '/documentacao/produtos/guias/ai-inference-agent/' }, + + { text: 'Comece Agora', header: true, type: 'learn', key: 'aiinference/get-started', items: [ + { text: 'Construa um agente de AI simples', slug: '/documentacao/produtos/guias/ai-inference-agent/', key: 'aiinference/build-agent' }, + ]}, { text: ' Modelos disponiveis', header: true, type: 'learn', key: 'aiinference.models',slug: '/documentacao/produtos/ai/ai-inference/modelos/', items: [ { text: 'BAAI/bge reranker v2 m3', slug: '/documentacao/produtos/ai/ai-inference/modelos/baai-bge-reranker-v2-m3/', key: 'aiinference/BAAI/bge-reranker-v2-m3' }, @@ -29,8 +32,8 @@ export default [ ]}, { text: 'Guias', header: true, type: 'learn', key: 'aiinference/guides', items: [ - { text: 'Implemente o AI Inference Starter kit', header: true, anchor: true, type: 'learn', key: 'aiinference/starter-kit', slug: '/documentacao/produtos/guias/ai-inference-starter-kit/' }, - { text: 'Implemente LangGraph AI Agent Boilerplate', header: true, anchor: true, type: 'learn', key: 'aiinference/langgraph-boilerplate', slug: '/documentacao/produtos/guias/langgraph-ai-agent-boilerplate/' }, + { text: 'Implemente o AI Inference Starter kit', slug: '/documentacao/produtos/guias/ai-inference-starter-kit/', key: 'aiinference/starter-kit' }, + { text: 'Implemente LangGraph AI Agent Boilerplate', slug: '/documentacao/produtos/guias/langgraph-ai-agent-boilerplate/', key: 'aiinference/langgraph-boilerplate' }, ]}, From 5037c8d8769bcdca89ed11e23b52f458482c9225 Mon Sep 17 00:00:00 2001 From: "gabriel.alves" Date: Fri, 22 May 2026 11:37:56 -0300 Subject: [PATCH 09/12] update-05-22-26 --- .../docs/en/pages/guides/ai-inference/quick-start.mdx | 11 ++++++++--- .../guides/langgraph-ai-agent-boilerplate/index.mdx | 2 +- .../ai-models/baai-bge-reranker-v2-m3.mdx | 6 +++--- .../reference/ai-inference/ai-models/internvl3.mdx | 4 ++-- .../ai-inference/ai-models/mistral-3-small.mdx | 8 ++++---- .../ai-inference/ai-models/qwen-2-5-vl-3b.mdx | 10 +++++----- .../ai-inference/ai-models/qwen-2-5-vl-7b.mdx | 10 +++++----- .../ai-inference/ai-models/qwen3-embedding-4b.mdx | 2 +- .../referencia/ai-inference/edge-ai-reference.mdx | 2 +- src/data/availableMenu.ts | 2 +- 10 files changed, 31 insertions(+), 26 deletions(-) diff --git a/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx b/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx index 075c710df5..d4c13c200f 100644 --- a/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx +++ b/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx @@ -108,14 +108,15 @@ Expected response: ## Add conversation memory -To maintain context across messages, add conversation history: +To maintain context across messages, you need to manage conversation history. Since edge functions are stateless, you have two options: -```javascript -let conversationHistory = []; +### Option 1: Pass history in the request body +```javascript async function handler(event) { const body = JSON.parse(event.request.body || '{}'); const userMessage = body.message || 'Hello!'; + const conversationHistory = body.history || []; // Add user message to history conversationHistory.push({ @@ -154,6 +155,10 @@ async function handler(event) { addEventListener("fetch", handler); ``` +### Option 2: Use KV Store for persistent sessions + +For persistent conversation history across requests, use [KV Store](/en/documentation/products/store/kv-database/) to store session data with a unique session ID. + --- ## What just happened? diff --git a/src/content/docs/en/pages/guides/langgraph-ai-agent-boilerplate/index.mdx b/src/content/docs/en/pages/guides/langgraph-ai-agent-boilerplate/index.mdx index 4972780f9f..b134738411 100644 --- a/src/content/docs/en/pages/guides/langgraph-ai-agent-boilerplate/index.mdx +++ b/src/content/docs/en/pages/guides/langgraph-ai-agent-boilerplate/index.mdx @@ -78,7 +78,7 @@ Configure your agent to use Azion's AI Inference endpoint: from langchain_openai import ChatOpenAI llm = ChatOpenAI( - base_url="https://api.azion.net/v1", + base_url="https://ai.azion.com/v1", api_key="your-api-key", model="Qwen/Qwen3-30B-A3B-Instruct-2507-FP8" ) diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx index 4ccb27195e..379ab601bb 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx @@ -36,7 +36,7 @@ permalink: /documentation/products/ai/ai-inference/models/baai-bge-reranker-v2-m This is an example of a basic rerank request using this model: ```ts -const modelResponse = await Azion.AI.run("baai-bge-reranker-v2-m3", { +const modelResponse = await Azion.AI.run("BAAI/bge-reranker-v2-m3", { "query": "What is deep learning?", "documents": [ "Deep learning is a subset of machine learning that uses neural networks with many layers", @@ -57,7 +57,7 @@ const modelResponse = await Azion.AI.run("baai-bge-reranker-v2-m3", { This is an example of a basic score request using this model: ```ts -const modelResponse = await Azion.AI.run("baai-bge-reranker-v2-m3", { +const modelResponse = await Azion.AI.run("BAAI/bge-reranker-v2-m3", { "text_1": "What is deep learning?", "text_2": [ "Deep learning is a subset of machine learning that uses neural networks with many layers", @@ -78,7 +78,7 @@ Response example: ```json { "id": "rerank-356bf11f0e794f3c8f726bec7ba698bb", - "model": "baai-bge-reranker-v2-m3", + "model": "BAAI/bge-reranker-v2-m3", "usage": { "total_tokens": 78 }, diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/internvl3.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/internvl3.mdx index 5d4de8f99a..74bb0b609c 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/internvl3.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/internvl3.mdx @@ -38,7 +38,7 @@ permalink: /documentation/products/ai/ai-inference/models/internvl3/ This is a basic chat completion example using this model: ```ts -const modelResponse = await Azion.AI.run("opengvlab-internvl3-1b-instruct", { +const modelResponse = await Azion.AI.run("OpenGVLab/InternVL3-1B-Instruct", { "stream": true, "messages": [ { @@ -65,7 +65,7 @@ const modelResponse = await Azion.AI.run("opengvlab-internvl3-1b-instruct", { This is a multimodal example using this model: ```ts -const modelResponse = await Azion.AI.run("opengvlab-internvl3-1b-instruct", { +const modelResponse = await Azion.AI.run("OpenGVLab/InternVL3-1B-Instruct", { "stream": true, "messages": [ { diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/mistral-3-small.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/mistral-3-small.mdx index d4389ad7e2..37c86ba6ff 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/mistral-3-small.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/mistral-3-small.mdx @@ -38,7 +38,7 @@ permalink: /documentation/products/ai/ai-inference/models/mistral-3-small/ This is an example of a basic chat completion request using this model: ```ts -const modelResponse = await Azion.AI.run("casperhansen-mistral-small-24b-instruct-2501-awq", { +const modelResponse = await Azion.AI.run("casperhansen/mistral-small-24b-instruct-2501-awq", { "stream": true, "max_tokens": 1024, "messages": [ @@ -69,7 +69,7 @@ Response example: "id": "chatcmpl-e27716424abf4b3f891ff4850470cb09", "object": "chat.completion", "created": 1746821581, - "model": "casperhansen-mistral-small-24b-instruct-2501-awq", + "model": "casperhansen/mistral-small-24b-instruct-2501-awq", "choices": [ { "index": 0, @@ -122,7 +122,7 @@ Response example: This is an example of a tool calling request using this model: ```ts -const modelResponse = await Azion.AI.run("casperhansen-mistral-small-24b-instruct-2501-awq", { +const modelResponse = await Azion.AI.run("casperhansen/mistral-small-24b-instruct-2501-awq", { "stream": true, "max_tokens": 1024, "messages": [ @@ -180,7 +180,7 @@ Response example: "id": "chatcmpl-88affc4730cf4219a06d2b15aad9ad44", "object": "chat.completion", "created": 1746821866, - "model": "qwen-qwen25-vl-3b-instruct-awq", + "model": "casperhansen/mistral-small-24b-instruct-2501-awq", "choices": [ { "index": 0, diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx index f77cc6b606..db66a7fa74 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx @@ -38,7 +38,7 @@ permalink: /documentation/products/ai/ai-inference/models/qwen-2-5-vl-3b/ This is a basic chat completion request example using this model: ```ts -const modelResponse = await Azion.AI.run("qwen-qwen25-vl-3b-instruct-awq", { +const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-3B-Instruct-AWQ", { "stream": true, "messages": [ { @@ -67,7 +67,7 @@ Response example: "id": "chatcmpl-e27716424abf4b3f891ff4850470cb09", "object": "chat.completion", "created": 1746821581, - "model": "qwen-qwen25-vl-3b-instruct-awq", + "model": "Qwen/Qwen2.5-VL-3B-Instruct-AWQ", "choices": [ { "index": 0, @@ -119,7 +119,7 @@ Response example: This is a tool calling request example using this model: ```ts -const modelResponse = await Azion.AI.run("qwen-qwen25-vl-3b-instruct-awq", { +const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-3B-Instruct-AWQ", { "stream": true, "messages": [ { @@ -174,7 +174,7 @@ Response example: "id": "chatcmpl-88affc4730cf4219a06d2b15aad9ad44", "object": "chat.completion", "created": 1746821866, - "model": "qwen-qwen25-vl-3b-instruct-awq", + "model": "Qwen/Qwen2.5-VL-3B-Instruct-AWQ", "choices": [ { "index": 0, @@ -236,7 +236,7 @@ Response example: This is a multimodal request example using this model: ```ts -const modelResponse = await Azion.AI.run("qwen-qwen25-vl-3b-instruct-awq", { +const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-3B-Instruct-AWQ", { "stream": true, "messages": [ { diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx index 1858b38512..7c7960b699 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx @@ -38,7 +38,7 @@ permalink: /documentation/products/ai/ai-inference/models/qwen-2-5-vl-7b/ This is a basic chat completion example using this model: ```ts -const modelResponse = await Azion.AI.run("qwen-qwen25-vl-7b-instruct-awq", { +const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-7B-Instruct-AWQ", { "stream": true, "messages": [ { @@ -68,7 +68,7 @@ Response example: "id": "chatcmpl-e27716424abf4b3f891ff4850470cb09", "object": "chat.completion", "created": 1746821581, - "model": "qwen-qwen25-vl-7b-instruct-awq", + "model": "Qwen/Qwen2.5-VL-7B-Instruct-AWQ", "choices": [ { "index": 0, @@ -122,7 +122,7 @@ Response example: This is a tool calling example using this model: ```ts -const modelResponse = await Azion.AI.run("qwen-qwen25-vl-7b-instruct-awq", { +const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-7B-Instruct-AWQ", { "stream": true, "messages": [ { @@ -177,7 +177,7 @@ Response example: "id": "chatcmpl-88affc4730cf4219a06d2b15aad9ad44", "object": "chat.completion", "created": 1746821866, - "model": "qwen-qwen25-vl-7b-instruct-awq", + "model": "Qwen/Qwen2.5-VL-7B-Instruct-AWQ", "choices": [ { "index": 0, @@ -241,7 +241,7 @@ Response example: This is a multimodal example using this model: ```ts -const modelResponse = await Azion.AI.run("qwen-qwen25-vl-7b-instruct-awq", { +const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-7B-Instruct-AWQ", { "stream": true, "messages": [ { diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen3-embedding-4b.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen3-embedding-4b.mdx index 904f43400c..50d76764b2 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen3-embedding-4b.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen3-embedding-4b.mdx @@ -50,7 +50,7 @@ Response example: Different dimensions can be selected by setting the `dimensions` parameter: ```ts -const modelResponse = await Azion.AI.run("qwen-qwen3-embedding-4b", { +const modelResponse = await Azion.AI.run("Qwen/Qwen3-Embedding-4B", { "input": "The food was delicious and the waiter...", "encoding_format": "float", "dimensions": 256 diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx index 08167d3dd5..3a1d2bbbc3 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx @@ -17,7 +17,7 @@ Comece implantando o Template do Starter Kit do AI Inference: diff --git a/src/data/availableMenu.ts b/src/data/availableMenu.ts index 3f01d610ca..a28011e4f0 100644 --- a/src/data/availableMenu.ts +++ b/src/data/availableMenu.ts @@ -11,6 +11,6 @@ export const availableMenus = [ { name: 'deployMenu', langs: ['en', 'pt-br'] }, { name: 'storeMenu', langs: ['en', 'pt-br'] }, { name: 'libMenu', langs: ['en', 'pt-br'] }, - { name: 'AIInferenceMenu', langs: ['en', 'pt-br'] } + { name: 'AIInferenceMenu', langs: ['en', 'pt-br'] }, { name: 'mcpMenu', langs: ['en', 'pt-br'] } ] From aff34162fa6f38a965a9c3b821443d870a90da54 Mon Sep 17 00:00:00 2001 From: "gabriel.alves" Date: Fri, 22 May 2026 11:56:23 -0300 Subject: [PATCH 10/12] Pt update 05-22-26 --- .../en/pages/guides/ai-inference/quick-start.mdx | 2 +- .../pages/guias/ai-inference/quick-start.mdx | 16 ++++++++++------ .../langgraph-ai-agent-boilerplate/index.mdx | 2 +- .../ai-models/baai-bge-reranker-v2-m3.mdx | 6 +++--- .../ai-inference/ai-models/internvl3.mdx | 4 ++-- .../ai-inference/ai-models/mistral-3-small.mdx | 8 ++++---- .../ai-inference/ai-models/qwen-2-5-vl-3b.mdx | 10 +++++----- .../ai-inference/ai-models/qwen-2-5-vl-7b.mdx | 10 +++++----- .../ai-models/qwen3-embedding-4b.mdx | 2 +- 9 files changed, 32 insertions(+), 28 deletions(-) diff --git a/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx b/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx index d4c13c200f..47ca20eb23 100644 --- a/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx +++ b/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx @@ -23,7 +23,7 @@ Build AI agents that think, respond, and act. Agents run on Azion's global edge diff --git a/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx b/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx index aa97bc73d3..bc890a9572 100644 --- a/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx +++ b/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx @@ -30,9 +30,8 @@ Construa agentes de AI que pensam, respondem e agem. Os agentes rodam na rede gl 1. Acesse o [Console da Azion](https://console.azion.com/). 2. Clique em **+ Create**. -3. Procure por **AI Inference Starter Kit** e selecione-o. -4. Digite um nome para sua aplicação, como `meu-primeiro-agente`. -5. Clique em **Deploy**. +3. Digite um nome para sua aplicação, como `meu-primeiro-agente`. +4. Clique em **Deploy**. Isso cria um projeto com: @@ -108,14 +107,15 @@ Resposta esperada: ## Adicionar memória de conversação -Para manter contexto entre mensagens, adicione histórico de conversação: +Para manter contexto entre mensagens, você precisa gerenciar o histórico de conversação. Como as edge functions são stateless, você tem duas opções: -```javascript -let conversationHistory = []; +### Opção 1: Passar histórico no corpo da requisição +```javascript async function handler(event) { const body = JSON.parse(event.request.body || '{}'); const userMessage = body.message || 'Olá!'; + const conversationHistory = body.history || []; // Adiciona mensagem do usuário ao histórico conversationHistory.push({ @@ -154,6 +154,10 @@ async function handler(event) { addEventListener("fetch", handler); ``` +### Opção 2: Usar KV Store para sessões persistentes + +Para histórico de conversação persistente entre requisições, use [KV Store](/pt-br/documentacao/produtos/store/kv-database/) para armazenar dados de sessão com um ID de sessão único. + --- ## O que acabou de acontecer? diff --git a/src/content/docs/pt-br/pages/guias/langgraph-ai-agent-boilerplate/index.mdx b/src/content/docs/pt-br/pages/guias/langgraph-ai-agent-boilerplate/index.mdx index a8f888546e..3338591924 100644 --- a/src/content/docs/pt-br/pages/guias/langgraph-ai-agent-boilerplate/index.mdx +++ b/src/content/docs/pt-br/pages/guias/langgraph-ai-agent-boilerplate/index.mdx @@ -78,7 +78,7 @@ Configure seu agente para usar o endpoint de AI Inference da Azion: from langchain_openai import ChatOpenAI llm = ChatOpenAI( - base_url="https://api.azion.net/v1", + base_url="https://ai.azion.com/v1", api_key="sua-api-key", model="Qwen/Qwen3-30B-A3B-Instruct-2507-FP8" ) diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx index 191e346f64..d61f334a09 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx @@ -36,7 +36,7 @@ menu_namespace: AIInferenceMenu Este é um exemplo de uma requisição básica de reranking usando este modelo: ```ts -const modelResponse = await Azion.AI.run("baai-bge-reranker-v2-m3", { +const modelResponse = await Azion.AI.run("BAAI/bge-reranker-v2-m3", { "query": "What is deep learning?", "documents": [ "Deep learning is a subset of machine learning that uses neural networks with many layers", @@ -57,7 +57,7 @@ const modelResponse = await Azion.AI.run("baai-bge-reranker-v2-m3", { Este é um exemplo de uma requisição básica de pontuação usando este modelo: ```ts -const modelResponse = await Azion.AI.run("baai-bge-reranker-v2-m3", { +const modelResponse = await Azion.AI.run("BAAI/bge-reranker-v2-m3", { "text_1": "What is deep learning?", "text_2": [ "Deep learning is a subset of machine learning that uses neural networks with many layers", @@ -78,7 +78,7 @@ Exemplo de resposta: ```json { "id": "rerank-356bf11f0e794f3c8f726bec7ba698bb", - "model": "baai-bge-reranker-v2-m3", + "model": "BAAI/bge-reranker-v2-m3", "usage": { "total_tokens": 78 }, diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/internvl3.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/internvl3.mdx index 1975b951ff..da8d048ecd 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/internvl3.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/internvl3.mdx @@ -38,7 +38,7 @@ menu_namespace: AIInferenceMenu Este é um exemplo básico de chat completion usando este modelo: ```ts -const modelResponse = await Azion.AI.run("opengvlab-internvl3-1b-instruct", { +const modelResponse = await Azion.AI.run("OpenGVLab/InternVL3-1B-Instruct", { "stream": true, "messages": [ { @@ -65,7 +65,7 @@ const modelResponse = await Azion.AI.run("opengvlab-internvl3-1b-instruct", { Este é um exemplo de requisição multimodal usando este modelo: ```ts -const modelResponse = await Azion.AI.run("opengvlab-internvl3-1b-instruct", { +const modelResponse = await Azion.AI.run("OpenGVLab/InternVL3-1B-Instruct", { "stream": true, "messages": [ { diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/mistral-3-small.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/mistral-3-small.mdx index 1b416aacae..f7215b040c 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/mistral-3-small.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/mistral-3-small.mdx @@ -38,7 +38,7 @@ menu_namespace: AIInferenceMenu Este é um exemplo de uma requisição básica de chat completion usando este modelo: ```ts -const modelResponse = await Azion.AI.run("casperhansen-mistral-small-24b-instruct-2501-awq", { +const modelResponse = await Azion.AI.run("casperhansen/mistral-small-24b-instruct-2501-awq", { "stream": true, "max_tokens": 1024, "messages": [ @@ -69,7 +69,7 @@ Exemplo de resposta: "id": "chatcmpl-e27716424abf4b3f891ff4850470cb09", "object": "chat.completion", "created": 1746821581, - "model": "casperhansen-mistral-small-24b-instruct-2501-awq", + "model": "casperhansen/mistral-small-24b-instruct-2501-awq", "choices": [ { "index": 0, @@ -122,7 +122,7 @@ Exemplo de resposta: Este é um exemplo de uma requisição de Tool Calling usando este modelo: ```ts -const modelResponse = await Azion.AI.run("casperhansen-mistral-small-24b-instruct-2501-awq", { +const modelResponse = await Azion.AI.run("casperhansen/mistral-small-24b-instruct-2501-awq", { "stream": true, "max_tokens": 1024, "messages": [ @@ -180,7 +180,7 @@ Exemplo de resposta: "id": "chatcmpl-88affc4730cf4219a06d2b15aad9ad44", "object": "chat.completion", "created": 1746821866, - "model": "qwen-qwen25-vl-3b-instruct-awq", + "model": "casperhansen/mistral-small-24b-instruct-2501-awq", "choices": [ { "index": 0, diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-3b.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-3b.mdx index 725715e80e..18a22e834e 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-3b.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-3b.mdx @@ -38,7 +38,7 @@ O **Qwen 2.5 VL AWQ 3B** é um modelo de linguagem e visão que oferece capacida Este é um exemplo básico de uma requisição de chat completion usando este modelo: ```ts -const modelResponse = await Azion.AI.run("qwen-qwen25-vl-3b-instruct-awq", { +const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-3B-Instruct-AWQ", { "stream": true, "messages": [ { @@ -67,7 +67,7 @@ Exemplo de resposta: "id": "chatcmpl-e27716424abf4b3f891ff4850470cb09", "object": "chat.completion", "created": 1746821581, - "model": "qwen-qwen25-vl-3b-instruct-awq", + "model": "Qwen/Qwen2.5-VL-3B-Instruct-AWQ", "choices": [ { "index": 0, @@ -119,7 +119,7 @@ Exemplo de resposta: Este é um exemplo de uma requisição de Tool Calling usando este modelo: ```ts -const modelResponse = await Azion.AI.run("qwen-qwen25-vl-3b-instruct-awq", { +const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-3B-Instruct-AWQ", { "stream": true, "messages": [ { @@ -174,7 +174,7 @@ Exemplo de resposta: "id": "chatcmpl-88affc4730cf4219a06d2b15aad9ad44", "object": "chat.completion", "created": 1746821866, - "model": "qwen-qwen25-vl-3b-instruct-awq", + "model": "Qwen/Qwen2.5-VL-3B-Instruct-AWQ", "choices": [ { "index": 0, @@ -236,7 +236,7 @@ Exemplo de resposta: Este é um exemplo de uma requisição multimodal usando este modelo: ```ts -const modelResponse = await Azion.AI.run("qwen-qwen25-vl-3b-instruct-awq", { +const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-3B-Instruct-AWQ", { "stream": true, "messages": [ { diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx index 5dff269399..d13a31fca0 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx @@ -38,7 +38,7 @@ O **Qwen 2.5 VL AWQ 7B** é um modelo de linguagem e visão que suporta 7 bilhõ Este é um exemplo básico de uma requisição de chat completion usando este modelo: ```ts -const modelResponse = await Azion.AI.run("qwen-qwen25-vl-7b-instruct-awq", { +const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-7B-Instruct-AWQ", { "stream": true, "messages": [ { @@ -68,7 +68,7 @@ Exemplo de resposta: "id": "chatcmpl-e27716424abf4b3f891ff4850470cb09", "object": "chat.completion", "created": 1746821581, - "model": "qwen-qwen25-vl-7b-instruct-awq", + "model": "Qwen/Qwen2.5-VL-7B-Instruct-AWQ", "choices": [ { "index": 0, @@ -123,7 +123,7 @@ Exemplo de resposta: Este é um exemplo de uma requisição de Tool Calling usando este modelo: ```ts -const modelResponse = await Azion.AI.run("qwen-qwen25-vl-7b-instruct-awq", { +const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-7B-Instruct-AWQ", { "stream": true, "messages": [ { @@ -178,7 +178,7 @@ Exemplo de resposta: "id": "chatcmpl-88affc4730cf4219a06d2b15aad9ad44", "object": "chat.completion", "created": 1746821866, - "model": "qwen-qwen25-vl-7b-instruct-awq", + "model": "Qwen/Qwen2.5-VL-7B-Instruct-AWQ", "choices": [ { "index": 0, @@ -243,7 +243,7 @@ Exemplo de resposta: Este é um exemplo multimodal usando este modelo: ```ts -const modelResponse = await Azion.AI.run("qwen-qwen25-vl-7b-instruct-awq", { +const modelResponse = await Azion.AI.run("Qwen/Qwen2.5-VL-7B-Instruct-AWQ", { "stream": true, "messages": [ { diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen3-embedding-4b.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen3-embedding-4b.mdx index 376c2a25b4..86adb764da 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen3-embedding-4b.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen3-embedding-4b.mdx @@ -50,7 +50,7 @@ Exemplo de resposta: Diferentes dimensões podem ser selecionadas definindo o parâmetro `dimensions`: ```ts -const modelResponse = await Azion.AI.run("qwen-qwen3-embedding-4b", { +const modelResponse = await Azion.AI.run("Qwen/Qwen3-Embedding-4B", { "input": "A comida estava deliciosa e o garçom...", "encoding_format": "float", "dimensions": 256 From 68898154b557e7e0c838c93625cedac74ef1c213 Mon Sep 17 00:00:00 2001 From: "gabriel.alves" Date: Fri, 22 May 2026 12:18:06 -0300 Subject: [PATCH 11/12] AI model change --- .../pages/guides/ai-inference/quick-start.mdx | 129 ++++++++++++++---- .../pages/guias/ai-inference/quick-start.mdx | 129 ++++++++++++++---- 2 files changed, 200 insertions(+), 58 deletions(-) diff --git a/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx b/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx index 47ca20eb23..33894d4457 100644 --- a/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx +++ b/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx @@ -47,35 +47,98 @@ This creates a project with: After deployment, navigate to your function and replace the code with this simple agent: ```javascript -async function handler(event) { - const body = JSON.parse(event.request.body || '{}'); - const userMessage = body.message || 'Hello!'; +async function handleRequest(request) { + // Check if the request is a POST request and has a JSON body + if (request.method !== "POST" || request.headers.get("content-type") !== "application/json") { + return new Response(JSON.stringify({ + error: "Request must be a POST request with JSON body", + }), { + status: 400, + headers: { "Content-Type": "application/json" }, + }); + } - const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { - "stream": false, - "messages": [ - { - "role": "system", - "content": "You are a helpful AI assistant. Be concise and friendly." - }, - { - "role": "user", - "content": userMessage - } - ], - "max_tokens": 500 - }); + let input; + try { + input = await request.json(); + } catch (err) { + return new Response(JSON.stringify({ + error: "Invalid JSON in request body", + }), { + status: 400, + headers: { "Content-Type": "application/json" }, + }); + } - return new Response(JSON.stringify({ - response: modelResponse.choices[0].message.content, - model: modelResponse.model, - usage: modelResponse.usage - }), { - headers: { "Content-Type": "application/json" } - }); + // Check if the required "model" field is present in the input + if (!input.hasOwnProperty("model")) { + return new Response(JSON.stringify({ + error: "Missing 'model' field in request body", + }), { + status: 400, + headers: { "Content-Type": "application/json" }, + }); + } + + const model = input["model"]; + + try { + const response = await Azion.AI.run(model, input); + + if (input.stream) { + const { readable, writable } = new TransformStream(); + const writer = writable.getWriter(); + const encoder = new TextEncoder(); + + (async () => { + for await (const chunk of response) { + await writer.write(encoder.encode(`data: ${JSON.stringify(chunk)}n`)); + } + await writer.write(encoder.encode("data: [DONE]n")); + await writer.close(); + })(); + + return new Response(readable, { + headers: { "Content-Type": "text/event-stream" }, + }); + } else { + return new Response(JSON.stringify(response), { + headers: { "Content-Type": "application/json" }, + }); + } + } catch (e) { + console.error(`${e.name}: ${e.message}`); + + if (e.message.includes("validation error")) { + return new Response(JSON.stringify({ + error: `Invalid input for ${model}`, + }), { + status: 400, + headers: { "Content-Type": "application/json" }, + }); + } + + if (e.message.includes("model not found")) { + return new Response(JSON.stringify({ + error: `${model} not found or not allowed`, + }), { + status: 400, + headers: { "Content-Type": "application/json" }, + }); + } + + return new Response(JSON.stringify({ + error: "AI internal error", + }), { + status: 500, + headers: { "Content-Type": "application/json" }, + }); + } } -addEventListener("fetch", handler); +addEventListener("fetch", (event) => { + event.respondWith(handleRequest(event.request)); +}); ``` --- @@ -85,17 +148,25 @@ addEventListener("fetch", handler); Send a POST request to your function's endpoint: ```bash -curl -X POST https://your-function-url.azion.net \ - -H "Content-Type: application/json" \ - -d '{"message": "What is edge computing?"}' +curl -X POST https://your-function-url.azion.net -H "Content-Type: application/json" -d '{"model":"casperhansen/mistral-small-24b-instruct-2501-awq","messages":[{"role":"user","content":"What is edge computing?"}]}' ``` Expected response: ```json { - "response": "Edge computing processes data closer to its source, reducing latency and bandwidth by bringing computation near end users or devices.", + "id": "chatcmpl-123", + "object": "chat.completion", + "created": 1677652288, "model": "Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": "Edge computing processes data closer to its source, reducing latency and bandwidth by bringing computation near end users or devices." + }, + "finish_reason": "stop" + }], "usage": { "prompt_tokens": 22, "completion_tokens": 24, diff --git a/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx b/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx index bc890a9572..643c9b7017 100644 --- a/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx +++ b/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx @@ -46,35 +46,98 @@ Isso cria um projeto com: Após a implantação, navegue até sua função e substitua o código por este agente simples: ```javascript -async function handler(event) { - const body = JSON.parse(event.request.body || '{}'); - const userMessage = body.message || 'Olá!'; +async function handleRequest(request) { + // Verifica se a requisição é POST e tem corpo JSON + if (request.method !== "POST" || request.headers.get("content-type") !== "application/json") { + return new Response(JSON.stringify({ + error: "A requisição deve ser POST com corpo JSON", + }), { + status: 400, + headers: { "Content-Type": "application/json" }, + }); + } - const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { - "stream": false, - "messages": [ - { - "role": "system", - "content": "Você é um assistente de AI útil. Seja conciso e amigável." - }, - { - "role": "user", - "content": userMessage - } - ], - "max_tokens": 500 - }); + let input; + try { + input = await request.json(); + } catch (err) { + return new Response(JSON.stringify({ + error: "JSON inválido no corpo da requisição", + }), { + status: 400, + headers: { "Content-Type": "application/json" }, + }); + } - return new Response(JSON.stringify({ - response: modelResponse.choices[0].message.content, - model: modelResponse.model, - usage: modelResponse.usage - }), { - headers: { "Content-Type": "application/json" } - }); + // Verifica se o campo "model" está presente + if (!input.hasOwnProperty("model")) { + return new Response(JSON.stringify({ + error: "Campo 'model' obrigatório não encontrado", + }), { + status: 400, + headers: { "Content-Type": "application/json" }, + }); + } + + const model = input["model"]; + + try { + const response = await Azion.AI.run(model, input); + + if (input.stream) { + const { readable, writable } = new TransformStream(); + const writer = writable.getWriter(); + const encoder = new TextEncoder(); + + (async () => { + for await (const chunk of response) { + await writer.write(encoder.encode(`data: ${JSON.stringify(chunk)}n`)); + } + await writer.write(encoder.encode("data: [DONE]n")); + await writer.close(); + })(); + + return new Response(readable, { + headers: { "Content-Type": "text/event-stream" }, + }); + } else { + return new Response(JSON.stringify(response), { + headers: { "Content-Type": "application/json" }, + }); + } + } catch (e) { + console.error(`${e.name}: ${e.message}`); + + if (e.message.includes("validation error")) { + return new Response(JSON.stringify({ + error: `Entrada inválida para ${model}`, + }), { + status: 400, + headers: { "Content-Type": "application/json" }, + }); + } + + if (e.message.includes("model not found")) { + return new Response(JSON.stringify({ + error: `${model} não encontrado ou não permitido`, + }), { + status: 400, + headers: { "Content-Type": "application/json" }, + }); + } + + return new Response(JSON.stringify({ + error: "Erro interno de AI", + }), { + status: 500, + headers: { "Content-Type": "application/json" }, + }); + } } -addEventListener("fetch", handler); +addEventListener("fetch", (event) => { + event.respondWith(handleRequest(event.request)); +}); ``` --- @@ -84,17 +147,25 @@ addEventListener("fetch", handler); Envie uma requisição POST para o endpoint da sua função: ```bash -curl -X POST https://url-da-sua-funcao.azion.net/v1/chat/completions - -H "Content-Type: application/json" - -d '{"message": "O que é computação de borda?"}' +curl -X POST https://url-da-sua-funcao.azion.net -H "Content-Type: application/json" -d '{"model":"casperhansen/mistral-small-24b-instruct-2501-awq","messages":[{"role":"user","content":"O que é computação de borda?"}]}' ``` Resposta esperada: ```json { - "response": "Computação de borda processa dados mais próximo de sua origem, reduzindo latência e uso de banda ao trazer computação perto dos usuários finais ou dispositivos.", + "id": "chatcmpl-123", + "object": "chat.completion", + "created": 1677652288, "model": "Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": "Computação de borda processa dados mais próximo de sua origem, reduzindo latência e uso de banda ao trazer computação perto dos usuários finais ou dispositivos." + }, + "finish_reason": "stop" + }], "usage": { "prompt_tokens": 22, "completion_tokens": 24, From ec3c549a13f68c9c08a44eb03ad66f38020954a4 Mon Sep 17 00:00:00 2001 From: "gabriel.alves" Date: Fri, 22 May 2026 15:29:00 -0300 Subject: [PATCH 12/12] update model en --- .../pages/guides/ai-inference/quick-start.mdx | 18 ++++++++++-------- .../pages/guias/ai-inference/quick-start.mdx | 13 ++++++++----- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx b/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx index 33894d4457..89dcd478c0 100644 --- a/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx +++ b/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx @@ -21,6 +21,7 @@ Build AI agents that think, respond, and act. Agents run on Azion's global edge ## Create a new project +Click the button below to create a new project with a pre-configured AI agent: +Or follow these steps manually: + 1. Access the [Azion Console](https://console.azion.com/). -2. Click **+ Create**. -3. Search for **AI Inference Starter Kit** and select it. -4. Enter a name for your application, such as `my-first-agent`. -5. Click **Deploy**. +2. Click **+ Create** and select **AI Inference Starter Kit**. +3. Enter a name for your application, such as `my-first-agent`. +4. Click **Deploy**. This creates a project with: @@ -145,7 +147,7 @@ addEventListener("fetch", (event) => { ## Test your agent -Send a POST request to your function's endpoint: +Send a POST request to your function's endpoint replacing `https://your-function-url.azion.net` with your function's actual URL: ```bash curl -X POST https://your-function-url.azion.net -H "Content-Type: application/json" -d '{"model":"casperhansen/mistral-small-24b-instruct-2501-awq","messages":[{"role":"user","content":"What is edge computing?"}]}' @@ -158,7 +160,7 @@ Expected response: "id": "chatcmpl-123", "object": "chat.completion", "created": 1677652288, - "model": "Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", + "model": "casperhansen/mistral-small-24b-instruct-2501-awq", "choices": [{ "index": 0, "message": { @@ -195,7 +197,7 @@ async function handler(event) { content: userMessage }); - const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { + const modelResponse = await Azion.AI.run("casperhansen/mistral-small-24b-instruct-2501-awq", { "stream": false, "messages": [ { @@ -289,7 +291,7 @@ async function handler(event) { } ]; - const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { + const modelResponse = await Azion.AI.run("casperhansen/mistral-small-24b-instruct-2501-awq", { "stream": false, "messages": [ { diff --git a/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx b/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx index 643c9b7017..75f4ef87a6 100644 --- a/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx +++ b/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx @@ -21,6 +21,7 @@ Construa agentes de AI que pensam, respondem e agem. Os agentes rodam na rede gl ## Criar um novo projeto +Clique no botão abaixo para criar um novo projeto com um agente de AI pré-configurado: +Ou siga estas etapas manualmente: + 1. Acesse o [Console da Azion](https://console.azion.com/). -2. Clique em **+ Create**. +2. Clique em **+ Create** e selecione **AI Inference Starter Kit**. 3. Digite um nome para sua aplicação, como `meu-primeiro-agente`. 4. Clique em **Deploy**. @@ -144,7 +147,7 @@ addEventListener("fetch", (event) => { ## Testar seu agente -Envie uma requisição POST para o endpoint da sua função: +Envie uma requisição POST para o endpoint da sua função substituindo `https://url-da-sua-funcao.azion.net` pela URL real da sua função: ```bash curl -X POST https://url-da-sua-funcao.azion.net -H "Content-Type: application/json" -d '{"model":"casperhansen/mistral-small-24b-instruct-2501-awq","messages":[{"role":"user","content":"O que é computação de borda?"}]}' @@ -157,7 +160,7 @@ Resposta esperada: "id": "chatcmpl-123", "object": "chat.completion", "created": 1677652288, - "model": "Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", + "model": "casperhansen/mistral-small-24b-instruct-2501-awq", "choices": [{ "index": 0, "message": { @@ -194,7 +197,7 @@ async function handler(event) { content: userMessage }); - const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { + const modelResponse = await Azion.AI.run("casperhansen/mistral-small-24b-instruct-2501-awq", { "stream": false, "messages": [ { @@ -288,7 +291,7 @@ async function handler(event) { } ]; - const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { + const modelResponse = await Azion.AI.run("casperhansen/mistral-small-24b-instruct-2501-awq", { "stream": false, "messages": [ {