-
Notifications
You must be signed in to change notification settings - Fork 150
feat: support Qwen3-next on npu device. #989
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
e13f43e
7d7787a
a028513
1be2d5c
75a3417
dc8dd64
0589a7f
8f16375
b06794f
86ac26a
8515dcf
5c89042
e1f3b0c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -286,19 +286,29 @@ void WorkerService::AllocateKVCache( | |
| threadpool_->schedule([this, controller, request, response, done]() mutable { | ||
| brpc::ClosureGuard done_guard(done); | ||
| std::vector<std::vector<int64_t>> kv_cache_shape; | ||
| // Reserve for key, value, and optionally index shape | ||
| kv_cache_shape.reserve(3); | ||
| kv_cache_shape.emplace_back( | ||
| std::vector<int64_t>(request->kv_cache_shape().key_shape().begin(), | ||
| request->kv_cache_shape().key_shape().end())); | ||
| kv_cache_shape.emplace_back( | ||
| std::vector<int64_t>(request->kv_cache_shape().value_shape().begin(), | ||
| request->kv_cache_shape().value_shape().end())); | ||
| const bool has_index_shape = request->kv_cache_shape().index_shape_size() > 0; | ||
| const bool has_conv_shape = request->kv_cache_shape().conv_shape_size() > 0; | ||
| const bool has_ssm_shape = request->kv_cache_shape().ssm_shape_size() > 0; | ||
| CHECK(!(has_index_shape && (has_conv_shape || has_ssm_shape))) | ||
| << "KVCacheShape does not support index_shape with conv/ssm shapes " | ||
| << "simultaneously."; | ||
|
Comment on lines
+292
to
+294
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
| // Reserve for key, value, and optional extra shapes | ||
| kv_cache_shape.reserve(has_conv_shape || has_ssm_shape ? 4 : 3); | ||
| kv_cache_shape.emplace_back(std::vector<int64_t>( | ||
| request->kv_cache_shape().key_shape().begin(), request->kv_cache_shape().key_shape().end())); | ||
| kv_cache_shape.emplace_back(std::vector<int64_t>( | ||
| request->kv_cache_shape().value_shape().begin(), request->kv_cache_shape().value_shape().end())); | ||
| // add index shape if exists | ||
| if (request->kv_cache_shape().index_shape_size() > 0) { | ||
| kv_cache_shape.emplace_back( | ||
| std::vector<int64_t>(request->kv_cache_shape().index_shape().begin(), | ||
| request->kv_cache_shape().index_shape().end())); | ||
| if (has_index_shape) { | ||
| kv_cache_shape.emplace_back(std::vector<int64_t>( | ||
| request->kv_cache_shape().index_shape().begin(), request->kv_cache_shape().index_shape().end())); | ||
| } else if (has_conv_shape || has_ssm_shape) { | ||
| CHECK(has_conv_shape && has_ssm_shape) | ||
| << "conv_shape and ssm_shape must be provided together."; | ||
|
Comment on lines
+306
to
+307
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The
Comment on lines
+292
to
+307
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
| kv_cache_shape.emplace_back(std::vector<int64_t>( | ||
| request->kv_cache_shape().conv_shape().begin(), request->kv_cache_shape().conv_shape().end())); | ||
| kv_cache_shape.emplace_back(std::vector<int64_t>( | ||
| request->kv_cache_shape().ssm_shape().begin(), request->kv_cache_shape().ssm_shape().end())); | ||
| } | ||
|
|
||
| auto future = worker_->allocate_kv_cache_async(kv_cache_shape); | ||
|
|
@@ -316,18 +326,34 @@ void WorkerService::AllocateKVCacheWithTransfer( | |
| threadpool_->schedule([this, controller, req, resp, done]() mutable { | ||
| brpc::ClosureGuard done_guard(done); | ||
| std::vector<std::vector<int64_t>> kv_cache_shape; | ||
| kv_cache_shape.reserve(2); | ||
| const auto& shape_req = req->kv_cache_shape(); | ||
| const bool has_index_shape = shape_req.index_shape_size() > 0; | ||
| const bool has_conv_shape = shape_req.conv_shape_size() > 0; | ||
| const bool has_ssm_shape = shape_req.ssm_shape_size() > 0; | ||
| CHECK(!(has_index_shape && (has_conv_shape || has_ssm_shape))) | ||
| << "KVCacheShape does not support index_shape with conv/ssm shapes " | ||
| << "simultaneously."; | ||
|
Comment on lines
+333
to
+335
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
| kv_cache_shape.reserve(has_conv_shape || has_ssm_shape ? 4 : 3); | ||
| kv_cache_shape.emplace_back( | ||
| std::vector<int64_t>(req->kv_cache_shape().key_shape().begin(), | ||
| req->kv_cache_shape().key_shape().end())); | ||
| std::vector<int64_t>(shape_req.key_shape().begin(), | ||
| shape_req.key_shape().end())); | ||
| kv_cache_shape.emplace_back( | ||
| std::vector<int64_t>(req->kv_cache_shape().value_shape().begin(), | ||
| req->kv_cache_shape().value_shape().end())); | ||
| std::vector<int64_t>(shape_req.value_shape().begin(), | ||
| shape_req.value_shape().end())); | ||
| // add index shape if exists | ||
| if (req->kv_cache_shape().index_shape_size() > 0) { | ||
| if (has_index_shape) { | ||
| kv_cache_shape.emplace_back( | ||
| std::vector<int64_t>(shape_req.index_shape().begin(), | ||
| shape_req.index_shape().end())); | ||
| } else if (has_conv_shape || has_ssm_shape) { | ||
| CHECK(has_conv_shape && has_ssm_shape) | ||
| << "conv_shape and ssm_shape must be provided together."; | ||
|
Comment on lines
+349
to
+350
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The
Comment on lines
+333
to
+350
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
| kv_cache_shape.emplace_back( | ||
| std::vector<int64_t>(shape_req.conv_shape().begin(), | ||
| shape_req.conv_shape().end())); | ||
| kv_cache_shape.emplace_back( | ||
| std::vector<int64_t>(req->kv_cache_shape().index_shape().begin(), | ||
| req->kv_cache_shape().index_shape().end())); | ||
| std::vector<int64_t>(shape_req.ssm_shape().begin(), | ||
| shape_req.ssm_shape().end())); | ||
| } | ||
|
|
||
| auto future = | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need to check whether there is a better way to do this