Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions ggml/src/ggml-openvino/ggml-decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1391,7 +1391,6 @@ void GgmlOvDecoder::compute_node_dynamic_dims() {
m_node_dynamic_dims[node] = -1;
if (m_node_dynamic_dims[node->src[1]] != -1) {
auto dynamic_dim_idx = m_node_dynamic_dims[node->src[1]];
auto dynamic_dim_value = node->src[1]->ne[dynamic_dim_idx];
if (dynamic_dim_idx == 0) {
m_node_dynamic_dims[node] = 1;
} else {
Expand All @@ -1404,9 +1403,9 @@ void GgmlOvDecoder::compute_node_dynamic_dims() {
}
}
}
OPENVINO_ASSERT(dynamic_dim_value == node->ne[m_node_dynamic_dims[node]],
"Dynamic dim value mismatch for node: " + std::string(node->name) +
" and its src[1]: " + std::string(node->src[1]->name));
// OPENVINO_ASSERT(dynamic_dim_value == node->ne[m_node_dynamic_dims[node]],
// "Dynamic dim value mismatch for node: " + std::string(node->name) +
// " and its src[1]: " + std::string(node->src[1]->name));
}
break;
case GGML_OP_MUL:
Expand Down Expand Up @@ -1458,8 +1457,8 @@ void GgmlOvDecoder::compute_node_dynamic_dims() {
}
if (m_node_dynamic_dims[node] != -1 && dynamic_dim_value != node->ne[m_node_dynamic_dims[node]]) {
m_node_dynamic_dims[node] = -1;
std::cout << "Warning: Dynamic dim value mismatch for node: " << node->name
<< " and its src[0]: " << node->src[0]->name << std::endl;
// std::cout << "Warning: Dynamic dim value mismatch for node: " << node->name
// << " and its src[0]: " << node->src[0]->name << std::endl;
}
}
break;
Expand Down Expand Up @@ -1562,7 +1561,7 @@ void GgmlOvDecoder::compute_node_dynamic_dims() {
m_node_dynamic_dims[node] = -1;
break;
default:
std::cout << "Doesn't handle node name: " << node->name << " op: " << ggml_op_name(node->op) << std::endl;
// std::cout << "Doesn't handle node name: " << node->name << " op: " << ggml_op_name(node->op) << std::endl;
break;
}
};
Expand Down
15 changes: 15 additions & 0 deletions ggml/src/ggml-openvino/ggml-openvino.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,18 @@ static bool has_view_op_input(const ggml_tensor * op) {
return false;
}

static bool has_non_contiguous_view_input(const ggml_tensor * op) {
for (int i = 0; i < GGML_MAX_SRC; i++) {
if (op->src[i] == nullptr) {
break;
}
if (op->src[i]->op == GGML_OP_VIEW && !ggml_is_contiguous(op->src[i])) {
return true;
}
}
return false;
}

static bool is_supported_flash_attn_pattern(const ggml_tensor * op) {
// pattern of q,k,v should be q->op==PERMUTE, q->src[0]->op==VIEW, q->src[0]->src[0]->view_src==nullptr
for (int i = 0; i < 3; i++) {
Expand Down Expand Up @@ -1156,6 +1168,9 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
// GGML_LOG_WARN("OpenVINO backend does not support op %s with view input\n", ggml_op_name(op->op));
return false;
}
if (op->op == GGML_OP_RMS_NORM && has_non_contiguous_view_input(op)) {
return false;
}
}
}

Expand Down
67 changes: 39 additions & 28 deletions ggml/src/ggml-openvino/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -748,6 +748,43 @@ enum ggml_status naive_compute(ggml_cgraph * cgraph,
}

namespace {
ov::Tensor make_contiguous_split_input_tensor(std::shared_ptr<GgmlOvDecoder> ggml_decoder,
const struct ggml_tensor * ggml_tensor,
const ov::Shape & input_shape) {
const size_t element_size = ggml_type_size(ggml_tensor->type);
const size_t block_size = ggml_blck_size(ggml_tensor->type);

GGML_ASSERT(block_size == 1 && "non-contiguous split inputs must be plain element types");

const struct ggml_tensor * source_tensor = ggml_tensor->view_src != nullptr ? ggml_tensor->view_src : ggml_tensor;
const size_t source_offset = ggml_tensor->view_src != nullptr ? ggml_tensor->view_offs : 0;

std::vector<uint8_t> source_data(ggml_nbytes(source_tensor));
ggml_backend_tensor_get(source_tensor, source_data.data(), 0, source_data.size());

ov::Tensor input_tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape);
auto * dst = static_cast<uint8_t *>(input_tensor.data());
size_t dst_offset = 0;

for (size_t i3 = 0; i3 < static_cast<size_t>(ggml_tensor->ne[3]); ++i3) {
for (size_t i2 = 0; i2 < static_cast<size_t>(ggml_tensor->ne[2]); ++i2) {
for (size_t i1 = 0; i1 < static_cast<size_t>(ggml_tensor->ne[1]); ++i1) {
for (size_t i0 = 0; i0 < static_cast<size_t>(ggml_tensor->ne[0]); ++i0) {
const size_t src_offset = source_offset +
i3 * ggml_tensor->nb[3] +
i2 * ggml_tensor->nb[2] +
i1 * ggml_tensor->nb[1] +
i0 * ggml_tensor->nb[0];
std::memcpy(dst + dst_offset, source_data.data() + src_offset, element_size);
dst_offset += element_size;
}
}
}
}

return input_tensor;
}

ov::Tensor convert_ggml_input_to_ov(std::shared_ptr<GgmlOvDecoder> ggml_decoder, const std::string & name) {
const auto * ggml_tensor = ggml_decoder->get_input_ggml_tensor(name);

Expand All @@ -774,34 +811,8 @@ ov::Tensor convert_ggml_input_to_ov(std::shared_ptr<GgmlOvDecoder> ggml_decoder,
input_shape = ggml_decoder->get_shape(ggml_tensor);
}

// Add explicit strided-copy reconstruction for PERMUTE and VIEW tensors in split
// models: iterate over all 4 dimensions using `nb[]` strides and `view_offs` to
// copy non-contiguous source data into a contiguous `ov::Tensor` buffer
if ((ggml_tensor->op == GGML_OP_PERMUTE) && ggml_decoder->is_splited_model()) {
// Create OpenVINO input tensor, the data need to reconstructed based on the view tensor shape & stride
ov::Tensor input_tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape);
const auto * src_tensor = ggml_tensor->view_src;
std::vector<uint8_t> data;
auto n_bytes = ggml_nbytes(src_tensor);
data.resize(n_bytes);
ggml_backend_tensor_get(src_tensor, data.data(), 0, n_bytes);

size_t des_index = 0;
for (size_t i0 = 0; i0 < static_cast<size_t>(ggml_tensor->ne[3]); i0++) {
for (size_t i1 = 0; i1 < static_cast<size_t>(ggml_tensor->ne[2]); i1++) {
for (size_t i2 = 0; i2 < static_cast<size_t>(ggml_tensor->ne[1]); i2++) {
for (size_t i3 = 0; i3 < static_cast<size_t>(ggml_tensor->ne[0]); i3++) {
size_t src_index = ggml_tensor->view_offs + i0 * ggml_tensor->nb[3] + i1 * ggml_tensor->nb[2] +
i2 * ggml_tensor->nb[1] + i3 * ggml_tensor->nb[0];

memcpy(static_cast<char *>(input_tensor.data()) + des_index,
reinterpret_cast<const char *>(data.data()) + src_index, ggml_tensor->nb[0]);
des_index += ggml_tensor->nb[0];
}
}
}
}
return input_tensor;
if (ggml_decoder->is_splited_model() && !ggml_is_contiguous(ggml_tensor)) {
return make_contiguous_split_input_tensor(ggml_decoder, ggml_tensor, input_shape);
}

auto input_tensor = ov::Tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape, input_data);
Expand Down
Loading