Skip to content

Commit

Permalink
Support for reshaping ov tensor
Browse files Browse the repository at this point in the history
  • Loading branch information
jatinwadhwa921 committed Jan 30, 2025
1 parent 732bf1b commit 2a17ad5
Show file tree
Hide file tree
Showing 10 changed files with 278 additions and 12 deletions.
45 changes: 44 additions & 1 deletion onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ BackendManager::BackendManager(const SessionContext& session_context,
}

const std::vector<const NodeArg*>& graph_inputs = subgraph.GetInputs();

if(!session_context.shape.empty()) {
ValidateInputShapes(session_context_.shape,graph_inputs);
}

for (auto input : graph_inputs) {
auto it = subgraph_context_.input_names.find(input->Name());
if (it == subgraph_context_.input_names.end()) {
Expand All @@ -101,7 +106,7 @@ BackendManager::BackendManager(const SessionContext& session_context,
}
std::string device_type = session_context_.device_type;

if (ModelHasSymbolicInputDims(subgraph)) {
if (ModelHasSymbolicInputDims(subgraph) && session_context.shape.empty()) {
subgraph_context_.has_dynamic_input_shape = true;
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
if ((session_context_.device_type.find("CPU") != std::string::npos ||
Expand Down Expand Up @@ -138,6 +143,7 @@ BackendManager::BackendManager(const SessionContext& session_context,
subgraph_context_,
model_stream);
} catch (const OnnxRuntimeException& ex) {
std::cout<<"It failed in creating a backend"<<std::endl;
std::string exception_str = ex.what();
bool eligible_for_cpu_fallback = device_type.find("NPU") != std::string::npos &&
!session_context_.disable_cpu_fallback &&
Expand Down Expand Up @@ -303,6 +309,41 @@ bool BackendManager::ModelHasSymbolicInputDims(const onnxruntime::GraphViewer& s
return has_sym_dims;
}

void BackendManager::ValidateInputShapes(const std::map<std::string, ov::PartialShape>& shape,
const std::vector<const NodeArg*>& graph_inputs) const {

for(const auto& [tensor_name,requested_shape] : shape) {

// Find matching input in graph
const NodeArg* graph_input = nullptr;
for(const auto* input : graph_inputs) {
if(input->Name() == tensor_name) {
graph_input = input;
break;
}
}

if(!graph_input) {
ORT_THROW("Input " + tensor_name + "specified in reshape_input does not exist");
}

const ONNX_NAMESPACE::TensorShapeProto* graph_shape = graph_input->Shape();
if(!graph_shape) {
ORT_THROW("Graph input" + tensor_name + "has no shape information");
}

// Check dimensions count matches
size_t graph_dim_count = graph_shape->dim_size();
size_t requested_dim_count = requested_shape.get_max_shape().size();
if(graph_dim_count != requested_dim_count) {
ORT_THROW("Dimensions mismatched for input" + tensor_name +
": graph expects " + std::to_string(graph_dim_count) +
" dimensions but reshape_input specifies " +
std::to_string(requested_dim_count) + " dimensions");
}
}
}

// Check to see if the graph is QDQ
static bool IsQDQGraph(const onnxruntime::GraphViewer& graph_viewer) {
std::unordered_set<std::string> qdq_ops = {"QuantizeLinear", "DequantizeLinear"};
Expand Down Expand Up @@ -523,6 +564,8 @@ void BackendManager::Compute(OrtKernelContext* context) {

dynamic_backend->Infer(context);
} else {
std::cout<<" call is being made at line no 585 for infer "<<std::endl;

concrete_backend_->Infer(context);
}
#ifdef OPENVINO_FIL_ENABLED
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/providers/openvino/backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ class BackendManager {

bool ModelHasSymbolicInputDims(const onnxruntime::GraphViewer& subgraph) const;
bool ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const;

void ValidateInputShapes(const std::map<std::string, ov::PartialShape>& shape,
const std::vector<const NodeArg*>& graph_inputs) const;
std::shared_ptr<ONNX_NAMESPACE::ModelProto>
ReWriteBatchDimWithOne(const ONNX_NAMESPACE::ModelProto& model_proto);

Expand Down
12 changes: 8 additions & 4 deletions onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ CreateOVModel(const std::string model,
try {
auto ov_model = session_context.ie_core.ReadModel(model, session_context.onnx_model_path_name);

if(!session_context.shape.empty()){
LOGS_DEFAULT(INFO) << log_tag << "Reshaping the ov tensor to specified shape";
ov_model->reshape(session_context.shape);
}

// Check for Constant Folding
if ((session_context.device_type != "NPU") && !subgraph_context.is_wholly_supported_graph) {
ov::pass::ConstantFolding pass_const_obj;
Expand All @@ -66,13 +71,12 @@ CreateOVModel(const std::string model,
--index;
}
}
#ifndef NDEBUG
if (IsDebugEnabled()) {

std::string name = ov_model->get_friendly_name();
ov::pass::Serialize serializer(name + ".xml", name + ".bin");
serializer.run_on_model(ov_model);
}
#endif


return ov_model;
} catch (std::string const& msg) {
ORT_THROW(msg);
Expand Down
120 changes: 116 additions & 4 deletions onnxruntime/core/providers/openvino/backends/basic_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
} else if (!subgraph_context_.has_external_weights &&
!subgraph_context_.has_dynamic_input_shape &&
!session_context_.export_ep_ctx_blob &&
session_context.shape.empty() &&
auto_unified_compile){
// Unified OV compile_model is efficient when ov model caching is enabled
// Unified OV compile_model API is supported with AUTO from version 2024.3 and above
Expand Down Expand Up @@ -344,6 +345,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
int input_idx = 0;
for (auto input_info_iter = graph_input_info.begin();
input_info_iter != graph_input_info.end(); ++input_info_iter) {
std::cout<<"does it reahces here again as index increments"<<std::endl;
auto input_names = input_info_iter->get_names();
std::string onnx_input_name;
std::string input_name;
Expand Down Expand Up @@ -407,42 +409,106 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
} else {
auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name));
ort_tensor_key_t ort_tensor_key{input_name};

const auto& ort_dims = tensor.GetTensorTypeAndShapeInfo().GetShape();
std::cout << "ORT Tensor Shape for " << input_name << ": [";
for (size_t i = 0; i < ort_dims.size(); ++i) {
std::cout << ort_dims[i];
if (i < ort_dims.size() - 1) std::cout << ",";
}
std::cout << "]" << std::endl;
std::cout<<"at line no 419"<<std::endl;
auto it = ort_ov_tensor_map.find(ort_tensor_key);
std::cout<<"at line no 421"<<std::endl;
if ((it == ort_ov_tensor_map.end()) ||
(it != ort_ov_tensor_map.end() && (it->second.ort_ptr != tensor.GetTensorRawData()))) {

ov_tensor_data_t ov_tensor_data;
std::cout<<"at line no 426"<<std::endl;
const auto& input = graph_input_info.at(input_idx);
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type(), input.get_shape(),
std::cout<<"at line no 428"<<std::endl;
//auto ov_shape = input.get_shape();

if(!session_context_.shape.empty())
{
ov::PartialShape partial_shape = input.get_partial_shape();
ValidateOrtDimsAgainstPartialShape(ort_dims,partial_shape);
ov::Shape concrete_shape;
for(size_t i=0; i<ort_dims.size(); ++i) {
concrete_shape.push_back(ort_dims[i]);
}
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type(), concrete_shape,
const_cast<void*>(tensor.GetTensorRawData()));

}
else{
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type(), input.get_shape(),
const_cast<void*>(tensor.GetTensorRawData()));
}
std::cout<<"at line no 440"<<std::endl;
ov_tensor_data.ort_ptr = tensor.GetTensorRawData();
ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;

std::cout<<"at line no 463"<<std::endl;
try {
std::cout<<"at line no 465 in try block"<<std::endl;
infer_request->SetTensor(std::move(input_name), ov_tensor_data.tensor_ptr);
} catch (const char* msg) {
std::cout<<"just making sure not in the catch block"<<std::endl;
ORT_THROW(msg);
}
}
}
}
std::cout<<"at lijne no 473 in basic_backend"<<std::endl;
input_idx++;
}
if (session_context_.device_type.find("NPU") != std::string::npos) {
// Set the output blob as remote blob
auto graph_output_info = exe_network_.Get().outputs();


for (const auto& output : graph_output_info) {
auto partial_shape = output.get_partial_shape();

std::cout << "Output partial shape: ";

if (partial_shape.is_dynamic()) {
std::cout << "Dynamic shape: " << partial_shape << std::endl;
// Check individual dimensions
for (int i = 0; i < partial_shape.rank().get_length(); i++) {
auto dim = partial_shape[i];
if (dim.is_dynamic()) {
std::cout << "Dimension " << i << " is dynamic" << std::endl;
} else {
std::cout << "Dimension " << i << ": " << dim.get_length() << std::endl;
}
}
} else {
auto static_shape = partial_shape.get_shape();
std::cout << "Static shape: [";
for (size_t i = 0; i < static_shape.size(); ++i) {
std::cout << static_shape[i];
if (i < static_shape.size() - 1) {
std::cout << ", ";
}
}
std::cout << "]" << std::endl;
}
}

auto output_idx = 0;
for (auto output_info_iter = graph_output_info.begin();
output_info_iter != graph_output_info.end(); ++output_info_iter) {
auto output_names = output_info_iter->get_names();
std::string onnx_output_name;
std::string output_name;
std::cout<<" am i reaching line no 514"<<std::endl;
// using the output name retrieved from ONNX original to match with the output names returned by OV tensors
for (auto it = subgraph_context_.output_names.begin(); it != subgraph_context_.output_names.end(); ++it) {
onnx_output_name = it->first;
if (output_names.find(onnx_output_name) != output_names.end()) {
// Assigning the output_name
output_name = it->first;
std::cout<<" am i reaching line no 521"<<std::endl;
break;
}
}
Expand All @@ -452,18 +518,36 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
infer_request,
output_name,
subgraph_context_.output_names);
const auto& ort_dims = tensor.GetTensorTypeAndShapeInfo().GetShape();
ort_tensor_key_t ort_tensor_key{output_name};
const auto& it = ort_ov_tensor_map.find(ort_tensor_key);
if ((it == ort_ov_tensor_map.end()) ||
(it != ort_ov_tensor_map.end() && (it->second.ort_ptr != tensor.GetTensorRawData()))) {
ov_tensor_data_t ov_tensor_data;
const auto& output = graph_output_info.at(output_idx);

ov::Shape concrete_shape;
for (const auto& dim : ort_dims) {
concrete_shape.push_back(static_cast<size_t>(dim));
}

std::cout << "Output concrete shape being used for ov to ort tenosr mapping: [";
for (size_t i = 0; i < concrete_shape.size(); ++i) {
std::cout << concrete_shape[i];
if (i < concrete_shape.size() - 1) std::cout << ", ";
}
std::cout << "]" << std::endl;



std::cout<<" am i reaching line no 537"<<std::endl;
ov_tensor_data.ort_ptr = tensor.GetTensorRawData();
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output.get_element_type(), output.get_shape(),
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output.get_element_type(), concrete_shape,
const_cast<void*>(tensor.GetTensorRawData()));
ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;

try {
std::cout<<" am i reaching line no 544"<<std::endl;
infer_request->SetTensor(std::move(output_name), ov_tensor_data.tensor_ptr);
} catch (const char* msg) {
ORT_THROW(msg);
Expand All @@ -474,6 +558,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
}

// Start Async inference
std::cout<<" am i reaching line no 555"<<std::endl;
infer_request->StartAsync();
} catch (const char* msg) {
ORT_THROW(msg);
Expand Down Expand Up @@ -663,6 +748,33 @@ void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRe
}
}


void BasicBackend::ValidateOrtDimsAgainstPartialShape(const std::vector<int64_t>& ort_dims, const ov::PartialShape& partial_shape) {
// Check if the number of dimensions matches
if (static_cast<int64_t>(ort_dims.size()) != partial_shape.rank().get_length()) {
ORT_THROW("Mismatch in number of dimensions between ORT tensor and OpenVINO PartialShape.");
}

// Validate each dimension
for (size_t i = 0; i < ort_dims.size(); ++i) {
const auto& ov_dim = partial_shape[i]; // OpenVINO dimension at index i
int64_t ort_dim = ort_dims[i]; // ORT dimension at index i

if (ov_dim.is_dynamic()) {
// If the OpenVINO dimension is dynamic, any ORT dimension is allowed
continue;
}

// Check if the ORT dimension is within the specified range
int64_t min_dim = ov_dim.get_min_length();
int64_t max_dim = ov_dim.get_max_length();
if (ort_dim < min_dim || ort_dim > max_dim) {
ORT_THROW(" ORT Dimension is out of range");
}
}
}


void BasicBackend::Infer(OrtKernelContext* ctx) {
// Preliminary Thread safety mechanism
// currently allows a maximum of 8 Infer request's to parallel execute at the same time
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class BasicBackend : public IBackend {
void EnableGPUThrottling(ov::AnyMap& device_config);
void EnableStreams();
void SetNumThreads(ov::AnyMap& device_config);
void ValidateOrtDimsAgainstPartialShape(const std::vector<int64_t>& ort_dims, const ov::PartialShape& partial_shape);
void StartAsyncInference(Ort::KernelContext& context, std::shared_ptr<OVInferRequest> infer_request);

#ifdef IO_BUFFER_ENABLED
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ struct SessionContext {
std::string device_type;
std::string precision_str;
std::filesystem::path cache_dir;
std::map<std::string,ov::PartialShape> shape;
std::map<std::string, ov::AnyMap> load_config;
std::string model_priority = "DEFAULT";
int num_streams;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ openvino_ep::SessionContext GetSessionContext(const OpenVINOExecutionProviderInf
.device_type = info.device_type_,
.precision_str = info.precision_,
.cache_dir = info.cache_dir_,
.shape = info.shape_,
.load_config = info.load_config_,
.model_priority = info.model_priority_,
.num_streams = info.num_streams_,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ struct OpenVINOExecutionProviderInfo {
size_t num_of_threads_{0};
std::map<std::string, ov::AnyMap> load_config_{};
std::string cache_dir_{""};
std::map<std::string,ov::PartialShape> shape_{};
std::string model_priority_{""};
int num_streams_{1};
void* context_{NULL};
Expand All @@ -98,6 +99,7 @@ struct OpenVINOExecutionProviderInfo {
size_t num_of_threads,
const std::map<std::string, ov::AnyMap>& load_config,
const std::string& cache_dir,
const std::map<std::string,ov::PartialShape>& shape,
const std::string& model_priority, int num_streams,
void* context, bool enable_opencl_throttling,
bool disable_dynamic_shapes, bool export_ep_ctx_blob,
Expand All @@ -107,6 +109,7 @@ struct OpenVINOExecutionProviderInfo {
num_of_threads_(num_of_threads),
load_config_(std::move(load_config)),
cache_dir_(std::move(cache_dir)),
shape_(std::move(shape)),
model_priority_(std::move(model_priority)),
num_streams_(num_streams),
context_(context),
Expand Down
Loading

0 comments on commit 2a17ad5

Please sign in to comment.