Support for reshaping ov tensor

intel · Jan 30, 2025 · 2a17ad5 · 2a17ad5
1 parent 732bf1b
commit 2a17ad5
Show file tree

Hide file tree

Showing 10 changed files with 278 additions and 12 deletions.
diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -76,6 +76,11 @@ BackendManager::BackendManager(const SessionContext& session_context,
   }
 
   const std::vector<const NodeArg*>& graph_inputs = subgraph.GetInputs();
+
+  if(!session_context.shape.empty()) {
+    ValidateInputShapes(session_context_.shape,graph_inputs);
+  }
+
   for (auto input : graph_inputs) {
     auto it = subgraph_context_.input_names.find(input->Name());
     if (it == subgraph_context_.input_names.end()) {
@@ -101,7 +106,7 @@ BackendManager::BackendManager(const SessionContext& session_context,
   }
   std::string device_type = session_context_.device_type;
 
-  if (ModelHasSymbolicInputDims(subgraph)) {
+  if (ModelHasSymbolicInputDims(subgraph) && session_context.shape.empty()) {
     subgraph_context_.has_dynamic_input_shape = true;
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
     if ((session_context_.device_type.find("CPU") != std::string::npos ||
@@ -138,6 +143,7 @@ BackendManager::BackendManager(const SessionContext& session_context,
                                                       subgraph_context_,
                                                       model_stream);
     } catch (const OnnxRuntimeException& ex) {
+      std::cout<<"It failed in creating a backend"<<std::endl;
       std::string exception_str = ex.what();
       bool eligible_for_cpu_fallback = device_type.find("NPU") != std::string::npos &&
                                        !session_context_.disable_cpu_fallback &&
@@ -303,6 +309,41 @@ bool BackendManager::ModelHasSymbolicInputDims(const onnxruntime::GraphViewer& s
   return has_sym_dims;
 }
 
+void BackendManager::ValidateInputShapes(const std::map<std::string, ov::PartialShape>& shape,
+                                        const std::vector<const NodeArg*>& graph_inputs) const {
+
+  for(const auto& [tensor_name,requested_shape] : shape) {
+
+      // Find matching input in graph
+      const NodeArg* graph_input = nullptr;
+      for(const auto* input : graph_inputs) {
+           if(input->Name() == tensor_name) {
+              graph_input = input;
+              break;
+           }
+      }
+
+      if(!graph_input) {
+        ORT_THROW("Input " + tensor_name + "specified in reshape_input does not exist");
+      }
+
+      const ONNX_NAMESPACE::TensorShapeProto* graph_shape = graph_input->Shape();
+      if(!graph_shape) {
+        ORT_THROW("Graph input" + tensor_name + "has no shape information");
+      }
+
+      // Check dimensions count matches
+      size_t graph_dim_count = graph_shape->dim_size();
+      size_t requested_dim_count = requested_shape.get_max_shape().size();
+      if(graph_dim_count != requested_dim_count) {
+        ORT_THROW("Dimensions mismatched for input" + tensor_name +
+                  ": graph expects " + std::to_string(graph_dim_count) +
+                  " dimensions but reshape_input specifies " +
+                  std::to_string(requested_dim_count) + " dimensions");
+      }
+  }
+}
+
 // Check to see if the graph is QDQ
 static bool IsQDQGraph(const onnxruntime::GraphViewer& graph_viewer) {
   std::unordered_set<std::string> qdq_ops = {"QuantizeLinear", "DequantizeLinear"};
@@ -523,6 +564,8 @@ void BackendManager::Compute(OrtKernelContext* context) {
 
     dynamic_backend->Infer(context);
   } else {
+    std::cout<<" call is being made at line no 585 for infer "<<std::endl;
+
     concrete_backend_->Infer(context);
   }
 #ifdef OPENVINO_FIL_ENABLED

diff --git a/onnxruntime/core/providers/openvino/backend_manager.h b/onnxruntime/core/providers/openvino/backend_manager.h
@@ -39,7 +39,8 @@ class BackendManager {
 
   bool ModelHasSymbolicInputDims(const onnxruntime::GraphViewer& subgraph) const;
   bool ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const;
-
+  void ValidateInputShapes(const std::map<std::string, ov::PartialShape>& shape,
+                           const std::vector<const NodeArg*>& graph_inputs) const;
   std::shared_ptr<ONNX_NAMESPACE::ModelProto>
   ReWriteBatchDimWithOne(const ONNX_NAMESPACE::ModelProto& model_proto);
 

diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -50,6 +50,11 @@ CreateOVModel(const std::string model,
   try {
     auto ov_model = session_context.ie_core.ReadModel(model, session_context.onnx_model_path_name);
 
+    if(!session_context.shape.empty()){
+      LOGS_DEFAULT(INFO) << log_tag << "Reshaping the ov tensor to specified shape";
+      ov_model->reshape(session_context.shape);
+    }
+
     // Check for Constant Folding
     if ((session_context.device_type != "NPU") && !subgraph_context.is_wholly_supported_graph) {
       ov::pass::ConstantFolding pass_const_obj;
@@ -66,13 +71,12 @@ CreateOVModel(const std::string model,
         --index;
       }
     }
-#ifndef NDEBUG
-    if (IsDebugEnabled()) {
+
       std::string name = ov_model->get_friendly_name();
       ov::pass::Serialize serializer(name + ".xml", name + ".bin");
       serializer.run_on_model(ov_model);
-    }
-#endif
+
+
     return ov_model;
   } catch (std::string const& msg) {
     ORT_THROW(msg);

diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -95,6 +95,7 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
       } else if (!subgraph_context_.has_external_weights &&
                  !subgraph_context_.has_dynamic_input_shape &&
                  !session_context_.export_ep_ctx_blob &&
+                 session_context.shape.empty() &&
                  auto_unified_compile){
         // Unified OV compile_model is efficient when ov model caching is enabled
         // Unified OV compile_model API is supported with AUTO from version 2024.3 and above
@@ -344,6 +345,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
     int input_idx = 0;
     for (auto input_info_iter = graph_input_info.begin();
          input_info_iter != graph_input_info.end(); ++input_info_iter) {
+          std::cout<<"does it reahces here again as index increments"<<std::endl;
       auto input_names = input_info_iter->get_names();
       std::string onnx_input_name;
       std::string input_name;
@@ -407,42 +409,106 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
         } else {
           auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name));
           ort_tensor_key_t ort_tensor_key{input_name};
+
+          const auto& ort_dims = tensor.GetTensorTypeAndShapeInfo().GetShape();
+          std::cout << "ORT Tensor Shape for " << input_name << ": [";
+          for (size_t i = 0; i < ort_dims.size(); ++i) {
+             std::cout << ort_dims[i];
+             if (i < ort_dims.size() - 1) std::cout << ",";
+            }
+          std::cout << "]" << std::endl;
+          std::cout<<"at line no 419"<<std::endl;
           auto it = ort_ov_tensor_map.find(ort_tensor_key);
+          std::cout<<"at line no 421"<<std::endl;
           if ((it == ort_ov_tensor_map.end()) ||
               (it != ort_ov_tensor_map.end() && (it->second.ort_ptr != tensor.GetTensorRawData()))) {
+
             ov_tensor_data_t ov_tensor_data;
+            std::cout<<"at line no 426"<<std::endl;
             const auto& input = graph_input_info.at(input_idx);
-            ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type(), input.get_shape(),
+            std::cout<<"at line no 428"<<std::endl;
+            //auto ov_shape = input.get_shape();
+
+            if(!session_context_.shape.empty())
+            {
+              ov::PartialShape partial_shape = input.get_partial_shape();
+              ValidateOrtDimsAgainstPartialShape(ort_dims,partial_shape);
+              ov::Shape concrete_shape;
+              for(size_t i=0; i<ort_dims.size(); ++i) {
+                concrete_shape.push_back(ort_dims[i]);
+              }
+              ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type(), concrete_shape,
                                                                      const_cast<void*>(tensor.GetTensorRawData()));
-
+            }
+            else{
+              ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type(), input.get_shape(),
+                                                                     const_cast<void*>(tensor.GetTensorRawData()));
+            }
+            std::cout<<"at line no 440"<<std::endl;
             ov_tensor_data.ort_ptr = tensor.GetTensorRawData();
             ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;
-
+            std::cout<<"at line no 463"<<std::endl;
             try {
+              std::cout<<"at line no 465 in try block"<<std::endl;
               infer_request->SetTensor(std::move(input_name), ov_tensor_data.tensor_ptr);
             } catch (const char* msg) {
+              std::cout<<"just making sure not in the catch block"<<std::endl;
               ORT_THROW(msg);
             }
           }
         }
       }
+      std::cout<<"at lijne no 473 in basic_backend"<<std::endl;
       input_idx++;
     }
     if (session_context_.device_type.find("NPU") != std::string::npos) {
       // Set the output blob as remote blob
       auto graph_output_info = exe_network_.Get().outputs();
+
+
+for (const auto& output : graph_output_info) {
+    auto partial_shape = output.get_partial_shape();
+
+    std::cout << "Output partial shape: ";
+
+    if (partial_shape.is_dynamic()) {
+        std::cout << "Dynamic shape: " << partial_shape << std::endl;
+        // Check individual dimensions
+        for (int i = 0; i < partial_shape.rank().get_length(); i++) {
+            auto dim = partial_shape[i];
+            if (dim.is_dynamic()) {
+                std::cout << "Dimension " << i << " is dynamic" << std::endl;
+            } else {
+                std::cout << "Dimension " << i << ": " << dim.get_length() << std::endl;
+            }
+        }
+    } else {
+        auto static_shape = partial_shape.get_shape();
+        std::cout << "Static shape: [";
+        for (size_t i = 0; i < static_shape.size(); ++i) {
+            std::cout << static_shape[i];
+            if (i < static_shape.size() - 1) {
+                std::cout << ", ";
+            }
+        }
+        std::cout << "]" << std::endl;
+    }
+}
+
       auto output_idx = 0;
       for (auto output_info_iter = graph_output_info.begin();
            output_info_iter != graph_output_info.end(); ++output_info_iter) {
         auto output_names = output_info_iter->get_names();
         std::string onnx_output_name;
         std::string output_name;
+        std::cout<<" am i reaching line no 514"<<std::endl;
         // using the output name retrieved from ONNX original to match with the output names returned by OV tensors
         for (auto it = subgraph_context_.output_names.begin(); it != subgraph_context_.output_names.end(); ++it) {
           onnx_output_name = it->first;
           if (output_names.find(onnx_output_name) != output_names.end()) {
             // Assigning the output_name
             output_name = it->first;
+            std::cout<<" am i reaching line no 521"<<std::endl;
             break;
           }
         }
@@ -452,18 +518,36 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
                                                    infer_request,
                                                    output_name,
                                                    subgraph_context_.output_names);
+        const auto& ort_dims = tensor.GetTensorTypeAndShapeInfo().GetShape();
         ort_tensor_key_t ort_tensor_key{output_name};
         const auto& it = ort_ov_tensor_map.find(ort_tensor_key);
         if ((it == ort_ov_tensor_map.end()) ||
             (it != ort_ov_tensor_map.end() && (it->second.ort_ptr != tensor.GetTensorRawData()))) {
           ov_tensor_data_t ov_tensor_data;
           const auto& output = graph_output_info.at(output_idx);
+
+        ov::Shape concrete_shape;
+        for (const auto& dim : ort_dims) {
+            concrete_shape.push_back(static_cast<size_t>(dim));
+        }
+
+        std::cout << "Output concrete shape being used for ov to ort tenosr mapping: [";
+        for (size_t i = 0; i < concrete_shape.size(); ++i) {
+            std::cout << concrete_shape[i];
+            if (i < concrete_shape.size() - 1) std::cout << ", ";
+        }
+        std::cout << "]" << std::endl;
+
+
+
+          std::cout<<" am i reaching line no 537"<<std::endl;
           ov_tensor_data.ort_ptr = tensor.GetTensorRawData();
-          ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output.get_element_type(), output.get_shape(),
+          ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output.get_element_type(), concrete_shape,
                                                                    const_cast<void*>(tensor.GetTensorRawData()));
           ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;
 
           try {
+            std::cout<<" am i reaching line no 544"<<std::endl;
             infer_request->SetTensor(std::move(output_name), ov_tensor_data.tensor_ptr);
           } catch (const char* msg) {
             ORT_THROW(msg);
@@ -474,6 +558,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
     }
 
     // Start Async inference
+    std::cout<<" am i reaching line no 555"<<std::endl;
     infer_request->StartAsync();
   } catch (const char* msg) {
     ORT_THROW(msg);
@@ -663,6 +748,33 @@ void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRe
   }
 }
 
+
+void BasicBackend::ValidateOrtDimsAgainstPartialShape(const std::vector<int64_t>& ort_dims, const ov::PartialShape& partial_shape) {
+    // Check if the number of dimensions matches
+    if (static_cast<int64_t>(ort_dims.size()) != partial_shape.rank().get_length()) {
+        ORT_THROW("Mismatch in number of dimensions between ORT tensor and OpenVINO PartialShape.");
+    }
+
+    // Validate each dimension
+    for (size_t i = 0; i < ort_dims.size(); ++i) {
+        const auto& ov_dim = partial_shape[i]; // OpenVINO dimension at index i
+        int64_t ort_dim = ort_dims[i];         // ORT dimension at index i
+
+        if (ov_dim.is_dynamic()) {
+            // If the OpenVINO dimension is dynamic, any ORT dimension is allowed
+            continue;
+        }
+
+        // Check if the ORT dimension is within the specified range
+        int64_t min_dim = ov_dim.get_min_length();
+        int64_t max_dim = ov_dim.get_max_length();
+        if (ort_dim < min_dim || ort_dim > max_dim) {
+            ORT_THROW(" ORT Dimension is out of range");
+        }
+    }
+}
+
+
 void BasicBackend::Infer(OrtKernelContext* ctx) {
   // Preliminary Thread safety mechanism
   // currently allows a maximum of 8 Infer request's to parallel execute at the same time

diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -47,6 +47,7 @@ class BasicBackend : public IBackend {
   void EnableGPUThrottling(ov::AnyMap& device_config);
   void EnableStreams();
   void SetNumThreads(ov::AnyMap& device_config);
+  void ValidateOrtDimsAgainstPartialShape(const std::vector<int64_t>& ort_dims, const ov::PartialShape& partial_shape);
   void StartAsyncInference(Ort::KernelContext& context, std::shared_ptr<OVInferRequest> infer_request);
 
 #ifdef IO_BUFFER_ENABLED

diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
@@ -26,6 +26,7 @@ struct SessionContext {
   std::string device_type;
   std::string precision_str;
   std::filesystem::path cache_dir;
+  std::map<std::string,ov::PartialShape> shape;
   std::map<std::string, ov::AnyMap> load_config;
   std::string model_priority = "DEFAULT";
   int num_streams;

diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -33,6 +33,7 @@ openvino_ep::SessionContext GetSessionContext(const OpenVINOExecutionProviderInf
       .device_type = info.device_type_,
       .precision_str = info.precision_,
       .cache_dir = info.cache_dir_,
+      .shape = info.shape_,
       .load_config = info.load_config_,
       .model_priority = info.model_priority_,
       .num_streams = info.num_streams_,

diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@@ -82,6 +82,7 @@ struct OpenVINOExecutionProviderInfo {
   size_t num_of_threads_{0};
   std::map<std::string, ov::AnyMap> load_config_{};
   std::string cache_dir_{""};
+  std::map<std::string,ov::PartialShape> shape_{};
   std::string model_priority_{""};
   int num_streams_{1};
   void* context_{NULL};
@@ -98,6 +99,7 @@ struct OpenVINOExecutionProviderInfo {
                                          size_t num_of_threads,
                                          const std::map<std::string, ov::AnyMap>& load_config,
                                          const std::string& cache_dir,
+                                         const std::map<std::string,ov::PartialShape>& shape,
                                          const std::string& model_priority, int num_streams,
                                          void* context, bool enable_opencl_throttling,
                                          bool disable_dynamic_shapes, bool export_ep_ctx_blob,
@@ -107,6 +109,7 @@ struct OpenVINOExecutionProviderInfo {
         num_of_threads_(num_of_threads),
         load_config_(std::move(load_config)),
         cache_dir_(std::move(cache_dir)),
+        shape_(std::move(shape)),
         model_priority_(std::move(model_priority)),
         num_streams_(num_streams),
         context_(context),