Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add performance statistics for image generation #1405

Draft
wants to merge 11 commits into
base: master
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ int32_t main(int32_t argc, char* argv[]) try {
ov::genai::num_inference_steps(number_of_inference_steps_per_image));

imwrite("image_" + std::to_string(imagei) + ".bmp", image, true);
auto perf_metrics = pipe.get_perfomance_metrics();
std::cout << "pipeline generate duration ms:" << perf_metrics.generate_duration / 1000.0f << std::endl;
std::cout << "pipeline inference duration ms:" << perf_metrics.get_inference_total_duration() << std::endl;
std::cout << "pipeline iteration:" << perf_metrics.raw_metrics.iteration_durations.size() << std::endl;
}

return EXIT_SUCCESS;
Expand Down
4 changes: 4 additions & 0 deletions samples/cpp/image_generation/image2image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ int32_t main(int32_t argc, char* argv[]) try {

// writes `num_images_per_prompt` images by pattern name
imwrite("image_%d.bmp", generated_image, true);
auto perf_metrics = pipe.get_perfomance_metrics();
std::cout << "pipeline generate duration ms:" << perf_metrics.generate_duration / 1000.0f << std::endl;
std::cout << "pipeline inference duration ms:" << perf_metrics.get_inference_total_duration() << std::endl;
std::cout << "pipeline iteration:" << perf_metrics.raw_metrics.iteration_durations.size() << std::endl;

return EXIT_SUCCESS;
} catch (const std::exception& error) {
Expand Down
5 changes: 5 additions & 0 deletions samples/cpp/image_generation/inpainting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ int32_t main(int32_t argc, char* argv[]) try {
// writes `num_images_per_prompt` images by pattern name
imwrite("image_%d.bmp", generated_image, true);

auto perf_metrics = pipe.get_perfomance_metrics();
std::cout << "pipeline generate duration ms:" << perf_metrics.generate_duration / 1000.0f << std::endl;
std::cout << "pipeline inference duration ms:" << perf_metrics.get_inference_total_duration() << std::endl;
std::cout << "pipeline iteration:" << perf_metrics.raw_metrics.iteration_durations.size() << std::endl;

return EXIT_SUCCESS;
} catch (const std::exception& error) {
try {
Expand Down
8 changes: 8 additions & 0 deletions samples/cpp/image_generation/lora_text2image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ int32_t main(int32_t argc, char* argv[]) try {
ov::genai::num_inference_steps(20),
ov::genai::rng_seed(42));
imwrite("lora.bmp", image, true);
auto perf_metrics = pipe.get_perfomance_metrics();
std::cout << "pipeline generate duration ms:" << perf_metrics.generate_duration / 1000.0f << std::endl;
std::cout << "pipeline inference duration ms:" << perf_metrics.get_inference_total_duration() << std::endl;
std::cout << "pipeline iteration:" << perf_metrics.raw_metrics.iteration_durations.size() << std::endl;

std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n";
image = pipe.generate(prompt,
Expand All @@ -38,6 +42,10 @@ int32_t main(int32_t argc, char* argv[]) try {
ov::genai::num_inference_steps(20),
ov::genai::rng_seed(42));
imwrite("baseline.bmp", image, true);
perf_metrics = pipe.get_perfomance_metrics();
std::cout << "pipeline generate duration ms:" << perf_metrics.generate_duration / 1000.0f << std::endl;
std::cout << "pipeline inference duration ms:" << perf_metrics.get_inference_total_duration() << std::endl;
std::cout << "pipeline iteration:" << perf_metrics.raw_metrics.iteration_durations.size() << std::endl;

return EXIT_SUCCESS;
} catch (const std::exception& error) {
Expand Down
5 changes: 5 additions & 0 deletions samples/cpp/image_generation/text2image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ int32_t main(int32_t argc, char* argv[]) try {
// writes `num_images_per_prompt` images by pattern name
imwrite("image_%d.bmp", image, true);

auto perf_metrics = pipe.get_perfomance_metrics();
std::cout << "pipeline generate duration ms:" << perf_metrics.generate_duration / 1000.0f << std::endl;
std::cout << "pipeline inference duration ms:" << perf_metrics.get_inference_total_duration() << std::endl;
std::cout << "pipeline iteration:" << perf_metrics.raw_metrics.iteration_durations.size() << std::endl;

return EXIT_SUCCESS;
} catch (const std::exception& error) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ class OPENVINO_GENAI_EXPORTS AutoencoderKL {
return compile(device, ov::AnyMap{std::forward<Properties>(properties)...});
}

ov::Tensor decode(ov::Tensor latent);
ov::Tensor decode(ov::Tensor latent, float& infer_duration);

ov::Tensor encode(ov::Tensor image, std::shared_ptr<Generator> generator);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "openvino/genai/visibility.hpp"
#include "openvino/genai/tokenizer.hpp"
#include "openvino/genai/lora_adapter.hpp"
#include "openvino/genai/perf_metrics.hpp"

#include "openvino/core/any.hpp"
#include "openvino/runtime/tensor.hpp"
Expand Down Expand Up @@ -84,7 +85,7 @@ class OPENVINO_GENAI_EXPORTS CLIPTextModel {

void set_adapters(const std::optional<AdapterConfig>& adapters);

ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance);
ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, float& infer_duration);

ov::Tensor get_output_tensor(const size_t idx);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "openvino/genai/visibility.hpp"
#include "openvino/genai/tokenizer.hpp"
#include "openvino/genai/lora_adapter.hpp"
#include "openvino/genai/perf_metrics.hpp"

#include "openvino/core/any.hpp"
#include "openvino/runtime/tensor.hpp"
Expand Down Expand Up @@ -84,7 +85,7 @@ class OPENVINO_GENAI_EXPORTS CLIPTextModelWithProjection {

void set_adapters(const std::optional<AdapterConfig>& adapters);

ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance);
ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, float& infer_duration);

ov::Tensor get_output_tensor(const size_t idx);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "openvino/runtime/tensor.hpp"

#include "openvino/genai/visibility.hpp"
#include "openvino/genai/perf_metrics.hpp"

namespace ov {
namespace genai {
Expand Down Expand Up @@ -75,7 +76,7 @@ class OPENVINO_GENAI_EXPORTS FluxTransformer2DModel {

void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states);

ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep);
ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep, float& infer_duration);

private:
Config m_config;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "openvino/genai/lora_adapter.hpp"
#include "openvino/genai/visibility.hpp"
#include "openvino/genai/perf_metrics.hpp"

namespace ov {
namespace genai {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,11 @@ class OPENVINO_GENAI_EXPORTS Image2ImagePipeline {

ov::Tensor decode(const ov::Tensor latent);

ImageGenerationPerfMetrics get_perfomance_metrics();

private:
std::shared_ptr<DiffusionPipeline> m_impl;
ImageGenerationPerfMetrics m_perf_metrics;

explicit Image2ImagePipeline(const std::shared_ptr<DiffusionPipeline>& impl);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#pragma once

#include <vector>
#include <chrono>
#include <map>
#include <string>

#include "openvino/genai/visibility.hpp"
#include "openvino/genai/perf_metrics.hpp"

namespace ov::genai {

struct OPENVINO_GENAI_EXPORTS RawImageGenerationPerfMetrics {
std::vector<MicroSeconds> unet_inference_durations; // unet inference durations for each step
std::vector<MicroSeconds> transformer_inference_durations; // transformer inference durations for each step
std::vector<MicroSeconds> iteration_durations; // durations of each step
};

struct OPENVINO_GENAI_EXPORTS ImageGenerationPerfMetrics {
float load_time; // model load time (includes reshape & read_model time), ms
float generate_duration; // duration of method generate(...), ms

MeanStdPair iteration_duration; // Mean-Std time of one generation iteration, ms
std::map<std::string, float> encoder_inference_duration; // inference durations for each encoder, ms
MeanStdPair unet_inference_duration; // inference duration for unet model, should be filled with zeros if we don't have unet, ms
MeanStdPair transformer_inference_duration; // inference duration for transformer model, should be filled with zeros if we don't have transformer, ms
float vae_encoder_inference_duration; // inference duration of vae_encoder model, should be filled with zeros if we don't use it, ms
float vae_decoder_inference_duration; // inference duration of vae_decoder model, ms

bool m_evaluated = false;

RawImageGenerationPerfMetrics raw_metrics;

void clean_up();
void evaluate_statistics();

MeanStdPair get_unet_inference_duration();
MeanStdPair get_transformer_inference_duration();
MeanStdPair get_iteration_duration();
float get_inference_total_duration();
float get_load_time();
float get_generate_duration();

};
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "openvino/genai/image_generation/scheduler.hpp"
#include "openvino/genai/image_generation/generation_config.hpp"
#include "openvino/genai/image_generation/image_generation_perf_metrics.hpp"

#include "openvino/genai/image_generation/clip_text_model.hpp"
#include "openvino/genai/image_generation/clip_text_model_with_projection.hpp"
Expand Down Expand Up @@ -110,8 +111,11 @@ class OPENVINO_GENAI_EXPORTS InpaintingPipeline {

ov::Tensor decode(const ov::Tensor latent);

ImageGenerationPerfMetrics get_perfomance_metrics();

private:
std::shared_ptr<DiffusionPipeline> m_impl;
ImageGenerationPerfMetrics m_perf_metrics;

explicit InpaintingPipeline(const std::shared_ptr<DiffusionPipeline>& impl);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "openvino/runtime/tensor.hpp"

#include "openvino/genai/visibility.hpp"
#include "openvino/genai/perf_metrics.hpp"

namespace ov {
namespace genai {
Expand Down Expand Up @@ -77,7 +78,7 @@ class OPENVINO_GENAI_EXPORTS SD3Transformer2DModel {

void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states);

ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep);
ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep, float& infer_duration);

private:
Config m_config;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "openvino/genai/visibility.hpp"
#include "openvino/genai/tokenizer.hpp"
#include "openvino/genai/lora_adapter.hpp"
#include "openvino/genai/perf_metrics.hpp"

#include "openvino/core/any.hpp"
#include "openvino/runtime/tensor.hpp"
Expand Down Expand Up @@ -68,7 +69,8 @@ class OPENVINO_GENAI_EXPORTS T5EncoderModel {
ov::Tensor infer(const std::string& pos_prompt,
const std::string& neg_prompt,
bool do_classifier_free_guidance,
int max_sequence_length);
int max_sequence_length,
float& infer_duration);

ov::Tensor get_output_tensor(const size_t idx);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline {
*/
ov::Tensor decode(const ov::Tensor latent);

ImageGenerationPerfMetrics get_perfomance_metrics();

private:
std::shared_ptr<DiffusionPipeline> m_impl;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "openvino/genai/visibility.hpp"
#include "openvino/genai/lora_adapter.hpp"
#include "openvino/genai/perf_metrics.hpp"

namespace ov {
namespace genai {
Expand Down Expand Up @@ -89,7 +90,7 @@ class OPENVINO_GENAI_EXPORTS UNet2DConditionModel {

void set_adapters(const std::optional<AdapterConfig>& adapters);

ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep);
ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, float& infer_duration);

bool do_classifier_free_guidance(float guidance_scale) const {
return guidance_scale > 1.0f && m_config.time_cond_proj_dim < 0;
Expand Down
4 changes: 3 additions & 1 deletion src/cpp/src/image_generation/diffusion_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,9 @@ class DiffusionPipeline {

virtual ov::Tensor generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties) = 0;

virtual ov::Tensor decode(const ov::Tensor latent) = 0;
virtual ov::Tensor decode(const ov::Tensor latent, float& infer_duration) = 0;

virtual ImageGenerationPerfMetrics get_perfomance_metrics() = 0;

virtual ~DiffusionPipeline() = default;

Expand Down
37 changes: 30 additions & 7 deletions src/cpp/src/image_generation/flux_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,12 +254,15 @@ class FluxPipeline : public DiffusionPipeline {
}

void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override {
float infer_duration;
// encode_prompt
std::string prompt_2_str = generation_config.prompt_2 != std::nullopt ? *generation_config.prompt_2 : positive_prompt;

m_clip_text_encoder->infer(positive_prompt, {}, false);
m_clip_text_encoder->infer(positive_prompt, {}, false, infer_duration);
m_perf_metrics.encoder_inference_duration["text_encoder"] = infer_duration / 1000.0f;
ov::Tensor pooled_prompt_embeds = m_clip_text_encoder->get_output_tensor(1);
ov::Tensor prompt_embeds = m_t5_text_encoder->infer(prompt_2_str, "", false, generation_config.max_sequence_length);
ov::Tensor prompt_embeds = m_t5_text_encoder->infer(prompt_2_str, "", false, generation_config.max_sequence_length, infer_duration);
m_perf_metrics.encoder_inference_duration["text_encoder_2"] = infer_duration / 1000.0f;

pooled_prompt_embeds = numpy_utils::repeat(pooled_prompt_embeds, generation_config.num_images_per_prompt);
prompt_embeds = numpy_utils::repeat(prompt_embeds, generation_config.num_images_per_prompt);
Expand Down Expand Up @@ -319,6 +322,9 @@ class FluxPipeline : public DiffusionPipeline {
ov::Tensor initial_image,
ov::Tensor mask_image,
const ov::AnyMap& properties) override {
const auto gen_start = std::chrono::steady_clock::now();
float infer_duration;
m_perf_metrics.clean_up();
m_custom_generation_config = m_generation_config;
m_custom_generation_config.update_generation_config(properties);

Expand Down Expand Up @@ -358,28 +364,44 @@ class FluxPipeline : public DiffusionPipeline {
float* timestep_data = timestep.data<float>();

for (size_t inference_step = 0; inference_step < timesteps.size(); ++inference_step) {
auto step_start = std::chrono::steady_clock::now();
timestep_data[0] = timesteps[inference_step] / 1000;

ov::Tensor noise_pred_tensor = m_transformer->infer(latents, timestep);
ov::Tensor noise_pred_tensor = m_transformer->infer(latents, timestep, infer_duration);
m_perf_metrics.raw_metrics.transformer_inference_durations.emplace_back(MicroSeconds(infer_duration));

auto scheduler_step_result = m_scheduler->step(noise_pred_tensor, latents, inference_step, m_custom_generation_config.generator);
latents = scheduler_step_result["latent"];

auto step_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start);
m_perf_metrics.raw_metrics.iteration_durations.emplace_back(MicroSeconds(step_ms));
if (callback && callback(inference_step, timesteps.size(), latents)) {
return ov::Tensor(ov::element::u8, {});
auto image = ov::Tensor(ov::element::u8, {});
m_perf_metrics.generate_duration =
std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - gen_start)
.count();
return image;
}
}

latents = unpack_latents(latents, m_custom_generation_config.height, m_custom_generation_config.width, vae_scale_factor);
return m_vae->decode(latents);
auto image = m_vae->decode(latents, infer_duration);
m_perf_metrics.vae_decoder_inference_duration = infer_duration / 1000.0f;
m_perf_metrics.generate_duration =
std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - gen_start).count();
return image;
}

ov::Tensor decode(const ov::Tensor latent) override {
ov::Tensor decode(const ov::Tensor latent, float& infer_duration) override {
ov::Tensor unpacked_latent = unpack_latents(latent,
m_custom_generation_config.height,
m_custom_generation_config.width,
m_vae->get_vae_scale_factor());
return m_vae->decode(unpacked_latent);
return m_vae->decode(unpacked_latent, infer_duration);
}

ImageGenerationPerfMetrics get_perfomance_metrics() override {
return m_perf_metrics;
}

private:
Expand Down Expand Up @@ -477,6 +499,7 @@ class FluxPipeline : public DiffusionPipeline {
std::shared_ptr<T5EncoderModel> m_t5_text_encoder = nullptr;
std::shared_ptr<AutoencoderKL> m_vae = nullptr;
ImageGenerationConfig m_custom_generation_config;
ImageGenerationPerfMetrics m_perf_metrics;
};

} // namespace genai
Expand Down
7 changes: 6 additions & 1 deletion src/cpp/src/image_generation/image2image_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,12 @@ ov::Tensor Image2ImagePipeline::generate(const std::string& positive_prompt, ov:
}

ov::Tensor Image2ImagePipeline::decode(const ov::Tensor latent) {
return m_impl->decode(latent);
float infer_duration;
return m_impl->decode(latent, infer_duration);
}

ImageGenerationPerfMetrics Image2ImagePipeline::get_perfomance_metrics() {
return m_impl->get_perfomance_metrics();
}

} // namespace genai
Expand Down
Loading
Loading