Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] LoRA for FLUX #1353

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "openvino/runtime/infer_request.hpp"
#include "openvino/runtime/properties.hpp"
#include "openvino/runtime/tensor.hpp"
#include "openvino/genai/lora_adapter.hpp"

#include "openvino/genai/visibility.hpp"

Expand All @@ -22,6 +23,7 @@ class OPENVINO_GENAI_EXPORTS FluxTransformer2DModel {
size_t in_channels = 64;
bool guidance_embeds = false;
size_t m_default_sample_size = 128;
std::vector<std::string> m_model_input_names;

explicit Config(const std::filesystem::path& config_path);
};
Expand Down Expand Up @@ -75,10 +77,13 @@ class OPENVINO_GENAI_EXPORTS FluxTransformer2DModel {

void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states);

void set_adapters(const std::optional<AdapterConfig>& adapters);

ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep);

private:
Config m_config;
AdapterController m_adapter_controller;
ov::InferRequest m_request;
std::shared_ptr<ov::Model> m_model;
size_t m_vae_scale_factor;
Expand Down
7 changes: 6 additions & 1 deletion src/cpp/src/image_generation/flux_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ class FluxPipeline : public DiffusionPipeline {

// initialize generation config
initialize_generation_config(data["_class_name"].get<std::string>());
update_adapters_from_properties(properties, m_generation_config.adapters);
}

FluxPipeline(PipelineType pipeline_type,
Expand Down Expand Up @@ -232,12 +233,13 @@ class FluxPipeline : public DiffusionPipeline {
}

void compile(const std::string& device, const ov::AnyMap& properties) override {
update_adapters_from_properties(properties, m_generation_config.adapters);
m_clip_text_encoder->compile(device, properties);
m_t5_text_encoder->compile(device, properties);
m_vae->compile(device, properties);
m_transformer->compile(device, properties);
}

void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override {
// encode_prompt
std::string prompt_2_str =
Expand Down Expand Up @@ -321,6 +323,9 @@ class FluxPipeline : public DiffusionPipeline {

check_inputs(m_custom_generation_config, initial_image);

m_clip_text_encoder->set_adapters(m_custom_generation_config.adapters);
m_transformer->set_adapters(m_custom_generation_config.adapters);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please, add LoRA adapters here:

void set_lora_adapters(std::optional<AdapterConfig> adapters) override {
OPENVINO_THROW("LORA adapters are not implemented for FLUX pipeline yet");
}


compute_hidden_states(positive_prompt, m_custom_generation_config);

ov::Tensor latents = prepare_latents(initial_image, m_custom_generation_config);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@

#include "json_utils.hpp"
#include "utils.hpp"
#include "lora_helper.hpp"

namespace {
void get_input_names(std::vector<std::string>& input_names, const std::vector<ov::Output<const ov::Node>>& inputs_info) {
for (const auto& port : inputs_info) {
input_names.push_back(port.get_any_name());
}
}
}

namespace ov {
namespace genai {
Expand Down Expand Up @@ -108,7 +117,16 @@ FluxTransformer2DModel& FluxTransformer2DModel::reshape(int batch_size,

FluxTransformer2DModel& FluxTransformer2DModel::compile(const std::string& device, const ov::AnyMap& properties) {
OPENVINO_ASSERT(m_model, "Model has been already compiled. Cannot re-compile already compiled model");
ov::CompiledModel compiled_model = utils::singleton_core().compile_model(m_model, device, properties);
std::optional<AdapterConfig> adapters;
ov::CompiledModel compiled_model;
if (auto filtered_properties = extract_adapters_from_properties(properties, &adapters)) {
adapters->set_tensor_name_prefix(adapters->get_tensor_name_prefix().value_or("transformer"));
m_adapter_controller = AdapterController(m_model, *adapters, device);
compiled_model = utils::singleton_core().compile_model(m_model, device, *filtered_properties);
} else {
compiled_model = utils::singleton_core().compile_model(m_model, device, properties);
}
get_input_names(m_config.m_model_input_names, compiled_model.inputs());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

obsolete code

m_request = compiled_model.create_infer_request();
// release the original model
m_model.reset();
Expand All @@ -121,6 +139,13 @@ void FluxTransformer2DModel::set_hidden_states(const std::string& tensor_name, o
m_request.set_tensor(tensor_name, encoder_hidden_states);
}

void FluxTransformer2DModel::set_adapters(const std::optional<AdapterConfig>& adapters) {
OPENVINO_ASSERT(m_request, "Transformer model must be compiled first");
if(adapters) {
m_adapter_controller.apply(m_request, *adapters);
}
}

ov::Tensor FluxTransformer2DModel::infer(const ov::Tensor latent_model_input, const ov::Tensor timestep) {
OPENVINO_ASSERT(m_request, "Transformer model must be compiled first. Cannot infer non-compiled model");

Expand Down
7 changes: 6 additions & 1 deletion src/cpp/src/image_generation/models/t5_encoder_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "json_utils.hpp"
#include "lora_helper.hpp"
#include "utils.hpp"
#include "lora_helper.hpp"

namespace ov {
namespace genai {
Expand Down Expand Up @@ -62,7 +63,11 @@ T5EncoderModel& T5EncoderModel::compile(const std::string& device, const ov::Any
OPENVINO_ASSERT(m_model, "Model has been already compiled. Cannot re-compile already compiled model");
ov::Core core = utils::singleton_core();
ov::CompiledModel compiled_model;
compiled_model = core.compile_model(m_model, device, properties);
if (auto filtered_properties = extract_adapters_from_properties(properties)) {
compiled_model = core.compile_model(m_model, device, *filtered_properties);
} else {
compiled_model = core.compile_model(m_model, device, properties);
}
m_request = compiled_model.create_infer_request();
// release the original model
m_model.reset();
Expand Down
Loading