Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arendjan/vlm #84

Merged
merged 15 commits into from
Jan 23, 2025
184 changes: 184 additions & 0 deletions lib/interop/generated_bindings.dart
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,128 @@
late final _ttiInferenceClose = _ttiInferenceClosePtr
.asFunction<ffi.Pointer<Status> Function(CLLMInference)>();

ffi.Pointer<StatusOrVLMInference> vlmInferenceOpen(

Check warning on line 574 in lib/interop/generated_bindings.dart

View check run for this annotation

Codecov / codecov/patch

lib/interop/generated_bindings.dart#L574

Added line #L574 was not covered by tests
ffi.Pointer<pkg_ffi.Utf8> model_path,
ffi.Pointer<pkg_ffi.Utf8> device,
) {
return _vlmInferenceOpen(

Check warning on line 578 in lib/interop/generated_bindings.dart

View check run for this annotation

Codecov / codecov/patch

lib/interop/generated_bindings.dart#L578

Added line #L578 was not covered by tests
model_path,
device,
);
}

late final _vlmInferenceOpenPtr = _lookup<
ffi.NativeFunction<
ffi.Pointer<StatusOrVLMInference> Function(ffi.Pointer<pkg_ffi.Utf8>,
ffi.Pointer<pkg_ffi.Utf8>)>>('vlmInferenceOpen');
late final _vlmInferenceOpen = _vlmInferenceOpenPtr.asFunction<
ffi.Pointer<StatusOrVLMInference> Function(
ffi.Pointer<pkg_ffi.Utf8>, ffi.Pointer<pkg_ffi.Utf8>)>();

ffi.Pointer<Status> vlmInferenceSetListener(

Check warning on line 592 in lib/interop/generated_bindings.dart

View check run for this annotation

Codecov / codecov/patch

lib/interop/generated_bindings.dart#L592

Added line #L592 was not covered by tests
CVLMInference instance,
VLMInferenceCallbackFunction callback,
) {
return _vlmInferenceSetListener(

Check warning on line 596 in lib/interop/generated_bindings.dart

View check run for this annotation

Codecov / codecov/patch

lib/interop/generated_bindings.dart#L596

Added line #L596 was not covered by tests
instance,
callback,
);
}

late final _vlmInferenceSetListenerPtr = _lookup<
ffi.NativeFunction<
ffi.Pointer<Status> Function(CVLMInference,
VLMInferenceCallbackFunction)>>('vlmInferenceSetListener');
late final _vlmInferenceSetListener = _vlmInferenceSetListenerPtr.asFunction<
ffi.Pointer<Status> Function(
CVLMInference, VLMInferenceCallbackFunction)>();

ffi.Pointer<StatusOrVLMModelResponse> vlmInferencePrompt(

Check warning on line 610 in lib/interop/generated_bindings.dart

View check run for this annotation

Codecov / codecov/patch

lib/interop/generated_bindings.dart#L610

Added line #L610 was not covered by tests
CVLMInference instance,
ffi.Pointer<pkg_ffi.Utf8> message,
int max_new_tokens,
) {
return _vlmInferencePrompt(

Check warning on line 615 in lib/interop/generated_bindings.dart

View check run for this annotation

Codecov / codecov/patch

lib/interop/generated_bindings.dart#L615

Added line #L615 was not covered by tests
instance,
message,
max_new_tokens,
);
}

late final _vlmInferencePromptPtr = _lookup<
ffi.NativeFunction<
ffi.Pointer<StatusOrVLMModelResponse> Function(CVLMInference,
ffi.Pointer<pkg_ffi.Utf8>, ffi.Int)>>('vlmInferencePrompt');
late final _vlmInferencePrompt = _vlmInferencePromptPtr.asFunction<
ffi.Pointer<StatusOrVLMModelResponse> Function(
CVLMInference, ffi.Pointer<pkg_ffi.Utf8>, int)>();

ffi.Pointer<Status> vlmInferenceSetImagePaths(

Check warning on line 630 in lib/interop/generated_bindings.dart

View check run for this annotation

Codecov / codecov/patch

lib/interop/generated_bindings.dart#L630

Added line #L630 was not covered by tests
CVLMInference instance,
ffi.Pointer<ffi.Pointer<pkg_ffi.Utf8>> paths,
int length,
) {
return _vlmInferenceSetImagePaths(

Check warning on line 635 in lib/interop/generated_bindings.dart

View check run for this annotation

Codecov / codecov/patch

lib/interop/generated_bindings.dart#L635

Added line #L635 was not covered by tests
instance,
paths,
length,
);
}

late final _vlmInferenceSetImagePathsPtr = _lookup<
ffi.NativeFunction<
ffi.Pointer<Status> Function(
CVLMInference,
ffi.Pointer<ffi.Pointer<pkg_ffi.Utf8>>,
ffi.Int)>>('vlmInferenceSetImagePaths');
late final _vlmInferenceSetImagePaths =
_vlmInferenceSetImagePathsPtr.asFunction<
ffi.Pointer<Status> Function(
CVLMInference, ffi.Pointer<ffi.Pointer<pkg_ffi.Utf8>>, int)>();

ffi.Pointer<StatusOrBool> vlmInferenceHasModelIndex(

Check warning on line 653 in lib/interop/generated_bindings.dart

View check run for this annotation

Codecov / codecov/patch

lib/interop/generated_bindings.dart#L653

Added line #L653 was not covered by tests
CVLMInference instance,
) {
return _vlmInferenceHasModelIndex(

Check warning on line 656 in lib/interop/generated_bindings.dart

View check run for this annotation

Codecov / codecov/patch

lib/interop/generated_bindings.dart#L656

Added line #L656 was not covered by tests
instance,
);
}

late final _vlmInferenceHasModelIndexPtr = _lookup<
ffi
.NativeFunction<ffi.Pointer<StatusOrBool> Function(CVLMInference)>>(
'vlmInferenceHasModelIndex');
late final _vlmInferenceHasModelIndex = _vlmInferenceHasModelIndexPtr
.asFunction<ffi.Pointer<StatusOrBool> Function(CVLMInference)>();

ffi.Pointer<Status> vlmInferenceStop(

Check warning on line 668 in lib/interop/generated_bindings.dart

View check run for this annotation

Codecov / codecov/patch

lib/interop/generated_bindings.dart#L668

Added line #L668 was not covered by tests
CVLMInference instance,
) {
return _vlmInferenceStop(

Check warning on line 671 in lib/interop/generated_bindings.dart

View check run for this annotation

Codecov / codecov/patch

lib/interop/generated_bindings.dart#L671

Added line #L671 was not covered by tests
instance,
);
}

late final _vlmInferenceStopPtr =
_lookup<ffi.NativeFunction<ffi.Pointer<Status> Function(CVLMInference)>>(
'vlmInferenceStop');
late final _vlmInferenceStop = _vlmInferenceStopPtr
.asFunction<ffi.Pointer<Status> Function(CVLMInference)>();

ffi.Pointer<Status> vlmInferenceClose(

Check warning on line 682 in lib/interop/generated_bindings.dart

View check run for this annotation

Codecov / codecov/patch

lib/interop/generated_bindings.dart#L682

Added line #L682 was not covered by tests
CVLMInference instance,
) {
return _vlmInferenceClose(

Check warning on line 685 in lib/interop/generated_bindings.dart

View check run for this annotation

Codecov / codecov/patch

lib/interop/generated_bindings.dart#L685

Added line #L685 was not covered by tests
instance,
);
}

late final _vlmInferenceClosePtr =
_lookup<ffi.NativeFunction<ffi.Pointer<Status> Function(CVLMInference)>>(
'vlmInferenceClose');
late final _vlmInferenceClose = _vlmInferenceClosePtr
.asFunction<ffi.Pointer<Status> Function(CVLMInference)>();

ffi.Pointer<StatusOrGraphRunner> graphRunnerOpen(
ffi.Pointer<pkg_ffi.Utf8> graph,
) {
Expand Down Expand Up @@ -861,6 +983,41 @@
external TTIMetrics metrics;
}

final class VLMMetrics extends ffi.Struct {
@ffi.Float()
external double load_time;

@ffi.Float()
external double generate_time;

@ffi.Float()
external double tokenization_time;

@ffi.Float()
external double detokenization_time;

@ffi.Float()
external double ttft;

@ffi.Float()
external double tpot;

@ffi.Float()
external double throughput;

@ffi.Int()
external int number_of_generated_tokens;

@ffi.Int()
external int number_of_input_tokens;
}

final class VLMStringWithMetrics extends ffi.Struct {
external ffi.Pointer<pkg_ffi.Utf8> string;

external VLMMetrics metrics;
}

final class Device extends ffi.Struct {
external ffi.Pointer<pkg_ffi.Utf8> id;

Expand Down Expand Up @@ -966,6 +1123,15 @@
external CLLMInference value;
}

final class StatusOrVLMInference extends ffi.Struct {
@ffi.Int()
external int status;

external ffi.Pointer<pkg_ffi.Utf8> message;

external CLLMInference value;
}

final class StatusOrModelResponse extends ffi.Struct {
@ffi.Int()
external int status;
Expand Down Expand Up @@ -1004,6 +1170,17 @@
external ffi.Pointer<pkg_ffi.Utf8> value;
}

final class StatusOrVLMModelResponse extends ffi.Struct {
@ffi.Int()
external int status;

external ffi.Pointer<pkg_ffi.Utf8> message;

external VLMMetrics metrics;

external ffi.Pointer<pkg_ffi.Utf8> value;
}

final class StatusOrDevices extends ffi.Struct {
@ffi.Int()
external int status;
Expand All @@ -1029,3 +1206,10 @@
typedef DartLLMInferenceCallbackFunctionFunction = void Function(
ffi.Pointer<StatusOrString>);
typedef CTTIInference = ffi.Pointer<ffi.Void>;
typedef CVLMInference = ffi.Pointer<ffi.Void>;
typedef VLMInferenceCallbackFunction
= ffi.Pointer<ffi.NativeFunction<VLMInferenceCallbackFunctionFunction>>;
typedef VLMInferenceCallbackFunctionFunction = ffi.Void Function(
ffi.Pointer<StatusOrString>);
typedef DartVLMInferenceCallbackFunctionFunction = void Function(
ffi.Pointer<StatusOrString>);
7 changes: 7 additions & 0 deletions lib/interop/openvino_bindings.dart
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@
const TTIModelResponse(this.content, this.metrics);
}

class VLMModelResponse {
final String content;
final VLMMetrics metrics;

const VLMModelResponse(this.content, this.metrics);

Check warning on line 56 in lib/interop/openvino_bindings.dart

View check run for this annotation

Codecov / codecov/patch

lib/interop/openvino_bindings.dart#L56

Added line #L56 was not covered by tests
}


String getLibraryPath() {
if (Platform.isWindows) {
Expand Down
123 changes: 123 additions & 0 deletions lib/interop/vlm_inference.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// Copyright (c) 2024 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0

import 'dart:ffi';
import 'dart:isolate';

import 'package:ffi/ffi.dart';
import 'package:inference/interop/openvino_bindings.dart';

final vlmOV = getBindings();

class VLMInference {
NativeCallable<VLMInferenceCallbackFunctionFunction>? nativeListener;
final Pointer<StatusOrVLMInference> instance;
late bool chatEnabled;

VLMInference(this.instance) {
chatEnabled = true;
}

static Future<VLMInference> init(String modelPath, String device) async {
final result = await Isolate.run(() {
final modelPathPtr = modelPath.toNativeUtf8();
final devicePtr = device.toNativeUtf8();
final status = vlmOV.vlmInferenceOpen(modelPathPtr, devicePtr);
calloc.free(modelPathPtr);
calloc.free(devicePtr);

return status;
});

print("${result.ref.status}, ${result.ref.message}");
if (StatusEnum.fromValue(result.ref.status) != StatusEnum.OkStatus) {
throw "VLMInference open error: ${result.ref.status} ${result.ref.message.toDartString()}";
}

return VLMInference(result);
}

Future<void> setListener(void Function(String) callback) async{
int instanceAddress = instance.ref.value.address;
void localCallback(Pointer<StatusOrString> ptr) {
if (StatusEnum.fromValue(ptr.ref.status) != StatusEnum.OkStatus) {
// TODO(RHeckerIntel): instead of throw, call an onError callback.
throw "VLM Callback error: ${ptr.ref.status} ${ptr.ref.message.toDartString()}";
}
callback(ptr.ref.value.toDartString());
vlmOV.freeStatusOrString(ptr);
}
nativeListener?.close();
nativeListener = NativeCallable<VLMInferenceCallbackFunctionFunction>.listener(localCallback);
final status = vlmOV.vlmInferenceSetListener(Pointer<Void>.fromAddress(instanceAddress), nativeListener!.nativeFunction);
if (StatusEnum.fromValue(status.ref.status) != StatusEnum.OkStatus) {
// TODO(RHeckerIntel): instead of throw, call an onError callback.
throw "VLM setListener error: ${status.ref.status} ${status.ref.message.toDartString()}";
}
vlmOV.freeStatus(status);
}


Future<VLMModelResponse> prompt(
String message, int maxNewTokens) async {
int instanceAddress = instance.ref.value.address;
final result = await Isolate.run(() {
final messagePtr = message.toNativeUtf8();
final status = vlmOV.vlmInferencePrompt(
Pointer<Void>.fromAddress(instanceAddress),
messagePtr,
maxNewTokens);
calloc.free(messagePtr);
return status;
});

if (StatusEnum.fromValue(result.ref.status) != StatusEnum.OkStatus) {
var msg = result.ref.message;
var status = result.ref.status;
var dStr = msg.toDartString();

throw "VLMInference prompt error: $status $dStr";
}

return VLMModelResponse(
result.ref.value.toDartString(), result.ref.metrics);
}


void setImagePaths(List<String> paths) {
// Convert Dart strings to C strings
final cStrings = paths.map((str) => str.toNativeUtf8()).toList();

// Create a pointer to the array of C strings
final pointerToCStrings = malloc<Pointer<Utf8>>(cStrings.length);
for (var i = 0; i < cStrings.length; i++) {
pointerToCStrings[i] = cStrings[i];
}

final status = vlmOV.vlmInferenceSetImagePaths(instance.ref.value, pointerToCStrings, cStrings.length);

if (StatusEnum.fromValue(status.ref.status) != StatusEnum.OkStatus) {
throw "Close error: ${status.ref.status} ${status.ref.message.toDartString()}";
}
vlmOV.freeStatus(status);
}

void forceStop() {
final status = vlmOV.vlmInferenceStop(instance.ref.value);

if (StatusEnum.fromValue(status.ref.status) != StatusEnum.OkStatus) {
throw "VLM Force Stop error: ${status.ref.status} ${status.ref.message.toDartString()}";
}
}


void close() {
final status = vlmOV.vlmInferenceClose(instance.ref.value);

if (StatusEnum.fromValue(status.ref.status) != StatusEnum.OkStatus) {
throw "Close error: ${status.ref.status} ${status.ref.message.toDartString()}";
}
vlmOV.freeStatus(status);
}
}
3 changes: 3 additions & 0 deletions lib/pages/models/inference.dart
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import 'package:inference/pages/computer_vision/computer_vision.dart';
import 'package:inference/pages/text_generation/text_generation.dart';
import 'package:inference/pages/text_to_image/text_to_image_page.dart';
import 'package:inference/pages/transcription/transcription.dart';
import 'package:inference/pages/vlm/vlm_page.dart';
import 'package:inference/project.dart';

class InferencePage extends StatelessWidget {
Expand All @@ -24,6 +25,8 @@ class InferencePage extends StatelessWidget {
return TranscriptionPage(project);
case ProjectType.textToImage:
return TextToImagePage(project);
case ProjectType.vlm:
return VLMPage(project);
}
}

Expand Down
5 changes: 4 additions & 1 deletion lib/pages/text_generation/text_generation.dart
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,10 @@ class _TextGenerationPageState extends State<TextGenerationPage> {
displayMode: PaneDisplayMode.top,
items: [
PaneItem(
icon: const Icon(FluentIcons.game),
icon: SvgPicture.asset("images/playground.svg",
colorFilter: ColorFilter.mode(textColor, BlendMode.srcIn),
width: 15,
),
title: const Text("Playground"),
body: Playground(project: widget.project),
),
Expand Down
Loading
Loading