Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[misc] Add LoRA kernel micro benchmarks #11579

Merged
merged 17 commits into from
Jan 16, 2025
Prev Previous commit
Next Next commit
format
Signed-off-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
Varun Sundar Rabindranath committed Jan 16, 2025
commit 75ca94d5bf9e64fd6d40fb5839345646cfa38cf9
22 changes: 15 additions & 7 deletions benchmarks/kernels/benchmark_lora.py
Original file line number Diff line number Diff line change
@@ -537,7 +537,9 @@ def as_bgmv_expand_slice_kwargs(self, add_inputs: bool) -> Dict[str, Any]:
})
return {'kwargs_list': kwargs_list}

def bench_fn_kwargs(self, op_type: OpType, add_inputs: Optional[bool] = None) -> Dict[str, Any]:
def bench_fn_kwargs(self,
op_type: OpType,
add_inputs: Optional[bool] = None) -> Dict[str, Any]:
if op_type.is_shrink_fn():
assert add_inputs is None
else:
@@ -577,16 +579,21 @@ def bench_optype(ctx: BenchmarkContext,
bt.sanity_check()

# BenchmarkTensors -> Dict (kwargs)
kwargs_list = [bt.bench_fn_kwargs(op_type, add_inputs=expand_fn_add_inputs) for bt in bench_tensors]
kwargs_list = [
bt.bench_fn_kwargs(op_type, add_inputs=expand_fn_add_inputs)
for bt in bench_tensors
]

# Merge into a single kwargs and quality arguments as ArgPool
kwargs = {k: ArgPool([]) for k in kwargs_list[0]}
for _kwargs in kwargs_list:
for k, v in _kwargs.items():
kwargs[k].values.append(v)

describe_args = f"add_inputs={expand_fn_add_inputs}" if expand_fn_add_inputs is not None else ""
description = f"{op_type.name}({describe_args}) ({bench_tensors[0].io_types()})"
describe_args = (f"add_inputs={expand_fn_add_inputs}"
if expand_fn_add_inputs is not None else "")
description = (
f"{op_type.name}({describe_args}) ({bench_tensors[0].io_types()})")
cuda_graph_params = CudaGraphBenchParams(
num_ops_in_cuda_graph=arg_pool_size) if with_cuda_graph else None
with Bench(cuda_graph_params,
@@ -666,12 +673,13 @@ def run(args: argparse.Namespace, bench_ctxs: List[BenchmarkContext]):
args.with_cuda_graph))

# Benchmark bench_op
expand_fn_add_inputs = [None] if bench_op.is_shrink_fn() else args.expand_fn_add_inputs
expand_fn_add_inputs = [
None
] if bench_op.is_shrink_fn() else args.expand_fn_add_inputs
for add_input_arg in expand_fn_add_inputs:
seq_len_timers.append(
bench_optype(_ctx, args.arg_pool_size, bench_op,
args.with_cuda_graph,
add_input_arg))
args.with_cuda_graph, add_input_arg))

print_timers(seq_len_timers)
timers.extend(seq_len_timers)