Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lowbit #1070

Closed
wants to merge 8 commits into from
Closed

Lowbit #1070

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pins/torchao-pin.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
85d03de43160328eaf350e7ec3877d3d7b57da50
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: update to commit hash after D62394341 lands.

7 changes: 7 additions & 0 deletions generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,13 @@ def decode_n_tokens(
)
input_pos += 1
break
if _i == 1:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove this before final landing. It was to do more accurate tokens/sec measurement during development, especially for torch.compile.

t0 = time.time()
if _i == num_new_tokens - 2:
t1 = time.time()
print(f"\nTime to generate {num_new_tokens-2} tokens: {t1-t0}")
print(f"\nTokens/sec to generate {num_new_tokens-2} tokens: {(num_new_tokens-2) / (t1-t0)}")


if not encountered_eos:
eos_token = torch.tensor(
Expand Down
48 changes: 45 additions & 3 deletions quantization/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,21 @@ def quantize_model(

try:
# Easier to ask forgiveness than permission
quant_handler = ao_quantizer_class_dict[quantizer](
groupsize=q_kwargs["groupsize"], device=device, precision=precision
)
if quantizer == "linear:a8wlow":
quant_handler = ao_quantizer_class_dict[quantizer](
device=device,
precision=precision,
bitwidth=q_kwargs.get("bitwidth", 4),
groupsize=q_kwargs.get("groupsize", 128),
has_weight_zeros=q_kwargs.get("has_weight_zeros", False),
squeeze_unsqueeze_dim0=True,
Comment on lines +97 to +102
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: We can probably curry Int8DynActLowbitWeightQuantizer to match the same args as the other AO quant classes

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I modeled the args after Int8DynActInt4WeightQuantizer, and used "groupsize" instead of "group_size" to match the experience.

But Int8DynActInt4WeightQuantizer doesn't have bitwidth and has_weight_zeros as concepts, but happy to rename those to whatever you think best.

)
else:
quant_handler = ao_quantizer_class_dict[quantizer](
groupsize=q_kwargs["groupsize"],
device=device,
precision=precision,
)
except TypeError as e:
if "unexpected keyword argument 'device'" in str(e):
quant_handler = ao_quantizer_class_dict[quantizer](
Expand Down Expand Up @@ -581,3 +593,33 @@ def quantized_model(self) -> nn.Module:
"linear:int4": Int4WeightOnlyQuantizer,
"linear:a8w4dq": Int8DynActInt4WeightQuantizer,
}

try:
import importlib.util
import sys
import os
torchao_build_path = f"{os.getcwd()}/torchao-build"

# Load quantizer
torchao_experimental_spec = importlib.util.spec_from_file_location(
"torchao_experimental",
f"{torchao_build_path}/src/ao/torchao/experimental/kernels/cpu/linear/examples/torch_custom_op/torch_custom_op.py",
)
torchao_experimental = importlib.util.module_from_spec(torchao_experimental_spec)
sys.modules["torchao_experimental"] = torchao_experimental
torchao_experimental_spec.loader.exec_module(torchao_experimental)
from torchao_experimental import Int8DynActLowbitWeightQuantizer
ao_quantizer_class_dict["linear:a8wlow"] = Int8DynActLowbitWeightQuantizer

# Try loading custom op
try:
import glob
libs = glob.glob(f"{torchao_build_path}/cmake-out/liblowbit_op_aten.*")
libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs))
torch.ops.load_library(libs[0])
except Exception as e:
print("Failed to load custom ops : ", e)
print("Slow fallback kernels will be used.")

except Exception as e:
print(f"Failed to use torchao_experimental kernels: {e}")
3 changes: 3 additions & 0 deletions runner/aoti.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@ if(Torch_FOUND)
target_link_libraries(aoti_run "${TORCH_LIBRARIES}" m)
set_property(TARGET aoti_run PROPERTY CXX_STANDARD 17)
endif()


target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/liblowbit_op_aten${CMAKE_SHARED_LIBRARY_SUFFIX}")
2 changes: 2 additions & 0 deletions runner/et.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,5 @@ if(executorch_FOUND)
else()
MESSAGE(WARNING "ExecuTorch package not found")
endif()

target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/liblowbit_op_executorch${CMAKE_SHARED_LIBRARY_SUFFIX}")
8 changes: 8 additions & 0 deletions scripts/build_native.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ if [ -z "${ET_BUILD_DIR}" ]; then
ET_BUILD_DIR="et-build"
fi

if [ -z "${TORCHAO_BUILD_DIR}" ]; then
TORCHAO_BUILD_DIR="torchao-build"
fi

source "$TORCHCHAT_ROOT/scripts/install_utils.sh"

pushd ${TORCHCHAT_ROOT}
Expand All @@ -70,6 +74,10 @@ if [[ "$TARGET" == "et" ]]; then
install_pip_dependencies
clone_executorch
install_executorch_libs false

EXECUTORCH_INCLUDE_DIRS=${TORCHCHAT_ROOT}/et-build/src
EXECUTORCH_LIBRARIES=${TORCHCHAT_ROOT}/et-build/install/lib/libexecutorch_no_prim_ops.a
install_torchao_custom_executorch_ops
fi
popd

Expand Down
23 changes: 23 additions & 0 deletions scripts/build_torchao_custom_ops.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

if [ -z "${TORCHCHAT_ROOT}" ]; then
# Get the absolute path of the current script
SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
# Get the absolute path of the parent directory
TORCHCHAT_ROOT="$(dirname "$SCRIPT_PATH")"
fi

if [ -z "${TORCHAO_BUILD_DIR}" ]; then
TORCHAO_BUILD_DIR="torchao-build"
fi

source "$TORCHCHAT_ROOT/scripts/install_utils.sh"

find_cmake_prefix_path
clone_torchao
install_torchao_custom_aten_ops
51 changes: 51 additions & 0 deletions scripts/install_utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,54 @@ install_executorch_libs() {

install_executorch_python_libs $1
}

clone_torchao() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have a pin for 0.5.0 that you might be able to piggy back off of. Can save you some effort

#1136

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is cloning torchao from source, but the pin looks like it is from pip?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can decide on it when your diff lands

Big thing here is that AO doesn't have Mac nightlies so I'm fine with us moving back to direct clones for mac

echo "Cloning torchao to ${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/src"
rm -rf ${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/src
mkdir -p ${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/src
pushd ${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/src
echo $pwd

cp -R /Users/scroy/fbsource/fbcode/pytorch/ao .
# git clone https://github.com/pytorch/ao.git
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: uncomment after D62394341 lands and commit hash is updated.

# cd ao
# git checkout $(cat ${TORCHCHAT_ROOT}/.pins/torchao-pin.txt)

popd
}

install_torchao_custom_aten_ops() {
echo "Installing custom torchao ops"
pushd ${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/src/ao/torchao/experimental/kernels/cpu/linear/examples/torch_custom_op
export TORCHAO_INCLUDE_DIRS=${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/src/ao

if [ "${CMAKE_OUT_DIR}" == "" ]; then
CMAKE_OUT_DIR="${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/cmake-out"
fi

cmake -DTORCHAO_INCLUDE_DIRS=${TORCHAO_INCLUDE_DIRS} \
-DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
-DPLATFORM="ATEN" \
-S . \
-B ${CMAKE_OUT_DIR} -G Ninja
cmake --build ${CMAKE_OUT_DIR}
}

install_torchao_custom_executorch_ops() {
echo "Installing custom torchao ops"
pushd ${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/src/ao/torchao/experimental/kernels/cpu/linear/examples/torch_custom_op
export TORCHAO_INCLUDE_DIRS=${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/src/ao

if [ "${CMAKE_OUT_DIR}" == "" ]; then
CMAKE_OUT_DIR="${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/cmake-out"
fi

cmake -DTORCHAO_INCLUDE_DIRS=${TORCHAO_INCLUDE_DIRS} \
-DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
-DEXECUTORCH_INCLUDE_DIRS=${EXECUTORCH_INCLUDE_DIRS} \
-DEXECUTORCH_LIBRARIES=${EXECUTORCH_LIBRARIES} \
-DPLATFORM="EXECUTORCH" \
-S . \
-B ${CMAKE_OUT_DIR} -G Ninja
cmake --build ${CMAKE_OUT_DIR}
}