Skip to content

Commit

Permalink
Add tuning tool for permutation rank3
Browse files Browse the repository at this point in the history
  • Loading branch information
CongMa13 committed Nov 20, 2024
1 parent c97246a commit b16e9e3
Show file tree
Hide file tree
Showing 18 changed files with 709 additions and 126 deletions.
38 changes: 28 additions & 10 deletions tuning/permutation/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,35 @@
# Make the ck includes visible so we can build instances.
get_target_property(composable_kernel_INCLUDES composable_kernel::device_other_operations INTERFACE_INCLUDE_DIRECTORIES)

set(HIPTENSOR_PERMUTATION_TUNING "permutation_tuning")

set(HIPTENSOR_PERMUTATION_TUNING_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/permutation_tuning.cpp
)
function(add_permutation_tuning RANK)

add_executable(${HIPTENSOR_PERMUTATION_TUNING} ${HIPTENSOR_PERMUTATION_TUNING_SOURCES})
set(HIPTENSOR_PERMUTATION_TUNING "permutation_tuning_${RANK}")

set_target_properties(${HIPTENSOR_PERMUTATION_TUNING} PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_options(${HIPTENSOR_PERMUTATION_TUNING} PRIVATE ${CMAKE_CXX_FLAGS} ${CLANG_DRIVER_MODE})
target_link_options(${HIPTENSOR_PERMUTATION_TUNING} PRIVATE ${CLANG_DRIVER_MODE})
target_include_directories(${HIPTENSOR_PERMUTATION_TUNING} PRIVATE ${composable_kernel_INCLUDES})
target_link_libraries(${HIPTENSOR_PERMUTATION_TUNING} PRIVATE hip::device hip::host)
set(HIPTENSOR_PERMUTATION_TUNING_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/permutation_instance_${RANK}.cpp
${CMAKE_CURRENT_SOURCE_DIR}/permutation_instance_F16_${RANK}_256_64_64_4_4.cpp
${CMAKE_CURRENT_SOURCE_DIR}/permutation_instance_F32_${RANK}_256_64_64_4_4.cpp
${CMAKE_CURRENT_SOURCE_DIR}/permutation_instance_F16_${RANK}_256_64_64_16_16.cpp
${CMAKE_CURRENT_SOURCE_DIR}/permutation_instance_F32_${RANK}_256_64_64_16_16.cpp
${CMAKE_CURRENT_SOURCE_DIR}/permutation_instance_F16_${RANK}_256_128_128_8_8.cpp
${CMAKE_CURRENT_SOURCE_DIR}/permutation_instance_F32_${RANK}_256_128_128_8_8.cpp
${CMAKE_CURRENT_SOURCE_DIR}/permutation_instance_F16_${RANK}_256_128_128_16_16.cpp
${CMAKE_CURRENT_SOURCE_DIR}/permutation_instance_F32_${RANK}_256_128_128_16_16.cpp
${CMAKE_CURRENT_SOURCE_DIR}/permutation_instance_F16_${RANK}_miscellaneous.cpp
${CMAKE_CURRENT_SOURCE_DIR}/permutation_instance_F32_${RANK}_miscellaneous.cpp
${CMAKE_CURRENT_SOURCE_DIR}/permutation_tuning_${RANK}.cpp
)

add_executable(${HIPTENSOR_PERMUTATION_TUNING} ${HIPTENSOR_PERMUTATION_TUNING_SOURCES})

set_target_properties(${HIPTENSOR_PERMUTATION_TUNING} PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_options(${HIPTENSOR_PERMUTATION_TUNING} PRIVATE ${CMAKE_CXX_FLAGS} ${CLANG_DRIVER_MODE})
target_link_options(${HIPTENSOR_PERMUTATION_TUNING} PRIVATE ${CLANG_DRIVER_MODE})
target_include_directories(${HIPTENSOR_PERMUTATION_TUNING} PRIVATE ${composable_kernel_INCLUDES})
target_include_directories(${HIPTENSOR_PERMUTATION_TUNING} PRIVATE "${CMAKE_SOURCE_DIR}/library/src/include/" )
target_link_libraries(${HIPTENSOR_PERMUTATION_TUNING} PRIVATE hip::device hip::host)

endfunction()

add_permutation_tuning(3)
247 changes: 247 additions & 0 deletions tuning/permutation/permutation_instance.hpp

Large diffs are not rendered by default.

84 changes: 84 additions & 0 deletions tuning/permutation/permutation_instance_3.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#include "permutation_instance.hpp"

namespace hiptensor
{
namespace tuning
{

namespace permutation
{

void genInstances_F16_3_256_128_128_16_16(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F16>, ck::Tuple<F16>, UnaryScaleSquare, 3>>>&
instances);
void genInstances_F16_3_256_128_128_8_8(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F16>, ck::Tuple<F16>, UnaryScaleSquare, 3>>>&
instances);
void genInstances_F16_3_256_64_64_16_16(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F16>, ck::Tuple<F16>, UnaryScaleSquare, 3>>>&
instances);
void genInstances_F16_3_256_64_64_4_4(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F16>, ck::Tuple<F16>, UnaryScaleSquare, 3>>>&
instances);
void genInstances_F16_3_miscellaneous(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F16>, ck::Tuple<F16>, UnaryScaleSquare, 3>>>&
instances);
void genInstances_F32_3_256_128_128_16_16(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F32>, ck::Tuple<F32>, UnaryScaleSquare, 3>>>&
instances);
void genInstances_F32_3_256_128_128_8_8(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F32>, ck::Tuple<F32>, UnaryScaleSquare, 3>>>&
instances);
void genInstances_F32_3_256_64_64_16_16(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F32>, ck::Tuple<F32>, UnaryScaleSquare, 3>>>&
instances);
void genInstances_F32_3_256_64_64_4_4(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F32>, ck::Tuple<F32>, UnaryScaleSquare, 3>>>&
instances);
void genInstances_F32_3_miscellaneous(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F32>, ck::Tuple<F32>, UnaryScaleSquare, 3>>>&
instances);

std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F16>, ck::Tuple<F16>, UnaryScaleSquare, 3>>>
genInstances_F16_3()
{
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F16>, ck::Tuple<F16>, UnaryScaleSquare, 3>>>
instances;
genInstances_F16_3_256_64_64_4_4(instances);
genInstances_F16_3_256_64_64_16_16(instances);
genInstances_F16_3_256_128_128_8_8(instances);
genInstances_F16_3_256_128_128_16_16(instances);
genInstances_F16_3_miscellaneous(instances);
return instances;
}

std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F32>, ck::Tuple<F32>, UnaryScaleSquare, 3>>>
genInstances_F32_3()
{
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F32>, ck::Tuple<F32>, UnaryScaleSquare, 3>>>
instances;
genInstances_F32_3_256_64_64_4_4(instances);
genInstances_F32_3_256_64_64_16_16(instances);
genInstances_F32_3_256_128_128_8_8(instances);
genInstances_F32_3_256_128_128_16_16(instances);
genInstances_F32_3_miscellaneous(instances);
return instances;
}

}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "permutation_instance.hpp"

namespace hiptensor::tuning::permutation
{

void genInstances_F16_3_256_128_128_16_16(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F16>, ck::Tuple<F16>, UnaryScaleSquare, 3>>>& instances)
{
genInstances_256_128_128_16_16<F16, 3>(instances);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "permutation_instance.hpp"

namespace hiptensor::tuning::permutation
{

void genInstances_F16_3_256_128_128_32_32(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F16>, ck::Tuple<F16>, UnaryScaleSquare, 3>>>& instances)
{
genInstances_256_128_128_32_32<F16, 3>(instances);
}

}
13 changes: 13 additions & 0 deletions tuning/permutation/permutation_instance_F16_3_256_128_128_8_8.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "permutation_instance.hpp"

namespace hiptensor::tuning::permutation
{

void genInstances_F16_3_256_128_128_8_8(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F16>, ck::Tuple<F16>, UnaryScaleSquare, 3>>>& instances)
{
genInstances_256_128_128_8_8<F16, 3>(instances);
}

}
13 changes: 13 additions & 0 deletions tuning/permutation/permutation_instance_F16_3_256_64_64_16_16.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "permutation_instance.hpp"

namespace hiptensor::tuning::permutation
{

void genInstances_F16_3_256_64_64_16_16(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F16>, ck::Tuple<F16>, UnaryScaleSquare, 3>>>& instances)
{
genInstances_256_64_64_16_16<F16, 3>(instances);
}

}
13 changes: 13 additions & 0 deletions tuning/permutation/permutation_instance_F16_3_256_64_64_4_4.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "permutation_instance.hpp"

namespace hiptensor::tuning::permutation
{

void genInstances_F16_3_256_64_64_4_4(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F16>, ck::Tuple<F16>, UnaryScaleSquare, 3>>>& instances)
{
genInstances_256_64_64_4_4<F16, 3>(instances);
}

}
13 changes: 13 additions & 0 deletions tuning/permutation/permutation_instance_F16_3_miscellaneous.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "permutation_instance.hpp"

namespace hiptensor::tuning::permutation
{

void genInstances_F16_3_miscellaneous(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F16>, ck::Tuple<F16>, UnaryScaleSquare, 3>>>& instances)
{
genInstances_miscellaneous<F16, 3>(instances);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "permutation_instance.hpp"

namespace hiptensor::tuning::permutation
{

void genInstances_F32_3_256_128_128_16_16(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F32>, ck::Tuple<F32>, UnaryScaleSquare, 3>>>& instances)
{
genInstances_256_128_128_16_16<F32, 3>(instances);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "permutation_instance.hpp"

namespace hiptensor::tuning::permutation
{

void genInstances_F32_3_256_128_128_32_32(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F32>, ck::Tuple<F32>, UnaryScaleSquare, 3>>>& instances)
{
genInstances_256_128_128_32_32<F32, 3>(instances);
}

}
13 changes: 13 additions & 0 deletions tuning/permutation/permutation_instance_F32_3_256_128_128_8_8.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "permutation_instance.hpp"

namespace hiptensor::tuning::permutation
{

void genInstances_F32_3_256_128_128_8_8(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F32>, ck::Tuple<F32>, UnaryScaleSquare, 3>>>& instances)
{
genInstances_256_128_128_8_8<F32, 3>(instances);
}

}
13 changes: 13 additions & 0 deletions tuning/permutation/permutation_instance_F32_3_256_64_64_16_16.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "permutation_instance.hpp"

namespace hiptensor::tuning::permutation
{

void genInstances_F32_3_256_64_64_16_16(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F32>, ck::Tuple<F32>, UnaryScaleSquare, 3>>>& instances)
{
genInstances_256_64_64_16_16<F32, 3>(instances);
}

}
13 changes: 13 additions & 0 deletions tuning/permutation/permutation_instance_F32_3_256_64_64_4_4.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "permutation_instance.hpp"

namespace hiptensor::tuning::permutation
{

void genInstances_F32_3_256_64_64_4_4(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F32>, ck::Tuple<F32>, UnaryScaleSquare, 3>>>& instances)
{
genInstances_256_64_64_4_4<F32, 3>(instances);
}

}
13 changes: 13 additions & 0 deletions tuning/permutation/permutation_instance_F32_3_miscellaneous.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "permutation_instance.hpp"

namespace hiptensor::tuning::permutation
{

void genInstances_F32_3_miscellaneous(
std::vector<std::unique_ptr<
DeviceElementwise<ck::Tuple<F32>, ck::Tuple<F32>, UnaryScaleSquare, 3>>>& instances)
{
genInstances_miscellaneous<F32, 3>(instances);
}

}
Loading

0 comments on commit b16e9e3

Please sign in to comment.