diff --git a/.github/workflows/flex.yml b/.github/workflows/flex.yml index c91e84d29b83..3dcc04a06099 100644 --- a/.github/workflows/flex.yml +++ b/.github/workflows/flex.yml @@ -22,11 +22,11 @@ concurrency: cancel-in-progress: true jobs: - test-hqps-engine: + build-flex: runs-on: ubuntu-20.04 if: ${{ github.repository == 'alibaba/GraphScope' }} container: - image: registry.cn-hongkong.aliyuncs.com/graphscope/hqps-server-base:v0.0.3 + image: registry.cn-hongkong.aliyuncs.com/graphscope/hqps-server-base:v0.0.4 steps: - uses: actions/checkout@v3 @@ -46,4 +46,13 @@ jobs: run: | cd ${GITHUB_WORKSPACE}/flex mkdir build && cd build - cmake .. && sudo make -j$(nproc) \ No newline at end of file + cmake .. && sudo make -j$(nproc) + + - name: GRIN on mutable csr test + run: | + git submodule update --init + cd flex/engines/graph_db/grin + mkdir build && cd build + cmake .. && sudo make -j$(nproc) + export FLEX_DATA_DIR=../../../../storages/rt_mutable_graph/modern_graph/ + ./run_grin_test diff --git a/.github/workflows/gaia.yml b/.github/workflows/gaia.yml index 9e0a6b6a9a88..16d1478aeb9f 100644 --- a/.github/workflows/gaia.yml +++ b/.github/workflows/gaia.yml @@ -94,7 +94,7 @@ jobs: run: | cd ${GITHUB_WORKSPACE} git clone -b master --single-branch --depth=1 https://github.com/7br/gstest.git /tmp/gstest - cd ${GITHUB_WORKSPACE}/interactive_engine/compiler && ./ir_exprimental_pattern_ci.sh + cd ${GITHUB_WORKSPACE}/interactive_engine/compiler && ./ir_exprimental_advanced_ci.sh - name: Ir Integration Test on Csr Store run: | diff --git a/.github/workflows/hqps-db-ci.yml b/.github/workflows/hqps-db-ci.yml new file mode 100644 index 000000000000..46cbb86860d3 --- /dev/null +++ b/.github/workflows/hqps-db-ci.yml @@ -0,0 +1,114 @@ +name: GraphScope GIE HighQPS DB CI + +on: + # Trigger the workflow on push or pull request, + # but only for the main branch + workflow_dispatch: + push: + branches: + - main + paths: + - 'flex/**' + - 'interactive_engine/**' + - '.github/workflows/hqps-db-ci.yml' + pull_request: + branches: + - main + paths: + - 'flex/**' + - 'interactive_engine/**' + - '.github/workflows/hqps-db-ci.yml' + +concurrency: + group: ${{ github.repository }}-${{ github.event.number || github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +jobs: + test-hqps-engine: + runs-on: ubuntu-20.04 + if: ${{ github.repository == 'alibaba/GraphScope' }} + container: + image: registry.cn-hongkong.aliyuncs.com/graphscope/hqps-server-base:v0.0.4 + steps: + - uses: actions/checkout@v3 + + - uses: actions/cache@v3 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - uses: actions/cache@v3 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + ~/.cache/sccache + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + + - name: Install latest libgrape-lite + if: false + run: | + git clone --single-branch https://github.com/alibaba/libgrape-lite.git /tmp/libgrape-lite + cd /tmp/libgrape-lite + mkdir -p build && cd build + cmake .. + make -j$(nproc) + make install + + - name: Setup tmate session + if: false + uses: mxschmitt/action-tmate@v3 + + - name: Build + env: + GIE_HOME: ${{ github.workspace }}/interactive_engine/ + HOME: /home/graphscope/ + run: | + cd ${GITHUB_WORKSPACE}/flex + mkdir build && cd build + cmake .. && sudo make -j$(nproc) + sudo make install + + # cargo + . /home/graphscope/.cargo/env + which cargo + + # build compiler + cd ${GIE_HOME}/compiler + make build + + - name: Run codegen test. + env: + GS_TEST_DIR: ${{ github.workspace }}/gstest + GIE_HOME: ${{ github.workspace }}/interactive_engine/ + HOME : /home/graphscope/ + run: | + # download dataset + git clone -b master --single-branch --depth=1 https://github.com/GraphScope/gstest.git ${GS_TEST_DIR} + + #flex_test_dir=${GS_TEST_DIR}/flex + ## preprocess bulk_load.yaml to use gstest_dir + #sed -i "s|workspaces/gstest|$GS_TEST_DIR|" ${flex_test_dir}/ldbc-sf01-long-date/audit_bulk_load.yaml + + # create tmp ir.compiler.properties + touch /tmp/ir.compiler.properties + echo "engine.type: hiactor" >> /tmp/ir.compiler.properties + echo "graph.schema: ${GIE_HOME}/executor/ir/core/resource/modern_schema.json" >> /tmp/ir.compiler.properties + echo "graph.store: exp" >> /tmp/ir.compiler.properties + echo "graph.planner: {\"isOn\":true,\"opt\":\"RBO\",\"rules\":[\"FilterMatchRule\"]}" >> /tmp/ir.compiler.properties + + cd ${GITHUB_WORKSPACE}/flex/bin + + for i in 2 3 5 6 8 9 11 12; + do + cmd="./load_plan_and_gen.sh -e=hqps -i=../resources/queries/ic/adhoc/ic${i}_adhoc.cypher -w=/tmp/codgen/" + cmd=${cmd}" -o=/tmp/plugin --ir_conf=/tmp/ir.compiler.properties " + cmd=${cmd}" --graph_schema_path=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/ldbc_schema_csr_ic.json" + echo $cmd + eval ${cmd} + done + diff --git a/.gitmodules b/.gitmodules index f038d8b178ac..dabe493f60bd 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,7 @@ [submodule "learning_engine/graph-learn"] path = learning_engine/graph-learn url = https://github.com/alibaba/graph-learn.git + +[submodule "flex/grin"] + path = flex/grin + url = https://github.com/GraphScope/GRIN.git diff --git a/analytical_engine/CMakeLists.txt b/analytical_engine/CMakeLists.txt index 55425acd8d5b..2913b5473076 100644 --- a/analytical_engine/CMakeLists.txt +++ b/analytical_engine/CMakeLists.txt @@ -278,10 +278,10 @@ if(Arrow_FOUND AND Arrow_VERSION VERSION_GREATER 9.0.1) endif() # Generate proto -execute_process(COMMAND python3 python/graphscope/proto/proto_generator.py "${PROJECT_SOURCE_DIR}" --cpp - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/..) +execute_process(COMMAND python3 ../proto/proto_generator.py "${PROJECT_SOURCE_DIR}/proto" --cpp + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) -file(GLOB PROTO_CPP_FILES "graphscope/proto/*.cc") +file(GLOB PROTO_CPP_FILES "proto/*.cc") file(GLOB CORE_RPC_SRC_FILES "core/server/*.cc") # Add targets @@ -569,7 +569,7 @@ endif() install_gsa_headers("${PROJECT_SOURCE_DIR}/apps") install_gsa_headers("${PROJECT_SOURCE_DIR}/benchmarks") install_gsa_headers("${PROJECT_SOURCE_DIR}/core") -install_gsa_headers("${PROJECT_SOURCE_DIR}/graphscope") +install_gsa_headers("${PROJECT_SOURCE_DIR}/proto") install_gsa_app_frames("${PROJECT_SOURCE_DIR}/frame") install_gsa_dependency_modules("${PROJECT_SOURCE_DIR}/cmake") diff --git a/analytical_engine/core/app/app_invoker.h b/analytical_engine/core/app/app_invoker.h index be4c38ff5808..eb3cf8c21942 100644 --- a/analytical_engine/core/app/app_invoker.h +++ b/analytical_engine/core/app/app_invoker.h @@ -29,8 +29,8 @@ #endif #include "core/config.h" #include "core/error.h" -#include "graphscope/proto/data_types.pb.h" -#include "graphscope/proto/types.pb.h" +#include "proto/data_types.pb.h" +#include "proto/types.pb.h" namespace bl = boost::leaf; diff --git a/analytical_engine/core/context/vertex_data_context.h b/analytical_engine/core/context/vertex_data_context.h index 1ae381d38d3d..c1671853899c 100644 --- a/analytical_engine/core/context/vertex_data_context.h +++ b/analytical_engine/core/context/vertex_data_context.h @@ -50,7 +50,7 @@ #include "core/server/rpc_utils.h" #include "core/utils/mpi_utils.h" #include "core/utils/transform_utils.h" -#include "graphscope/proto/types.pb.h" +#include "proto/types.pb.h" #define CONTEXT_TYPE_VERTEX_DATA "vertex_data" #define CONTEXT_TYPE_LABELED_VERTEX_DATA "labeled_vertex_data" diff --git a/analytical_engine/core/error.h b/analytical_engine/core/error.h index e2bfb51dc48b..a5a1e0c6f60f 100644 --- a/analytical_engine/core/error.h +++ b/analytical_engine/core/error.h @@ -24,7 +24,7 @@ #include "vineyard/graph/utils/error.h" // IWYU pragma: export -#include "graphscope/proto/error_codes.pb.h" // IWYU pragma: export +#include "proto/error_codes.pb.h" // IWYU pragma: export namespace gs { diff --git a/analytical_engine/core/fragment/arrow_projected_fragment.h b/analytical_engine/core/fragment/arrow_projected_fragment.h index 8afeee239a7a..8a1e2f368110 100644 --- a/analytical_engine/core/fragment/arrow_projected_fragment.h +++ b/analytical_engine/core/fragment/arrow_projected_fragment.h @@ -41,7 +41,7 @@ #include "core/config.h" #include "core/fragment/arrow_projected_fragment_base.h" // IWYU pragma: export #include "core/vertex_map/arrow_projected_vertex_map.h" -#include "graphscope/proto/types.pb.h" +#include "proto/types.pb.h" namespace arrow { class Array; diff --git a/analytical_engine/core/fragment/dynamic_fragment.h b/analytical_engine/core/fragment/dynamic_fragment.h index fa9e1db8fae5..734e745e2227 100644 --- a/analytical_engine/core/fragment/dynamic_fragment.h +++ b/analytical_engine/core/fragment/dynamic_fragment.h @@ -42,7 +42,7 @@ #include "core/object/dynamic.h" #include "core/utils/convert_utils.h" #include "core/utils/partitioner.h" -#include "graphscope/proto/types.pb.h" +#include "proto/types.pb.h" namespace gs { diff --git a/analytical_engine/core/fragment/fragment_reporter.h b/analytical_engine/core/fragment/fragment_reporter.h index 1ac8d424020f..0cb332b4a721 100644 --- a/analytical_engine/core/fragment/fragment_reporter.h +++ b/analytical_engine/core/fragment/fragment_reporter.h @@ -39,7 +39,7 @@ #include "core/server/rpc_utils.h" #include "core/utils/convert_utils.h" #include "core/utils/msgpack_utils.h" -#include "graphscope/proto/types.pb.h" +#include "proto/types.pb.h" namespace bl = boost::leaf; diff --git a/analytical_engine/core/grape_instance.cc b/analytical_engine/core/grape_instance.cc index 97f701931ac5..c49357f73875 100644 --- a/analytical_engine/core/grape_instance.cc +++ b/analytical_engine/core/grape_instance.cc @@ -65,9 +65,9 @@ #include "core/server/command_detail.h" #include "core/server/rpc_utils.h" #include "core/utils/mpi_utils.h" -#include "graphscope/proto/attr_value.pb.h" -#include "graphscope/proto/graph_def.pb.h" -#include "graphscope/proto/types.pb.h" +#include "proto/attr_value.pb.h" +#include "proto/graph_def.pb.h" +#include "proto/types.pb.h" namespace bl = boost::leaf; diff --git a/analytical_engine/core/grape_instance.h b/analytical_engine/core/grape_instance.h index 460eaa8ff784..03e32fbb6fa8 100644 --- a/analytical_engine/core/grape_instance.h +++ b/analytical_engine/core/grape_instance.h @@ -38,7 +38,7 @@ #include "core/object/object_manager.h" #include "core/server/dispatcher.h" #include "core/server/rpc_utils.h" -#include "graphscope/proto/types.pb.h" +#include "proto/types.pb.h" namespace bl = boost::leaf; diff --git a/analytical_engine/core/io/property_parser.h b/analytical_engine/core/io/property_parser.h index a55e3201e29d..acf3e1da25ca 100644 --- a/analytical_engine/core/io/property_parser.h +++ b/analytical_engine/core/io/property_parser.h @@ -34,8 +34,8 @@ #include "vineyard/common/util/status.h" #include "core/server/rpc_utils.h" -#include "graphscope/proto/attr_value.pb.h" -#include "graphscope/proto/types.pb.h" +#include "proto/attr_value.pb.h" +#include "proto/types.pb.h" namespace bl = boost::leaf; diff --git a/analytical_engine/core/object/dynamic.h b/analytical_engine/core/object/dynamic.h index 35d7c6c48ef3..d1b6a30fb8f8 100644 --- a/analytical_engine/core/object/dynamic.h +++ b/analytical_engine/core/object/dynamic.h @@ -32,7 +32,7 @@ // IWYU pragma: end_exports #include "grape/serialization/in_archive.h" -#include "graphscope/proto/graph_def.pb.h" +#include "proto/graph_def.pb.h" namespace gs { diff --git a/analytical_engine/core/object/fragment_wrapper.h b/analytical_engine/core/object/fragment_wrapper.h index a178dc435e87..aded8aaaea68 100644 --- a/analytical_engine/core/object/fragment_wrapper.h +++ b/analytical_engine/core/object/fragment_wrapper.h @@ -61,8 +61,8 @@ #include "core/object/i_fragment_wrapper.h" #include "core/server/rpc_utils.h" #include "core/utils/transform_utils.h" -#include "graphscope/proto/graph_def.pb.h" -#include "graphscope/proto/types.pb.h" +#include "proto/graph_def.pb.h" +#include "proto/types.pb.h" namespace bl = boost::leaf; diff --git a/analytical_engine/core/server/command_detail.cc b/analytical_engine/core/server/command_detail.cc index 47e50b51a06a..49a09539f3c1 100644 --- a/analytical_engine/core/server/command_detail.cc +++ b/analytical_engine/core/server/command_detail.cc @@ -21,8 +21,8 @@ #include "grape/serialization/in_archive.h" #include "grape/serialization/out_archive.h" -#include "graphscope/proto/attr_value.pb.h" -#include "graphscope/proto/types.pb.h" +#include "proto/attr_value.pb.h" +#include "proto/types.pb.h" namespace gs { diff --git a/analytical_engine/core/server/command_detail.h b/analytical_engine/core/server/command_detail.h index c33db5e77ae2..efc75ca65f25 100644 --- a/analytical_engine/core/server/command_detail.h +++ b/analytical_engine/core/server/command_detail.h @@ -19,8 +19,8 @@ #include #include -#include "graphscope/proto/attr_value.pb.h" -#include "graphscope/proto/types.pb.h" +#include "proto/attr_value.pb.h" +#include "proto/types.pb.h" namespace grape { class InArchive; diff --git a/analytical_engine/core/server/dispatcher.cc b/analytical_engine/core/server/dispatcher.cc index e10f4fd74d0b..efa3e535c7d3 100644 --- a/analytical_engine/core/server/dispatcher.cc +++ b/analytical_engine/core/server/dispatcher.cc @@ -37,8 +37,8 @@ #include "core/error.h" #include "core/io/property_parser.h" #include "core/server/command_detail.h" -#include "graphscope/proto/attr_value.pb.h" -#include "graphscope/proto/graph_def.pb.h" +#include "proto/attr_value.pb.h" +#include "proto/graph_def.pb.h" namespace bl = boost::leaf; diff --git a/analytical_engine/core/server/dispatcher.h b/analytical_engine/core/server/dispatcher.h index 1b2a7d30ce47..ecbffda33805 100644 --- a/analytical_engine/core/server/dispatcher.h +++ b/analytical_engine/core/server/dispatcher.h @@ -25,9 +25,9 @@ #include "grape/worker/comm_spec.h" #include "vineyard/common/util/blocking_queue.h" -#include "graphscope/proto/error_codes.pb.h" -#include "graphscope/proto/graph_def.pb.h" -#include "graphscope/proto/types.pb.h" +#include "proto/error_codes.pb.h" +#include "proto/graph_def.pb.h" +#include "proto/types.pb.h" namespace bl = boost::leaf; diff --git a/analytical_engine/core/server/graphscope_service.cc b/analytical_engine/core/server/graphscope_service.cc index 84dd07aa35c8..3e646a8ffae6 100644 --- a/analytical_engine/core/server/graphscope_service.cc +++ b/analytical_engine/core/server/graphscope_service.cc @@ -26,11 +26,11 @@ #include "grpcpp/support/sync_stream.h" #include "core/server/rpc_utils.h" -#include "graphscope/proto/attr_value.pb.h" -#include "graphscope/proto/error_codes.pb.h" -#include "graphscope/proto/graph_def.pb.h" -#include "graphscope/proto/message.pb.h" -#include "graphscope/proto/op_def.pb.h" +#include "proto/attr_value.pb.h" +#include "proto/error_codes.pb.h" +#include "proto/graph_def.pb.h" +#include "proto/message.pb.h" +#include "proto/op_def.pb.h" namespace gs { struct CommandDetail; diff --git a/analytical_engine/core/server/graphscope_service.h b/analytical_engine/core/server/graphscope_service.h index 435f3f87c74a..8e5850cab290 100644 --- a/analytical_engine/core/server/graphscope_service.h +++ b/analytical_engine/core/server/graphscope_service.h @@ -29,9 +29,9 @@ #include "grpcpp/support/sync_stream.h" #include "core/server/dispatcher.h" -#include "graphscope/proto/engine_service.grpc.pb.h" -#include "graphscope/proto/message.pb.h" -#include "graphscope/proto/op_def.pb.h" +#include "proto/engine_service.grpc.pb.h" +#include "proto/message.pb.h" +#include "proto/op_def.pb.h" namespace gs { namespace rpc { diff --git a/analytical_engine/core/server/rpc_utils.h b/analytical_engine/core/server/rpc_utils.h index bd7aa6b3b7fd..83314d84f46e 100644 --- a/analytical_engine/core/server/rpc_utils.h +++ b/analytical_engine/core/server/rpc_utils.h @@ -28,10 +28,10 @@ #include "core/config.h" #include "core/server/command_detail.h" -#include "graphscope/proto/attr_value.pb.h" -#include "graphscope/proto/graph_def.pb.h" -#include "graphscope/proto/op_def.pb.h" -#include "graphscope/proto/types.pb.h" +#include "proto/attr_value.pb.h" +#include "proto/graph_def.pb.h" +#include "proto/op_def.pb.h" +#include "proto/types.pb.h" namespace bl = boost::leaf; diff --git a/analytical_engine/frame/cython_app_frame.cc b/analytical_engine/frame/cython_app_frame.cc index 61f595bbc398..71914faff1ea 100644 --- a/analytical_engine/frame/cython_app_frame.cc +++ b/analytical_engine/frame/cython_app_frame.cc @@ -47,8 +47,8 @@ #include "core/app/pregel/pregel_property_app_base.h" #include "core/error.h" #include "frame/ctx_wrapper_builder.h" -#include "graphscope/proto/data_types.pb.h" -#include "graphscope/proto/types.pb.h" +#include "proto/data_types.pb.h" +#include "proto/types.pb.h" #include QUOTE(_APP_HEADER) diff --git a/analytical_engine/frame/cython_pie_app_frame.cc b/analytical_engine/frame/cython_pie_app_frame.cc index c8084a5b31fe..2771d98eeff5 100644 --- a/analytical_engine/frame/cython_pie_app_frame.cc +++ b/analytical_engine/frame/cython_pie_app_frame.cc @@ -45,8 +45,8 @@ #include "core/app/app_invoker.h" #include "core/error.h" #include "frame/ctx_wrapper_builder.h" -#include "graphscope/proto/data_types.pb.h" -#include "graphscope/proto/types.pb.h" +#include "proto/data_types.pb.h" +#include "proto/types.pb.h" #ifdef _APP_HEADER #include QUOTE(_APP_HEADER) diff --git a/analytical_engine/frame/project_frame.cc b/analytical_engine/frame/project_frame.cc index 112ce3743012..bf156240fff2 100644 --- a/analytical_engine/frame/project_frame.cc +++ b/analytical_engine/frame/project_frame.cc @@ -30,7 +30,7 @@ #include "core/object/fragment_wrapper.h" #include "core/server/rpc_utils.h" #include "core/utils/fragment_traits.h" -#include "graphscope/proto/attr_value.pb.h" +#include "proto/attr_value.pb.h" #if !defined(_PROJECTED_GRAPH_TYPE) #error "_PROJECTED_GRAPH_TYPE is undefined" diff --git a/analytical_engine/frame/property_graph_frame.cc b/analytical_engine/frame/property_graph_frame.cc index 9e0645e676c5..50ac6e9d89e7 100644 --- a/analytical_engine/frame/property_graph_frame.cc +++ b/analytical_engine/frame/property_graph_frame.cc @@ -33,8 +33,8 @@ #include "core/server/rpc_utils.h" #include "core/utils/fragment_traits.h" #include "core/vertex_map/arrow_projected_vertex_map.h" -#include "graphscope/proto/attr_value.pb.h" -#include "graphscope/proto/graph_def.pb.h" +#include "proto/attr_value.pb.h" +#include "proto/graph_def.pb.h" #if !defined(_GRAPH_TYPE) #error Missing _GRAPH_TYPE diff --git a/analytical_engine/test/run_java_app.cc b/analytical_engine/test/run_java_app.cc index 118dd5a8feb9..d5b75037a92a 100644 --- a/analytical_engine/test/run_java_app.cc +++ b/analytical_engine/test/run_java_app.cc @@ -49,9 +49,9 @@ #include "core/loader/arrow_fragment_loader.h" #include "core/object/fragment_wrapper.h" #include "core/utils/transform_utils.h" -#include "graphscope/proto/graph_def.pb.h" #include "java_pie/java_pie_projected_parallel_app.h" #include "java_pie/java_pie_property_parallel_app.h" +#include "proto/graph_def.pb.h" namespace bl = boost::leaf; diff --git a/coordinator/gscoordinator/kubernetes_launcher.py b/coordinator/gscoordinator/kubernetes_launcher.py index a83a18914dcf..ebf0a0d225b6 100644 --- a/coordinator/gscoordinator/kubernetes_launcher.py +++ b/coordinator/gscoordinator/kubernetes_launcher.py @@ -1148,7 +1148,7 @@ def _allocate_analytical_engine(self): def _distribute_analytical_process(self, pod_name_list, pod_ip_list): # generate and distribute hostfile - hosts = os.path.join(get_tempdir(), "kube_hosts") + hosts = os.path.join(get_tempdir(), "hosts_of_nodes") with open(hosts, "w") as f: for i, pod_ip in enumerate(pod_ip_list): f.write(f"{pod_ip} {pod_name_list[i]}\n") diff --git a/coordinator/gscoordinator/op_executor.py b/coordinator/gscoordinator/op_executor.py index 59bb3e3e3d88..4c5499ec78b3 100644 --- a/coordinator/gscoordinator/op_executor.py +++ b/coordinator/gscoordinator/op_executor.py @@ -355,7 +355,7 @@ def _create_analytical_grpc_stub(self): time.sleep(delay) delay *= 2 # back off raise RuntimeError( - "Failed to connect to engine in 60s, deployment may failed. Please check coordinator log for details" + "Failed to connect to engine in a reasonable time, deployment may failed. Please check coordinator log for details" ) @property diff --git a/coordinator/gscoordinator/template/CMakeLists.template b/coordinator/gscoordinator/template/CMakeLists.template index 2be48666c9ce..610d79472a16 100644 --- a/coordinator/gscoordinator/template/CMakeLists.template +++ b/coordinator/gscoordinator/template/CMakeLists.template @@ -263,7 +263,7 @@ endif() if(GRAPHSCOPE_ANALYTICAL_HOME) include_directories("${GRAPHSCOPE_ANALYTICAL_INCLUDE_DIRS}") include_directories("${GRAPHSCOPE_ANALYTICAL_HOME}/include/graphscope/apps") - include_directories("${GRAPHSCOPE_ANALYTICAL_HOME}/include/graphscope/proto") + include_directories("${GRAPHSCOPE_ANALYTICAL_HOME}/include/graphscope") # include vineyard---------------------------------------------------------- include_directories("${GRAPHSCOPE_ANALYTICAL_HOME}/include/vineyard") else() diff --git a/flex/.devcontainer.json b/flex/.devcontainer.json new file mode 100644 index 000000000000..da3df7ea09af --- /dev/null +++ b/flex/.devcontainer.json @@ -0,0 +1,32 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/javascript-node +{ + "name": "GraphScope", + // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile + "image": "registry.cn-hongkong.aliyuncs.com/graphscope/hiactor-server:v0.0.1", + // Features to add to the dev container. More info: https://containers.dev/features. + "features": { + "ghcr.io/devcontainers/features/common-utils:2": { + "installZsh": "true", + "configureZshAsDefaultShell": "true", + "installOhMyZsh": true, + "upgradePackages": "false" + } + }, + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + "settings": {}, + "extensions": [ + "streetsidesoftware.code-spell-checker", + "eamodio.gitlens", + "github.copilot", + "github.copilot-labs" + ] + } + }, + // Set `remoteUser` to `root` to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. + "remoteUser": "root", + "postCreateCommand": "sudo chown -R graphscope /workspaces && bash pre-commit/install-hook.sh && bash pre-commit/prepare-commit-msg" +} \ No newline at end of file diff --git a/flex/CMakeLists.txt b/flex/CMakeLists.txt index 029cebdfba82..5169aa6257af 100644 --- a/flex/CMakeLists.txt +++ b/flex/CMakeLists.txt @@ -9,13 +9,17 @@ project ( VERSION ${FLEX_VERSION} LANGUAGES CXX) + +option(BUILD_HQPS "Whether to build HighQPS Engine" ON) +option(BUILD_TEST "Whether to build test" ON) option(BUILD_DOC "Whether to build doc" ON) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../) set(DEFAULT_BUILD_TYPE "Release") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -mno-avx512f") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -mno-avx512f -fPIC") + add_compile_definitions(FLEX_VERSION="${FLEX_VERSION}") @@ -52,6 +56,11 @@ else () include_directories(SYSTEM ${GFLAGS_INCLUDE_DIRS}) endif () +#find boost---------------------------------------------------------------------- +find_package(Boost REQUIRED COMPONENTS system filesystem + # required by folly + context program_options regex thread) + # Find Doxygen if (BUILD_DOC) find_package(Doxygen) @@ -69,11 +78,32 @@ if (BUILD_DOC) endif() add_subdirectory(utils) +add_subdirectory(codegen) add_subdirectory(storages) add_subdirectory(engines) add_subdirectory(bin) +if (BUILD_TEST) + add_subdirectory(tests) +endif() +file(GLOB_RECURSE FILES_NEED_LINT + "engines/*.cc" + "engines/*.h" + "bin/*.cc" + "storages/*.h" + "storages/*.cc" + "test/*.h" + "test/*.cc" + "third_pary/*.h" + "third_pary/*.cc" EXCEPT "*.act.h" "*.actg.h" "*.autogen.h" "*.autogen.cc") +list(FILTER FILES_NEED_LINT EXCLUDE REGEX ".*\.act.h$|.*\.actg.h$|.*\.autogen.h$|.*\.autogen.cc$") +# gsa_clformat +add_custom_target(flex_clformat + COMMAND clang-format --style=file -i ${FILES_NEED_LINT} + COMMENT "Running clang-format, using clang-format-8 from https://github.com/muttleyxd/clang-tools-static-binaries/releases" + VERBATIM) + if (NOT DEFINED CPACK_PACKAGE_NAME) set(CPACK_PACKAGE_NAME "graphscope_flex") endif () @@ -88,4 +118,8 @@ set(CPACK_DEBIAN_FILE_NAME DEB-DEFAULT) set(CPACK_COMPONENTS_GROUPING ALL_COMPONENTS_IN_ONE) set(CPACK_DEB_COMPONENT_INSTALL YES) + +#install CMakeLists.txt.template to resources/ +install(FILES resources/hqps/CMakeLists.txt.template DESTINATION lib/flex/) + include(CPack) diff --git a/flex/Dockerfile b/flex/Dockerfile index ba8b5a44b5b0..d047cab1e212 100644 --- a/flex/Dockerfile +++ b/flex/Dockerfile @@ -35,5 +35,22 @@ RUN git clone https://github.com/alibaba/hiactor.git -b v0.1.1 --single-branch & cmake -DHiactor_DEMOS=OFF -DHiactor_TESTING=OFF -DHiactor_DPDK=OFF -DHiactor_CXX_DIALECT=gnu++17 -DSeastar_CXX_FLAGS="-DSEASTAR_DEFAULT_ALLOCATOR -mno-avx512" .. && \ make -j && make install -# for hqps-engine -RUN apt-get install -y protobuf-compiler libprotobuf-dev \ No newline at end of file +#install protobuf +RUN apt-get install -y protobuf-compiler libprotobuf-dev + +RUN apt-get install -y sudo + +# Add graphscope user with user id 1001 +RUN useradd -m graphscope -u 1001 && \ + echo 'graphscope ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers + +# Change to graphscope user +USER graphscope +WORKDIR /home/graphscope + +RUN curl -sf -L https://static.rust-lang.org/rustup.sh | \ + sh -s -- -y --profile minimal && \ + chmod +x "$HOME/.cargo/env" && \ + echo "$source $HOME/.cargo/env" >> ~/.bashrc && \ + source "$HOME/.cargo/env" && \ + bash -c "rustup component add rustfmt" diff --git a/flex/README.md b/flex/README.md index 7c091ab2196e..da1f3d059443 100644 --- a/flex/README.md +++ b/flex/README.md @@ -11,7 +11,7 @@ GraphScope Flex is an ongoing evolution of GraphScope. It champions a modular de ### Architecture
- GraphScope Flex architecture + GraphScope Flex architecture
The GraphScope Flex stack (as shown in the figure), consists of multiple components that users can combine like LEGO bricks to customize their graph computing deployments. The components are classified into three layers: @@ -44,7 +44,7 @@ Please use `flexbuild --help` to learn more. ### Case 1: For online BI analysis
- GraphScope Flex usecase-1 + GraphScope Flex usecase-1
BI analysis is for analysts who interactively analyze data in a WebUI. While high concurrency is unlikely, low latency for complex queries is crucial. @@ -60,7 +60,7 @@ To build the artifacts for this use case, run the following command: ### Case 2: For high QPS queries
- GraphScope Flex usecase-2 + GraphScope Flex usecase-2
In some service scenarios, e.g., recommendation or searching, the graph queries are coming at an extremely high rate and demands high throughput. In these scenarios, GraphScope Flex can be deployed with a different component set. The **compiler** generates an optimized query plan and **Hiactor Codegen** produces a physical plan tailored for **Hiactor**, a high-performance and concurrent actor framework for OLTP-like queries. @@ -76,7 +76,7 @@ Please note that we use the artifacts built by this command for LDBC SNB benchma ### Case 3: For offline graph analytics
- GraphScope Flex usecase-3 + GraphScope Flex usecase-3
GraphScope Flex is an efficient and user-friendly platform for performing graph analytics. It offers **built-in algorithms**, as well as **interfaces** for developing customized algorithms. The runtime, based on **GRAPE**, is fragment-centric and extensible, supporting multiple programming models like **FLASH**, **PIE**, and **Pregel**. Sequential algorithms can be easily parallelized or incrementalized using the **Ingress** component. To achieve high performance, an **in-memory graph store** is deployed in this stack. @@ -91,7 +91,7 @@ To build the artifacts for this use case, run the following command: ### Case 4: For graph learning tasks
- GraphScope Flex usecase-4 + GraphScope Flex usecase-4
GraphScope Flex's GNN framework supports billion-scale graphs in industrial scenarios. It provides GNN model development paradigms, **example models**, and the flexibility to choose between **TensorFlow** or **PyTorch** as the training backend. Furthermore, the framework employs decoupled sampling and training processes, which can be independently scaled for optimal end-to-end throughput, providing superior performance. diff --git a/flex/bin/CMakeLists.txt b/flex/bin/CMakeLists.txt index ddbf11feb27d..1a0b4e1381c7 100644 --- a/flex/bin/CMakeLists.txt +++ b/flex/bin/CMakeLists.txt @@ -1,13 +1,14 @@ -find_package (Hiactor) -if (Hiactor_FOUND) - add_executable(rt_server rt_server.cc) - target_link_libraries(rt_server flex_utils flex_rt_mutable_graph flex_graph_db flex_graph_db_server) - - install(TARGETS rt_server - RUNTIME DESTINATION bin - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib) -endif () +find_package(Hiactor) + +if(Hiactor_FOUND) + add_executable(rt_server rt_server.cc) + target_link_libraries(rt_server flex_utils flex_rt_mutable_graph flex_graph_db flex_server) + + install(TARGETS rt_server + RUNTIME DESTINATION bin + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib) +endif() add_executable(rt_admin rt_admin.cc) target_link_libraries(rt_admin flex_utils flex_rt_mutable_graph flex_graph_db) @@ -24,3 +25,17 @@ install(TARGETS flex_analytical_engine RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib) + +if(BUILD_HQPS) + if(Hiactor_FOUND) + add_executable(sync_server sync_server.cc) + target_link_libraries(sync_server flex_utils flex_graph_db flex_server hqps_plan_proto ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES}) + + install(TARGETS sync_server + RUNTIME DESTINATION bin + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib) + endif() + # install the script + install(PROGRAMS load_plan_and_gen.sh DESTINATION bin) +endif() \ No newline at end of file diff --git a/flex/bin/load_plan_and_gen.sh b/flex/bin/load_plan_and_gen.sh new file mode 100755 index 000000000000..b1310fe593c7 --- /dev/null +++ b/flex/bin/load_plan_and_gen.sh @@ -0,0 +1,475 @@ +#!/bin/bash +# Copyright 2020 Alibaba Group Holding Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) +FLEX_HOME=${SCRIPT_DIR}/../ +echo "FLEX_HOME root = ${FLEX_HOME}" +FLEX_INCLUDE=${FLEX_HOME}/include/flex/ +echo "FLEX_INCLUDE directory = ${CODE_GEN_ROOT}" + +# first try to resolve as if we are installed. the try to resolve locally. + +if [ ! -d ${FLEX_INCLUDE} ]; then + echo "try FLEX_HOME ${FLEX_INCLUDE} not exists." + echo "try to resolve locally" +else + echo "try FLEX_HOME ${FLEX_INCLUDE} exists." + CODEGEN_RUNNER=${FLEX_HOME}/bin/gen_code_from_plan + CMAKE_TEMPLATE_PATH=${FLEX_HOME}/lib/flex/CMakeLists.txt.template + FLEX_LIB_DIR=${FLEX_HOME}/lib/ + PEGASUS_COMPILE_PATH=${FLEX_HOME}resources/pegasus/benchmark +fi + +FLEX_INCLUDE=${FLEX_HOME}/../ +echo "try to find flex with FLEX_INCLUDE directory = ${FLEX_INCLUDE}" +if [ ! -d ${FLEX_INCLUDE} ]; then + echo "FLEX_INCLUDE directory = ${FLEX_INCLUDE} not exists." + echo "Fail to find flex." + exit 1 +else + CODEGEN_RUNNER=${FLEX_HOME}/build/codegen/gen_code_from_plan + CMAKE_TEMPLATE_PATH=${FLEX_HOME}/resources/hqps/CMakeLists.txt.template + FLEX_LIB_DIR=${FLEX_HOME}/build/lib/ + PEGASUS_COMPILE_PATH=${FLEX_HOME}resources/pegasus/benchmark +fi + +echo "Codegen runner = ${CODEGEN_RUNNER}" +echo "Cmake template path = ${CMAKE_TEMPLATE_PATH}" +#check these files exist +if [ ! -f ${CODEGEN_RUNNER} ]; then + echo "Codegen runner = ${CODEGEN_RUNNER} not exists." + echo "Fail to find codegen_runner." + exit 1 +fi + +if [ ! -f ${CMAKE_TEMPLATE_PATH} ]; then + echo "Cmake template path = ${CMAKE_TEMPLATE_PATH} not exists." + echo "Fail to find CMakeLists.txt.template." + exit 1 +fi + +#if [ ! -f ${PEGASUS_COMPILE_PATH} ]; then +# echo "Pegasus compile path = ${PEGASUS_COMPILE_PATH} not exists." +# exit 1 +#fi + +cypher_to_plan() { + if [ $# -ne 4 ]; then + echo "Usage: $0 , but receive: "$# + exit 1 + fi + # check GIE_HOME set + if [ -z ${GIE_HOME} ]; then + echo "GIE_HOME not set." + exit 1 + fi + input_path=$1 + output_path=$2 + ir_compiler_properties=$3 + # find java executable + echo "IR compiler properties = ${ir_compiler_properties}" + #check file exists + if [ ! -f ${ir_compiler_properties} ]; then + echo "IR compiler properties = ${ir_compiler_properties} not exists." + echo "Fail to find IR compiler properties." + exit 1 + fi + JAVA_EXECUTABLE=$(which java) + if [ -z ${JAVA_EXECUTABLE} ]; then + # try find from JAVA_HOME + if [ -z ${JAVA_HOME} ]; then + echo "JAVA_HOME not set." + exit 1 + else + JAVA_EXECUTABLE=${JAVA_HOME}/bin/java + fi + exit 1 + fi + echo "Java executable = ${JAVA_EXECUTABLE}" + echo "---------------------------" + echo "Find compiler exists" + # read from file ${input_path} + cypher_query=$(cat ${input_path}) + echo "Find cypher query:" + echo "---------------------------" + echo ${cypher_query} + echo "---------------------------" + + #get abs path of input_path + real_input_path=$(realpath ${input_path}) + real_output_path=$(realpath ${output_path}) + + compiler_jar=${GIE_HOME}/compiler/target/compiler-0.0.1-SNAPSHOT.jar + if [ ! -f ${compiler_jar} ]; then + echo "Compiler jar = ${compiler_jar} not exists." + echo "Fail to find compiler jar." + exit 1 + fi + cmd="java -cp ${GIE_HOME}/compiler/target/libs/*:${compiler_jar}" + cmd="${cmd} -Dgraph.schema=${graph_schema_path}" + cmd="${cmd} -Djna.library.path=${GIE_HOME}/executor/ir/target/release/" + cmd="${cmd} com.alibaba.graphscope.common.ir.tools.GraphPlanner ${ir_compiler_properties} ${real_input_path} ${real_output_path}" + echo "running physical plan genration with "${cmd} + eval ${cmd} + + echo "---------------------------" + #check output + if [ ! -f ${output_path} ]; then + echo "Output file = ${output_path} not exists." + echo "Fail to generate physical plan." + exit 1 + fi +} + +compile_hqps_so() { + #check input params size eq 2 or 3 + if [ $# -ne 4 ] && [ $# -ne 5 ]; then + echo "Usage: $0 [output_dir]" + exit 1 + fi + input_path=$1 + work_dir=$2 + ir_compiler_properties=$3 + graph_schema_path=$4 + if [ $# -eq 5 ]; then + output_dir=$5 + else + output_dir=${work_dir} + fi + echo "Input path = ${input_path}" + echo "Work dir = ${work_dir}" + echo "ir compiler properties = ${ir_compiler_properties}" + echo "graph schema path = ${graph_schema_path}" + echo "Output dir = ${output_dir}" + + last_file_name=$(basename ${input_path}) + + echo "last file name: ${last_file_name}" + # requiest last_file_name suffix is .pb + if [[ $last_file_name == *.pb ]]; then + query_name="${last_file_name%.pb}" + echo "File has .pb suffix." + elif [[ $last_file_name == *.cc ]]; then + echo "File havs .cc suffix." + query_name="${last_file_name%.cc}" + elif [[ $last_file_name == *.cypher ]]; then + echo "File has .cypher suffix." + query_name="${last_file_name%.cypher}" + else + echo "Expect a .pb or .cc file" + exit 1 + fi + cur_dir=${work_dir} + mkdir -p ${cur_dir} + output_cc_path="${cur_dir}/${query_name}.cc" + if [[ $(uname) == "Linux" ]]; then + output_so_path="${cur_dir}/lib${query_name}.so" + dst_so_path="${output_dir}/lib${query_name}.so" + elif [[ $(uname) == "Darwin" ]]; then + output_so_path="${cur_dir}/lib${query_name}.dylib" + dst_so_path="${output_dir}/lib${query_name}.dylib" + else + echo "Not support OS." + exit 1 + fi + + #only do codegen when receives a .pb file. + if [[ $last_file_name == *.pb ]]; then + cmd="${CODEGEN_RUNNER} ${input_path} ${output_cc_path}" + echo "Codegen command = ${cmd}" + eval ${cmd} + echo "----------------------------" + elif [[ $last_file_name == *.cypher ]]; then + echo "Generating code from cypher query" + # first do .cypher to .pb + output_pb_path="${cur_dir}/${query_name}.pb" + cypher_to_plan ${input_path} ${output_pb_path} ${ir_compiler_properties} ${graph_schema_path} + echo "----------------------------" + echo "Codegen from cypher query done." + echo "----------------------------" + cmd="${CODEGEN_RUNNER} -e hqps -i ${output_pb_path} -o ${output_cc_path}" + echo "Codegen command = ${cmd}" + eval ${cmd} + # then. do .pb to .cc + elif [[ $last_file_name == *.cc ]]; then + cp $input_path ${output_cc_path} + fi + echo "Start running cmake and make" + #check output_cc_path exists + if [ ! -f ${output_cc_path} ]; then + echo "Codegen failed, ${output_cc_path} not exists." + exit 1 + fi + + # copy cmakelist.txt to output path. + cp ${CMAKE_TEMPLATE_PATH} ${cur_dir}/CMakeLists.txt + # run cmake and make in output path. + pushd ${cur_dir} + cmd="cmake . -DQUERY_NAME=${query_name} -DFLEX_INCLUDE_PREFIX=${FLEX_INCLUDE} -DFLEX_LIB_DIR=${FLEX_LIB_DIR}" + # if CMAKE_CXX_COMPILER is set, use it. + if [ ! -z ${CMAKE_CXX_COMPILER} ]; then + cmd="${cmd} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" + fi + # if CMAKE_C_COMPILER is set, use it. + if [ ! -z ${CMAKE_C_COMPILER} ]; then + cmd="${cmd} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}" + fi + echo "Cmake command = ${cmd}" + echo "---------------------------" + eval ${cmd} + + ################### now build ########################## + # get number of cores + #n_cores=`nproc` + n_cores=1 + cmd="make -j"${n_cores} + echo "Make command = ${cmd}" + eval ${cmd} + echo "---------------------------" + #check if build success + if [ $? -ne 0 ]; then + echo "Build failed." + exit 1 + fi + # check output_so_name exists + if [ ! -f ${output_so_path} ]; then + echo "Build failed, ${output_so_path} not exists." + exit 1 + fi + echo "Finish building, output to ${output_so_path}" + popd + + ################### now copy ########################## + # if dst_so_path eq output_so_path, skip copying. + if [ ${dst_so_path} == ${output_so_path} ]; then + echo "Output dir is same as work dir, skip copying." + exit 0 + fi + # copy output to output_dir + if [ ! -z ${output_dir} ]; then + mkdir -p ${output_dir} + else + echo "Output dir not set, skip copying." + exit 0 + fi + # check output_dir doesn't contains output_so_name + if [ -f ${dst_so_path} ]; then + echo "Output dir ${output_dir} already contains ${query_name}.so" + echo "Please remove it first." + exit 1 + fi + cp ${output_so_path} ${output_dir} + #check dst_so_path exists + if [ ! -f ${dst_so_path} ]; then + echo "Copy failed, ${dst_so_path} not exists." + exit 1 + fi + echo "Finish copying, output to ${dst_so_path}" +} + +compile_pegasus_so() { + echo "Start compiling pegasus so" + #check input params size eq 2 or 3 + if [ $# -ne 4 ] && [ $# -ne 5 ]; then + echo "Usage: $0 [output_dir]" + exit 1 + fi + input_path=$1 + work_dir=$2 + ir_compiler_properties=$3 + graph_schema_path=$4 + if [ $# -eq 5 ]; then + output_dir=$5 + else + output_dir=${work_dir} + fi + echo "Input path = ${input_path}" + echo "Work dir = ${work_dir}" + echo "ir compiler properties = ${ir_compiler_properties}" + echo "graph schema path = ${graph_schema_path}" + echo "Output dir = ${output_dir}" + + last_file_name=$(basename ${input_path}) + + echo "last file name: ${last_file_name}" + # requiest last_file_name suffix is .pb + if [[ $last_file_name == *.pb ]]; then + query_name="${last_file_name%.pb}" + echo "File has .pb suffix." + elif [[ $last_file_name == *.rs ]]; then + echo "File has .rs suffix." + query_name="${last_file_name%.rs}" + elif [[ $last_file_name == *.cypher ]]; then + echo "File has .cypher suffix." + query_name="${last_file_name%.cypher}" + elif [[ $last_file_name == *.json ]]; then + echo "File has .json suffix." + query_name="${last_file_name%.json}" + else + echo "Expect a .pb or .cc file" + exit 1 + fi + cur_dir=${work_dir} + mkdir -p ${cur_dir} + output_rs_path=${cur_dir}/${query_name}.rs + if [[ $(uname) == "Linux" ]]; then + output_so_path=${PEGASUS_COMPILE_PATH}/target/release/lib.so + dst_so_path=${output_dir}/lib${query_name}.so + elif [[ $(uname) == "Darwin" ]]; then + output_so_path=${PEGASUS_COMPILE_PATH}/target/release/lib.dylib + dst_so_path=${output_dir}/lib${query_name}.dylib + else + echo "Not support OS." + exit 1 + fi + + #only do codegen when receives a .pb file. + if [[ $last_file_name == *.json ]]; then + cmd="${CODEGEN_RUNNER} ${input_path} ${output_rs_path}" + echo "Codegen command = ${cmd}" + eval ${cmd} + echo "----------------------------" + elif [[ $last_file_name == *.rs ]]; then + cp $input_path ${output_rs_path} + fi + echo "Start running cmake and make" + #check output_cc_path exists + if [ ! -f ${output_rs_path} ]; then + echo "Codegen failed, ${output_rs_path} not exists." + exit 1 + fi + + # copy cmakelist.txt to output path. + rm ${PEGASUS_COMPILE_PATH}/query/src/queries/*.rs + cp ${output_rs_path} ${PEGASUS_COMPILE_PATH}/query/src/queries + >${PEGASUS_COMPILE_PATH}/query/src/queries/mod.rs + echo "pub mod ${query_name};" >> ${PEGASUS_COMPILE_PATH}/query/src/queries/mod.rs + # build dynamic lib + pushd ${PEGASUS_COMPILE_PATH}/query + cmd="cargo build --release" + eval ${cmd} + #check if build success + if [ $? -ne 0 ]; then + echo "Build failed." + exit 1 + fi + # check output_so_name exists + if [ ! -f ${output_so_path} ]; then + echo "Build failed, ${output_so_path} not exists." + exit 1 + fi + echo "Finish building, output to "${output_so_path} + popd + + ################### now copy ########################## + # copy output to output_dir + if [ ! -z ${output_dir} ]; then + mkdir -p ${output_dir} + else + echo "Output dir not set, skip copying." + exit 0 + fi + # check output_dir doesn't contains output_so_name + if [ -f ${dst_so_path} ]; then + echo "Output dir ${output_dir} already contains ${query_name}.so" + echo "Please remove it first." + exit 1 + fi + cp ${output_so_path} ${output_dir} + #check dst_so_path exists + if [ ! -f ${dst_so_path} ]; then + echo "Copy failed, ${dst_so_path} not exists." + exit 1 + fi + echo "Finish copying, output to ${dst_so_path}" +} + +# input path +# output dir +run() { + for i in "$@"; do + case $i in + -e=* | --engine_type=*) + ENGINE_TYPE="${i#*=}" + shift # past argument=value + ;; + -i=* | --input=*) + INPUT="${i#*=}" + shift # past argument=value + ;; + -w=* | --work_dir=*) + WORK_DIR="${i#*=}" + shift # past argument=value + ;; + -o=* | --output_dir=*) + OUTPUT_DIR="${i#*=}" + shift # past argument=value + ;; + --ir_conf=*) + IR_CONF="${i#*=}" + shift # past argument=value + ;; + --graph_schema_path=*) + GRAPH_SCHEMA_PATH="${i#*=}" + shift # past argument=value + ;; + -* | --*) + echo "Unknown option $i" + exit 1 + ;; + *) ;; + + esac + done + + echo "Input ="${INPUT} + echo "Work dir ="${WORK_DIR} + echo "Output path ="${OUTPUT_DIR} + echo "ir conf ="${IR_CONF} + echo "graph_schema_path ="${GRAPH_SCHEMA_PATH} + echo "Engine type ="${ENGINE_TYPE} + + # check input exist + if [ ! -f ${INPUT} ]; then + echo "Input file ${INPUT} not exists." + exit 1 + fi + + # if engine_type equals hqps + if [ ${ENGINE_TYPE} == "hqps" ]; then + echo "Engine type is hqps, generating dynamic library for hqps engine." + compile_hqps_so ${INPUT} ${WORK_DIR} ${IR_CONF} ${GRAPH_SCHEMA_PATH} ${OUTPUT_DIR} + + # else if engine_type equals pegasus + elif [ ${ENGINE_TYPE} == "pegasus" ]; then + echo "Engine type is pegasus, generating dynamic library for pegasus engine." + compile_pegasus_so ${INPUT} ${WORK_DIR} ${IR_CONF} ${GRAPH_SCHEMA_PATH} ${OUTPUT_DIR} + else + echo "Unknown engine type "${ENGINE_TYPE} + exit 1 + fi + exit 0 +} + +if [ $# -lt 5 ]; then + echo "Usage: $0 input_file work_dir output_dir" + echo "Example: $0 -e=hqps/pegasus -i=../query/1.pb -o=/plugin/ --ir_conf=../conf/ir.conf --graph_schema_path=../conf/graph_schema.json -w=/tmp/codegen" + echo "your num args: "$# + exit 1 +fi + +run "$@" diff --git a/flex/bin/rt_server.cc b/flex/bin/rt_server.cc index c4f0b79e52a5..e9093537a70c 100644 --- a/flex/bin/rt_server.cc +++ b/flex/bin/rt_server.cc @@ -16,8 +16,8 @@ #include "grape/util.h" #include "flex/engines/graph_db/database/graph_db.h" -#include "flex/engines/graph_db/server/options.h" -#include "flex/engines/graph_db/server/service.h" +#include "flex/engines/http_server/graph_db_service.h" +#include "flex/engines/http_server/options.h" #include #include @@ -92,8 +92,8 @@ int main(int argc, char** argv) { // start service LOG(INFO) << "GraphScope http server start to listen on port " << http_port; - service::get().init(shard_num, http_port, enable_dpdk); - service::get().run_and_wait_for_exit(); + server::GraphDBService::get().init(shard_num, http_port, enable_dpdk); + server::GraphDBService::get().run_and_wait_for_exit(); return 0; } diff --git a/flex/bin/sync_server.cc b/flex/bin/sync_server.cc new file mode 100644 index 000000000000..660141b54d42 --- /dev/null +++ b/flex/bin/sync_server.cc @@ -0,0 +1,237 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "stdlib.h" + +#include "flex/engines/http_server/hqps_service.h" + +#include "flex/engines/hqps_db/database/mutable_csr_interface.h" +#include "flex/engines/http_server/codegen_proxy.h" +#include "flex/engines/http_server/stored_procedure.h" + +#include +#include + +namespace bpo = boost::program_options; + +static constexpr const char* CODEGEN_BIN = "load_plan_and_run.sh"; + +std::string find_codegen_bin() { + // first check whether flex_home env exists + std::string flex_home; + std::string codegen_bin; + char* flex_home_char = getenv("FLEX_HOME"); + if (flex_home_char == nullptr) { + // infer flex_home from current binary' directory + char* bin_path = realpath("/proc/self/exe", NULL); + std::string bin_path_str(bin_path); + // flex home should be bin_path/../../ + std::string flex_home_str = + bin_path_str.substr(0, bin_path_str.find_last_of("/")); + // usr/loca/bin/ + flex_home_str = flex_home_str.substr(0, flex_home_str.find_last_of("/")); + // usr/local/ + + LOG(INFO) << "infer flex_home as installed, flex_home: " << flex_home_str; + // check codege_bin_path exists + codegen_bin = flex_home_str + "/bin/" + CODEGEN_BIN; + // if flex_home exists, return flex_home + if (std::filesystem::exists(codegen_bin)) { + return codegen_bin; + } else { + // if not found, try as if it is in build directory + // flex/build/ + flex_home_str = flex_home_str.substr(0, flex_home_str.find_last_of("/")); + // flex/ + LOG(INFO) << "infer flex_home as build, flex_home: " << flex_home_str; + codegen_bin = flex_home_str + "/bin/" + CODEGEN_BIN; + if (std::filesystem::exists(codegen_bin)) { + return codegen_bin; + } else { + LOG(FATAL) << "codegen bin not exists: "; + return ""; + } + } + } else { + flex_home = std::string(flex_home_char); + LOG(INFO) << "flex_home env exists, flex_home: " << flex_home; + codegen_bin = flex_home + "/bin/" + CODEGEN_BIN; + if (std::filesystem::exists(codegen_bin)) { + return codegen_bin; + } else { + LOG(FATAL) << "codegen bin not exists: "; + return ""; + } + } +} + +int main(int argc, char** argv) { + bpo::options_description desc("Usage:"); + desc.add_options()("help,h", "Display help messages")( + "server-config,c", bpo::value(), + "path to server config yaml")( + "codegen-dir,d", + bpo::value()->default_value("/tmp/codegen/"), + "codegen working directory")("codegen-bin,b", bpo::value(), + "codegen binary path")( + "db-home", bpo::value(), "db home path")( + "graph-config,g", bpo::value(), "graph schema config file")( + "data-path,d", bpo::value(), "data directory path")( + "bulk-load,l", bpo::value(), "bulk-load config file"); + + setenv("TZ", "Asia/Shanghai", 1); + tzset(); + + bpo::variables_map vm; + bpo::store(bpo::command_line_parser(argc, argv).options(desc).run(), vm); + bpo::notify(vm); + + if (vm.count("help")) { + std::cout << desc << std::endl; + return 0; + } + + uint32_t shard_num = 1; + uint16_t http_port = 10000; + std::string plugin_dir; + if (vm.count("server-config") != 0) { + std::string server_config_path = vm["server-config"].as(); + // check file exists + if (!std::filesystem::exists(server_config_path)) { + LOG(ERROR) << "server-config not exists: " << server_config_path; + return 0; + } + YAML::Node config = YAML::LoadFile(server_config_path); + auto dbms_node = config["dbms"]; + if (dbms_node) { + auto server_node = dbms_node["server"]; + if (!server_node) { + LOG(ERROR) << "dbms.server config not found"; + return 0; + } + auto shard_num_node = server_node["shared_num"]; + if (shard_num_node) { + shard_num = shard_num_node.as(); + } else { + LOG(INFO) << "shared_num not found, use default value 1"; + } + auto http_port_node = server_node["port"]; + if (http_port_node) { + http_port = http_port_node.as(); + } else { + LOG(INFO) << "http_port not found, use default value 10000"; + } + auto plugin_dir_node = server_node["plugin_dir"]; + if (plugin_dir_node) { + plugin_dir = plugin_dir_node.as(); + } else { + LOG(INFO) << "plugin_dir not found"; + } + } else { + LOG(ERROR) << "dbms config not found"; + return 0; + } + } else { + LOG(INFO) << "server-config is not specified, use default config"; + } + LOG(INFO) << "shard_num: " << shard_num; + LOG(INFO) << "http_port: " << http_port; + LOG(INFO) << "plugin_dir: " << plugin_dir; + + std::string codegen_dir = vm["codegen-dir"].as(); + + LOG(INFO) << "codegen dir: " << codegen_dir; + + std::string codegen_bin; + if (vm.count("codegen-bin") == 0) { + LOG(INFO) << "codegen-bin is not specified"; + LOG(INFO) << "Try to find with relative path: "; + codegen_bin = find_codegen_bin(); + } else { + LOG(INFO) << "codegen-bin is specified"; + codegen_bin = vm["codegen-bin"].as(); + } + + LOG(INFO) << "codegen bin: " << codegen_bin; + + // check codegen bin exists + if (!std::filesystem::exists(codegen_bin)) { + LOG(ERROR) << "codegen bin not exists: " << codegen_bin; + return 0; + } + + // clear codegen dir + if (std::filesystem::exists(codegen_dir)) { + LOG(INFO) << "codegen dir exists, clear directory"; + std::filesystem::remove_all(codegen_dir); + } else { + // create codegen_dir + LOG(INFO) << "codegen dir not exists, create directory"; + std::filesystem::create_directory(codegen_dir); + } + // init graph + std::string graph_schema_path = ""; + std::string data_path = ""; + std::string bulk_load_config_path = ""; + + if (!vm.count("graph-config")) { + LOG(ERROR) << "graph-config is required"; + return -1; + } + graph_schema_path = vm["graph-config"].as(); + if (!vm.count("data-path")) { + LOG(ERROR) << "data-path is required"; + return -1; + } + data_path = vm["data-path"].as(); + if (vm.count("bulk-load")) { + bulk_load_config_path = vm["bulk-load"].as(); + } + + double t0 = -grape::GetCurrentTime(); + auto& db = gs::GraphDB::get(); + + auto ret = gs::Schema::LoadFromYaml(graph_schema_path, bulk_load_config_path); + db.Init(std::get<0>(ret), std::get<1>(ret), std::get<2>(ret), + std::get<3>(ret), data_path, shard_num); + + t0 += grape::GetCurrentTime(); + + LOG(INFO) << "Finished loading graph, elapsed " << t0 << " s"; + + // loading plugin + if (!plugin_dir.empty()) { + LOG(INFO) << "Load plugins from dir: " << plugin_dir; + server::StoredProcedureManager::get().LoadFromPluginDir(plugin_dir, 0); + } + + // db-home + std::string db_home; + if (vm.count("db-home") == 0) { + LOG(FATAL) << "db-home is not specified" << std::endl; + } else { + db_home = vm["db-home"].as(); + LOG(INFO) << "db-home: " << db_home; + } + + server::CodegenProxy::get().Init(codegen_dir, codegen_bin, db_home); + + server::HQPSService::get().init(shard_num, http_port, false); + server::HQPSService::get().run_and_wait_for_exit(); + + return 0; +} diff --git a/flex/codegen/CMakeLists.txt b/flex/codegen/CMakeLists.txt new file mode 100644 index 000000000000..c414e8da22fe --- /dev/null +++ b/flex/codegen/CMakeLists.txt @@ -0,0 +1,12 @@ + +find_package(Boost REQUIRED COMPONENTS system filesystem + context program_options regex thread) +include_directories(SYSTEM ${Boost_INCLUDE_DIRS}) + +add_executable(gen_code_from_plan gen_code_from_plan.cc) +target_link_libraries(gen_code_from_plan hqps_plan_proto ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES} ${Boost_LIBRARIES}) + +install(TARGETS gen_code_from_plan + RUNTIME DESTINATION bin + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib) diff --git a/flex/codegen/README.md b/flex/codegen/README.md new file mode 100644 index 000000000000..bb71703d7158 --- /dev/null +++ b/flex/codegen/README.md @@ -0,0 +1,47 @@ +# GIE Codegen + +This Project contains the runtime code generation module for `GraphScope` Interactive Engine. +Given a cypher query, GIE compiler compiles it into a physical plan consisting of GAIA IRs, +and codegen module is able to generating native code to run the physical plan on GIE engine. + +The generated C++ code contains 4 parts +- Headers +- Expression classes +- Query class +- extern C APIs. +```c++ +//0. headers +#include "flex/engines/hqps_db/core/sync_engine.h" +#include "flex/engines/hqps_db/app/cypher_app_base.h" +#include "flex/storages/mutable_csr/mutable_csr_interface.h" + +namespace gs { + +//1. Expressions +struct Expression0{ +}; + +struct Expression1{ +}; + +//2. Query class + +class Query0 : public HqpsAppBase { + public: + results::CollectiveResults Query(const MutableCSRInterface& graph, + int64_t time_stamp) const override { + + + } +}; +} // namespace gs + +// 3. Create and delete handler for query +extern "C" { +void* CreateApp(gs::GraphStoreType store_type) { +} +void DeleteApp(void* app, gs::GraphStoreType store_type) { +} +} +``` + diff --git a/flex/codegen/gen_code_from_plan.cc b/flex/codegen/gen_code_from_plan.cc new file mode 100644 index 000000000000..57756b1b81b0 --- /dev/null +++ b/flex/codegen/gen_code_from_plan.cc @@ -0,0 +1,137 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include +#include +#include +#include + +#include + +#include "glog/logging.h" +#include "google/protobuf/message.h" +#include "google/protobuf/text_format.h" +#include "google/protobuf/util/json_util.h" + +#include "flex/codegen/src/hqps_generator.h" +#include "flex/codegen/src/pegasus_generator.h" + +namespace bpo = boost::program_options; + +namespace gs { + +std::string read_binary_str_from_path(const std::string& file_path) { + std::ifstream ifs(file_path, std::ios::binary); + CHECK(ifs.is_open()) << "Failed to open file: " << file_path; + std::string content_str((std::istreambuf_iterator(ifs)), + (std::istreambuf_iterator())); + return content_str; +} + +std::string read_json_str_from_path(const std::string& file_path) { + std::ifstream in(file_path); + std::string input_json; + in >> input_json; + in.close(); + return input_json; +} + +void output_code_to_file(const std::string& code, + const std::string& output_file_path) { + std::ofstream ofs(output_file_path); + CHECK(ofs.is_open()) << "Failed to open file: " << output_file_path; + ofs << code; + ofs.close(); + LOG(INFO) << "Finish writing to: " << output_file_path; +} +void deserialize_plan_and_gen_pegasus(const std::string& input_file_path, + const std::string& output_file_path) { + auto input_json = read_json_str_from_path(input_file_path); + physical::PhysicalPlan plan; + google::protobuf::util::JsonStringToMessage(input_json, &plan); + gs::BuildingContext ctx; + // parse query name from input_file_path + std::string query_name = + input_file_path.substr(input_file_path.find_last_of('/') + 1); + gs::PegasusGenerator pegasus_generator(ctx, query_name, plan); + std::string res = pegasus_generator.GenerateQuery(); + LOG(INFO) << "Start writing to: " << output_file_path; + output_code_to_file(res, output_file_path); +} + +void deserialize_plan_and_gen_hqps(const std::string& input_file_path, + const std::string& output_file_path) { + LOG(INFO) << "Start deserializing from: " << input_file_path; + std::string content_str = read_binary_str_from_path(input_file_path); + LOG(INFO) << "Deserilized plan size : " << content_str.size() << ", from " + << input_file_path; + physical::PhysicalPlan plan_pb; + auto stream = std::istringstream(content_str); + CHECK(plan_pb.ParseFromArray(content_str.data(), content_str.size())); + LOG(INFO) << "deserilized plan size : " << plan_pb.ByteSizeLong(); + LOG(INFO) << "deserilized plan : " << plan_pb.DebugString(); + BuildingContext context; + QueryGenerator query_generator(context, plan_pb); + auto res = query_generator.GenerateQuery(); + LOG(INFO) << "Start writing to: " << output_file_path; + output_code_to_file(res, output_file_path); +} +} // namespace gs + +int main(int argc, char** argv) { + bpo::options_description desc("Usage:"); + desc.add_options()("help", "Display help message")( + "engine,e", bpo::value(), "engine type")( + "input,i", bpo::value(), "input plan path")( + "output,o", bpo::value(), "output file path"); + + bpo::variables_map vm; + bpo::store(bpo::command_line_parser(argc, argv).options(desc).run(), vm); + bpo::notify(vm); + + if (vm.count("help")) { + std::cout << desc << std::endl; + return 0; + } + + std::string input_path = vm["input"].as(); + std::string output_path = vm["output"].as(); + std::string engine_type = vm["engine"].as(); + + if (!std::filesystem::exists(input_path)) { + LOG(ERROR) << "input file: [" << input_path << "] not found"; + return 1; + } + if (std::filesystem::exists(output_path)) { + LOG(WARNING) << "output file: [" << output_path + << "] exists, will overwrite"; + } + + if (engine_type == "pegasus") { + LOG(INFO) << "Start generating pegasus code"; + gs::deserialize_plan_and_gen_pegasus(input_path, output_path); + } else if (engine_type == "hqps") { + LOG(INFO) << "Start generating hqps code"; + gs::deserialize_plan_and_gen_hqps(input_path, output_path); + } else { + LOG(ERROR) << "Unknown engine type: " << engine_type + << ", valid engine types: " + << ""; + return 1; + } + + LOG(INFO) << "Successfully generated code to " << output_path; + + return 0; +} diff --git a/flex/codegen/src/building_context.h b/flex/codegen/src/building_context.h new file mode 100644 index 000000000000..8f11b87ab572 --- /dev/null +++ b/flex/codegen/src/building_context.h @@ -0,0 +1,452 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_BUILDING_CONTEXT_H_ +#define CODEGEN_SRC_BUILDING_CONTEXT_H_ + +#include +#include + +#include "flex/codegen/src/graph_types.h" +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" +#include "glog/logging.h" + +namespace gs { +static constexpr const char* time_stamp = "time_stamp"; +static constexpr const char* graph_var = "graph"; +static constexpr const char* GRAPE_INTERFACE_CLASS = "gs::MutableCSRInterface"; +static constexpr const char* GRAPE_INTERFACE_HEADER = + "flex/engines/hqps_db/database/mutable_csr_interface.h"; +static constexpr const char* EDGE_EXPAND_OPT_NAME = "edge_expand_opt"; +static constexpr const char* SORT_OPT_NAME = "sort_opt"; +static constexpr const char* GET_V_OPT_NAME = "get_v_opt"; +static constexpr const char* EXPR_NAME = "expr"; +static constexpr const char* PROJECT_NAME = "project_opt"; +static constexpr const char* CONTEXT_NAME = "ctx"; +static constexpr const char* GROUP_KEY_NAME = "group_key"; +static constexpr const char* GROUP_OPT_NAME = "group_opt"; +static constexpr const char* AGG_FUNC_NAME = "agg_func"; +static constexpr const char* PATH_OPT_NAME = "path_opt"; +static constexpr const char* MAPPER_NAME = "mapper"; +static constexpr const char* APP_BASE_HEADER = + "flex/engines/apps/cypher_app_base.h"; +static constexpr const char* APP_BASE_CLASS_NAME = "HqpsAppBase"; +static constexpr const char* QUERY_FUNC_RETURN = "results::CollectiveResults"; + +enum class StorageBackend { + kGrape = 0, +}; + +std::string storage_backend_to_string(StorageBackend storage_backend) { + switch (storage_backend) { + case StorageBackend::kGrape: + return "gs::GraphStoreType::Grape"; + default: + throw std::runtime_error("unsupported storage backend"); + } +} + +// manages the mapping between tag ids and tag inds. +struct TagIndMapping { + int32_t GetTagInd(int32_t tag_id) const { + if (tag_id == -1) { + return -1; + } + print_debug_info(); + CHECK(tag_id < tag_id_2_tag_inds_.size()) + << "tag id: " << tag_id << " not found"; + return tag_id_2_tag_inds_[tag_id]; + } + + int32_t CreateOrGetTagInd(int32_t tag_id) { + if (tag_id == -1) { + return -1; + } + auto it = + std::find(tag_ind_2_tag_ids_.begin(), tag_ind_2_tag_ids_.end(), tag_id); + if (it == tag_ind_2_tag_ids_.end()) { + auto new_tag_ind = tag_ind_2_tag_ids_.size(); + tag_ind_2_tag_ids_.emplace_back(tag_id); + auto old_size = tag_id_2_tag_inds_.size(); + if (tag_id + 1 > old_size) { + tag_id_2_tag_inds_.resize(tag_id + 1); + for (auto i = old_size; i < tag_id; ++i) { + tag_id_2_tag_inds_[i] = -1; + } + } + + tag_id_2_tag_inds_[tag_id] = new_tag_ind; + check_variant(); + return new_tag_ind; + } else { + return tag_id_2_tag_inds_[tag_id]; + } + } + + void check_variant() const { + print_debug_info(); + auto max_ind = + *std::max_element(tag_id_2_tag_inds_.begin(), tag_id_2_tag_inds_.end()); + auto max_tag_id = + *std::max_element(tag_ind_2_tag_ids_.begin(), tag_ind_2_tag_ids_.end()); + CHECK(max_ind + 1 == tag_ind_2_tag_ids_.size()); + CHECK(max_tag_id + 1 == tag_id_2_tag_inds_.size()); + } + + void print_debug_info() const { + VLOG(10) << "tag id to inds : " << gs::to_string(tag_id_2_tag_inds_); + VLOG(10) << "tag ind to tag ids: " << gs::to_string(tag_ind_2_tag_ids_); + } + + const std::vector& GetTagInd2TagIds() const { + return tag_ind_2_tag_ids_; + } + + // convert tag_ind (us) to tag ids + std::vector tag_ind_2_tag_ids_; + // convert tag ids(pb) to tag_inds + std::vector tag_id_2_tag_inds_; +}; + +class BuildingContext { + public: + BuildingContext(StorageBackend storage_type = StorageBackend::kGrape, + std::string query_name = "Query0", + std::string ctx_prefix = "") + : storage_backend_(storage_type), + app_base_header_(APP_BASE_HEADER), + ctx_id_(0), + var_id_(0), + query_name_(query_name), + expr_var_id_(0), + mapper_var_id_(0), + expr_id_(0), + ctx_prefix_(ctx_prefix), + alias_size_(0) { + if (storage_type == StorageBackend::kGrape) { + graph_header_ = GRAPE_INTERFACE_HEADER; + graph_interface_ = GRAPE_INTERFACE_CLASS; + } else { + LOG(FATAL) << "unsupported storage backend"; + } + } + + BuildingContext(std::string graph_interface, std::string graph_header, + StorageBackend storage_type = StorageBackend::kGrape, + std::string query_name = "Query0", + std::string ctx_prefix = "") + : storage_backend_(storage_type), + graph_interface_(graph_interface), + graph_header_(graph_header), + app_base_header_(APP_BASE_HEADER), + ctx_id_(0), + var_id_(0), + query_name_(query_name), + expr_var_id_(0), + mapper_var_id_(0), + expr_id_(0), + ctx_prefix_(ctx_prefix), + alias_size_(0) {} + + // int32_t GetCurrentCtxId() const { return ctx_id_; } + bool EmptyContext() const { return ctx_id_ == 0; } + + // return a pair indicate the direction of assigning, also increate cur ctx id + std::pair GetPrevAndNextCtxName() { + std::string ctx_name = ctx_prefix_ + CONTEXT_NAME + std::to_string(ctx_id_); + std::string ctx_name2 = + ctx_prefix_ + CONTEXT_NAME + std::to_string(ctx_id_ + 1); + ctx_id_ += 1; + return std::make_pair(ctx_name, ctx_name2); + } + + std::string GetCurCtxName() const { + return ctx_prefix_ + CONTEXT_NAME + std::to_string(ctx_id_); + } + + std::string GetNextCtxName() const { + return ctx_prefix_ + CONTEXT_NAME + std::to_string(ctx_id_ + 1); + } + + void IncCtxId() { ++ctx_id_; } + + // int32_t GetCurrentVarId() const { return var_id_; } + + // int32_t GetCurrentExprid() const { return expr_id_; } + std::string GetNextExprName() { + return ctx_prefix_ + EXPR_NAME + std::to_string(expr_id_++); + } + + std::string GetNextMapperName() { + return ctx_prefix_ + MAPPER_NAME + std::to_string(mapper_var_id_++); + } + + std::string GetNextExprVarName() { + return ctx_prefix_ + EXPR_NAME + std::to_string(expr_var_id_++); + } + + std::string GetNextEdgeOptName() { + return ctx_prefix_ + EDGE_EXPAND_OPT_NAME + std::to_string(var_id_++); + } + + std::string GetNextSortOptName() { + return ctx_prefix_ + SORT_OPT_NAME + std::to_string(var_id_++); + } + + std::string GetNextGetVOptName() { + return ctx_prefix_ + GET_V_OPT_NAME + std::to_string(var_id_++); + } + + std::string GetNextProjectOptName() { + return ctx_prefix_ + PROJECT_NAME + std::to_string(var_id_++); + } + + std::string GetNextGroupKeyName() { + return ctx_prefix_ + GROUP_KEY_NAME + std::to_string(var_id_++); + } + + std::string GetNextAggFuncName() { + return ctx_prefix_ + AGG_FUNC_NAME + std::to_string(var_id_++); + } + + std::string GetNextGroupOptName() { + return ctx_prefix_ + GROUP_OPT_NAME + std::to_string(var_id_++); + } + + std::string GetNextPathOptName() { + return ctx_prefix_ + PATH_OPT_NAME + std::to_string(var_id_++); + } + + std::string GetNextVarName() { return "var" + std::to_string(var_id_++); } + + std::string GetGraphInterface() const { return graph_interface_; } + + std::string GetGraphHeader() const { return graph_header_; } + + std::string GetAppBaseHeader() const { return app_base_header_; } + + std::string GetAppBaseClassName() const { return APP_BASE_CLASS_NAME; } + + std::string ContextPrefix() const { return ctx_prefix_; } + + BuildingContext CreateSubTaskContext(std::string sufix = "inner_") { + BuildingContext ctx; + ctx.storage_backend_ = storage_backend_; + ctx.query_name_ = query_name_; + ctx.ctx_id_ = ctx_id_; + ctx.var_id_ = var_id_; + ctx.expr_id_ = expr_id_; + ctx.expr_var_id_ = expr_var_id_; + ctx.mapper_var_id_ = mapper_var_id_; + ctx.graph_interface_ = graph_interface_; + ctx.app_base_header_ = app_base_header_; + ctx.graph_header_ = graph_header_; + ctx.ctx_prefix_ = ctx_prefix_ + sufix; + ctx.tag_ind_mapping_ = tag_ind_mapping_; + + ctx.contain_head_ = contain_head_; + ctx.head_type_ = head_type_; + ctx.alias_size_ = alias_size_; + ctx.tag_index_ = tag_index_; + ctx.tag_type_ = tag_type_; + ctx.cur_outputs_ = cur_outputs_; + ctx.vertex_properties_set_ = vertex_properties_set_; + + return ctx; + } + + void MergeSubTaskContext(BuildingContext& ctx) { + contain_head_ = ctx.contain_head_; + } + + void AppendContextPrefix(const std::string sufix) { + ctx_prefix_ = ctx_prefix_ + sufix; + } + + // void IncCtxId() { ++ctx_id_; } + + // void IncVarId() { ++var_id_; } + + // void IncExprId() { ++expr_id_; } + + // void IncExprVarId() { ++expr_var_id_; } + + std::string TimeStampVar() const { return time_stamp; } + + std::string GraphVar() const { return graph_var; } + + void AddParameterVar(const codegen::ParamConst& var) { + parameter_vars_.emplace_back(var); + } + + void AddExprCode(const std::string& code) { expr_code_.emplace_back(code); } + + const std::vector& GetExprCode() const { return expr_code_; } + + std::string GetNextLambdaFuncName() { + int32_t ret = lambda_func_id_; + ++lambda_func_id_; + return std::string("lambda") + std::to_string(ret); + } + + const std::vector& GetParameterVars() const { + return parameter_vars_; + } + + std::string GetQueryClassName() const { return query_name_; } + + std::string GetQueryRet() const { return QUERY_FUNC_RETURN; } + + // get storage type + StorageBackend GetStorageType() const { return storage_backend_; } + + // for input tag_id, + // return -1 if tag_id == -1 + // return new asigned tag_ind if it doesn't appears before; + // return the found tag_ind if it appears before; + int32_t CreateOrGetTagInd(int tag_id) { + return tag_ind_mapping_.CreateOrGetTagInd(tag_id); + } + + int32_t GetTagInd(int32_t tag_id) const { + return tag_ind_mapping_.GetTagInd(tag_id); + } + + void UpdateTagIdAndIndMapping(const TagIndMapping& tag_ind_mapping) { + tag_ind_mapping_ = tag_ind_mapping; + } + + // get_tag_id_mapping + const TagIndMapping& GetTagIdAndIndMapping() const { + return tag_ind_mapping_; + } + + void SetHead(bool contain_head) { contain_head_ = contain_head; } + + bool ContainHead() const { return contain_head_; } + + void SetHeadType(int32_t data_type, std::vector label_list) { + head_type_ = std::make_pair(data_type, label_list); + }; + + const std::pair>& GetHeadType() const { + return head_type_; + } + + void SetAliasType(int32_t alias, int32_t type, + std::vector& label_list) { + auto index = tag_index_[alias]; + if (tag_type_.size() <= index) { + tag_type_.resize(index + 1); + } + tag_type_[index].first = type; + tag_type_[index].second = label_list; + } + + const std::pair>& GetAliasType( + int32_t alias) const { + int32_t index = tag_index_[alias]; + return tag_type_[index]; + } + + int32_t SetAlias(int32_t cur_alias) { + if (cur_alias >= tag_index_.size()) { + tag_index_.resize(cur_alias + 1, -1); + } + if (tag_index_[cur_alias] != -1) { + return tag_index_[cur_alias]; + } else { + tag_index_[cur_alias] = alias_size_; + alias_size_++; + return alias_size_ - 1; + } + } + + void ResetAlias() { + tag_index_.resize(0); + tag_type_.resize(0); + cur_outputs_.resize(0); + alias_size_ = 0; + } + + int32_t GetAliasIndex(int32_t alias) const { + if (contain_head_) { + return tag_index_[alias] + 1; + } else { + return tag_index_[alias]; + } + } + + int32_t AliasSize() const { return alias_size_; } + + int32_t InputSize() const { + if (contain_head_) { + return alias_size_ + 1; + } else { + return alias_size_; + } + } + + void SetOutput(int32_t index, std::vector& output) { + if (cur_outputs_.size() <= index) { + cur_outputs_.resize(index + 1); + } + cur_outputs_[index] = output; + } + + const std::vector>& GetOutput() { + return cur_outputs_; + } + + void AddVertexProperty(int32_t vertex_label, + const codegen::ParamConst& property) { + vertex_properties_set_[vertex_label].push_back(property); + } + + const std::unordered_map>& + GetVertexProperties() { + return vertex_properties_set_; + } + + private: + StorageBackend storage_backend_; + std::string query_name_; + int32_t ctx_id_; + int32_t var_id_; + int32_t expr_id_; + int32_t expr_var_id_; + int32_t mapper_var_id_; + int32_t lambda_func_id_; + std::string graph_interface_; + std::string graph_header_; + std::string app_base_header_; + std::string ctx_prefix_; + + std::vector parameter_vars_; + std::vector expr_code_; + TagIndMapping tag_ind_mapping_; + + bool contain_head_; + std::pair> head_type_; + int32_t alias_size_; + std::vector tag_index_; + std::vector>> tag_type_; + std::vector> cur_outputs_; + std::unordered_map> + vertex_properties_set_; +}; + +} // namespace gs + +#endif // CODEGEN_SRC_BUILDING_CONTEXT_H_ \ No newline at end of file diff --git a/flex/codegen/src/codegen_utils.h b/flex/codegen/src/codegen_utils.h new file mode 100644 index 000000000000..54c90c05b546 --- /dev/null +++ b/flex/codegen/src/codegen_utils.h @@ -0,0 +1,216 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_CODEGEN_UTILS_H_ +#define CODEGEN_SRC_CODEGEN_UTILS_H_ + +#include +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/string_utils.h" +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" +#include "glog/logging.h" + +#include "proto_generated_gie/physical.pb.h" + +namespace gs { + +// remote deuplicate from vector +template +std::vector remove_duplicate(const std::vector& labels) { + std::vector res; + std::set label_set(labels.begin(), labels.end()); + res.assign(label_set.begin(), label_set.end()); + return res; +} + +std::string get_vertex_prop_column_name(std::string prop_name, + int32_t label_id) { + boost::format column_name_fmter("property_%1%_%2%"); + column_name_fmter % prop_name % label_id; + return column_name_fmter.str(); +} + +std::string get_edge_prop_column_name( + std::string prop_name, int32_t src_label, int32_t edge_label, + int32_t dst_label, physical::EdgeExpand::Direction direction) { + boost::format column_name_fmter("property_%1%_%2%_%3%_%4%_%5%"); + std::string edge_direction; + if (direction == physical::EdgeExpand_Direction::EdgeExpand_Direction_OUT) { + edge_direction = "out"; + } else if (direction == + physical::EdgeExpand_Direction::EdgeExpand_Direction_OUT) { + edge_direction = "in"; + } else { + LOG(FATAL) << "Unexpect direction"; + } + column_name_fmter % prop_name % src_label % edge_label % dst_label % + edge_direction; + return column_name_fmter.str(); +} + +std::string get_subgraph_name(int32_t src_label, int32_t edge_label, + int32_t dst_label, + physical::EdgeExpand::Direction direction) { + boost::format subgraph_name_fmter("subgraph_%1%_%2%_%3%_%4%"); + std::string edge_direction; + if (direction == physical::EdgeExpand_Direction::EdgeExpand_Direction_OUT) { + edge_direction = "out"; + } else if (direction == + physical::EdgeExpand_Direction::EdgeExpand_Direction_IN) { + edge_direction = "in"; + } else { + LOG(FATAL) << "Unexpect direction"; + } + subgraph_name_fmter % src_label % edge_label % dst_label % edge_direction; + return subgraph_name_fmter.str(); +} + +std::string generate_arg_list(std::string arg_name, int32_t size) { + std::stringstream arg_ss; + if (size > 1) { + arg_ss << "("; + } + for (auto i = 0; i < size; i++) { + arg_ss << arg_name << i; + if (i < size - 1) { + arg_ss << ", "; + } + } + if (size > 1) { + arg_ss << ")"; + } + return arg_ss.str(); +} + +std::string generate_output_list(std::string input_name, int32_t input_size, + std::string result_name, int32_t alias_index, + bool contain_head) { + std::stringstream result_ss; + result_ss << "(" << result_name; + if (contain_head) { + for (auto i = 1; i < input_size; i++) { + if (i == alias_index) { + result_ss << ", " << result_name; + } else { + result_ss << ", " << input_name << i; + } + } + if (input_size == alias_index) { + result_ss << ", " << result_name; + } + } else { + for (auto i = 0; i < input_size; i++) { + if (i == alias_index) { + result_ss << ", " << result_name; + } else { + result_ss << ", " << input_name << i; + } + } + if (input_size == alias_index) { + result_ss << ", " << result_name; + } + } + result_ss << ")"; + return result_ss.str(); +} + +// check type consistent +bool data_type_consistent(const common::DataType& left, + const common::DataType& right) { + if (left == common::DataType::NONE || right == common::DataType::NONE) { + return true; + } + return left == right; +} + +std::tuple decode_param_from_decoder( + const codegen::ParamConst& param_const, int32_t ind, + const std::string& var_prefix, const std::string& decoder_name) { + std::stringstream ss; + std::string var_name = var_prefix + std::to_string(ind); + ss << _4_SPACES; + ss << data_type_2_string(param_const.type) << " " << var_name << " = "; + ss << decoder_name << "." << decode_type_as_str(param_const.type) << ";"; + ss << std::endl; + return std::tuple{var_name, ss.str()}; +} + +template +void intersection(std::vector& v1, std::vector& v2) { + std::vector res; + for (auto num : v1) { + for (int i = 0; i < v2.size(); i++) { + if (num == v2[i]) { + res.push_back(num); + break; + } + } + } + res.swap(v1); +} + +static std::vector add_quotes( + const std::vector& strs) { + std::vector res; + for (auto& str : strs) { + res.emplace_back("\"" + str + "\""); + } + return res; +} + +static std::string with_quote(std::string res) { return "\"" + res + "\""; } + +static std::string make_named_property( + const std::vector& prop_names, + const std::vector& prop_types) { + std::stringstream ss; + auto quoted_prop_names = add_quotes(prop_names); + std::string prop_names_str = gs::to_string(quoted_prop_names); + std::string prop_types_str = gs::to_string(prop_types); + ss << NAMED_PROPERTY_CLASS_NAME << "<" << prop_types_str << ">"; + ss << "("; + // ss << "{" << prop_names_str << "}"; + ss << prop_names_str; + ss << ")"; + return ss.str(); +} + +static std::string make_inner_id_property(int tag_id, std::string prop_type) { + std::stringstream ss; + ss << INNER_ID_PROPERTY_NAME << "<" << tag_id << ">{}"; + return ss.str(); +} + +static codegen::ParamConst variable_to_param_const(const common::Variable& var, + BuildingContext& ctx) { + codegen::ParamConst param_const; + if (var.has_property()) { + param_const.var_name = var.property().key().name(); + param_const.type = + common_data_type_pb_2_data_type(var.node_type().data_type()); + } else { + param_const.var_name = ctx.GetNextVarName(); + param_const.type = codegen::DataType::kVertexId; + } + + return param_const; +} + +} // namespace gs + +#endif // CODEGEN_SRC_CODEGEN_UTILS_H_ \ No newline at end of file diff --git a/flex/codegen/src/graph_types.h b/flex/codegen/src/graph_types.h new file mode 100644 index 000000000000..feac73237183 --- /dev/null +++ b/flex/codegen/src/graph_types.h @@ -0,0 +1,232 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_GRAPH_TYPES_H_ +#define CODEGEN_SRC_GRAPH_TYPES_H_ + +#include +#include + +#include "flex/codegen/src/string_utils.h" +#include "glog/logging.h" +#include "google/protobuf/any.h" +#include "proto_generated_gie/common.pb.h" + +namespace gs { + +namespace codegen { +using oid_t = int64_t; + +enum class DataType { + kInt32 = 0, + kInt64 = 1, + kFloat = 2, + kDouble = 3, + kString = 4, + kInt64Array = 5, + kInt32Array = 6, + kBoolean = 7, + kVertexId = 8, +}; + +// a parameter const, the real data will be feed at runtime. +struct ParamConst { + DataType type; + std::string var_name; + int32_t id; // unique id for each param const +}; + +// implement operator == for ParamConst +inline bool operator==(const ParamConst& lhs, const ParamConst& rhs) { + return lhs.type == rhs.type && lhs.var_name == rhs.var_name && + lhs.id == rhs.id; +} + +} // namespace codegen + +static codegen::DataType common_data_type_pb_2_data_type( + const common::DataType& data_type) { + switch (data_type) { + case common::DataType::INT32: + return codegen::DataType::kInt32; + case common::DataType::INT64: + return codegen::DataType::kInt64; + case common::DataType::DOUBLE: + return codegen::DataType::kDouble; + case common::DataType::STRING: + return codegen::DataType::kString; + case common::DataType::INT64_ARRAY: + return codegen::DataType::kInt64Array; + case common::DataType::INT32_ARRAY: + return codegen::DataType::kInt32Array; + case common::DataType::BOOLEAN: + return codegen::DataType::kBoolean; + default: + // LOG(FATAL) << "unknown data type"; + throw std::runtime_error("unknown data type" + + std::to_string(static_cast(data_type))); + } +} + +static std::string common_data_type_pb_2_str( + const common::DataType& data_type) { + switch (data_type) { + case common::DataType::BOOLEAN: + return "bool"; + case common::DataType::INT32: + return "int32_t"; + case common::DataType::INT64: + return "int64_t"; + case common::DataType::DOUBLE: + return "double"; + case common::DataType::STRING: + return "std::string_view"; + case common::DataType::INT64_ARRAY: + return "std::vector"; + case common::DataType::INT32_ARRAY: + return "std::vector"; + default: + // LOG(FATAL) << "unknown data type"; + // return ""; + throw std::runtime_error("unknown data type" + + std::to_string(static_cast(data_type))); + } +} + +static std::string arith_to_str(const common::Arithmetic& arith_type) { + switch (arith_type) { + case common::Arithmetic::ADD: + return "+"; + case common::Arithmetic::SUB: + return "-"; + case common::Arithmetic::MUL: + return "*"; + case common::Arithmetic::DIV: + return "/"; + default: + throw std::runtime_error("unknown arith type"); + } +} + +static codegen::ParamConst param_const_pb_to_param_const( + const common::DynamicParam& param_const_pb) { + auto data_type_pb = param_const_pb.data_type(); + CHECK(data_type_pb.type_case() == common::IrDataType::kDataType); + return codegen::ParamConst{ + common_data_type_pb_2_data_type(data_type_pb.data_type()), + param_const_pb.name(), param_const_pb.index()}; +} + +static codegen::ParamConst param_const_pb_to_param_const( + const common::DynamicParam& param_const_pb, + const common::IrDataType& ir_data_type) { + if (ir_data_type.type_case() == common::IrDataType::kDataType) { + auto primitive_type = ir_data_type.data_type(); + return codegen::ParamConst{common_data_type_pb_2_data_type(primitive_type), + param_const_pb.name(), param_const_pb.index()}; + } else { + throw std::runtime_error("Expect node type in ir_data_type"); + } +} + +static std::string data_type_2_string(const codegen::DataType& data_type) { + switch (data_type) { + case codegen::DataType::kInt32: + return "int32_t"; + case codegen::DataType::kInt64: + return "int64_t"; + case codegen::DataType::kDouble: + return "double"; + case codegen::DataType::kString: + return "std::string_view"; + case codegen::DataType::kInt64Array: + return "std::vector"; + case codegen::DataType::kInt32Array: + return "std::vector"; + case codegen::DataType::kBoolean: + return "bool"; + case codegen::DataType::kVertexId: + return VERTEX_ID_T; + default: + // LOG(FATAL) << "unknown data type" << static_cast(data_type); + throw std::runtime_error("unknown data type" + + std::to_string(static_cast(data_type))); + } +} + +// for different type, generate get_type() call +static std::string decode_type_as_str(const codegen::DataType& data_type) { + switch (data_type) { + case codegen::DataType::kInt32: + return "get_int()"; + case codegen::DataType::kInt64: + return "get_long()"; + case codegen::DataType::kDouble: + return "get_double()"; + case codegen::DataType::kString: + return "get_string()"; + case codegen::DataType::kBoolean: + return "get_bool()"; + default: + // LOG(FATAL) << "unknown data type" << static_cast(data_type); + throw std::runtime_error("unknown data type" + + std::to_string(static_cast(data_type))); + } +} + +static std::string data_type_2_rust_string(const codegen::DataType& data_type) { + switch (data_type) { + case codegen::DataType::kInt32: + return "i32"; + case codegen::DataType::kInt64: + return "i64"; + case codegen::DataType::kDouble: + return "double"; + case codegen::DataType::kString: + return "String"; + case codegen::DataType::kInt64Array: + return "Vector"; + case codegen::DataType::kInt32Array: + return "Vector"; + case codegen::DataType::kBoolean: + return "bool"; + case codegen::DataType::kVertexId: + return "ID"; + default: + LOG(FATAL) << "unknown data type" << static_cast(data_type); + } +} + +static void parse_param_const_from_pb( + const common::DynamicParam& param_const_pb, + const common::IrDataType& node_type, codegen::ParamConst& param_cost) { + auto data_type = param_const_pb.data_type(); + if (data_type.type_case() == common::IrDataType::kDataType) { + param_cost.type = common_data_type_pb_2_data_type(data_type.data_type()); + param_cost.var_name = param_const_pb.name(); + param_cost.id = param_const_pb.index(); + return; + } else if (node_type.type_case() == common::IrDataType::kDataType) { + param_cost.type = common_data_type_pb_2_data_type(node_type.data_type()); + param_cost.var_name = param_const_pb.name(); + param_cost.id = param_const_pb.index(); + return; + } else { + throw std::runtime_error("Fail to get data type from param const"); + } +} + +} // namespace gs + +#endif // CODEGEN_SRC_GRAPH_TYPES_H_ \ No newline at end of file diff --git a/flex/codegen/src/hqps/hqps_case_when_builder.h b/flex/codegen/src/hqps/hqps_case_when_builder.h new file mode 100644 index 000000000000..7e6cf2cd4f2f --- /dev/null +++ b/flex/codegen/src/hqps/hqps_case_when_builder.h @@ -0,0 +1,217 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_HQPS_HQPS_CASE_WHEN_BUILDER_H_ +#define CODEGEN_SRC_HQPS_HQPS_CASE_WHEN_BUILDER_H_ + +#include +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/hqps/hqps_expr_builder.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/expr.pb.h" + +namespace gs { + +class CaseWhenBuilder : public ExprBuilder { + public: + using base_t = ExprBuilder; + using ret_t = std::tuple, + std::vector>, + std::string, common::DataType>; + CaseWhenBuilder(BuildingContext& ctx) + : base_t(ctx), + ret_type_(common::DataType::DataType_INT_MIN_SENTINEL_DO_NOT_USE_) { + VLOG(10) << "try to build: " << base_t::class_name_; + } + + CaseWhenBuilder& when_then_exprs( + const google::protobuf::RepeatedPtrField& + when_expr) { + VLOG(10) << "Got when then exprs of size: " << when_expr.size(); + + // Basiclly, each when_then is a if then. + for (auto& when_then_expr : when_expr) { + auto& when_val = when_then_expr.when_expression(); + auto& the_result_expr = when_then_expr.then_result_expression(); + if (when_val.operators_size() == 0) { + throw std::runtime_error("when expression is empty"); + } + when_then_expr_impl_general(when_val, the_result_expr); + } + return *this; + } + + // Since we process else_expr at last, we can do this return. + CaseWhenBuilder& else_expr(const common::Expression& else_exr) { + if (else_exr.operators_size() == 0) { + throw std::runtime_error("else expression is empty"); + } + + { + auto& else_oprs = else_exr.operators(); + auto expr_code = build_sub_expr(else_oprs); + + std::stringstream ss; + ss << "return ("; + ss << expr_code; + ss << ");" << std::endl; + else_code_ = ss.str(); // assign to member. + } + VLOG(10) << "Finish else expr: " << else_code_; + return *this; + } + + CaseWhenBuilder& return_type(common::DataType ret_type) { + ret_type_ = ret_type; + return *this; + } + + ret_t Build() const override { + for (auto i = 0; i < construct_params_.size(); ++i) { + ctx_.AddParameterVar(construct_params_[i]); + } + + VLOG(10) << "Enter express building"; + std::string constructor_param_str, field_init_code_str, + func_call_template_typename_str, func_call_params_str, + func_call_impl_str, private_filed_str; + constructor_param_str = get_constructor_params_str(); + field_init_code_str = get_field_init_code_str(); + func_call_template_typename_str = get_func_call_typename_str(); + + func_call_params_str = get_func_call_params_str(); + // the func_call impl is overrided + func_call_impl_str = get_func_call_impl_str(); + private_filed_str = get_private_filed_str(); + + boost::format formater(EXPR_BUILDER_TEMPLATE_STR); + formater % class_name_ % constructor_param_str % field_init_code_str % + func_call_template_typename_str % func_call_params_str % + func_call_impl_str % private_filed_str; + + std::string str = formater.str(); + + return std::make_tuple( + class_name_, construct_params_, tag_selectors_, str, + common::DataType::DataType_INT_MIN_SENTINEL_DO_NOT_USE_); + } + + protected: + void when_then_expr_impl_general(const common::Expression& when_val, + const common::Expression& the_result_expr) { + if (when_val.operators_size() != 1) { + throw std::runtime_error("when expression can only one"); + } + // can only be var or dynamic param + auto& when_opr = when_val.operators(0); + if (when_opr.item_case() != common::ExprOpr::kConst && + when_opr.item_case() != common::ExprOpr::kParam) { + throw std::runtime_error("when expression can only be const or param"); + } + + std::string when_key; + { + if (when_opr.item_case() == common::ExprOpr::kConst) { + auto& const_val = when_opr.const_(); + when_key = value_pb_to_str(const_val); + } else { + auto& param = when_opr.param(); + auto param_node_type = when_opr.node_type(); + auto param_const = + param_const_pb_to_param_const(param, param_node_type); + VLOG(10) << "receive param const: " << param.DebugString(); + when_key = param_const.var_name + "_"; // TODO: fix hack + construct_params_.push_back(param_const); + } + } + + // then result expression + std::string then_result_expr_code; + { + auto& else_oprs = the_result_expr.operators(); + auto expr_code = build_sub_expr(else_oprs); + std::stringstream ss; + ss << "return ("; + ss << expr_code; + ss << ");" << std::endl; + then_result_expr_code = ss.str(); + } + + // concatenate case when into a if else. + { + std::stringstream ss; + ss << "if (" << when_key << ") {" << std::endl; + ss << then_result_expr_code; + ss << "}" << std::endl; + auto tmp_res = ss.str(); + VLOG(10) << "WhenThen expr: " << tmp_res; + when_then_codes_.emplace_back(std::move(tmp_res)); + } + } + + std::string get_func_call_impl_str() const override { + std::stringstream ss; + for (int i = 0; i < when_then_codes_.size(); ++i) { + ss << when_then_codes_[i] << std::endl; + } + ss << else_code_ << std::endl; + return ss.str(); + } + + // For each when then expr, we need to build a sub expr. + std::string build_sub_expr( + const google::protobuf::RepeatedPtrField& oprs) { + ExprBuilder expr_builder(ctx_, cur_var_id_, true); + expr_builder.AddAllExprOpr(oprs); + auto& expr_nodes = expr_builder.GetExprNodes(); + auto& tag_props = expr_builder.GetTagSelectors(); + auto& func_call_vars = expr_builder.GetFuncCallVars(); + auto& param_consts = expr_builder.GetConstructParams(); + // save the tag props and param const to us. + for (auto tag_prop : tag_props) { + tag_selectors_.push_back(tag_prop); + } + for (auto param_const : param_consts) { + construct_params_.push_back(param_const); + } + for (auto func_call_var : func_call_vars) { + func_call_vars_.push_back(func_call_var); + } + VLOG(10) << "Inc var id from " << cur_var_id_ << " to " + << expr_builder.GetCurVarId(); + cur_var_id_ = expr_builder.GetCurVarId(); + + std::stringstream ss; + for (auto& expr_node : expr_nodes) { + ss << expr_node << " "; + } + return ss.str(); + } + + std::string input_expr_code_; + + std::vector when_then_codes_; + std::string else_code_; + common::DataType ret_type_; +}; + +} // namespace gs + +#endif // CODEGEN_SRC_HQPS_HQPS_CASE_WHEN_BUILDER_H_ \ No newline at end of file diff --git a/flex/codegen/src/hqps/hqps_dedup_builder.h b/flex/codegen/src/hqps/hqps_dedup_builder.h new file mode 100644 index 000000000000..b1fcf8353a69 --- /dev/null +++ b/flex/codegen/src/hqps/hqps_dedup_builder.h @@ -0,0 +1,101 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_HQPS_HQPS_DEDUP_BUILDER_H_ +#define CODEGEN_SRC_HQPS_HQPS_DEDUP_BUILDER_H_ + +#include +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/hqps/hqps_expr_builder.h" +#include "flex/codegen/src/pb_parser/query_params_parser.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/physical.pb.h" +namespace gs { + +enum DedupProp { + kInnerId = 0, + kProp = 1, +}; + +static constexpr const char* DEDUP_OP_TEMPLATE_STR = + "auto %1%= Engine::template Dedup<%2%>(std::move(%3%));"; + +class DedupOpBuilder { + public: + DedupOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + // dedup on kId + DedupOpBuilder& dedup_on_inner_id(int32_t tag_id) { + // dedup_prop_ = "none"; + // dedup_prop_type_ = DedupProp::kInnerId; + int32_t real_tag_ind = ctx_.GetTagInd(tag_id); + dedup_tag_ids_.emplace_back(real_tag_ind); + return *this; + } + + std::string Build() const { + std::string prev_ctx_name, next_ctx_name; + std::tie(prev_ctx_name, next_ctx_name) = ctx_.GetPrevAndNextCtxName(); + std::string dedup_tag_ids_str; + { + std::stringstream ss; + for (auto i = 0; i < dedup_tag_ids_.size(); ++i) { + if (i + 1 == dedup_tag_ids_.size()) { + ss << dedup_tag_ids_[i]; + } else { + ss << dedup_tag_ids_[i] << ","; + } + } + dedup_tag_ids_str = ss.str(); + } + boost::format formater(DEDUP_OP_TEMPLATE_STR); + formater % next_ctx_name % dedup_tag_ids_str % prev_ctx_name; + return formater.str(); + } + + private: + BuildingContext& ctx_; + std::vector dedup_tag_ids_; +}; + +static std::string BuildDedupOp( + BuildingContext& ctx, const algebra::Dedup& dedup, + const physical::PhysicalOpr::MetaData& meta_data) { + DedupOpBuilder dedup_builder(ctx); + auto keys = dedup.keys(); + CHECK(keys.size() > 0) << "Dedup keys size should be gt 0"; + + for (auto& key : keys) { + if (key.has_property()) { + LOG(FATAL) << "dedup on property" << key.property().DebugString() + << "not supported"; + // dedup_builder.dedup_prop(key.property()); + } else { + VLOG(10) << "dedup on innerid"; + dedup_builder.dedup_on_inner_id(key.tag().id()); + } + } + + return dedup_builder.Build(); +} + +} // namespace gs + +#endif // CODEGEN_SRC_HQPS_HQPS_DEDUP_BUILDER_H_ \ No newline at end of file diff --git a/flex/codegen/src/hqps/hqps_edge_expand_builder.h b/flex/codegen/src/hqps/hqps_edge_expand_builder.h new file mode 100644 index 000000000000..7f4bf254eccc --- /dev/null +++ b/flex/codegen/src/hqps/hqps_edge_expand_builder.h @@ -0,0 +1,415 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_HQPS_HQPS_EDGE_EXPAND_BUILDER_H_ +#define CODEGEN_SRC_HQPS_HQPS_EDGE_EXPAND_BUILDER_H_ + +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/pb_parser/internal_struct.h" +#include "flex/codegen/src/pb_parser/ir_data_type_parser.h" +#include "flex/codegen/src/pb_parser/name_id_parser.h" +#include "flex/codegen/src/pb_parser/query_params_parser.h" +#include "flex/codegen/src/string_utils.h" + +// Expand to vertices with filter. +static constexpr const char* EDGE_EXPAND_V_OPT_FILTER_TEMPLATE_STR = + "auto %1% = gs::make_filter(%2%(%3%) %4%);\n" + "auto %5% = gs::make_edge_expandv_opt(%6%, %7%, %8%, std::move(%1%));\n"; + +// Expand to vertices with no filter. +static constexpr const char* EDGE_EXPAND_V_OPT_NO_FILTERTEMPLATE_STR = + "auto %1% = gs::make_edge_expandv_opt(%2%, %3%, %4%);\n"; + +// Expand to Edges with Filter. +// propNames, direction, edge_label, vertex_label, filter +static constexpr const char* EDGE_EXPAND_E_OPT_FILTER_TEMPLATE_STR = + "auto %1% = gs::make_filter(%2%(%3%) %4%);\n" + "auto %5% = gs::make_edge_expande_opt<%6%>(%7%, %8%, %9%, %10%, " + "std::move(%1%));\n"; +// Expand to Edges with no filter +static constexpr const char* EDGE_EXPAND_E_OPT_NO_FILTER_TEMPLATE_STR = + "auto %1% = gs::make_edge_expande_opt<%2%>(%3%, %4%, %5%, %6%);\n"; + +/* i.e. + auto filter = gs::make_filter(TruePredicate(), EmptySelector); + auto opt = + gs::make_edge_expande_opt(std::tuple{PropertySelector("creationDate")}, + Direction::In, knows_label, person_label, std::move(filter)); +*/ + +static constexpr const char* EDGE_EXPANDV_OP_TEMPLATE_STR = + "auto %1% = Engine::template EdgeExpandV<%2%, %3%>(%4%, %5%, %6%);\n"; + +static constexpr const char* EDGE_EXPANDE_OP_TEMPLATE_STR = + "auto %1% = Engine::template EdgeExpandE<%2%,%3%>(%4%, %5%, %6%);\n"; + +namespace gs { +// create edge expand opt +// the expression in query params are applied on edge. +// extract the edge property from ir_data_type +template +static std::pair BuildEdgeExpandOpt( + BuildingContext& ctx, const internal::Direction& direction, + const algebra::QueryParams& params, + const std::vector& dst_vertex_labels, + const physical::EdgeExpand::ExpandOpt& expand_opt, + const physical::PhysicalOpr::MetaData& meta_data) { + std::string expr_func_name; + std::string expr_var_name = ctx.GetNextExprVarName(); + std::string opt_var_name = ctx.GetNextEdgeOptName(); + std::string func_construct_params_str; + std::string property_selectors_str; + std::string edge_label_id_str; + std::string dst_label_ids_str; + std::string edge_prop_selectors_str; + std::string edge_expand_e_types_str; + { + std::vector> prop_names; + std::vector> prop_types; + auto& ir_data_type = meta_data.type(); + // do we nneed to extract prop name from data_type? + // for edge_expand_e, we need the type info, but for edge_expand_v, we + // don't cause grape_graph need the exact type info. + if (try_to_get_prop_names_and_types_from_ir_data_type( + ir_data_type, prop_names, prop_types)) { + CHECK(prop_names.size() == 1); + auto& cur_prop_names = prop_names[0]; + auto& cur_prop_types = prop_types[0]; + CHECK(cur_prop_names.size() == cur_prop_types.size()); + { + std::string type_names; + { + std::stringstream ss; + for (int i = 0; i < cur_prop_types.size(); ++i) { + ss << cur_prop_types[i]; + if (i != cur_prop_types.size() - 1) { + ss << ", "; + } + } + edge_expand_e_types_str = ss.str(); + } + { + std::stringstream ss; + for (int i = 0; i < cur_prop_names.size(); ++i) { + ss << add_quote(cur_prop_names[i]); + if (i != cur_prop_names.size() - 1) { + ss << ", "; + } + } + type_names = ss.str(); + } + boost::format formater(PROP_NAME_ARRAY); + formater % edge_expand_e_types_str % type_names; + edge_prop_selectors_str = formater.str(); + } + } else { + VLOG(10) << "No prop types found."; + } + } + + // first check whether expand_opt contains expression. + if (params.has_predicate()) { + VLOG(10) << "Found expr in edge expand"; + auto& expr = params.predicate(); + auto expr_builder = ExprBuilder(ctx); + expr_builder.set_return_type(common::DataType::BOOLEAN); + expr_builder.AddAllExprOpr(expr.operators()); + std::string expr_code; + std::vector func_call_param_const; + std::vector> expr_tag_props; + common::DataType unused_expr_ret_type; + std::tie(expr_func_name, func_call_param_const, expr_tag_props, expr_code, + unused_expr_ret_type) = expr_builder.Build(); + VLOG(10) << "Found expr in edge_expand_opt: " << expr_func_name; + // generate code. + ctx.AddExprCode(expr_code); + + { + std::stringstream ss; + for (auto i = 0; i < func_call_param_const.size(); ++i) { + ss << func_call_param_const[i].var_name; + if (i != func_call_param_const.size() - 1) { + ss << ", "; + } + } + func_construct_params_str = ss.str(); + } + { + std::stringstream ss; + if (expr_tag_props.size() > 0) { + ss << ","; + } + for (int i = 0; i < expr_tag_props.size(); ++i) { + ss << expr_tag_props[i].second; + if (i != expr_tag_props.size() - 1) { + ss << ", "; + } + } + property_selectors_str = ss.str(); + } + } + + { + auto& edge_table = params.tables(); + CHECK(edge_table.size() == 1) << "edge table size should be 1"; + LabelT edge_label = + try_get_label_from_name_or_id(params.tables()[0]); + edge_label_id_str = ensure_label_id(edge_label); + } + + { + CHECK(dst_vertex_labels.size() > 0); + if (dst_vertex_labels.size() == 1) { + dst_label_ids_str = ensure_label_id(dst_vertex_labels[0]); + } else { + dst_label_ids_str = label_ids_to_array_str(dst_vertex_labels); + } + } + boost::format formater(""); + if (expand_opt == + physical::EdgeExpand::ExpandOpt::EdgeExpand_ExpandOpt_EDGE) { + if (params.has_predicate()) { + VLOG(10) << "Building EdgeExpandE with predicate"; + formater = boost::format(EDGE_EXPAND_E_OPT_FILTER_TEMPLATE_STR); + formater % expr_var_name % expr_func_name % func_construct_params_str % + property_selectors_str % opt_var_name % edge_expand_e_types_str % + edge_prop_selectors_str % gs::direction_pb_to_str(direction) % + edge_label_id_str % dst_label_ids_str; + } else { + VLOG(10) << "Building EdgeExpandE without predicate"; + formater = boost::format(EDGE_EXPAND_E_OPT_NO_FILTER_TEMPLATE_STR); + formater % opt_var_name % edge_expand_e_types_str % + edge_prop_selectors_str % gs::direction_pb_to_str(direction) % + edge_label_id_str % dst_label_ids_str; + } + } else { + if (params.has_predicate()) { + VLOG(10) << "Building EdgeExpanV with predicate"; + formater = boost::format(EDGE_EXPAND_V_OPT_FILTER_TEMPLATE_STR); + formater % expr_var_name % expr_func_name % func_construct_params_str % + property_selectors_str % opt_var_name % + gs::direction_pb_to_str(direction) % edge_label_id_str % + dst_label_ids_str; + } else { + VLOG(10) << "Buliding EdgeExpandV without predicate"; + formater = boost::format(EDGE_EXPAND_V_OPT_NO_FILTERTEMPLATE_STR); + formater % opt_var_name % gs::direction_pb_to_str(direction) % + edge_label_id_str % dst_label_ids_str; + } + } + + return std::make_pair(opt_var_name, formater.str()); +} + +template +class EdgeExpandOpBuilder { + public: + EdgeExpandOpBuilder(BuildingContext& ctx) + : ctx_(ctx), direction_(internal::Direction::kNotSet) {} + ~EdgeExpandOpBuilder() = default; + + EdgeExpandOpBuilder& resAlias(const int32_t res_alias) { + res_alias_ = ctx_.CreateOrGetTagInd(res_alias); + return *this; + } + + EdgeExpandOpBuilder& dstVertexLabels( + const std::vector& dst_vertex_labels) { + get_v_vertex_labels_ = dst_vertex_labels; + return *this; + } + + EdgeExpandOpBuilder& query_params(const algebra::QueryParams& query_params) { + query_params_ = query_params; + return *this; + } + + EdgeExpandOpBuilder& expand_opt(const physical::EdgeExpand::ExpandOpt& opt) { + expand_opt_ = opt; + return *this; + } + + EdgeExpandOpBuilder& direction(const physical::EdgeExpand::Direction& dir) { + switch (dir) { + case physical::EdgeExpand::Direction::EdgeExpand_Direction_OUT: + direction_ = internal::Direction::kOut; + break; + + case physical::EdgeExpand::Direction::EdgeExpand_Direction_IN: + direction_ = internal::Direction::kIn; + break; + + case physical::EdgeExpand::Direction::EdgeExpand_Direction_BOTH: + direction_ = internal::Direction::kBoth; + break; + default: + LOG(FATAL) << "Unknown direction"; + } + return *this; + } + + EdgeExpandOpBuilder& v_tag(const int32_t& v_tag) { + v_tag_ = ctx_.GetTagInd(v_tag); + return *this; + } + + EdgeExpandOpBuilder& meta_data( + const physical::PhysicalOpr::MetaData& meta_data) { + meta_data_ = meta_data; + // we can get the edge tuplet from meta_data, in case we fail to extract + // edge triplet from ir_data_type + { + auto& ir_data_type = meta_data_.type(); + VLOG(10) << "str: " << ir_data_type.DebugString(); + CHECK(ir_data_type.has_graph_type()); + auto& graph_ele_type = ir_data_type.graph_type(); + VLOG(10) << "debug string: " << graph_ele_type.DebugString(); + CHECK(graph_ele_type.element_opt() == + common::GraphDataType::GraphElementOpt:: + GraphDataType_GraphElementOpt_EDGE || + graph_ele_type.element_opt() == + common::GraphDataType::GraphElementOpt:: + GraphDataType_GraphElementOpt_VERTEX) + << "expect edge meta for edge builder"; + auto& graph_data_type = graph_ele_type.graph_data_type(); + CHECK(graph_data_type.size() > 0); + + CHECK(direction_ != internal::Direction::kNotSet); + for (auto ele_labe_type : graph_data_type) { + auto& triplet = ele_labe_type.label(); + auto& dst_label = triplet.dst_label(); + if (direction_ == internal::Direction::kOut) { + VLOG(10) << "got dst_label : " << dst_label.value(); + dst_vertex_labels_.emplace_back(dst_label.value()); + } else if (direction_ == internal::Direction::kIn) { + dst_vertex_labels_.emplace_back(triplet.src_label().value()); + } else { // kBoth + auto src = triplet.src_label().value(); + auto dst = triplet.dst_label().value(); + CHECK(src == dst) << "When expand with direction, both, src and dst " + "label should be the same"; + dst_vertex_labels_.emplace_back(src); + } + } + VLOG(10) << "before join: " << gs::to_string(dst_vertex_labels_); + VLOG(10) << "before join get_v: " << gs::to_string(get_v_vertex_labels_); + // only interset if get_v_vertex_labels specify any labels + if (get_v_vertex_labels_.size() > 0) { + intersection(dst_vertex_labels_, get_v_vertex_labels_); + } + { + std::unordered_set s(dst_vertex_labels_.begin(), + dst_vertex_labels_.end()); + dst_vertex_labels_.assign(s.begin(), s.end()); + } + VLOG(10) << "after join " << gs::to_string(dst_vertex_labels_); + VLOG(10) << "extract dst vertex label: " + << gs::to_string(dst_vertex_labels_) << ", from meta data"; + } + return *this; + } + + std::string Build() const { + std::string opt_name, opt_code; + std::tie(opt_name, opt_code) = + BuildEdgeExpandOpt(ctx_, direction_, query_params_, dst_vertex_labels_, + expand_opt_, meta_data_); + + std::string prev_ctx_name, next_ctx_name; + std::tie(prev_ctx_name, next_ctx_name) = ctx_.GetPrevAndNextCtxName(); + boost::format formater(""); + if (expand_opt_ == + physical::EdgeExpand::ExpandOpt::EdgeExpand_ExpandOpt_EDGE) { + formater = boost::format(EDGE_EXPANDE_OP_TEMPLATE_STR); + } else { + formater = boost::format(EDGE_EXPANDV_OP_TEMPLATE_STR); + } + + auto append_opt = res_alias_to_append_opt(res_alias_); + formater % next_ctx_name % append_opt % format_input_col(v_tag_) % + ctx_.GraphVar() % make_move(prev_ctx_name) % make_move(opt_name); + + return opt_code + formater.str(); + } + + private: + BuildingContext& ctx_; + int32_t res_alias_; + algebra::QueryParams query_params_; + physical::EdgeExpand::ExpandOpt expand_opt_; + internal::Direction direction_; + std::vector dst_vertex_labels_; + std::vector get_v_vertex_labels_; + int32_t v_tag_; + physical::PhysicalOpr::MetaData meta_data_; +}; + +template +static std::string BuildEdgeExpandOp( + BuildingContext& ctx, const physical::EdgeExpand& edge_expand, + const physical::PhysicalOpr::MetaData& meta_data) { + VLOG(10) << "Building Edge Expand Op: " << edge_expand.DebugString(); + EdgeExpandOpBuilder builder(ctx); + if (edge_expand.has_alias()) { + builder.resAlias(edge_expand.alias().value()); + } else { + builder.resAlias(-1); + } + builder.query_params(edge_expand.params()) + .expand_opt(edge_expand.expand_opt()) + .direction(edge_expand.direction()); + if (edge_expand.has_v_tag()) { + builder.v_tag(edge_expand.v_tag().value()); + } else { + builder.v_tag(-1); + } + builder.meta_data(meta_data); + return builder.Build(); +} + +// build edge expand op with dst vertex labels. +// the extra dst_vertex_labels are extracted from get_v, It can be a larger +// collection or a smaller collection. +template +static std::string BuildEdgeExpandOp( + BuildingContext& ctx, const physical::EdgeExpand& edge_expand, + const physical::PhysicalOpr::MetaData& meta_data, + std::vector dst_vertex_labels) { + VLOG(10) << "Building Edge Expand Op: " << edge_expand.DebugString(); + EdgeExpandOpBuilder builder(ctx); + if (edge_expand.has_alias()) { + builder.resAlias(edge_expand.alias().value()); + } else { + builder.resAlias(-1); + } + builder.dstVertexLabels(dst_vertex_labels) + .query_params(edge_expand.params()) + .expand_opt(edge_expand.expand_opt()) + .direction(edge_expand.direction()); + if (edge_expand.has_v_tag()) { + builder.v_tag(edge_expand.v_tag().value()); + } else { + builder.v_tag(-1); + } + builder.meta_data(meta_data); + return builder.Build(); +} + +} // namespace gs + +#endif // CODEGEN_SRC_HQPS_HQPS_EDGE_EXPAND_BUILDER_H_ \ No newline at end of file diff --git a/flex/codegen/src/hqps/hqps_expr_builder.h b/flex/codegen/src/hqps/hqps_expr_builder.h new file mode 100644 index 000000000000..6f98b1f494ff --- /dev/null +++ b/flex/codegen/src/hqps/hqps_expr_builder.h @@ -0,0 +1,459 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_HQPS_HQPS_EXPR_BUILDER_H_ +#define CODEGEN_SRC_HQPS_HQPS_EXPR_BUILDER_H_ + +#include +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/expr.pb.h" + +#include + +namespace gs { + +// 1: expression class name +// 2: constructor params, as string +// 3: construction implement, concatenated as string +// 4: operator call params, as string +// 5: operator call implement, concatenated as string +// 6: private members, concatenated as string +static constexpr const char* EXPR_BUILDER_TEMPLATE_STR = + "struct %1% {\n" + " public: \n" + " using result_t = %2%;\n" + " %1%(%3%) %4% {}\n" + " %5%\n" + " inline %6% operator()(%7%) const {\n" + " %8%\n" + " }\n" + " private:\n" + " %9%\n" + "};\n"; + +// The input variable can have property or not, if property is not present, we +// take that as a IdKey +static std::pair variable_to_tag_id_property_selector( + BuildingContext& ctx, const common::Variable& var) { + int tag_id = -1; + if (var.has_tag()) { + tag_id = var.tag().id(); + } + int real_tag_ind = ctx.GetTagInd(tag_id); + if (var.has_property()) { + std::string prop_name = var.property().key().name(); + std::string prop_type = data_type_2_string( + common_data_type_pb_2_data_type(var.node_type().data_type())); + boost::format formater(PROPERTY_SELECTOR); + formater % prop_type % prop_name; + + return std::make_pair(real_tag_ind, formater.str()); + } else { + // if variable has no property, we assume it means get the innerIdProperty + // there are two cases: + // 0: vertex, but the node type is passed as all property and types. + // 1: collection, just take the value; + std::string prop_type; + if (var.node_type().type_case() == common::IrDataType::kDataType) { + prop_type = data_type_2_string( + common_data_type_pb_2_data_type(var.node_type().data_type())); + } else { + prop_type = GRAPE_EMPTY_TYPE; + } + boost::format formater(PROPERTY_SELECTOR); + formater % prop_type % "None"; + + return std::make_pair(real_tag_ind, formater.str()); + } +} + +static std::string logical_to_str(const common::Logical& logical) { + switch (logical) { + case common::Logical::AND: + return "&&"; + case common::Logical::OR: + return "||"; + case common::Logical::NOT: + return "!"; + case common::Logical::EQ: + return "=="; + case common::Logical::NE: + return "!="; + case common::Logical::GT: + return ">"; + case common::Logical::GE: + return ">="; + case common::Logical::LT: + return "<"; + case common::Logical::LE: + return "<="; + case common::Logical::WITHIN: + return "< WithIn > "; + default: + throw std::runtime_error("unknown logical"); + } +} + +std::string i64_array_pb_to_str(const common::I64Array& array) { + auto size = array.item_size(); + std::stringstream ss; + ss << "std::array{"; + for (int i = 0; i < size; ++i) { + ss << array.item(i); + if (i + 1 != size) { + ss << ","; + } + } + ss << "}"; + return ss.str(); +} + +// i32_array_pb_to_str +std::string i32_array_pb_to_str(const common::I32Array& array) { + auto size = array.item_size(); + std::stringstream ss; + ss << "std::array{"; + for (int i = 0; i < size; ++i) { + ss << array.item(i); + if (i + 1 != size) { + ss << ","; + } + } + ss << "}"; + return ss.str(); +} + +static std::string value_pb_to_str(const common::Value& value) { + switch (value.item_case()) { + case common::Value::kI32: + return std::to_string(value.i32()); + case common::Value::kI64: + return std::to_string(value.i64()); + case common::Value::kF64: + return std::to_string(value.f64()); + case common::Value::kStr: + return with_quote(value.str()); + case common::Value::kBoolean: + return value.boolean() ? "true" : "false"; + case common::Value::kI32Array: + return i32_array_pb_to_str(value.i32_array()); + case common::Value::kI64Array: + return i64_array_pb_to_str(value.i64_array()); + case common::Value::kNone: + return NONE_LITERAL; + default: + throw std::runtime_error("unknown value type" + value.DebugString()); + } +} + +bool constains_vertex_id(const std::vector& params) { + for (auto& param : params) { + if (param.type == codegen::DataType::kVertexId) { + return true; + } + } + return false; +} + +// Simlutate the calculation of expression, return the result data type. +// convert to prefix expression + +/*Build a expression struct from expression*/ +class ExprBuilder { + protected: + static constexpr const char* EXPR_OPERATOR_CALL_VAR_NAME = "var"; + + public: + ExprBuilder(BuildingContext& ctx, int var_id = 0, bool no_build = false) + : ctx_(ctx), + cur_var_id_(var_id), + res_data_type_( + common::DataType::DataType_INT_MIN_SENTINEL_DO_NOT_USE_) { + if (!no_build) { + // no build indicates whether we will use this builder as a helper. + // If set to true, we will not let queryClassName and next_expr_name + // increase. + class_name_ = ctx_.GetQueryClassName() + ctx_.GetNextExprName(); + } + } + + void set_return_type(common::DataType data_type) { + res_data_type_ = data_type; + } + + void AddAllExprOpr( + const google::protobuf::RepeatedPtrField& expr_ops) { + // we currently don't support filter with label keys! + // If we meet label keys just ignore. + auto size = expr_ops.size(); + VLOG(10) << "Adding expr of size: " << size; + for (auto i = 0; i < size;) { + auto expr = expr_ops[i]; + if (expr.has_var() && expr.var().property().has_label()) { + VLOG(10) << "Found label in expr, skip this check"; + int j = i; + for (; j < size; ++j) { + if (expr_ops[j].item_case() == common::ExprOpr::kBrace && + expr_ops[j].brace() == + common::ExprOpr::Brace::ExprOpr_Brace_RIGHT_BRACE) { + VLOG(10) << "Found right brace at ind: " << j + << ", started at: " << i; + AddExprOpr(std::string("true")); + AddExprOpr(expr_ops[j]); + i = j + 1; + break; + } + } + if (j == size) { + LOG(WARNING) << "no right brace found" << j << "size: " << size; + i = j; + } + } else { + AddExprOpr(expr_ops[i]); + ++i; + } + } + } + + void AddExprOpr(const std::string expr_str) { + expr_nodes_.emplace_back(expr_str); + } + + // visit each expr opr. + void AddExprOpr(const common::ExprOpr& opr) { + switch (opr.item_case()) { + case common::ExprOpr::kBrace: { + auto brace = opr.brace(); + if (brace == common::ExprOpr::Brace::ExprOpr_Brace_LEFT_BRACE) { + expr_nodes_.emplace_back("("); + } else if (brace == common::ExprOpr::Brace::ExprOpr_Brace_RIGHT_BRACE) { + expr_nodes_.emplace_back(")"); + } else { + throw std::runtime_error("unknown brace"); + } + break; + } + + case common::ExprOpr::kConst: { + VLOG(10) << "Got const: " << opr.const_().DebugString(); + auto str = value_pb_to_str(opr.const_()); + VLOG(10) << "Got const: " << str; + expr_nodes_.emplace_back(std::move(str)); + break; + } + + case common::ExprOpr::kVar: { + auto& var = opr.var(); + auto param_const = variable_to_param_const(var, ctx_); + // for each variable, we need add the variable to func_call_vars_. + // and also set a expr node for it. which is unique. + func_call_vars_.push_back(param_const); + expr_nodes_.emplace_back(std::string(EXPR_OPERATOR_CALL_VAR_NAME) + + std::to_string(cur_var_id_++)); + + // expr_nodes_.emplace_back(param_const.var_name); + // convert a variable to a tag property, + // gs::NamedProperty{"prop1"}, saved for later use. + tag_selectors_.emplace_back( + variable_to_tag_id_property_selector(ctx_, var)); + VLOG(10) << "Got var: " << var.DebugString(); + break; + } + + case common::ExprOpr::kLogical: { + auto logical = opr.logical(); + auto str = logical_to_str(logical); + VLOG(10) << "Got expr opt logical: " << str; + expr_nodes_.emplace_back(std::move(str)); + break; + } + + // todo: use dynamic param + case common::ExprOpr::kParam: { + auto param_const_pb = opr.param(); + auto param_node_type = opr.node_type(); + auto param_const = + param_const_pb_to_param_const(param_const_pb, param_node_type); + VLOG(10) << "receive param const: " << param_const_pb.DebugString(); + construct_params_.push_back(param_const); + expr_nodes_.emplace_back(param_const.var_name + "_"); + break; + } + + case common::ExprOpr::kArith: { + auto arith = opr.arith(); + auto str = arith_to_str(arith); + VLOG(10) << "Got expr opt arith: " << str; + expr_nodes_.emplace_back(std::move(str)); + break; + } + + default: + LOG(WARNING) << "not recognized expr opr: " << opr.DebugString(); + throw std::runtime_error("not recognized expr opr"); + } + } + + // get expr nodes + const std::vector& GetExprNodes() const { return expr_nodes_; } + + // get func call vars + const std::vector& GetFuncCallVars() const { + return func_call_vars_; + } + + // get tag property strs + const std::vector>& GetTagSelectors() const { + return tag_selectors_; + } + + // get construct params + const std::vector& GetConstructParams() const { + return construct_params_; + } + + int32_t GetCurVarId() const { return cur_var_id_; } + + // 0: function name + // 1: function call params, + // 2: tag_property + // 3. function code + // 4. return type + virtual std::tuple, + std::vector>, std::string, + common::DataType> + Build() const { + // Insert param vars to context. + for (auto i = 0; i < construct_params_.size(); ++i) { + ctx_.AddParameterVar(construct_params_[i]); + } + + std::string constructor_param_str, field_init_code_str, + func_call_template_typename_str, func_call_params_str, + func_call_impl_str, private_filed_str; + constructor_param_str = get_constructor_params_str(); + field_init_code_str = get_field_init_code_str(); + func_call_template_typename_str = get_func_call_typename_str(); + func_call_params_str = get_func_call_params_str(); + func_call_impl_str = get_func_call_impl_str(); + private_filed_str = get_private_filed_str(); + VLOG(10) << "Finish preparing code blocks"; + + boost::format formater(EXPR_BUILDER_TEMPLATE_STR); + formater % class_name_ % common_data_type_pb_2_str(res_data_type_) % + constructor_param_str % field_init_code_str % + func_call_template_typename_str % "auto" % func_call_params_str % + func_call_impl_str % private_filed_str; + + std::string str = formater.str(); + + return std::make_tuple(class_name_, construct_params_, tag_selectors_, str, + res_data_type_); + } + + bool empty() const { return expr_nodes_.empty(); } + + protected: + // return the concatenated string of constructor's input params + std::string get_constructor_params_str() const { + std::stringstream ss; + if (!construct_params_.empty()) { + ss << ":"; + } + for (int i = 0; i < construct_params_.size(); ++i) { + ss << data_type_2_string(construct_params_[i].type) << " " + << construct_params_[i].var_name; + if (i != construct_params_.size() - 1) { + ss << ","; + } + } + return ss.str(); + } + + std::string get_field_init_code_str() const { + std::stringstream ss; + for (int i = 0; i < construct_params_.size(); ++i) { + ss << construct_params_[i].var_name << "_" + << "(" << construct_params_[i].var_name << ")"; + if (i != construct_params_.size() - 1) { + ss << ","; + } + } + return ss.str(); + } + + std::string get_func_call_typename_str() const { + std::string typename_template = ""; + if (constains_vertex_id(func_call_vars_)) { + typename_template = "template "; + } + return typename_template; + } + + std::string get_func_call_params_str() const { + std::stringstream ss; + for (int i = 0; i < func_call_vars_.size(); ++i) { + ss << data_type_2_string(func_call_vars_[i].type) << " " + << EXPR_OPERATOR_CALL_VAR_NAME << i; + if (i != func_call_vars_.size() - 1) { + ss << ","; + } + } + return ss.str(); + } + + virtual std::string get_func_call_impl_str() const { + std::stringstream ss; + ss << "return "; + for (auto i = 0; i < expr_nodes_.size(); ++i) { + ss << expr_nodes_[i] << " "; + } + ss << ";"; + return ss.str(); + } + + std::string get_private_filed_str() const { + std::stringstream ss; + for (auto i = 0; i < construct_params_.size(); ++i) { + ss << data_type_2_string(construct_params_[i].type) << " " + << construct_params_[i].var_name << "_;" << std::endl; + } + return ss.str(); + } + + // this corresponding to the input params. + std::vector construct_params_; + // input var list of function call + std::vector func_call_vars_; + // we shall also keep the private member too, use {var}; + std::vector> + tag_selectors_; // gs::NamedProperty({"creationDate"}) + // component of expression + std::vector expr_nodes_; + BuildingContext& ctx_; + int cur_var_id_; + common::DataType res_data_type_; + + std::string class_name_; +}; +} // namespace gs + +#endif // CODEGEN_SRC_HQPS_HQPS_EXPR_BUILDER_H_ \ No newline at end of file diff --git a/flex/codegen/src/hqps/hqps_fold_builder.h b/flex/codegen/src/hqps/hqps_fold_builder.h new file mode 100644 index 000000000000..7d085afdf4d1 --- /dev/null +++ b/flex/codegen/src/hqps/hqps_fold_builder.h @@ -0,0 +1,173 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_HQPS_HQPS_FOLD_BUILDER_H_ +#define CODEGEN_SRC_HQPS_HQPS_FOLD_BUILDER_H_ + +#include +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/hqps/hqps_expr_builder.h" +#include "flex/codegen/src/hqps/hqps_group_by_builder.h" +#include "flex/codegen/src/pb_parser/query_params_parser.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/physical.pb.h" + +namespace gs { + +static constexpr const char* AGG_FUNC_TEMPLATE_STR = + "auto %1% = gs::make_aggregate_prop<%2%>(std::tuple{{%3%}, " + "std::integer_sequence{});\n"; + +static constexpr const char* FOLD_OP_TEMPLATE_STR = + "auto %1% = gs::make_fold_opt(%2%);\n" + "auto %3% = Engine::GroupByWithoutKey(%4%, std::move(%5%), %1%);\n"; + +std::pair gen_agg_var_and_code_for_fold( + BuildingContext& ctx, const physical::GroupBy::AggFunc& agg_func) { + auto agg_func_name = agg_func_pb_2_str(agg_func.aggregate()); + auto cur_var_name = ctx.GetNextAggFuncName(); + std::vector in_tags; + std::vector in_prop_names; + std::vector in_prop_types; + int32_t res_alias = agg_func.alias().value(); + auto real_res_alias = ctx.CreateOrGetTagInd(res_alias); + auto& vars = agg_func.vars(); + for (auto i = 0; i < vars.size(); ++i) { + auto& var = vars[i]; + auto raw_tag_id = var.tag().id(); + in_tags.push_back(ctx.GetTagInd(raw_tag_id)); + VLOG(10) << "var " << i << " tag id " << raw_tag_id << " real tag id " + << in_tags[i]; + if (var.has_property()) { + auto var_prop = var.property(); + if (var_prop.item_case() == common::Property::kId) { + // IdKey + VLOG(10) << "aggregate on internal id"; + in_prop_names.push_back("None"); + in_prop_types.push_back(EMPTY_TYPE); + } else { + VLOG(10) << "aggregate on property " << var_prop.key().name(); + in_prop_names.push_back(var.property().key().name()); + in_prop_types.push_back( + common_data_type_pb_2_str(var.node_type().data_type())); + } + } else { + // var has no property, which means internal id. + VLOG(10) << "aggregate on internal id"; + in_prop_names.push_back("None"); + in_prop_types.push_back(EMPTY_TYPE); + } + } + CHECK(in_prop_names.size() > 0); + std::string selectors_str, in_tags_str; + { + std::stringstream ss; + for (auto i = 0; i < in_prop_types.size(); ++i) { + boost::format selector_formater(PROPERTY_SELECTOR); + selector_formater % in_prop_types[i] % in_prop_names[i]; + ss << selector_formater.str(); + if (i != in_prop_types.size() - 1) { + ss << ", "; + } + } + selectors_str = ss.str(); + } + { + std::stringstream ss; + for (auto i = 0; i < in_tags.size(); ++i) { + ss << in_tags[i]; + if (i != in_tags.size() - 1) { + ss << ", "; + } + } + in_tags_str = ss.str(); + } + + boost::format formater(AGG_FUNC_TEMPLATE_STR); + formater % cur_var_name % agg_func_name % selectors_str % in_tags_str; + return std::make_pair(cur_var_name, formater.str()); +} + +// i.e. group without key. +class FoldOpBuilder { + public: + FoldOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + FoldOpBuilder& AddAggFunc(const physical::GroupBy::AggFunc& agg_func) { + std::string agg_fun_var_name, agg_fun_code; + std::tie(agg_fun_var_name, agg_fun_code) = + gen_agg_var_and_code_for_fold(ctx_, agg_func); + agg_func_name_and_code.emplace_back(agg_fun_var_name, agg_fun_code); + return *this; + } + + std::string Build() const { + CHECK(agg_func_name_and_code.size() > 0); + + std::string fold_ops_code; + { + std::stringstream ss; + for (auto i = 0; i < agg_func_name_and_code.size(); ++i) { + ss << make_move(agg_func_name_and_code[i].first); + if (i != agg_func_name_and_code.size() - 1) { + ss << ", "; + } + } + fold_ops_code = ss.str(); + } + + std::string prev_ctx_name, next_ctx_name; + std::string fold_opt_var_name = ctx_.GetNextGroupOptName(); + std::tie(prev_ctx_name, next_ctx_name) = ctx_.GetPrevAndNextCtxName(); + + std::string agg_func_code_con; + { + std::stringstream ss; + for (auto i = 0; i < agg_func_name_and_code.size(); ++i) { + ss << agg_func_name_and_code[i].second << std::endl; + } + agg_func_code_con = ss.str(); + } + + boost::format formater(FOLD_OP_TEMPLATE_STR); + formater % fold_opt_var_name % fold_ops_code % next_ctx_name % + ctx_.GraphVar() % prev_ctx_name; + + return agg_func_code_con + formater.str(); + } + + private: + BuildingContext& ctx_; + std::vector> agg_func_name_and_code; +}; + +static std::string BuildGroupWithoutKeyOp( + BuildingContext& ctx, const physical::GroupBy& group_by_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + CHECK(group_by_pb.mappings_size() == 0); + FoldOpBuilder fold_op_builder(ctx); + for (auto i = 0; i < group_by_pb.functions_size(); ++i) { + fold_op_builder.AddAggFunc(group_by_pb.functions(i)); + } + return fold_op_builder.Build(); +} +} // namespace gs + +#endif // CODEGEN_SRC_HQPS_HQPS_FOLD_BUILDER_H_ \ No newline at end of file diff --git a/flex/codegen/src/hqps/hqps_get_v_builder.h b/flex/codegen/src/hqps/hqps_get_v_builder.h new file mode 100644 index 000000000000..8777c4cd7ce4 --- /dev/null +++ b/flex/codegen/src/hqps/hqps_get_v_builder.h @@ -0,0 +1,244 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_HQPS_HQPS_GET_V_BUILDER_H_ +#define CODEGEN_SRC_HQPS_HQPS_GET_V_BUILDER_H_ + +#include +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/pb_parser/name_id_parser.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/expr.pb.h" +#include "proto_generated_gie/physical.pb.h" + +#include + +namespace gs { + +static constexpr const char* GET_V_OPT_NO_FILTER_TEMPLATE_STR = + "auto %1% = make_getv_opt(%2%, %3%);\n"; + +static constexpr const char* GET_V_NO_FILTER_TEMPLATE_STR = + "auto %1% = make_getv_opt(%2%, %3%);\n" + "auto %4% = Engine::template GetV<%5%,%6%>(%7%, std::move(%8%), " + "std::move(%1%));\n"; +static constexpr const char* GET_V_FILTER_TEMPLATE_STR = + "auto %1% = gs::make_filter(%2%(%3%), %4%);\n" + "auto %5% = make_getv_opt(%6%, %7%, std::move(%1%));\n" + "auto %8% = Engine::template GetV<%9%,%10%>(%11%, std::move(%12%), " + "std::move(%5%));\n"; + +namespace internal { +enum class GetVType { + kStart = 0, + kEnd = 1, + kOther = 2, + kBoth = 3, + kItself = 4, +}; + +std::string get_v_type_2_str(GetVType get_v_type) { + switch (get_v_type) { + case GetVType::kStart: + return "gs::VOpt::Start"; + case GetVType::kEnd: + return "gs::VOpt::End"; + case GetVType::kOther: + return "gs::VOpt::Other"; + case GetVType::kBoth: + return "gs::VOpt::Both"; + case GetVType::kItself: + return "gs::VOpt::Itself"; + default: + throw std::runtime_error("unknown get_v_type"); + } +} +} // namespace internal + +template +std::pair make_getv_opt_call_code( + BuildingContext& ctx, const internal::GetVType& get_v_type, + const std::vector& vertex_labels) { + std::string var_name = ctx.GetNextGetVOptName(); + std::stringstream ss; + + boost::format formater(GET_V_OPT_NO_FILTER_TEMPLATE_STR); + formater % var_name % internal::get_v_type_2_str(get_v_type) % + label_ids_to_array_str(vertex_labels); + return std::make_pair(var_name, formater.str()); +} + +internal::GetVType vopt_pb_to_internal(const physical::GetV::VOpt& v_opt) { + switch (v_opt) { + case physical::GetV_VOpt_START: + return internal::GetVType::kStart; + case physical::GetV_VOpt_END: + return internal::GetVType::kEnd; + case physical::GetV_VOpt_OTHER: + return internal::GetVType::kOther; + case physical::GetV_VOpt_BOTH: + return internal::GetVType::kBoth; + case physical::GetV_VOpt_ITSELF: + return internal::GetVType::kItself; + default: + throw std::runtime_error("unknown vopt"); + } +} + +template +class GetVOpBuilder { + public: + GetVOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + GetVOpBuilder& v_opt(const physical::GetV::VOpt& v_opt) { + v_opt_ = vopt_pb_to_internal(v_opt); + return *this; + } + + GetVOpBuilder& in_tag(int32_t in_tag_id) { + in_tag_id_ = ctx_.GetTagInd(in_tag_id); + return *this; + } + + GetVOpBuilder& out_tag(int32_t out_tag_id) { + out_tag_id_ = ctx_.CreateOrGetTagInd(out_tag_id); + return *this; + } + + GetVOpBuilder& add_vertex_label(const common::NameOrId& vertex_label) { + vertex_labels_.push_back( + try_get_label_from_name_or_id(vertex_label)); + return *this; + } + + GetVOpBuilder& filter(const common::Expression& expr) { + auto size = expr.operators().size(); + if (size > 0) { + ExprBuilder expr_builder(ctx_); + + auto& expr_oprs = expr.operators(); + expr_builder.AddAllExprOpr(expr_oprs); + expr_builder.set_return_type(common::DataType::BOOLEAN); + common::DataType unused_expr_ret_type; + if (!expr_builder.empty()) { + std::tie(expr_name_, expr_call_param_, tag_propertys_, expr_code_, + unused_expr_ret_type) = expr_builder.Build(); + ctx_.AddExprCode(expr_code_); + } else { + VLOG(10) << "No valid expression in getv filter"; + } + } else { + VLOG(10) << "no expression in getv"; + } + VLOG(10) << "Finish build getv filter"; + return *this; + } + + std::string Build() const { + std::string get_v_opt_var = ctx_.GetNextGetVOptName(); + std::string get_v_code; + std::string prev_ctx_name, next_ctx_name; + std::tie(prev_ctx_name, next_ctx_name) = ctx_.GetPrevAndNextCtxName(); + auto append_opt = res_alias_to_append_opt(out_tag_id_); + auto input_col_str = format_input_col(in_tag_id_); + std::vector tmp = remove_duplicate(vertex_labels_); + VLOG(10) << "Before deduplicate: " << gs::to_string(vertex_labels_) + << ", after dedup: " << gs::to_string(tmp); + if (expr_name_.empty()) { + boost::format formater(GET_V_NO_FILTER_TEMPLATE_STR); + formater % get_v_opt_var % internal::get_v_type_2_str(v_opt_) % + label_ids_to_array_str(tmp) % next_ctx_name % append_opt % + input_col_str % ctx_.GraphVar() % prev_ctx_name; + get_v_code = formater.str(); + // no filter + } else { + boost::format formater(GET_V_FILTER_TEMPLATE_STR); + // with filter + std::string expr_var_name = ctx_.GetNextExprVarName(); + std::string expr_call_str; + std::string selectors_str; + { + std::stringstream ss; + for (int i = 0; i < expr_call_param_.size(); ++i) { + ss << expr_call_param_[i].var_name; + if (i != expr_call_param_.size() - 1) { + ss << ", "; + } + } + expr_call_str = ss.str(); + } + { + std::stringstream ss; + for (int i = 0; i < tag_propertys_.size(); ++i) { + ss << tag_propertys_[i].second; + if (i != tag_propertys_.size() - 1) { + ss << ", "; + } + } + selectors_str = ss.str(); + } + formater % expr_var_name % expr_name_ % expr_call_str % selectors_str % + get_v_opt_var % internal::get_v_type_2_str(v_opt_) % + label_ids_to_array_str(tmp) % next_ctx_name % append_opt % + input_col_str % ctx_.GraphVar() % prev_ctx_name; + get_v_code = formater.str(); + } + VLOG(10) << "Finish building getv code"; + + return get_v_code; + } + + private: + BuildingContext& ctx_; + internal::GetVType v_opt_; + int32_t in_tag_id_, out_tag_id_; + std::vector vertex_labels_; + std::vector expr_call_param_; + std::vector> tag_propertys_; + std::string expr_name_, expr_code_; +}; + +template +static std::string BuildGetVOp( + BuildingContext& ctx, const physical::GetV& get_v_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + GetVOpBuilder builder(ctx); + builder.v_opt(get_v_pb.opt()); + if (get_v_pb.has_tag()) { + builder.in_tag(get_v_pb.tag().value()); + } else { + builder.in_tag(-1); + } + + if (get_v_pb.has_alias()) { + builder.out_tag(get_v_pb.alias().value()); + } else { + builder.out_tag(-1); + } + auto& vertex_labels_pb = get_v_pb.params().tables(); + for (auto vertex_label_pb : vertex_labels_pb) { + builder.add_vertex_label(vertex_label_pb); + } + + return builder.filter(get_v_pb.params().predicate()).Build(); +} +} // namespace gs + +#endif // CODEGEN_SRC_HQPS_HQPS_GET_V_BUILDER_H_ \ No newline at end of file diff --git a/flex/codegen/src/hqps/hqps_group_by_builder.h b/flex/codegen/src/hqps/hqps_group_by_builder.h new file mode 100644 index 000000000000..1fcbc4aac5b7 --- /dev/null +++ b/flex/codegen/src/hqps/hqps_group_by_builder.h @@ -0,0 +1,281 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_HQPS_HQPS_GROUP_BY_BUILDER_H_ +#define CODEGEN_SRC_HQPS_HQPS_GROUP_BY_BUILDER_H_ + +#include +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/hqps/hqps_expr_builder.h" +#include "flex/codegen/src/pb_parser/query_params_parser.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/physical.pb.h" + +namespace gs { + +static constexpr const char* GROUP_KEY_TEMPLATE_STR = + "GroupKey<%1%, %2%> %3%(%4%);\n"; + +static constexpr const char* GROUP_AGG_TEMPLATE_STR = + "auto %1% = gs::make_aggregate_prop<%2%>(std::tuple{%3%}, " + "std::integer_sequence{});\n"; + +static constexpr const char* GROUP_BY_OP_TEMPLATE_STR = + "%1%\n" + "%2%\n" + "auto %3% = Engine::GroupBy(%4%, std::move(%5%), std::tuple{%6%}, " + "std::tuple{%7%});\n"; + +std::string agg_func_pb_2_str( + const physical::GroupBy::AggFunc::Aggregate& agg_func) { + switch (agg_func) { + case physical::GroupBy::AggFunc::Aggregate::GroupBy_AggFunc_Aggregate_SUM: + return "gs::AggFunc::SUM"; + case physical::GroupBy::AggFunc::Aggregate::GroupBy_AggFunc_Aggregate_AVG: + return "gs::AggFunc::AVG"; + case physical::GroupBy::AggFunc::Aggregate::GroupBy_AggFunc_Aggregate_MAX: + return "gs::AggFunc::MAX"; + case physical::GroupBy::AggFunc::Aggregate::GroupBy_AggFunc_Aggregate_MIN: + return "gs::AggFunc::MIN"; + case physical::GroupBy::AggFunc::Aggregate::GroupBy_AggFunc_Aggregate_COUNT: + return "gs::AggFunc::COUNT"; + case physical::GroupBy::AggFunc::Aggregate:: + GroupBy_AggFunc_Aggregate_COUNT_DISTINCT: + return "gs::AggFunc::COUNT_DISTINCT"; + case physical::GroupBy::AggFunc::Aggregate::GroupBy_AggFunc_Aggregate_TO_LIST: + return "gs::AggFunc::TO_LIST"; + case physical::GroupBy::AggFunc::Aggregate::GroupBy_AggFunc_Aggregate_TO_SET: + return "gs::AggFunc::TO_SET"; + default: + LOG(FATAL) << "Unsupported aggregate function"; + } +} + +std::pair gen_agg_var_and_code( + BuildingContext& ctx, TagIndMapping& new_mapping, + const physical::GroupBy::AggFunc& agg_func) { + auto agg_func_name = agg_func_pb_2_str(agg_func.aggregate()); + auto cur_var_name = ctx.GetNextAggFuncName(); + std::vector in_tags; + std::vector in_prop_names; + std::vector in_prop_types; + int32_t res_alias = agg_func.alias().value(); + auto real_res_alias = new_mapping.CreateOrGetTagInd(res_alias); + auto& vars = agg_func.vars(); + for (auto i = 0; i < vars.size(); ++i) { + auto& var = vars[i]; + auto raw_tag_id = var.tag().id(); + in_tags.push_back(ctx.GetTagInd(raw_tag_id)); + if (var.has_property()) { + auto var_prop = var.property(); + if (var_prop.item_case() == common::Property::kId) { + // IdKey + VLOG(10) << "aggregate on internal id"; + in_prop_names.push_back("None"); + in_prop_types.push_back(EMPTY_TYPE); + } else { + VLOG(10) << "aggregate on property " << var_prop.key().name(); + in_prop_names.push_back(var.property().key().name()); + in_prop_types.push_back( + common_data_type_pb_2_str(var.node_type().data_type())); + } + } else { + // var has no property, which means internal id. + VLOG(10) << "aggregate on internal id"; + in_prop_names.push_back("None"); + in_prop_types.push_back(EMPTY_TYPE); + } + } + + std::string selectors_str, in_tags_str; + { + std::stringstream ss; + for (auto i = 0; i < in_prop_types.size(); ++i) { + boost::format selector_formater(PROPERTY_SELECTOR); + selector_formater % in_prop_types[i] % in_prop_names[i]; + ss << selector_formater.str(); + if (i != in_prop_types.size() - 1) { + ss << ", "; + } + } + selectors_str = ss.str(); + } + { + std::stringstream ss; + for (auto i = 0; i < in_tags.size(); ++i) { + ss << in_tags[i]; + if (i != in_tags.size() - 1) { + ss << ", "; + } + } + in_tags_str = ss.str(); + } + boost::format agg_formater(GROUP_AGG_TEMPLATE_STR); + agg_formater % cur_var_name % agg_func_name % selectors_str % in_tags_str; + return std::make_pair(cur_var_name, agg_formater.str()); +} + +class GroupByOpBuilder { + public: + GroupByOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + // add group key + // after groupby, we will clear the previous context, thus we will restart the + // counting of tag_ind + GroupByOpBuilder& AddKeyAlias(const physical::GroupBy::KeyAlias& key_alias) { + CHECK(key_alias.has_alias()); + CHECK(key_alias.key().has_tag()); + std::string prop_name = "None"; + std::string prop_type; + auto group_key_var_name = ctx_.GetNextGroupKeyName(); + + // first in tag id, then alias id + int32_t input_tag_id = ctx_.GetTagInd(key_alias.key().tag().id()); + int32_t output_tag_id = + new_tag_id_mapping.CreateOrGetTagInd(key_alias.alias().value()); + // output_col_id should equal to current key's length + CHECK(output_tag_id == key_alias_name_and_code.size()); + // we currently assume group key is always on internal id or graph ele + auto key_alias_key = key_alias.key(); + if (key_alias_key.has_property()) { + auto& prop = key_alias.key().property(); + if (prop.item_case() == common::Property::kId) { + VLOG(10) << "Group on " << key_alias.key().tag().id() + << ", inner id id"; + prop_type = EMPTY_TYPE; + } else if (prop.item_case() == common::Property::kKey) { + auto& prop_key = prop.key(); + prop_name = prop_key.name(); + prop_type = + common_data_type_pb_2_str(key_alias_key.node_type().data_type()); + } else { + LOG(FATAL) + << "Current only support key_alias on internal id or property"; + } + } else { + VLOG(10) << "Apply internal id since no property provided"; + prop_type = EMPTY_TYPE; + } + + std::string property_selector_str; + { + boost::format property_selector_fmt(PROPERTY_SELECTOR); + property_selector_fmt % prop_type % prop_name; + property_selector_str = property_selector_fmt.str(); + } + + boost::format formater(GROUP_KEY_TEMPLATE_STR); + formater % input_tag_id % prop_type % group_key_var_name % + property_selector_str; + + key_alias_name_and_code.emplace_back(group_key_var_name, formater.str()); + return *this; + } + + // add aggregation function + GroupByOpBuilder& AddAggFunc(const physical::GroupBy::AggFunc& agg_func) { + // agg function can apply on multiple tag's prop + std::string agg_fun_var_name, agg_fun_code; + std::tie(agg_fun_var_name, agg_fun_code) = + gen_agg_var_and_code(ctx_, new_tag_id_mapping, agg_func); + agg_func_name_and_code.emplace_back(agg_fun_var_name, agg_fun_code); + return *this; + } + + // return at least one key, at least one agg func + // and the operator code. + std::string Build() const { + CHECK(key_alias_name_and_code.size() > 0); + CHECK(agg_func_name_and_code.size() > 0); + + std::string prev_ctx_name, next_ctx_name; + std::tie(prev_ctx_name, next_ctx_name) = ctx_.GetPrevAndNextCtxName(); + + std::string key_alias_con_str; + std::string agg_func_con_str; + std::string group_by_keys_vars_str; + std::string group_by_agg_vars_str; + { + std::stringstream ss; + for (auto i = 0; i < key_alias_name_and_code.size(); ++i) { + ss << key_alias_name_and_code[i].second << std::endl; + } + key_alias_con_str = ss.str(); + } + { + std::stringstream ss; + for (auto i = 0; i < agg_func_name_and_code.size(); ++i) { + ss << agg_func_name_and_code[i].second << std::endl; + } + agg_func_con_str = ss.str(); + } + for (auto i = 0; i < key_alias_name_and_code.size(); ++i) { + group_by_keys_vars_str += key_alias_name_and_code[i].first; + if (i != key_alias_name_and_code.size() - 1) { + group_by_keys_vars_str += ", "; + } + } + for (auto i = 0; i < agg_func_name_and_code.size(); ++i) { + group_by_agg_vars_str += agg_func_name_and_code[i].first; + if (i != agg_func_name_and_code.size() - 1) { + group_by_agg_vars_str += ", "; + } + } + boost::format formater(GROUP_BY_OP_TEMPLATE_STR); + formater % key_alias_con_str % agg_func_con_str % next_ctx_name % + ctx_.GraphVar() % prev_ctx_name % group_by_keys_vars_str % + group_by_agg_vars_str; + + // it is safe to update tag_id_mapping here + ctx_.UpdateTagIdAndIndMapping(new_tag_id_mapping); + return formater.str(); + } + + private: + BuildingContext& ctx_; + std::vector> key_alias_name_and_code; + std::vector> agg_func_name_and_code; + TagIndMapping new_tag_id_mapping; // only update when build is over. +}; + +static std::string BuildGroupByOp( + BuildingContext& ctx, const physical::GroupBy& group_by_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + GroupByOpBuilder builder(ctx); + CHECK(group_by_pb.mappings_size() == 1) + << "Currently we only support one key"; + auto& key_aliases = group_by_pb.mappings(); + + CHECK(group_by_pb.functions_size() >= 1); + auto& functions = group_by_pb.functions(); + for (auto i = 0; i < key_aliases.size(); ++i) { + auto& key_alias = key_aliases[i]; + builder.AddKeyAlias(key_alias); + } + + for (auto i = 0; i < functions.size(); ++i) { + auto& func = functions[i]; + builder.AddAggFunc(func); + } + return builder.Build(); +} +} // namespace gs + +#endif // CODEGEN_SRC_HQPS_HQPS_GROUP_BY_BUILDER_H_ \ No newline at end of file diff --git a/flex/codegen/src/hqps/hqps_join_utils.h b/flex/codegen/src/hqps/hqps_join_utils.h new file mode 100644 index 000000000000..0edef28221f7 --- /dev/null +++ b/flex/codegen/src/hqps/hqps_join_utils.h @@ -0,0 +1,64 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_HQPS_HQPS_JOIN_UTILS_H_ +#define CODEGEN_SRC_HQPS_HQPS_JOIN_UTILS_H_ + +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/pb_parser/name_id_parser.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/expr.pb.h" +#include "proto_generated_gie/physical.pb.h" +namespace gs { + +namespace internal { +enum class JoinKind { kInnerJoin, kAntiJoin, kLeftOuterJoin }; +} + +internal::JoinKind join_kind_pb_to_internal( + const physical::Join::JoinKind& join_kind_pb) { + switch (join_kind_pb) { + case physical::Join::JoinKind::Join_JoinKind_INNER: + return internal::JoinKind::kInnerJoin; + case physical::Join::JoinKind::Join_JoinKind_ANTI: + return internal::JoinKind::kAntiJoin; + case physical::Join::JoinKind::Join_JoinKind_LEFT_OUTER: + return internal::JoinKind::kLeftOuterJoin; + default: + throw std::runtime_error("unknown join_kind_pb"); + } +} + +std::string join_kind_to_str(const internal::JoinKind& join_kind) { + switch (join_kind) { + case internal::JoinKind::kInnerJoin: + return "gs::JoinKind::InnerJoin"; + case internal::JoinKind::kAntiJoin: + return "gs::JoinKind::AntiJoin"; + case internal::JoinKind::kLeftOuterJoin: + return "gs::JoinKind::LeftOuterJoin"; + default: + throw std::runtime_error("unknown join_kind"); + } +} + +} // namespace gs + +#endif // CODEGEN_SRC_HQPS_HQPS_JOIN_UTILS_H_ \ No newline at end of file diff --git a/flex/codegen/src/hqps/hqps_path_expand_builder.h b/flex/codegen/src/hqps/hqps_path_expand_builder.h new file mode 100644 index 000000000000..af13d562f0e6 --- /dev/null +++ b/flex/codegen/src/hqps/hqps_path_expand_builder.h @@ -0,0 +1,344 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_HQPS_HQPS_PATH_EXPAND_BUILDER_H_ +#define CODEGEN_SRC_HQPS_HQPS_PATH_EXPAND_BUILDER_H_ + +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/hqps/hqps_get_v_builder.h" +#include "flex/codegen/src/pb_parser/expand_parser.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/expr.pb.h" + +// #ifndef HOP_RANGE_PARAM +// #define HOP_RANGE_PARAM +// #endif + +namespace gs { + +static constexpr const char* PATH_EXPAND_OP_TEMPLATE_STR = + "%1%\n" + "%2%\n" + "auto %3% = gs::make_path_expand_opt(std::move(%4%), std::move(%5%), " + "gs::Range(%6%, %7%));\n" + "auto %8% = Engine::PathExpandV<%9%, %10%>(%11%, std::move(%12%), " + "std::move(%13%));\n"; + +std::string path_opt_pb_2_str( + const physical::PathExpand::PathOpt& path_opt_pb) { + switch (path_opt_pb) { + case physical::PathExpand::PathOpt::PathExpand_PathOpt_ARBITRARY: + return "gs::PathOpt::Arbitrary"; + case physical::PathExpand::PathOpt::PathExpand_PathOpt_SIMPLE: + return "gs::PathOpt::Simple"; + default: + throw std::runtime_error("unknown path_opt_pb"); + } +} + +std::string result_opt_pb_2_str( + const physical::PathExpand::ResultOpt& result_opt_pb) { + switch (result_opt_pb) { + case physical::PathExpand::ResultOpt::PathExpand_ResultOpt_END_V: + return "gs::ResultOpt::EndV"; + case physical::PathExpand::ResultOpt::PathExpand_ResultOpt_ALL_V: + return "gs::ResultOpt::AllV"; + default: + throw std::runtime_error("unknown result_opt_pb"); + } +} + +template +class PathExpandOpBuilder { + public: + PathExpandOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + PathExpandOpBuilder& in_tag(int32_t in_tag_id) { + in_tag_id_ = ctx_.GetTagInd(in_tag_id); + return *this; + } + + PathExpandOpBuilder& out_tag(int32_t out_tag_id) { + out_tag_id_ = ctx_.CreateOrGetTagInd(out_tag_id); + VLOG(10) << "out_tag_id: " << out_tag_id + << ", out_tag_ind_: " << out_tag_id_; + return *this; + } + + // get the expand_opt name and the expand_opt code. + PathExpandOpBuilder& edge_expand_opt() { return *this; } + + // get the getv name and the getv code. + PathExpandOpBuilder& path_expand_opt( + const physical::EdgeExpand& edge_expand_pb, + const physical::GetV& get_v_pb, + const google::protobuf::RepeatedPtrField& + meta_data_pb) { + direction_ = + edge_expand_pb_2_internal_direction(edge_expand_pb.direction()); + + if (meta_data_pb.size() >= 1) { + CHECK(meta_data_pb.size() == 1) << "currently only support one meta_data"; + auto& graph_type = meta_data_pb[0].type(); + if (graph_type.type_case() == common::IrDataType::kDataType) { + // throw runtime error + throw std::runtime_error("Expect graphDataType in ir_data_type"); + } else if (graph_type.type_case() == common::IrDataType::kGraphType) { + VLOG(10) << "Parse edge triplet from meta_data"; + auto& act_graph_type = graph_type.graph_type(); + if (act_graph_type.element_opt() != + common::GraphDataType::GraphElementOpt:: + GraphDataType_GraphElementOpt_EDGE) { + throw std::runtime_error("Expect edge graph type"); + } + auto& edge_type = act_graph_type.graph_data_type(); + if (edge_type.size() != 1) { + throw std::runtime_error("Expect only one edge type"); + } + auto& edge_type0 = edge_type[0]; + auto& edge_labels = edge_type0.label(); + auto src_label = edge_labels.src_label().value(); + auto dst_label = edge_labels.dst_label().value(); + + // if find edge triplets, we clear current + VLOG(10) << "Clear current dst labels:" + << gs::to_string(dst_vertex_labels_); + dst_vertex_labels_.clear(); + + if (direction_ == internal::Direction::kBoth) { + CHECK(src_label == dst_label); + dst_vertex_labels_.emplace_back(src_label); + } else if (direction_ == internal::Direction::kOut) { + dst_vertex_labels_.emplace_back(dst_label); + } else if (direction_ == internal::Direction::kIn) { + dst_vertex_labels_.emplace_back(src_label); + } else { + throw std::runtime_error("Unknown direction"); + } + } else { + throw std::runtime_error("Expect graphDataType in ir_data_type"); + } + } else { + VLOG(10) << "No meta_data found"; + } + + { + // build get_v + auto v_opt = vopt_pb_to_internal(get_v_pb.opt()); + auto& v_labels_pb = get_v_pb.params().tables(); + + if (dst_vertex_labels_.empty()) { + for (auto i = 0; i < v_labels_pb.size(); ++i) { + dst_vertex_labels_.push_back( + try_get_label_from_name_or_id(v_labels_pb[i])); + } + } + VLOG(10) << "get vertex labels:" << gs::to_string(dst_vertex_labels_); + CHECK(!get_v_pb.params().has_predicate()) << "currently don't suppport " + "getv with condition"; + // std::tie(get_v_expr_call_code, get_v_opt, getv_opt_) = + // BuildGetVOpt(ctx_, get_v_pb); + std::tie(getv_opt_name_, getv_opt_code_) = + make_getv_opt_call_code(ctx_, v_opt, dst_vertex_labels_); + VLOG(10) << "Got getv_opt_name_: " << getv_opt_name_; + VLOG(10) << "Got getv_opt_code_: " << getv_opt_code_; + } + + { + // build edge_expand_opt + auto& params = edge_expand_pb.params(); + auto expand_opt = edge_expand_pb.expand_opt(); + CHECK(dst_vertex_labels_.size() > 0) << "no dst lables found"; + + physical::PhysicalOpr::MetaData meta_data; + // pass an empty meta_data, since we need no meta_data for + // edge_expand_opt. + std::tie(edge_expand_opt_name_, edge_expand_opt_) = BuildEdgeExpandOpt( + ctx_, direction_, params, dst_vertex_labels_, expand_opt, meta_data); + VLOG(10) << "edge_expand_opt_name_: " << edge_expand_opt_name_; + VLOG(10) << "edge_expand_opt_: " << edge_expand_opt_; + } + + return *this; + } + + PathExpandOpBuilder& hop_range(const algebra::Range& hop_range_pb) { +#ifdef HOP_RANGE_PARAM + if (hop_range_pb.has_lower()) { + auto& lower = hop_range_pb.lower(); + if (lower.has_value()) { + range_lower_ = lower.value(); + } else if (lower.has_param()) { + auto& param = lower.param(); + range_lower_param_ = param_const_pb_to_param_const(lower.param()); + } else { + LOG(WARNING) << "hop_range_pb has no lower"; + range_lower_ = 0; + } + } else { + LOG(WARNING) << "hop_range_pb has no lower"; + range_lower_ = 0; + } +#else + range_lower_ = hop_range_pb.lower(); +#endif + +#ifdef HOP_RANGE_PARAM + if (hop_range_pb.has_upper()) { + auto& upper = hop_range_pb.upper(); + if (upper.has_value()) { + range_upper_ = upper.value(); + } else if (upper.has_param()) { + auto& param = upper.param(); + range_upper_param_ = param_const_pb_to_param_const(upper.param()); + } else { + LOG(WARNING) << "hop_range_pb has no upper"; + range_upper_ = std::numeric_limits::max(); + } + } else { + LOG(WARNING) << "hop_range_pb has no upper"; + range_upper_ = std::numeric_limits::max(); + } +#else + range_upper_ = hop_range_pb.upper(); +#endif + + VLOG(10) << "got range: " << range_lower_.value() << " " + << range_upper_.value(); + if (range_lower_param_) { + VLOG(10) << "got range_lower_param_: " + << range_lower_param_.value().var_name; + } + if (range_upper_param_) { + VLOG(10) << "got range_upper_param_: " + << range_upper_param_.value().var_name; + } + return *this; + } + + PathExpandOpBuilder& path_opt( + const physical::PathExpand::PathOpt& path_opt_pb) { + path_opt_str_ = path_opt_pb_2_str(path_opt_pb); + VLOG(10) << "got path_opt: " << path_opt_str_; + return *this; + } + + PathExpandOpBuilder& result_opt( + const physical::PathExpand::ResultOpt& result_opt_pb) { + result_opt_str_ = result_opt_pb_2_str(result_opt_pb); + VLOG(10) << "got result_opt: " << result_opt_str_; + return *this; + } + + PathExpandOpBuilder& condition(const common::Expression& condition_pb) { + LOG(WARNING) << "Skiped for path expand with condition"; + return *this; + } + + std::string Build() const { + { + // first put the possible param vars into context + if (range_lower_param_.has_value()) { + ctx_.AddParameterVar(range_lower_param_.value()); + } + if (range_upper_param_.has_value()) { + ctx_.AddParameterVar(range_upper_param_.value()); + } + } + std::string prev_ctx_name, next_ctx_name; + std::tie(prev_ctx_name, next_ctx_name) = ctx_.GetPrevAndNextCtxName(); + std::string path_expand_opt_var = ctx_.GetNextPathOptName(); + std::string range_lower_value, range_upper_value; + if (range_lower_.has_value()) { + range_lower_value = std::to_string(range_lower_.value()); + } else if (range_lower_param_.has_value()) { + range_lower_value = range_lower_param_.value().var_name; + } else { + LOG(FATAL) << "no id nor param found"; + } + if (range_upper_.has_value()) { + range_upper_value = std::to_string(range_upper_.value()); + } else if (range_upper_param_.has_value()) { + range_upper_value = range_upper_param_.value().var_name; + } else { + LOG(FATAL) << "no id nor param found"; + } + + auto append_opt = res_alias_to_append_opt(out_tag_id_); + auto input_col_str = format_input_col(in_tag_id_); + boost::format formater(PATH_EXPAND_OP_TEMPLATE_STR); + formater % edge_expand_opt_ % getv_opt_code_ % path_expand_opt_var % + edge_expand_opt_name_ % getv_opt_name_ % range_lower_value % + range_upper_value % next_ctx_name % append_opt % input_col_str % + ctx_.GraphVar() % prev_ctx_name % path_expand_opt_var; + + return formater.str(); + } + + private: + BuildingContext& ctx_; + int32_t in_tag_id_, out_tag_id_; + std::string edge_expand_opt_name_, edge_expand_opt_; + std::string getv_opt_name_, getv_opt_code_; + std::optional range_lower_, range_upper_; + std::optional range_lower_param_, range_upper_param_; + std::string path_opt_str_, result_opt_str_; + std::vector dst_vertex_labels_; + internal::Direction direction_; +}; + +// edge_expand_opt +// get_v_opt +// path_expand_opt +// op_code. +// NOTE: we currenly only support path expand v, the in_tag can be fetch fromn +// path_expand_pb itself, while the res_alilas shall be fetch from the later +// get_v +template +static std::string BuildPathExpandOp( + BuildingContext& ctx, const physical::PathExpand& path_expand_pb, + const google::protobuf::RepeatedPtrField& + meta_data, + int32_t out_tag_id) { + PathExpandOpBuilder builder(ctx); + if (path_expand_pb.has_start_tag()) { + builder.in_tag(path_expand_pb.start_tag().value()); + } else { + builder.in_tag(-1); + } + + // CHECK(!path_expand_pb.has_alias()); + builder.out_tag(out_tag_id); + + return builder + .path_expand_opt(path_expand_pb.base().edge_expand(), + path_expand_pb.base().get_v(), + meta_data) // get_v_opt must be called first to + // provide dst_label ids. + .hop_range(path_expand_pb.hop_range()) + .path_opt(path_expand_pb.path_opt()) + .result_opt(path_expand_pb.result_opt()) + .condition(path_expand_pb.condition()) + + .Build(); +} +} // namespace gs + +#endif // CODEGEN_SRC_HQPS_HQPS_PATH_EXPAND_BUILDER_H_ \ No newline at end of file diff --git a/flex/codegen/src/hqps/hqps_project_builder.h b/flex/codegen/src/hqps/hqps_project_builder.h new file mode 100644 index 000000000000..b4b84feee443 --- /dev/null +++ b/flex/codegen/src/hqps/hqps_project_builder.h @@ -0,0 +1,375 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_HQPS_HQPS_PROJECT_BUILDER_H_ +#define CODEGEN_SRC_HQPS_HQPS_PROJECT_BUILDER_H_ + +#include +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/hqps/hqps_case_when_builder.h" +#include "flex/codegen/src/hqps/hqps_expr_builder.h" +#include "flex/codegen/src/pb_parser/query_params_parser.h" +#include "flex/codegen/src/string_utils.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/physical.pb.h" + +namespace gs { +static constexpr const char* PROJECT_MAPPER_VAR_TEMPLATE_STR = + "gs::make_mapper_with_variable(%2%)"; + +static constexpr const char* PROJECT_MAPPER_EXPR_TEMPLATE_STR = + "gs::make_mapper_with_expr<%1%>(%2%(%3%) %4%)"; + +static constexpr const char* PROJECT_OP_TEMPLATE_STR = + "auto %1% = Engine::Project<%2%>(%3%, std::move(%4%), std::tuple{%5%});\n"; + +// to check the output type of case when is the same. +bool sanity_check(const common::Case& expr_case) { + auto& when_exprs = expr_case.when_then_expressions(); + auto& else_expr = expr_case.else_result_expression(); + + // TODO: implement this check + return true; +} + +std::tuple concatenate_expr_built_result( + BuildingContext& ctx, + const std::vector& func_construct_param_const, + const std::vector>& expr_selectors) { + std::string in_col_ids, expr_constructor_param_str, expr_selector_str; + { + std::stringstream ss; + for (auto i = 0; i < expr_selectors.size(); ++i) { + ss << expr_selectors[i].first; + if (i != expr_selectors.size() - 1) { + ss << ", "; + } + } + in_col_ids = ss.str(); + } + { + std::stringstream ss; + if (func_construct_param_const.size() > 0) { + ss << ", "; + } + for (auto i = 0; i < func_construct_param_const.size(); ++i) { + ss << func_construct_param_const[i].var_name; + if (i != func_construct_param_const.size() - 1) { + ss << ", "; + } + } + expr_constructor_param_str = ss.str(); + } + { + std::stringstream ss; + if (expr_selectors.size() > 0) { + ss << ", "; + } + for (auto i = 0; i < expr_selectors.size(); ++i) { + ss << expr_selectors[i].second; + if (i != expr_selectors.size() - 1) { + ss << ", "; + } + } + expr_selector_str = ss.str(); + } + return std::make_tuple(in_col_ids, expr_constructor_param_str, + expr_selector_str); +} + +// There can be expression in project's mappings +// 0. project common expression +// 1. project case when. +// NOTE: the return type of all case-when and else should be all the +// same(excluding null) +std::string project_case_when_from_project_mapping( + BuildingContext& ctx, const common::Case& expr_case, + common::DataType data_type, int32_t out_alias_tag) { + // the common::case contains three expression, input-expr, when-expr, + // then-expr the input-expr can be null. if it is null, it means we just + // evaluate when-expr, then-expr like case when. + + // if input-expr is not null, we need to evaluate input-expr, and then + // evaluate when-expr, then-expr, and else_expr. + + // if input-expr is null, we need to evaluate when-expr, then-expr, and + // else_expr. + + // check the return type is the same. + if (!sanity_check(expr_case)) { + throw std::runtime_error("case when sanity check failed"); + } + CaseWhenBuilder builder(ctx); + builder.return_type(data_type) + .when_then_exprs(expr_case.when_then_expressions()) + .else_expr(expr_case.else_result_expression()); + + std::string expr_func_name, expr_code; + std::vector func_construct_param_const; + std::vector> expr_selectors; + common::DataType ret_data_type; // returned data type for case when building + std::tie(expr_func_name, func_construct_param_const, expr_selectors, + expr_code, ret_data_type) = builder.Build(); + + ctx.AddExprCode(expr_code); + if (func_construct_param_const.size() > 0) { + for (auto& param_const : func_construct_param_const) { + ctx.AddParameterVar(param_const); + } + } + + // make_project_with_expr + std::string in_col_ids, expr_constructor_param_str, expr_selector_str; + std::tie(in_col_ids, expr_constructor_param_str, expr_selector_str) = + concatenate_expr_built_result(ctx, func_construct_param_const, + expr_selectors); + boost::format formater(PROJECT_MAPPER_EXPR_TEMPLATE_STR); + formater % in_col_ids % expr_func_name % expr_constructor_param_str % + expr_selector_str; + return formater.str(); +} + +std::string project_expression_from_project_mapping( + BuildingContext& ctx, const common::Expression& expr, + int32_t out_alias_tag) { + auto expr_builder = ExprBuilder(ctx); + CHECK(expr.operators_size() == 3) + << "Current only support binary expression for project"; + CHECK(expr.operators(1).has_node_type()); + auto data_type_name = + common_data_type_pb_2_str(expr.operators(1).node_type().data_type()); + expr_builder.set_return_type(expr.operators(1).node_type().data_type()); + expr_builder.AddAllExprOpr(expr.operators()); + std::string expr_func_name, expr_code; + std::vector func_construct_param_const; + std::vector> expr_selectors; + common::DataType unused_expr_ret_type; + std::tie(expr_func_name, func_construct_param_const, expr_selectors, + expr_code, unused_expr_ret_type) = expr_builder.Build(); + + ctx.AddExprCode(expr_code); + // make_project_with_expr + if (func_construct_param_const.size() > 0) { + for (auto& param_const : func_construct_param_const) { + ctx.AddParameterVar(param_const); + } + } + + // make_project_with_expr + std::string in_col_ids, expr_constructor_param_str, expr_selector_str; + std::tie(in_col_ids, expr_constructor_param_str, expr_selector_str) = + concatenate_expr_built_result(ctx, func_construct_param_const, + expr_selectors); + boost::format formater(PROJECT_MAPPER_EXPR_TEMPLATE_STR); + formater % in_col_ids % expr_func_name % expr_constructor_param_str % + expr_selector_str; + return formater.str(); +} + +std::string project_variable_mapping_to_string(BuildingContext& ctx, + const common::ExprOpr& expr_op, + int32_t real_res_col_id) { + int32_t in_tag_id = -2; + std::vector prop_names; + std::vector data_types; + bool project_self = false; + switch (expr_op.item_case()) { + case common::ExprOpr::kCase: { + VLOG(10) << "Got case when in projecting"; + auto case_when = expr_op.case_(); + VLOG(10) << case_when.DebugString(); + CHECK(expr_op.node_type().type_case() == common::IrDataType::kDataType); + return project_case_when_from_project_mapping( + ctx, case_when, expr_op.node_type().data_type(), real_res_col_id); + } + case common::ExprOpr::kVar: { + VLOG(10) << "Got var in projecting"; + auto& var = expr_op.var(); + in_tag_id = var.tag().id(); + if (var.has_property()) { + auto& prop = var.property(); + if (prop.item_case() == common::Property::kId) { + project_self = true; + } else if (prop.item_case() == common::Property::kKey) { + prop_names.push_back(prop.key().name()); + data_types.push_back( + common_data_type_pb_2_data_type(var.node_type().data_type())); + } else { + LOG(FATAL) << "Unknown property type" << prop.DebugString(); + } + } else { + VLOG(10) << "receives no property, project itself"; + project_self = true; + } + break; + } + case common::ExprOpr::kVarMap: { + VLOG(10) << "Got variable map in projecting"; + LOG(WARNING) << "CURRENTLY we flat the var map to a list of variables"; + } + + case common::ExprOpr::kVars: { + VLOG(10) << "Got variable keys in projecting"; + // project properties to a list. + auto& vars = + expr_op.has_vars() ? expr_op.vars().keys() : expr_op.var_map().keys(); + for (auto i = 0; i < vars.size(); ++i) { + auto& var = vars[i]; + if (in_tag_id == -2) { + in_tag_id = var.tag().id(); + } else { + CHECK(in_tag_id == var.tag().id()) << "can only support one tag"; + } + + auto& prop = var.property(); + // if (prop.has_id()) { + if (prop.item_case() == common::Property::kId) { + LOG(FATAL) << "Not support project id in projecting with vars"; + // } else if (prop.has_key()) { + } else if (prop.item_case() == common::Property::kKey) { + prop_names.push_back(prop.key().name()); + data_types.push_back( + common_data_type_pb_2_data_type(var.node_type().data_type())); + } else { + LOG(FATAL) << "Unknown property type" << prop.DebugString(); + } + } + break; + } + default: + LOG(FATAL) << "Unknown variable type"; + } + auto real_in_col_id = ctx.GetTagInd(in_tag_id); + VLOG(10) << "real_in_tag_id: " << real_in_col_id + << " in_tag_id: " << in_tag_id; + + if (project_self) { + VLOG(10) << "Projecting self"; + CHECK(prop_names.size() == 0 && data_types.size() == 0); + std::string selector_str; + { + boost::format select_formater(PROPERTY_SELECTOR); + select_formater % EMPTY_TYPE % ""; + selector_str = select_formater.str(); + } + boost::format formater(PROJECT_MAPPER_VAR_TEMPLATE_STR); + + formater % real_in_col_id % selector_str; + return formater.str(); + } else { + VLOG(10) << "Projecting properties" << gs::to_string(prop_names); + CHECK(prop_names.size() == data_types.size()); + CHECK(prop_names.size() == 1); + std::string selector_str; + { + boost::format select_formater(PROPERTY_SELECTOR); + select_formater % data_type_2_string(data_types[0]) % prop_names[0]; + selector_str = select_formater.str(); + } + boost::format formater(PROJECT_MAPPER_VAR_TEMPLATE_STR); + formater % real_in_col_id % selector_str; + return formater.str(); + } +} + +std::string project_mapping_to_string( + BuildingContext& ctx, const physical::Project::ExprAlias& mapping, + TagIndMapping& new_tag_ind_map) { + int32_t res_alias = mapping.alias().value(); + // TODO: Currenly we assume each expr_alias contains only property for that + // input tag + + auto real_res_alias = new_tag_ind_map.CreateOrGetTagInd(res_alias); + auto& expr = mapping.expr(); + // CHECK(expr.operators_size() == 1) << "can only support one variable"; + if (expr.operators_size() > 1) { + return project_expression_from_project_mapping(ctx, expr, real_res_alias); + } else if (expr.operators_size() == 1) { + auto& expr_op = expr.operators(0); + return project_variable_mapping_to_string(ctx, expr_op, real_res_alias); + } else { + LOG(FATAL) << "expect at least one expr opr"; + return ""; + } +} + +/** + * @brief Build project operator. + * Project op will create a brand new context, which means we should create a + * new tag_id tag_ind mapping. + * + */ +class ProjectOpBuilder { + public: + ProjectOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + ProjectOpBuilder& is_append(bool is_append) { + is_append_ = is_append; + return *this; + } + + ProjectOpBuilder& add_mapping(const physical::Project::ExprAlias& mapping) { + mappings_.push_back(mapping); + return *this; + } + + // return make_project code and call project code. + std::string Build() const { + TagIndMapping new_tag_id_mapping; + std::string project_cols_code; + std::string prev_ctx_name, next_ctx_name; + std::tie(prev_ctx_name, next_ctx_name) = ctx_.GetPrevAndNextCtxName(); + std::stringstream ss; + for (int i = 0; i < mappings_.size(); ++i) { + ss << project_mapping_to_string(ctx_, mappings_[i], new_tag_id_mapping); + if (i != mappings_.size() - 1) { + ss << ", "; + } + } + project_cols_code = ss.str(); + + boost::format formater(PROJECT_OP_TEMPLATE_STR); + formater % next_ctx_name % project_is_append_str(is_append_) % + ctx_.GraphVar() % prev_ctx_name % project_cols_code; + ctx_.UpdateTagIdAndIndMapping(new_tag_id_mapping); + + return formater.str(); + } + + private: + BuildingContext& ctx_; + bool is_append_; + std::vector mappings_; +}; + +static std::string BuildProjectOp( + BuildingContext& ctx, const physical::Project& project_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + ProjectOpBuilder builder(ctx); + builder.is_append(project_pb.is_append()); + auto& mappings = project_pb.mappings(); + for (auto i = 0; i < mappings.size(); ++i) { + builder.add_mapping(mappings[i]); + } + return builder.Build(); +} +} // namespace gs + +#endif // CODEGEN_SRC_HQPS_HQPS_PROJECT_BUILDER_H_ \ No newline at end of file diff --git a/flex/codegen/src/hqps/hqps_scan_builder.h b/flex/codegen/src/hqps/hqps_scan_builder.h new file mode 100644 index 000000000000..416a44198ceb --- /dev/null +++ b/flex/codegen/src/hqps/hqps_scan_builder.h @@ -0,0 +1,247 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_HQPS_HQPS_SCAN_BUILDER_H_ +#define CODEGEN_SRC_HQPS_HQPS_SCAN_BUILDER_H_ + +#include +#include +#include + +#include "boost/format.hpp" +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/pb_parser/query_params_parser.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/physical.pb.h" + +// #define FAST_SCAN + +namespace gs { + +/// Args +/// 1. expr_var_name +/// 2. expr_class_name(const_params) +/// 3. selectors,concatenated string +/// 4. res_ctx_name +/// 5. AppendOpt +/// 6. graph name +/// 7. vertex label +static constexpr const char* SCAN_OP_TEMPLATE_STR = + "auto %1% = gs::make_filter(%2%(%3%), %4%);\n" + "auto %5% = Engine::template ScanVertex<%6%>(%7%, %8%, std::move(%1%));\n"; + +/// Args +/// 1. res_ctx_name +/// 2. AppendOpt, +/// 3. graph name +/// 4. vertex label +/// 5. oid +static constexpr const char* SCAN_OP_WITH_OID_TEMPLATE_STR = + "auto %1% = Engine::template ScanVertex<%2%>(%3%, %4%, %5%));\n"; + +/** + * @brief When building scanOp, we ignore the data type provided in the pb. + * + */ +class ScanOpBuilder { + public: + ScanOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + ScanOpBuilder& scanOpt(const physical::Scan::ScanOpt& opt) { + if (opt != physical::Scan::ScanOpt::Scan_ScanOpt_VERTEX) { + throw std::runtime_error( + std::string("Currently only suppor from vertex")); + } + scan_opt_ = opt; + return *this; + } + + ScanOpBuilder& resAlias(const int32_t& res_alias) { + res_alias_ = ctx_.CreateOrGetTagInd(res_alias); + CHECK(res_alias_ == -1 || res_alias_ == 0); + return *this; + } + + // get required oid from query params + ScanOpBuilder& queryParams(const algebra::QueryParams& query_params) { + if (!query_params.has_predicate()) { + throw std::runtime_error(std::string("expect expr in params")); + } + query_params_ = query_params; + return *this; + } + + std::string Build() const { + std::string label_name; + std::vector labels_ids; + if (!try_to_get_label_name_from_query_params(query_params_, label_name)) { + LOG(WARNING) << "fail to label name from expr"; + if (!try_to_get_label_id_from_query_params(query_params_, labels_ids)) { + LOG(FATAL) << "fail to label id from expr"; + } + } + + // the user provide oid can be a const or a param const + auto& predicate = query_params_.predicate(); + VLOG(10) << "predicate: " << predicate.DebugString(); + // We first scan the predicate to find whether there is conditions on + // labels. + std::vector expr_label_ids; + if (try_to_get_label_ids_from_expr(predicate, expr_label_ids)) { + // join expr_label_ids with table_lable_ids; + VLOG(10) << "Found label ids in expr: " << gs::to_string(expr_label_ids); + labels_ids = expr_label_ids; + } + // CHECK(labels_ids.size() == 1) << "only support one label in scan"; + +#ifdef FAST_SCAN + gs::codegen::oid_t oid; + gs::codegen::ParamConst oid_param; + if (try_to_get_oid_from_expr(predicate, oid)) { + VLOG(10) << "Parse oid: " << oid << "from expr"; + return scan_with_oid(label_name, label_id, oid); + } else if (try_to_get_oid_param_from_expr(predicate, oid_param)) { + VLOG(10) << "Parse oid param: " << oid_param.var_name << "from expr"; + return scan_with_oid(label_name, label_id, oid_param.var_name); + } else { + VLOG(10) << "Fail to parse oid from expr"; + { +#endif + auto expr_builder = ExprBuilder(ctx_); + expr_builder.set_return_type(common::DataType::BOOLEAN); + expr_builder.AddAllExprOpr(predicate.operators()); + + std::string expr_func_name, expr_code; + std::vector func_call_param_const; + std::vector> expr_tag_props; + common::DataType unused_expr_ret_type; + std::tie(expr_func_name, func_call_param_const, expr_tag_props, + expr_code, unused_expr_ret_type) = expr_builder.Build(); + VLOG(10) << "Found expr in edge_expand_opt: " << expr_func_name; + // generate code. + ctx_.AddExprCode(expr_code); + std::string expr_var_name = ctx_.GetNextExprVarName(); + std::string expr_construct_params; // function construction params and + std::string selectors_str; // selectors str, concatenated + { + std::stringstream ss; + for (auto i = 0; i < func_call_param_const.size(); ++i) { + ss << func_call_param_const[i].var_name; + if (i != func_call_param_const.size() - 1) { + ss << ","; + } + } + expr_construct_params = ss.str(); + } + { + std::stringstream ss; + if (expr_tag_props.size() > 0) { + for (auto i = 0; i + 1 < expr_tag_props.size(); ++i) { + ss << expr_tag_props[i].second << ", "; + } + ss << expr_tag_props[expr_tag_props.size() - 1].second; + } + selectors_str = ss.str(); + } + + // use expression to filter. + return scan_with_expr(labels_ids, expr_var_name, expr_func_name, + expr_construct_params, selectors_str); + +#ifdef FAST_SCAN + } + } +#endif + } + + private: + std::string scan_with_oid(const std::string& label_name, + const int32_t& label_id, codegen::oid_t oid) const { + VLOG(10) << "Scan with fixed oid" << oid; + std::string next_ctx_name = ctx_.GetCurCtxName(); + auto append_opt = res_alias_to_append_opt(res_alias_); + + boost::format formater(SCAN_OP_WITH_OID_TEMPLATE_STR); + formater % next_ctx_name % append_opt % ctx_.GraphVar() % label_id % oid; + return formater.str(); + } + std::string scan_with_oid(const std::string& label_name, + const int32_t& label_id, + const std::string& oid) const { + VLOG(10) << "Scan with dynamic param oid"; + std::string next_ctx_name = ctx_.GetCurCtxName(); + auto append_opt = res_alias_to_append_opt(res_alias_); + + boost::format formater(SCAN_OP_WITH_OID_TEMPLATE_STR); + formater % next_ctx_name % append_opt % ctx_.GraphVar() % label_id % oid; + return formater.str(); + } + + std::string scan_with_expr(const std::vector& label_ids, + const std::string& expr_var_name, + const std::string& expr_func_name, + const std::string& expr_construct_params, + const std::string& selectors_str) const { + std::string next_ctx_name = ctx_.GetCurCtxName(); + std::string label_ids_str; + { + std::stringstream ss; + CHECK(label_ids.size() > 0); + if (label_ids.size() == 1) { + ss << label_ids[0]; + } else { + ss << "std::array {"; + for (auto i = 0; i + 1 < label_ids.size(); ++i) { + ss << std::to_string(label_ids[i]) << ", "; + } + ss << std::to_string(label_ids[label_ids.size() - 1]); + ss << "}"; + } + label_ids_str = ss.str(); + } + + boost::format formater(SCAN_OP_TEMPLATE_STR); + formater % expr_var_name % expr_func_name % expr_construct_params % + selectors_str % next_ctx_name % res_alias_to_append_opt(res_alias_) % + ctx_.GraphVar() % label_ids_str; + return formater.str(); + } + BuildingContext& ctx_; + physical::Scan::ScanOpt scan_opt_; + algebra::QueryParams query_params_; + int res_alias_; +}; + +static std::string BuildScanOp( + BuildingContext& ctx, const physical::Scan& scan_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + if (!scan_pb.has_params()) { + throw std::runtime_error(std::string("expect scan pb has params")); + } + auto builder = ScanOpBuilder(ctx).scanOpt(scan_pb.scan_opt()); + if (scan_pb.has_alias()) { + VLOG(10) << "scan pb has alias" << scan_pb.alias().value(); + builder.resAlias(scan_pb.alias().value()); + } else { + builder.resAlias(-1); + } + return builder.queryParams(scan_pb.params()).Build(); +} + +} // namespace gs + +#endif // CODEGEN_SRC_HQPS_HQPS_SCAN_BUILDER_H_ \ No newline at end of file diff --git a/flex/codegen/src/hqps/hqps_select_builder.h b/flex/codegen/src/hqps/hqps_select_builder.h new file mode 100644 index 000000000000..1c14d1833250 --- /dev/null +++ b/flex/codegen/src/hqps/hqps_select_builder.h @@ -0,0 +1,134 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_HQPS_HQPS_SELECT_BUILDER_H_ +#define CODEGEN_SRC_HQPS_HQPS_SELECT_BUILDER_H_ + +#include +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/hqps/hqps_expr_builder.h" +#include "flex/codegen/src/pb_parser/query_params_parser.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/physical.pb.h" + +namespace gs { + +static constexpr const char* SELECT_OP_TEMPLATE_STR = + "auto %1% = gs::make_filter(%2%(%3%), %4%);\n" + "auto %5% = Engine::template Select<%6%>(%7%, std::move(%8%), " + "std::move(%1%));\n"; + +class SelectOpBuilder { + public: + SelectOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + SelectOpBuilder& expr(const common::Expression expr) { + ExprBuilder expr_builder(ctx_); + expr_builder.set_return_type(common::DataType::BOOLEAN); + expr_builder.AddAllExprOpr(expr.operators()); + + std::string func_code; + std::vector func_call_params; + std::vector> tag_props; + common::DataType unused_expr_ret_type; + std::tie(expr_name_, func_call_params, tag_props, func_code, + unused_expr_ret_type) = expr_builder.Build(); + + // add func_call_params to ctx's param const; + for (auto i = 0; i < func_call_params.size(); ++i) { + ctx_.AddParameterVar(func_call_params[i]); + } + + ctx_.AddExprCode(func_code); + + expr_var_name_ = ctx_.GetNextExprVarName(); + { + std::stringstream ss; + for (auto i = 0; i < func_call_params.size(); ++i) { + ss << func_call_params[i].var_name; + if (i != func_call_params.size() - 1) { + ss << ","; + } + } + func_call_param_str_ = ss.str(); + } + { + std::stringstream ss; + for (auto i = 0; i < tag_props.size(); ++i) { + ss << tag_props[i].second; + if (i != tag_props.size() - 1) { + ss << ","; + } + } + selectors_str_ = ss.str(); + } + { + std::stringstream ss; + for (auto i = 0; i < tag_props.size(); ++i) { + ss << format_input_col(tag_props[i].first); + if (i != tag_props.size() - 1) { + ss << ","; + } + } + in_col_ids_str_ = ss.str(); + } + + return *this; + } + + std::string Build() const { + std::string prev_ctx_name, next_ctx_name; + std::tie(prev_ctx_name, next_ctx_name) = ctx_.GetPrevAndNextCtxName(); + + boost::format formater(SELECT_OP_TEMPLATE_STR); + formater % expr_var_name_ % expr_name_ % func_call_param_str_ % + selectors_str_ % next_ctx_name % in_col_ids_str_ % ctx_.GraphVar() % + prev_ctx_name; + return formater.str(); + } + + private: + BuildingContext& ctx_; + std::string expr_name_; + std::string expr_call_code_; + std::string expr_var_name_; + std::string func_call_param_str_; + std::string selectors_str_; + std::string in_col_ids_str_; +}; + +// return expression code and select op code(including expr calling) +static std::string BuildSelectOp( + BuildingContext& ctx, const algebra::Select& select_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + if (!select_pb.has_predicate()) { + throw std::runtime_error("Select expression is not set"); + } + + ////build select op code + SelectOpBuilder select_builder(ctx); + select_builder.expr(select_pb.predicate()); + std::string select_op_code = select_builder.Build(); + return select_op_code; +} + +} // namespace gs + +#endif // CODEGEN_SRC_HQPS_HQPS_SELECT_BUILDER_H_ \ No newline at end of file diff --git a/flex/codegen/src/hqps/hqps_sink_builder.h b/flex/codegen/src/hqps/hqps_sink_builder.h new file mode 100644 index 000000000000..99cacda42de6 --- /dev/null +++ b/flex/codegen/src/hqps/hqps_sink_builder.h @@ -0,0 +1,75 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEBN_SRC_HQPS_HQPS_SINK_BUILDER_H_ +#define CODEGEBN_SRC_HQPS_HQPS_SINK_BUILDER_H_ + +#include +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/hqps/hqps_expr_builder.h" +#include "flex/codegen/src/pb_parser/query_params_parser.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/physical.pb.h" + +namespace gs { + +static constexpr const char* SINK_OP_TEMPLATE_STR = + "return Engine::Sink(%1%, std::array{%3%});"; +class SinkOpBuilder { + public: + SinkOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + std::string Build() { + std::string prev_ctx_name = ctx_.GetCurCtxName(); + // We need to sink the result along with the alias_id, which is maintained + // in the context + auto& tag_ind_2_tag_ids = ctx_.GetTagIdAndIndMapping().GetTagInd2TagIds(); + CHECK(tag_ind_2_tag_ids.size() > 0); + + std::string tag_ids_str; + { + std::stringstream ss; + for (auto i = 0; i < tag_ind_2_tag_ids.size(); ++i) { + if (i == tag_ind_2_tag_ids.size() - 1) { + ss << tag_ind_2_tag_ids[i]; + } else { + ss << tag_ind_2_tag_ids[i] << ","; + } + } + tag_ids_str = ss.str(); + } + boost::format formater(SINK_OP_TEMPLATE_STR); + formater % prev_ctx_name % tag_ind_2_tag_ids.size() % tag_ids_str; + return formater.str(); + } + + private: + BuildingContext ctx_; +}; + +std::string BuildSinkOp(BuildingContext& ctx, const physical::Sink& sink_op_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + SinkOpBuilder builder(ctx); + return builder.Build(); +} + +} // namespace gs + +#endif // CODEGEBN_SRC_HQPS_HQPS_SINK_BUILDER_H_ \ No newline at end of file diff --git a/flex/codegen/src/hqps/hqps_sort_builder.h b/flex/codegen/src/hqps/hqps_sort_builder.h new file mode 100644 index 000000000000..cabee24afca0 --- /dev/null +++ b/flex/codegen/src/hqps/hqps_sort_builder.h @@ -0,0 +1,166 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_HQPS_HQPS_SORT_BUILDER_H_ +#define CODEGEN_SRC_HQPS_HQPS_SORT_BUILDER_H_ + +#include +#include +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/hqps/hqps_expr_builder.h" +#include "flex/codegen/src/pb_parser/query_params_parser.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/physical.pb.h" + +namespace gs { + +static constexpr const char* ORDERING_PAIR_TEMPLATE_STR = + "gs::OrderingPropPair<%1%, %2%, %3%>(\"%4%\")"; + +static constexpr const char* SORT_OP_TEMPLATE_STR = + "auto %1% = Engine::Sort(%2%, std::move(%3%), gs::Range(%4%, %5%), " + "std::tuple{%6%});"; + +std::string sort_pair_pb_to_order_pair( + const BuildingContext& ctx, const algebra::OrderBy::OrderingPair& pair) { + std::stringstream ss; + std::string sort_order_str, sort_prop_type, sort_prop_name; + if (pair.order() == + algebra::OrderBy::OrderingPair::Order::OrderBy_OrderingPair_Order_ASC) { + sort_order_str = "gs::SortOrder::ASC"; + } else if (pair.order() == algebra::OrderBy::OrderingPair::Order:: + OrderBy_OrderingPair_Order_DESC) { + sort_order_str = "gs::SortOrder::DESC"; + } else { + throw std::runtime_error("Unknown sort order: "); + } + auto real_key_tag_id = ctx.GetTagInd(pair.key().tag().id()); + CHECK(pair.key().node_type().type_case() == common::IrDataType::kDataType) + << "sort ordering pair only support primitive"; + sort_prop_type = + common_data_type_pb_2_str(pair.key().node_type().data_type()); + // the type of sorted property. + sort_prop_name = pair.key().property().key().name(); + + boost::format formater(ORDERING_PAIR_TEMPLATE_STR); + formater % sort_order_str % real_key_tag_id % sort_prop_type % sort_prop_name; + return formater.str(); +} + +class SortOpBuilder { + public: + SortOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + SortOpBuilder& add_sort_pair(const algebra::OrderBy::OrderingPair& pair) { + sort_pairs_.push_back(pair); + return *this; + } + + // the range size can also be specified at runtime + SortOpBuilder& range(const algebra::Range& limit) { + lower_ = limit.lower(); + upper_ = limit.upper(); + + VLOG(10) << "Sort Range: " << lower_.value() + << ", upper: " << upper_.value(); + if (lower_param_.has_value()) { + VLOG(10) << "lower param: " << lower_param_.value().var_name; + } + if (upper_param_.has_value()) { + VLOG(10) << "upper param: " << upper_param_.value().var_name; + } + return *this; + } + + // return the sort opt and sort code. + std::string Build() const { + // make sort opt; + std::string range_lower, range_upper; + std::string ordering_pairs_str; + std::string sort_code; + // if there are any param_const in the range, we need to insert it into + // context. + { + if (lower_param_.has_value()) { + ctx_.AddParameterVar(lower_param_.value()); + } + if (lower_param_.has_value()) { + ctx_.AddParameterVar(upper_param_.value()); + } + } + + if (lower_.has_value()) { + range_lower = std::to_string(lower_.value()); + } else if (lower_param_.has_value()) { + range_lower = lower_param_.value().var_name; + } else { + LOG(FATAL) << "Lower param not set"; + } + + if (upper_.has_value()) { + range_upper = std::to_string(upper_.value()); + } else if (upper_param_.has_value()) { + range_upper = upper_param_.value().var_name; + } else { + LOG(FATAL) << "Upper param not set"; + } + { + std::stringstream ss; + for (int i = 0; i < sort_pairs_.size(); ++i) { + ss << sort_pair_pb_to_order_pair(ctx_, sort_pairs_[i]); + if (i != sort_pairs_.size() - 1) { + ss << ", "; + } + } + ordering_pairs_str = ss.str(); + } + + boost::format formater(SORT_OP_TEMPLATE_STR); + std::string prev_ctx_name, next_ctx_name; + std::tie(prev_ctx_name, next_ctx_name) = ctx_.GetPrevAndNextCtxName(); + formater % next_ctx_name % ctx_.GraphVar() % prev_ctx_name % range_lower % + range_upper % ordering_pairs_str; + + return formater.str(); + } + + private: + BuildingContext& ctx_; + std::vector sort_pairs_; + std::optional lower_, upper_; + std::optional lower_param_, upper_param_; +}; + +static std::string BuildSortOp( + BuildingContext& ctx, const algebra::OrderBy& order_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + if (order_pb.pairs_size() <= 0) { + throw std::runtime_error("Sort has no pairs"); + } + SortOpBuilder sort_builder(ctx); + auto& sort_pairs = order_pb.pairs(); + for (auto i = 0; i < sort_pairs.size(); ++i) { + sort_builder.add_sort_pair(sort_pairs[i]); + } + return sort_builder.range(order_pb.limit()).Build(); +} +} // namespace gs + +#endif // CODEGEN_SRC_HQPS_HQPS_SORT_BUILDER_H_ \ No newline at end of file diff --git a/flex/codegen/src/hqps_generator.h b/flex/codegen/src/hqps_generator.h new file mode 100644 index 000000000000..645d1770c56e --- /dev/null +++ b/flex/codegen/src/hqps_generator.h @@ -0,0 +1,752 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_HQPS_HQPS_GENERATOR_H_ +#define CODEGEN_SRC_HQPS_HQPS_GENERATOR_H_ + +#include +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/hqps/hqps_dedup_builder.h" +#include "flex/codegen/src/hqps/hqps_edge_expand_builder.h" +#include "flex/codegen/src/hqps/hqps_fold_builder.h" +#include "flex/codegen/src/hqps/hqps_get_v_builder.h" +#include "flex/codegen/src/hqps/hqps_join_utils.h" +#include "flex/codegen/src/hqps/hqps_path_expand_builder.h" +#include "flex/codegen/src/hqps/hqps_project_builder.h" +#include "flex/codegen/src/hqps/hqps_scan_builder.h" +#include "flex/codegen/src/hqps/hqps_select_builder.h" +#include "flex/codegen/src/hqps/hqps_sink_builder.h" +#include "flex/codegen/src/hqps/hqps_sort_builder.h" +#include "proto_generated_gie/physical.pb.h" + +namespace gs { + +static constexpr const char* QUERY_TEMPLATE_STR = + "// Generated by query_generator.h\n" + "// This file is generated by codegen/query_generator.h\n" + "// DO NOT EDIT\n" + "\n" + "#include \"flex/engines/hqps_db/core/sync_engine.h\"\n" + "#include \"flex/engines/hqps_db/app/hqps_app_base.h\"\n" // app_base_header.h + "#include \"%1%\"\n" // graph_interface_header.h + "\n" + "\n" + "namespace gs {\n" + "// Auto generated expression class definition\n" + "%2%\n" + "\n" + "// Auto generated query class definition\n" + "class %3% : public HqpsAppBase<%4%> {\n" + " public:\n" + " using Engine = SyncEngine<%4%>;\n" + " using label_id_t = typename %4%::label_id_t;\n" + " using vertex_id_t = typename %4%::vertex_id_t;\n" + "// Query function for query class\n" + " %5% Query(const %4%& %6% %7%) const{\n" + " %8%\n" + " }\n" + "// Wrapper query function for query class\n" + " %5% Query(const %4%& %6%, Decoder& decoder) const override {\n" + " //decoding params from decoder, and call real query func\n" + " %9%\n" + " return Query(%6% %10%);\n" + " }\n" + "};\n" + "} // namespace gs\n" + "\n" + "// extern c interfaces\n" + "extern \"C\" {\n" + "void* CreateApp(gs::GraphStoreType store_type) {\n" + " if (store_type == %11%) {\n" + " gs::%3%* app =\n" + " new gs::%3%();\n" + " return static_cast(app);\n" + " }\n" + " return nullptr;\n" + "}\n" + "void DeleteApp(void* app, gs::GraphStoreType store_type) {\n" + " if (store_type == %11%) {\n" + " gs::%3%* casted =\n" + " static_cast(app);\n" + " delete casted;\n" + " }\n" + "}\n" + "}\n"; + +// declare +template +static std::array BuildJoinOp( + BuildingContext& ctx, const physical::Join& join_op_pb, + const physical::PhysicalOpr::MetaData& meta_data); + +// declare +template +static std::string BuildApplyOp( + BuildingContext& ctx, const physical::Apply& apply_op_pb, + const physical::PhysicalOpr::MetaData& meta_data); + +// declare +template +static std::array BuildIntersectOp( + BuildingContext& ctx, const physical::Intersect& intersect_op); + +// get_v can contains labels and filters. +// what ever it takes, we will always fuse label info into edge_expand, +// but if get_v contains expression, we will not fuse it into edge_expand +bool simple_get_v(const physical::GetV& get_v_op) { + if (get_v_op.params().has_predicate()) { + return false; + } + return true; +} + +bool intermeidate_edge_op(const physical::EdgeExpand& expand_op) { + if (!expand_op.has_alias() || expand_op.alias().value() == -1) { + return true; + } + return false; +} + +template +void extract_vertex_labels(const physical::GetV& get_v_op, + std::vector& vertex_labels) { + // get vertex label id from get_ + auto get_v_tables = get_v_op.params().tables(); + for (auto vertex_label_pb : get_v_tables) { + vertex_labels.emplace_back( + try_get_label_from_name_or_id(vertex_label_pb)); + } + VLOG(10) << "Got vertex labels : " << gs::to_string(vertex_labels); +} + +template +void build_fused_edge_get_v( + BuildingContext& ctx, std::stringstream& ss, + physical::EdgeExpand& edge_expand_op, + const physical::PhysicalOpr::MetaData& edge_meta_data, + const physical::GetV& get_v_op, const std::vector& vertex_labels) { + // build edge expand + + CHECK(vertex_labels.size() > 0); + edge_expand_op.set_expand_opt( + physical::EdgeExpand::ExpandOpt::EdgeExpand_ExpandOpt_VERTEX); + edge_expand_op.mutable_alias()->set_value(get_v_op.alias().value()); + ss << _4_SPACES + << BuildEdgeExpandOp(ctx, edge_expand_op, edge_meta_data, + vertex_labels) + << std::endl; +} + +// Entrance for generating a parameterized query +// The generated class will have two function +// 1. Query(GraphInterface& graph, int64_t ts, Decoder& input) const override +// 2. Query(GraphInterface& graph, int64_t ts, Params&...params) const +// the first one overrides the base class function, and the second one will be +// called by the first one, with some params(depends on the plan received) +template +class QueryGenerator { + public: + // if edge expand e is followed by a get_v, we can fuse them into one op + static constexpr bool FUSE_EDGE_GET_V = true; + static constexpr bool FUSE_PATH_EXPAND_V = true; + QueryGenerator(BuildingContext& ctx, const physical::PhysicalPlan& plan) + : ctx_(ctx), plan_(plan) {} + + std::string GenerateQuery() { + // During generate query body, we will track the parameteres + // And also generate the expression for needed + std::string query_code = build_query_code(); + std::string expr_code; + { + std::stringstream ss; + auto exprs = ctx_.GetExprCode(); + for (auto& expr : exprs) { + ss << expr << std::endl; + } + ss << std::endl; + expr_code = ss.str(); + } + std::string dynamic_vars_str = concat_param_vars(ctx_.GetParameterVars()); + std::string decoding_params_code, decoded_params_str; + std::tie(decoding_params_code, decoded_params_str) = + decode_params_from_decoder(ctx_.GetParameterVars()); + boost::format formater(QUERY_TEMPLATE_STR); + formater % ctx_.GetGraphHeader() % expr_code % ctx_.GetQueryClassName() % + ctx_.GetGraphInterface() % ctx_.GetQueryRet() % ctx_.GraphVar() % + dynamic_vars_str % query_code % decoding_params_code % + decoded_params_str % storage_backend_to_string(ctx_.GetStorageType()); + return formater.str(); + } + + // Generate a subtask for a subplan + // 0: expr codes. + // 1. query codes. + std::pair, std::string> GenerateSubTask() const { + auto query_body = build_query_code(); + return std::make_pair(ctx_.GetExprCode(), query_body); + } + + private: + // copy the param vars to sort + std::string concat_param_vars( + std::vector param_vars) const { + std::stringstream ss; + if (param_vars.size() > 0) { + sort(param_vars.begin(), param_vars.end(), + [](const auto& a, const auto& b) { return a.id < b.id; }); + // FIXME: ENable this line + // the dynamic params can be duplicate. + CHECK(param_vars[0].id == 0); + ss << ","; + for (auto i = 0; i < param_vars.size(); ++i) { + if (i > 0 && param_vars[i].id == param_vars[i - 1].id) { + // found duplicate + CHECK(param_vars[i] == param_vars[i - 1]); + continue; + } else { + ss << ", " << data_type_2_string(param_vars[i].type) << " " + << param_vars[i].var_name; + } + } + } + return ss.str(); + } + + // implement the function that overrides the base class. + std::tuple decode_params_from_decoder( + std::vector param_vars) const { + std::vector param_names, param_decoding_codes; + // the param vars itself contains the index, which is the order of the param + sort(param_vars.begin(), param_vars.end(), + [](const auto& a, const auto& b) { return a.id < b.id; }); + if (param_vars.size() > 0) { + CHECK(param_vars[0].id == 0); // encoding start from 0 + } + + for (auto i = 0; i < param_vars.size(); ++i) { + if (i > 0 && param_vars[i].id == param_vars[i - 1].id) { + CHECK(param_vars[i] == param_vars[i - 1]); + continue; + } else { + auto& cur_param_var = param_vars[i]; + // for each param_var, decode the param from decoder,and one line of + // code + std::string cur_param_name, cur_param_decoding_code; + std::tie(cur_param_name, cur_param_decoding_code) = + decode_param_from_decoder(cur_param_var, i, "var", "decoder"); + param_names.push_back(cur_param_name); + param_decoding_codes.push_back(cur_param_decoding_code); + } + } + VLOG(10) << "Finish decoding params, size: " << param_names.size(); + std::string param_vars_decoding, param_vars_concat_str; + { + std::stringstream ss; + if (param_names.size() > 0) { + ss << ","; + } + for (auto i = 0; i < param_names.size(); ++i) { + ss << param_names[i]; + if (i != param_names.size() - 1) { + ss << ", "; + } + } + param_vars_concat_str = ss.str(); + } + { + std::stringstream ss; + for (auto i = 0; i < param_decoding_codes.size(); ++i) { + ss << param_decoding_codes[i] << std::endl; + } + param_vars_decoding = ss.str(); + } + return std::make_tuple(param_vars_decoding, param_vars_concat_str); + } + + std::string build_query_code() const { + std::stringstream ss; + auto size = plan_.plan_size(); + + LOG(INFO) << "Found " << size << " operators in the plan"; + for (auto i = 0; i < size; ++i) { + auto op = plan_.plan(i); + auto& meta_datas = op.meta_data(); + // CHECK(meta_datas.size() == 1) << "meta data size: " << + // meta_datas.size(); + // physical::PhysicalOpr::MetaData meta_data; //fake meta + auto opr = op.opr(); + switch (opr.op_kind_case()) { + case physical::PhysicalOpr::Operator::kScan: { // scan + // TODO: meta_data is not found in scan + physical::PhysicalOpr::MetaData meta_data; + + LOG(INFO) << "Found a scan operator"; + auto& scan_op = opr.scan(); + + ss << BuildScanOp(ctx_, scan_op, meta_data) << std::endl; + break; + } + case physical::PhysicalOpr::Operator::kEdge: { // edge expand + physical::EdgeExpand real_edge_expand = opr.edge(); + // try to use infomation from later operator + std::vector dst_vertex_labels; + if (i + 1 < size) { + auto& get_v_op_opr = plan_.plan(i + 1).opr(); + if (get_v_op_opr.op_kind_case() == + physical::PhysicalOpr::Operator::kVertex) { + auto& get_v_op = get_v_op_opr.vertex(); + extract_vertex_labels(get_v_op, dst_vertex_labels); + + if (FUSE_EDGE_GET_V) { + if (simple_get_v(get_v_op) && + intermeidate_edge_op(real_edge_expand)) { + CHECK(dst_vertex_labels.size() > 0); + VLOG(10) << "When fuseing edge+get_v, get_v has labels: " + << gs::to_string(dst_vertex_labels); + build_fused_edge_get_v(ctx_, ss, real_edge_expand, + meta_datas[0], get_v_op, + dst_vertex_labels); + LOG(INFO) << "Fuse edge expand and get_v since get_v is simple"; + i += 1; + break; + } else if (intermeidate_edge_op(real_edge_expand)) { + LOG(INFO) << "try to fuse edge expand with complex get_v, take " + "take the get_v' vertex label"; + } else { + // only fuse get_v label into edge expand + LOG(INFO) + << "Skip fusing edge expand and get_v since simple get v"; + } + } + } else { + LOG(INFO) << "Skip fusing edge expand and get_v since the next " + "operator is not get_v"; + } + } else { + LOG(INFO) << "EdgeExpand is the last operator"; + } + auto& meta_data = meta_datas[0]; + LOG(INFO) << "Found a edge expand operator"; + ss << BuildEdgeExpandOp(ctx_, real_edge_expand, meta_data, + dst_vertex_labels) + << std::endl; + + break; + } + + case physical::PhysicalOpr::Operator::kDedup: { // dedup + // auto& meta_data = meta_datas[0]; + physical::PhysicalOpr::MetaData meta_data; // fake meta + LOG(INFO) << "Found a dedup operator"; + auto& dedup_op = opr.dedup(); + ss << BuildDedupOp(ctx_, dedup_op, meta_data) << std::endl; + break; + } + + case physical::PhysicalOpr::Operator::kProject: { // project + // project op can result into multiple meta data + // auto& meta_data = meta_datas[0]; + physical::PhysicalOpr::MetaData meta_data; + LOG(INFO) << "Found a project operator"; + auto& project_op = opr.project(); + std::string call_project_code; + call_project_code = BuildProjectOp(ctx_, project_op, meta_data); + ss << call_project_code; + break; + } + + case physical::PhysicalOpr::Operator::kSelect: { + // auto& meta_data = meta_datas[0]; + physical::PhysicalOpr::MetaData meta_data; + LOG(INFO) << "Found a select operator"; + auto& select_op = opr.select(); + std::string select_code = BuildSelectOp(ctx_, select_op, meta_data); + ss << select_code << std::endl; + break; + } + + case physical::PhysicalOpr::Operator::kVertex: { + physical::PhysicalOpr::MetaData meta_data; + LOG(INFO) << "Found a get v operator"; + auto& get_v_op = opr.vertex(); + auto get_v_code = BuildGetVOp(ctx_, get_v_op, meta_data); + // first output code can be empty, just ignore + ss << get_v_code; + break; + } + + case physical::PhysicalOpr::Operator::kGroupBy: { + // auto& meta_data = meta_datas[0]; + // meta_data is currenly not used in groupby. + physical::PhysicalOpr::MetaData meta_data; + auto& group_by_op = opr.group_by(); + if (group_by_op.mappings_size() > 0) { + LOG(INFO) << "Found a group by operator"; + auto code_lines = BuildGroupByOp(ctx_, group_by_op, meta_data); + ss << code_lines; + } else { + LOG(INFO) << "Found a group by operator with no group by keys"; + auto code_lines = + BuildGroupWithoutKeyOp(ctx_, group_by_op, meta_data); + ss << code_lines; + } + LOG(INFO) << "Finish groupby operator gen"; + break; + } + + // Path Expand + GetV shall be always fused. + case physical::PhysicalOpr::Operator::kPath: { + physical::PhysicalOpr::MetaData meta_data; + LOG(INFO) << "Found a path operator"; + if (FUSE_PATH_EXPAND_V) { + if (i + 1 < size) { + auto& path_op = opr.path(); + auto& next_op = plan_.plan(i + 1).opr(); + CHECK(next_op.op_kind_case() == + physical::PhysicalOpr::Operator::kVertex) + << "PathExpand must be followed by GetV"; + auto& get_v_op = next_op.vertex(); + int32_t get_v_res_alias = -1; + if (get_v_op.has_alias()) { + get_v_res_alias = get_v_op.alias().value(); + } + + auto res = BuildPathExpandOp(ctx_, path_op, meta_datas, + get_v_res_alias); + ss << res; + i += 1; // jump one step + break; + } else { + LOG(FATAL) << "PathExpand is the last operator"; + } + } else { + LOG(FATAL) << "Currently not supported: PathExpand without Getv"; + } + } + + case physical::PhysicalOpr::Operator::kApply: { + auto& meta_data = meta_datas[0]; + LOG(INFO) << "Found a apply operator"; + auto& apply_op = opr.apply(); + std::string call_apply_code = + BuildApplyOp(ctx_, apply_op, meta_data); + ss << call_apply_code << std::endl; + break; + } + + case physical::PhysicalOpr::Operator::kJoin: { + // auto& meta_data = meta_datas[0]; + LOG(INFO) << "Found a join operator"; + auto& join_op = opr.join(); + auto join_opt_code = BuildJoinOp(ctx_, join_op); + for (auto& line : join_opt_code) { + ss << line << std::endl; + } + break; + } + + case physical::PhysicalOpr::Operator::kIntersect: { + LOG(INFO) << "Found a intersect operator"; + // a intersect op must be followed by a unfold op + CHECK(i + 1 < size) << " intersect op must be followed by a unfold op"; + auto& next_op = plan_.plan(i + 1).opr(); + CHECK(next_op.op_kind_case() == + physical::PhysicalOpr::Operator::kUnfold) + << "intersect op must be followed by a unfold op"; + auto& intersect_op = opr.intersect(); + auto intersect_opt_code = BuildIntersectOp(ctx_, intersect_op); + for (auto& line : intersect_opt_code) { + ss << line << std::endl; + } + i += 1; // skip unfold + break; + } + + case physical::PhysicalOpr::Operator::kOrderBy: { + physical::PhysicalOpr::MetaData meta_data; + LOG(INFO) << "Found a order by operator"; + auto& order_by_op = opr.order_by(); + std::string sort_code = BuildSortOp(ctx_, order_by_op, meta_data); + ss << sort_code << std::endl; + break; + } + + case physical::PhysicalOpr::Operator::kSink: { + physical::PhysicalOpr::MetaData meta_data; + LOG(INFO) << "Found a sink operator"; + auto& sink_op = opr.sink(); + std::string call_sink_code = BuildSinkOp(ctx_, sink_op, meta_data); + ss << call_sink_code << std::endl; + break; + } + + case physical::PhysicalOpr::Operator::kRepartition: { + LOG(INFO) << "Found a repartition operator, just ignore"; + break; + } + + default: + LOG(FATAL) << "Unsupported operator type: " << opr.op_kind_case(); + } + } + LOG(INFO) << "Finish adding query"; + return ss.str(); + } + + BuildingContext& ctx_; + const physical::PhysicalPlan& plan_; +}; + +// When building a join op, we need to consider the following cases: +// 0. tag_id to tag_ind mapping, two plan shoud keep different mappings +// const physical::PhysicalOpr::MetaData& meta_data +template +static std::array BuildJoinOp( + BuildingContext& ctx, const physical::Join& join_op_pb) { + auto join_kind = join_kind_pb_to_internal(join_op_pb.join_kind()); + CHECK(join_op_pb.left_keys_size() == join_op_pb.right_keys_size()); + // these keys are tag_ids. + auto& left_keys = join_op_pb.left_keys(); + auto& right_keys = join_op_pb.right_keys(); + std::vector join_keys; // the left_keys and + for (auto i = 0; i < left_keys.size(); ++i) { + CHECK(left_keys[i].tag().id() == right_keys[i].tag().id()); + join_keys.push_back(left_keys[i].tag().id()); + } + + VLOG(10) << "Join tag: " << gs::to_string(join_keys); + std::string copy_context_code, left_plan_code, right_plan_code, join_code; + std::string left_res_ctx_name, right_res_ctx_name; + std::string left_start_ctx_name, right_start_ctx_name; + // the derived context should preserve the tag_id to tag_inds mappings we + // already have. + auto right_context = ctx.CreateSubTaskContext("right_"); + // if join op is the start node, the copy_context_code is empty + if (ctx.EmptyContext()) { + // the prefix of left context should be appended. + // can append fix this problem? + ctx.AppendContextPrefix("left_"); + } else { + // copy the context. + // always copy for right context. + std::stringstream cur_ss; + right_start_ctx_name = right_context.GetCurCtxName(); + left_start_ctx_name = ctx.GetCurCtxName(); + cur_ss << "auto " << right_start_ctx_name << "(" << ctx.GetCurCtxName() + << ");" << std::endl; + copy_context_code = cur_ss.str(); + } + { + // left code. + // before enter left, we need to rename the context with left. + auto left_task_generator = + QueryGenerator(ctx, join_op_pb.left_plan()); + std::vector left_exprs; + // the generate left exprs are already contained in ctx; + std::tie(left_exprs, left_plan_code) = + left_task_generator.GenerateSubTask(); + left_res_ctx_name = ctx.GetCurCtxName(); + } + + { + // right code + auto right_task_generator = + QueryGenerator(right_context, join_op_pb.right_plan()); + std::vector right_exprs; + std::tie(right_exprs, right_plan_code) = + right_task_generator.GenerateSubTask(); + right_res_ctx_name = right_context.GetCurCtxName(); + for (auto expr : right_exprs) { + ctx.AddExprCode(expr); + } + auto right_param_vars = right_context.GetParameterVars(); + for (auto right_param_var : right_param_vars) { + ctx.AddParameterVar(right_param_var); + } + } + + // join code. + { + // we need to extract distinct inds for two side join key + std::stringstream cur_ss; + std::string cur_ctx_name, prev_ctx_name; + std::tie(prev_ctx_name, cur_ctx_name) = ctx.GetPrevAndNextCtxName(); + CHECK(prev_ctx_name == left_res_ctx_name) + << prev_ctx_name << ", " << left_res_ctx_name; + cur_ss << "auto " << cur_ctx_name << _ASSIGN_STR_; + if (join_keys.size() == 1) { + cur_ss << " Engine::template Join"; + cur_ss << "<"; + { + cur_ss << ctx.GetTagInd(join_keys[0]) << ", " + << right_context.GetTagInd(join_keys[0]) << ","; + cur_ss << join_kind_to_str(join_kind); + } + } else if (join_keys.size() == 2) { + cur_ss << " Engine::template Join"; + cur_ss << "<"; + { + cur_ss << ctx.GetTagInd(join_keys[0]) << ", " + << ctx.GetTagInd(join_keys[1]) << "," + << right_context.GetTagInd(join_keys[0]) << "," + << right_context.GetTagInd(join_keys[1]) << ","; + cur_ss << join_kind_to_str(join_kind); + } + } else { + LOG(FATAL) << "Join on more than two key is not supported yet."; + } + + cur_ss << ">"; + cur_ss << "("; + { + cur_ss << "std::move(" << left_res_ctx_name << "),"; + cur_ss << "std::move(" << right_res_ctx_name << ")"; + } + cur_ss << ");"; + join_code = cur_ss.str(); + } + return std::array{copy_context_code, left_plan_code, + right_plan_code, join_code}; +} + +template +static std::string BuildApplyOp( + BuildingContext& ctx, const physical::Apply& apply_op_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + auto join_kind = join_kind_pb_to_internal(apply_op_pb.join_kind()); + auto res_alias = apply_op_pb.alias().value(); + auto& sub_plan = apply_op_pb.sub_plan(); + std::string lambda_func_name, lambda_func_code; + { + auto new_building_ctx = ctx.CreateSubTaskContext(); + auto sub_task_generator = + QueryGenerator(new_building_ctx, sub_plan); + // QueryGenrator sub_task_generator(new_building_ctx, sub_plan_); + // gen a lambda function. + lambda_func_name = ctx.GetNextLambdaFuncName(); + std::stringstream inner_ss; + // header + inner_ss << "auto " << lambda_func_name << " = [&]"; + inner_ss << "(auto&& " << new_building_ctx.GetCurCtxName() << ") {" + << std::endl; + + // body + std::vector exprs; + std::string query_code; + std::tie(exprs, query_code) = sub_task_generator.GenerateSubTask(); + inner_ss << query_code; + for (auto expr : exprs) { + ctx.AddExprCode(expr); + } + // end + // return last context; + inner_ss << " return " << new_building_ctx.GetCurCtxName() << ";" + << std::endl; + inner_ss << "};" << std::endl; + + lambda_func_code = inner_ss.str(); + } + + std::stringstream inner_ss; + std::string prev_ctx_name, next_ctx_name; + std::tie(prev_ctx_name, next_ctx_name) = ctx.GetPrevAndNextCtxName(); + inner_ss << lambda_func_code << std::endl; + inner_ss << "auto " << next_ctx_name << " = Engine::template"; + inner_ss << " Apply<" << res_alias << "," << join_kind_to_str(join_kind) + << ">"; + inner_ss << "(std::move(" << prev_ctx_name << "),"; + inner_ss << "std::move(" << lambda_func_name << "));" << std::endl; + return inner_ss.str(); +} + +// declare +template +static std::array BuildIntersectOp( + BuildingContext& ctx, const physical::Intersect& intersect_op) { + auto& sub_plans = intersect_op.sub_plans(); + CHECK(sub_plans.size() == 2) << "Only support two sub plans intersect now."; + auto& left_plan = sub_plans[0]; + auto& right_plan = sub_plans[1]; + auto join_key = intersect_op.key(); + VLOG(10) << "join on key: " << join_key; + + std::string copy_context_code; + std::string left_res_ctx_name, right_res_ctx_name; + std::string left_plan_code, right_plan_code; + std::string intersect_code; + + auto right_context = ctx.CreateSubTaskContext("right_"); + CHECK(!ctx.EmptyContext()); + + { + std::stringstream cur_ss; + auto right_start_ctx_name = right_context.GetCurCtxName(); + auto left_start_ctx_name = ctx.GetCurCtxName(); + cur_ss << "auto " << right_start_ctx_name << "(" << left_start_ctx_name + << ");" << std::endl; + copy_context_code = cur_ss.str(); + } + + { + // left code; + auto left_task_generator = QueryGenerator(ctx, left_plan); + std::vector left_exprs; + // the generate left exprs are already contained in ctx; + std::tie(left_exprs, left_plan_code) = + left_task_generator.GenerateSubTask(); + left_res_ctx_name = ctx.GetCurCtxName(); + } + { + // right code + auto right_task_generator = + QueryGenerator(right_context, right_plan); + std::vector right_exprs; + std::tie(right_exprs, right_plan_code) = + right_task_generator.GenerateSubTask(); + right_res_ctx_name = right_context.GetCurCtxName(); + for (auto expr : right_exprs) { + ctx.AddExprCode(expr); + } + } + // intersect code; + { + std::stringstream cur_ss; + std::string cur_ctx_name, prev_ctx_name; + std::tie(prev_ctx_name, cur_ctx_name) = ctx.GetPrevAndNextCtxName(); + CHECK(prev_ctx_name == left_res_ctx_name) + << prev_ctx_name << ", " << left_res_ctx_name; + + auto right_tag_ind = right_context.GetTagInd(join_key); + auto left_tag_ind = ctx.GetTagInd(join_key); + VLOG(10) << "Intersect on tag ind: " << left_tag_ind << ", " + << right_tag_ind; + + cur_ss << "auto " << cur_ctx_name << _ASSIGN_STR_; + cur_ss << " Engine::template Intersect"; + cur_ss << "<" << left_tag_ind << "," << right_tag_ind << ">"; + cur_ss << "(std::move(" << left_res_ctx_name << "),std::move(" + << right_res_ctx_name << "));"; + intersect_code = cur_ss.str(); + } + return std::array{copy_context_code, left_plan_code, + right_plan_code, intersect_code}; +} + +} // namespace gs + +#endif // CODEGEN_SRC_HQPS_HQPS_GENERATOR_H_ diff --git a/flex/codegen/src/pb_parser/expand_parser.h b/flex/codegen/src/pb_parser/expand_parser.h new file mode 100644 index 000000000000..ad476bafee8d --- /dev/null +++ b/flex/codegen/src/pb_parser/expand_parser.h @@ -0,0 +1,40 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PB_PARSER_EXPAND_PARSER_H_ +#define CODEGEN_SRC_PB_PARSER_EXPAND_PARSER_H_ + +#include "flex/codegen/src/pb_parser/internal_struct.h" +#include "proto_generated_gie/physical.pb.h" + +namespace gs { + +// a static function convert physical::EdgeExpand::Direction to +// internal::Direction +internal::Direction edge_expand_pb_2_internal_direction( + const physical::EdgeExpand::Direction& direction_pb) { + switch (direction_pb) { + case physical::EdgeExpand::Direction::EdgeExpand_Direction_OUT: + return internal::Direction::kOut; + case physical::EdgeExpand::Direction::EdgeExpand_Direction_IN: + return internal::Direction::kIn; + case physical::EdgeExpand::Direction::EdgeExpand_Direction_BOTH: + return internal::Direction::kBoth; + default: + throw std::runtime_error("unknown direction_pb"); + } +} +} // namespace gs + +#endif // CODEGEN_SRC_PB_PARSER_EXPAND_PARSER_H_ \ No newline at end of file diff --git a/flex/codegen/src/pb_parser/internal_struct.h b/flex/codegen/src/pb_parser/internal_struct.h new file mode 100644 index 000000000000..e168fb67191f --- /dev/null +++ b/flex/codegen/src/pb_parser/internal_struct.h @@ -0,0 +1,24 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PB_PARSER_INTERNAL_STRUCT_H_ +#define CODEGEN_SRC_PB_PARSER_INTERNAL_STRUCT_H_ + +namespace gs { +namespace internal { +enum class Direction { kOut, kIn, kBoth, kNotSet }; +}; +} // namespace gs + +#endif // CODEGEN_SRC_PB_PARSER_INTERNAL_STRUCT_H_ \ No newline at end of file diff --git a/flex/codegen/src/pb_parser/ir_data_type_parser.h b/flex/codegen/src/pb_parser/ir_data_type_parser.h new file mode 100644 index 000000000000..3621349773e6 --- /dev/null +++ b/flex/codegen/src/pb_parser/ir_data_type_parser.h @@ -0,0 +1,102 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PB_PARSER_IR_DATA_TYPE_PARSER_H_ +#define CODEGEN_SRC_PB_PARSER_IR_DATA_TYPE_PARSER_H_ + +#include + +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/pb_parser/name_id_parser.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/physical.pb.h" + +namespace gs { + +// There can be multiple labels. +// for each label, we have multiple properties. +// deuplicate the property name and types if two edge label are same, only +// differs on src-dst pair +static bool try_to_get_prop_names_and_types_from_ir_data_type( + const common::IrDataType& ir_data_type, + std::vector>& prop_names, + std::vector>& prop_types) { + switch (ir_data_type.type_case()) { + case common::IrDataType::TypeCase::kDataType: { + VLOG(10) << "Primitive type is not supported yet."; + return false; + } + case common::IrDataType::TypeCase::kGraphType: { + auto& graph_ele_type = ir_data_type.graph_type(); + auto ele = graph_ele_type.element_opt(); + if (ele == common::GraphDataType::GraphElementOpt:: + GraphDataType_GraphElementOpt_VERTEX) { + VLOG(10) << "Get property for vertex element: "; + } else { + VLOG(10) << "Get property for edge element: "; + } + auto& graph_data_type = graph_ele_type.graph_data_type(); + VLOG(10) << "Element label types size: " << graph_data_type.size(); + // these graph_data_type should be the same. + if (graph_data_type.size() > 0) { + auto& cur_ele_prop_types = graph_data_type[0].props(); + // one label can have multiple properties + if (cur_ele_prop_types.size() > 0) { + std::unordered_set, + boost::hash>> + prop_set; + for (auto j = 0; j < cur_ele_prop_types.size(); ++j) { + std::string prop_name; + CHECK(get_name_from_name_or_id(cur_ele_prop_types[j].prop_id(), + prop_name)); + auto prop_type = data_type_2_string( + common_data_type_pb_2_data_type(cur_ele_prop_types[j].type())); + prop_set.insert(std::make_pair(prop_name, prop_type)); + } + std::vector cur_prop_names; + std::vector cur_prop_types; + for (auto iter : prop_set) { + cur_prop_names.emplace_back(iter.first); + cur_prop_types.emplace_back(iter.second); + } + prop_names.emplace_back(cur_prop_names); + prop_types.emplace_back(cur_prop_types); + } else { + // no property for this label + VLOG(10) << "No property type found for GraphElementType"; + } + + VLOG(10) << "Finish parsing property names and types."; + if (prop_names.size() > 0) { + VLOG(10) << "Property names: " << gs::to_string(prop_names); + VLOG(10) << "Property types: " << gs::to_string(prop_types); + return true; + } else { + VLOG(10) << "No property names and types found in the graph element."; + return false; + } + } else { + return false; + } + } + default: { + VLOG(10) << "Unsupported data type: " << ir_data_type.DebugString(); + return false; + } + } +} +} // namespace gs + +#endif // CODEGEN_SRC_PB_PARSER_IR_DATA_TYPE_PARSER_H_ \ No newline at end of file diff --git a/flex/codegen/src/pb_parser/name_id_parser.h b/flex/codegen/src/pb_parser/name_id_parser.h new file mode 100644 index 000000000000..516646a5d348 --- /dev/null +++ b/flex/codegen/src/pb_parser/name_id_parser.h @@ -0,0 +1,57 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PB_PARSER_NAME_ID_PARSER_H_ +#define CODEGEN_SRC_PB_PARSER_NAME_ID_PARSER_H_ + +#include +#include "proto_generated_gie/common.pb.h" + +namespace gs { + +bool get_name_from_name_or_id(const common::NameOrId& name_or_id, + std::string& name) { + if (name_or_id.item_case() == common::NameOrId::kName) { + name = name_or_id.name(); + return true; + } else { + return false; + } +} + +template < + typename LabelT, + typename std::enable_if>::type* = nullptr> +static LabelT try_get_label_from_name_or_id( + const common::NameOrId& name_or_id) { + if (name_or_id.item_case() != common::NameOrId::kId) { + LOG(FATAL) << "no id is found"; + } + return name_or_id.id(); +} + +template >::type* = + nullptr> +static LabelT try_get_label_from_name_or_id( + const common::NameOrId& name_or_id) { + if (name_or_id.item_case() != common::NameOrId::kName) { + LOG(FATAL) << "no name is found"; + } + return name_or_id.name(); +} + +} // namespace gs + +#endif // CODEGEN_SRC_PB_PARSER_NAME_ID_PARSER_H_ \ No newline at end of file diff --git a/flex/codegen/src/pb_parser/query_params_parser.h b/flex/codegen/src/pb_parser/query_params_parser.h new file mode 100644 index 000000000000..ccbcf6a2f589 --- /dev/null +++ b/flex/codegen/src/pb_parser/query_params_parser.h @@ -0,0 +1,227 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PB_PARSER_QUERY_PARAMS_PARSER_H_ +#define CODEGEN_SRC_PB_PARSER_QUERY_PARAMS_PARSER_H_ + +#include + +#include "flex/codegen/src/graph_types.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" + +namespace gs { + +bool try_to_get_label_name_from_query_params( + const algebra::QueryParams& params, std::vector& label_names) { + CHECK(params.tables_size() != 0) << "At least one label is required"; + for (auto i = 0; i < params.tables_size(); i++) { + auto name_or_id = params.tables(i); + if (name_or_id.item_case() == common::NameOrId::kName) { + label_names.push_back(name_or_id.name()); + } + } + if (label_names.size() == 0) { + return false; + } + return true; +} + +bool try_to_get_label_name_from_query_params(const algebra::QueryParams& params, + std::string& label_name) { + if (params.tables(0).item_case() != common::NameOrId::kName) { + return false; + } + label_name = params.tables(0).name(); + return true; +} + +bool try_to_get_label_id_from_query_params(const algebra::QueryParams& params, + int32_t& label_id) { + if (params.tables(0).item_case() != common::NameOrId::kId) { + return false; + } + label_id = params.tables(0).id(); + return true; +} + +bool try_to_get_label_id_from_query_params(const algebra::QueryParams& params, + std::vector& label_ids) { + if (params.tables_size() > 0) { + LOG(WARNING) << "params has more than 1 labels"; + } + for (auto i = 0; i < params.tables_size(); i++) { + if (params.tables(i).item_case() != common::NameOrId::kId) { + return false; + } + label_ids.push_back(params.tables(i).id()); + } + return true; +} + +bool try_to_get_label_ids_from_expr(const common::Expression& expression, + std::vector& label_ids) { + auto opr_size = expression.operators_size(); + for (auto i = 0; i < opr_size; ++i) { + auto opr = expression.operators(i); + if (opr.has_var() && opr.var().property().has_label()) { + CHECK(i + 2 < opr_size) << "expr is not valid"; + auto mid = expression.operators(i + 1); + auto right = expression.operators(i + 2); + if (mid.item_case() == common::ExprOpr::kLogical && + mid.logical() == common::Logical::EQ && + right.item_case() == common::ExprOpr::kConst) { + auto const_ = right.const_(); + if (const_.item_case() == common::Value::kI32) { + label_ids.push_back(right.const_().i32()); + } else if (const_.item_case() == common::Value::kI64) { + label_ids.push_back(right.const_().i64()); + } else { + LOG(FATAL) << "expect i32 or i64 for label id"; + return false; + } + return true; + } else if (mid.item_case() == common::ExprOpr::kLogical && + mid.logical() == common::Logical::WITHIN && + right.item_case() == common::ExprOpr::kConst) { + auto const_ = right.const_(); + if (const_.item_case() == common::Value::kI32Array) { + auto array = const_.i32_array(); + CHECK(array.item_size() == 1); + label_ids.push_back(array.item(0)); + // } else if (const_.has_i64_array()) { + } else if (const_.item_case() == common::Value::kI64Array) { + auto array = const_.i64_array(); + for (auto i = 0; i < array.item_size(); ++i) { + label_ids.push_back(array.item(i)); + } + } else { + LOG(FATAL) << "expect i32 or i64 for label id"; + return false; + } + return true; + } + } + } + return false; +} + +bool try_to_get_oid_from_expr_impl(const common::Expression& expression, + int64_t& oid) { + VLOG(10) << "try get oid from expression"; + if (expression.operators_size() != 3) { + VLOG(10) << "operator size gt 3, return false"; + return false; + } + auto& left = expression.operators(0); + auto& mid = expression.operators(1); + auto& right = expression.operators(2); + if (!left.has_var()) { + VLOG(10) << "First item is not var"; + return false; + } + if (mid.item_case() != common::ExprOpr::kLogical || + mid.logical() != common::Logical::EQ) { + VLOG(10) << "mid item is not eq"; + return false; + } + + if (!right.has_const_()) { + VLOG(10) << "right item is not const"; + return false; + } + auto& con_val = right.const_(); + if (con_val.item_case() != common::Value::kI64 && + con_val.item_case() != common::Value::kI32) { + VLOG(10) << "right value is not int64 or int32"; + return false; + } + if (con_val.item_case() == common::Value::kI64) { + oid = con_val.i64(); + return true; + } + oid = (int64_t) con_val.i32(); + return true; +} + +// entry for parse oid from expression, expression can contains 3 ops or 6 ops. +bool try_to_get_oid_from_expr(const common::Expression& expression, + int64_t& oid) { + auto num_oprs = expression.operators_size(); + VLOG(10) << "try get oid from expression, size: " << num_oprs; + if (num_oprs != 3 && num_oprs != 11) { + VLOG(10) << "can only support 3 ops or 11 ops expression"; + return false; + } + if (num_oprs == 3) { + return try_to_get_oid_from_expr_impl(expression, oid); + } + // TODO: current hacks the implementaion. (label within 1) && (id == 8780) + common::Expression new_expr; + new_expr.add_operators()->CopyFrom(expression.operators(7)); + new_expr.add_operators()->CopyFrom(expression.operators(8)); + new_expr.add_operators()->CopyFrom(expression.operators(9)); + return try_to_get_oid_from_expr_impl(new_expr, oid); +} + +bool try_to_get_oid_param_from_expr_impl(const common::Expression& expression, + codegen::ParamConst& param_const) { + VLOG(10) << "try get oid param from expression"; + if (expression.operators_size() != 3) { + VLOG(10) << "operator size gt 3, return false"; + return false; + } + auto& left = expression.operators(0); + auto& mid = expression.operators(1); + auto& right = expression.operators(2); + if (!left.has_var()) { + VLOG(10) << "First item is not var"; + return false; + } + if (mid.item_case() != common::ExprOpr::kLogical || + mid.logical() != common::Logical::EQ) { + VLOG(10) << "mid item is not eq"; + return false; + } + + if (right.item_case() != common::ExprOpr::kParam) { + VLOG(10) << "right item is not param const"; + return false; + } + auto& con_val = right.param(); + parse_param_const_from_pb(con_val, right.node_type(), param_const); + return true; +} + +bool try_to_get_oid_param_from_expr(const common::Expression& expression, + codegen::ParamConst& param_const) { + auto num_oprs = expression.operators_size(); + VLOG(10) << "try get oid param from expression, size: " << num_oprs; + if (num_oprs != 3 && num_oprs != 11) { + VLOG(10) << "can only support 3 ops or 11 ops expression"; + return false; + } + if (num_oprs == 3) { + return try_to_get_oid_param_from_expr_impl(expression, param_const); + } + common::Expression new_expr; + new_expr.add_operators()->CopyFrom(expression.operators(7)); + new_expr.add_operators()->CopyFrom(expression.operators(8)); + new_expr.add_operators()->CopyFrom(expression.operators(9)); + return try_to_get_oid_param_from_expr_impl(new_expr, param_const); +} + +} // namespace gs + +#endif // CODEGEN_SRC_PB_PARSER_QUERY_PARAMS_PARSER_H_ diff --git a/flex/codegen/src/pegasus/pegasus_dedup_builder.h b/flex/codegen/src/pegasus/pegasus_dedup_builder.h new file mode 100644 index 000000000000..7f555dd76a68 --- /dev/null +++ b/flex/codegen/src/pegasus/pegasus_dedup_builder.h @@ -0,0 +1,124 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PEGASUS_PEGASUS_DEDUP_BUILDER_H_ +#define CODEGEN_SRC_PEGASUS_PEGASUS_DEDUP_BUILDER_H_ + +#include +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/pegasus/pegasus_repartition_builder.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/expr.pb.h" +#include "proto_generated_gie/physical.pb.h" + +namespace gs { +namespace pegasus { +class DedupOpBuilder { + public: + DedupOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + DedupOpBuilder& operator_index(const int32_t operator_index) { + operator_index_ = operator_index; + return *this; + } + + DedupOpBuilder& add_in_tag(int32_t in_tag_id) { + in_tag_ids_.push_back(in_tag_id); + return *this; + } + + std::string Build() { + std::stringstream ss; + auto input_size = ctx_.InputSize(); + + // write key_by head + boost::format key_by_head_fmter( + "let stream_%1% = stream_%2%.key_by|%3%| {\n"); + std::string key_by_input = generate_arg_list("i", input_size); + key_by_head_fmter % operator_index_ % (operator_index_ - 1) % key_by_input; + + boost::format key_by_output_fmter("Ok((%1%, %2%))\n})?\n"); + std::stringstream key_ss; + key_ss << "("; + std::unordered_set key_sets; + for (auto i = 0; i < in_tag_ids_.size(); i++) { + int32_t input_index = 0; + if (in_tag_ids_[i] != -1) { + input_index = ctx_.GetAliasIndex(in_tag_ids_[i]); + } + key_sets.insert(input_index); + key_ss << "i" << input_index; + if (i != in_tag_ids_.size() - 1) { + key_ss << ", "; + } + } + key_ss << ")"; + std::string key_code = key_ss.str(); + + std::stringstream value_ss; + value_ss << "("; + std::vector value_list; + for (auto i = 0; i < input_size; i++) { + if (key_sets.find(i) == key_sets.end()) { + value_list.push_back(i); + } + } + for (auto i = 0; i < value_list.size(); i++) { + value_ss << "i" << value_list[i]; + if (i != value_list.size() - 1) { + value_ss << ", "; + } + } + value_ss << ")"; + std::string value_code = value_ss.str(); + + key_by_output_fmter % key_code % value_code; + std::string key_by_code = + key_by_head_fmter.str() + key_by_output_fmter.str(); + + boost::format dedup_fmter( + ".dedup()?\n" + ".map(|%1%| Ok(%2%))?;\n"); + std::string params = key_code + ", " + value_code; + std::string outputs = generate_arg_list("i", input_size); + dedup_fmter % params % outputs; + + return key_by_code + dedup_fmter.str(); + } + + private: + BuildingContext& ctx_; + int32_t operator_index_; + std::vector in_tag_ids_; +}; + +static std::string BuildDedupOp( + BuildingContext& ctx, int32_t operator_index, const algebra::Dedup& dedup, + const physical::PhysicalOpr::MetaData& meta_data) { + DedupOpBuilder builder(ctx); + auto tag_size = dedup.keys_size(); + for (auto i = 0; i < tag_size; i++) { + builder.add_in_tag(dedup.keys(i).tag().id()); + } + return builder.operator_index(operator_index).Build(); +} +} // namespace pegasus +} // namespace gs +#endif // CODEGEN_SRC_PEGASUS_PEGASUS_DEDUP_BUILDER_H_ diff --git a/flex/codegen/src/pegasus/pegasus_edge_expand_builder.h b/flex/codegen/src/pegasus/pegasus_edge_expand_builder.h new file mode 100644 index 000000000000..fe55f804f03a --- /dev/null +++ b/flex/codegen/src/pegasus/pegasus_edge_expand_builder.h @@ -0,0 +1,357 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PEGASUS_PEGASUS_EDGE_EXPAND_BUILDER_H_ +#define CODEGEN_SRC_PEGASUS_PEGASUS_EDGE_EXPAND_BUILDER_H_ + +#include + +#include + +#include "proto_generated_gie/physical.pb.h" + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/pb_parser/ir_data_type_parser.h" +#include "flex/codegen/src/pb_parser/name_id_parser.h" +#include "flex/codegen/src/pb_parser/query_params_parser.h" +#include "flex/codegen/src/pegasus/pegasus_expr_builder.h" +#include "flex/codegen/src/string_utils.h" + +#define NO_EXTRACT_PROP_FROM_IR_DATA_TYPE + +namespace gs { +namespace pegasus { + +template +class EdgeExpandOpBuilder { + public: + EdgeExpandOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + ~EdgeExpandOpBuilder() = default; + + EdgeExpandOpBuilder& operator_index(const int32_t operator_index) { + operator_index_ = operator_index; + return *this; + } + + EdgeExpandOpBuilder& resAlias(const int32_t res_alias) { + res_alias_ = res_alias; + return *this; + } + + EdgeExpandOpBuilder& query_params(const algebra::QueryParams& query_params) { + query_params_ = query_params; + if (query_params_.has_predicate()) { + ExprBuilder expr_builder(ctx_); + auto& expr_oprs = query_params_.predicate().operators(); + expr_builder.AddAllExprOpr(expr_oprs); + std::vector predicate_tag; + std::vector case_exprs; + std::tie(predicate_expr_, var_names_, predicate_tag, properties_, + case_exprs) = expr_builder.BuildRust(); + } + return *this; + } + + EdgeExpandOpBuilder& expand_opt(const physical::EdgeExpand::ExpandOpt& opt) { + expand_opt_ = opt; + return *this; + } + + EdgeExpandOpBuilder& direction(const physical::EdgeExpand::Direction& dir) { + direction_ = dir; + return *this; + } + + EdgeExpandOpBuilder& v_tag(const int32_t& v_tag) { + v_tag_ = v_tag; + return *this; + } + + EdgeExpandOpBuilder& meta_data( + const physical::PhysicalOpr::MetaData& meta_data) { + meta_data_ = meta_data; + // we can get the edge tuplet from meta_data, in case we fail to extract + // edge triplet from ir_data_type + { + auto& ir_data_type = meta_data_.type(); + VLOG(10) << "str: " << ir_data_type.DebugString(); + CHECK(ir_data_type.has_graph_type()); + auto& graph_type = ir_data_type.graph_type(); + VLOG(10) << "debug string: " << graph_type.DebugString(); + CHECK(graph_type.element_opt() == common::GraphDataType::GraphElementOpt:: + GraphDataType_GraphElementOpt_EDGE) + << "expect edge meta for edge builder"; + auto& ele_label_types = graph_type.graph_data_type(); + CHECK(ele_label_types.size() > 0); + for (auto ele_label_type : ele_label_types) { + auto& triplet = ele_label_type.label(); + auto& src_label = triplet.src_label(); + auto& dst_label = triplet.dst_label(); + src_vertex_labels_.emplace_back(src_label.value()); + dst_vertex_labels_.emplace_back(dst_label.value()); + } + VLOG(10) << "extract dst vertex label: " + << gs::to_string(dst_vertex_labels_) << ", from meta data"; + } + return *this; + } + + EdgeExpandOpBuilder& set_intersect(bool intersect) { + is_intersect_ = intersect; + return *this; + } + + std::string Build() const { + std::stringstream ss; + + VLOG(10) << "Start write head"; + std::string head_code = write_head(); + + int32_t input_index; + std::pair> in_type; + if (v_tag_ == -1) { + input_index = 0; + in_type = ctx_.GetHeadType(); + } else { + input_index = ctx_.GetAliasIndex(v_tag_); + in_type = ctx_.GetAliasType(v_tag_); + } + + VLOG(10) << "Start write edge expand"; + int32_t input_size = ctx_.InputSize(); + std::unordered_set start_labels_set; + std::vector start_labels, end_labels; + if (direction_ == physical::EdgeExpand_Direction::EdgeExpand_Direction_IN) { + for (auto i = 0; i < src_vertex_labels_.size(); i++) { + end_labels.push_back(src_vertex_labels_[i]); + } + for (auto i = 0; i < dst_vertex_labels_.size(); i++) { + start_labels_set.insert(dst_vertex_labels_[i]); + start_labels.push_back(dst_vertex_labels_[i]); + } + } else { + for (auto i = 0; i < dst_vertex_labels_.size(); i++) { + end_labels.push_back(dst_vertex_labels_[i]); + } + for (auto i = 0; i < src_vertex_labels_.size(); i++) { + start_labels_set.insert(src_vertex_labels_[i]); + start_labels.push_back(src_vertex_labels_[i]); + } + } + + int32_t edge_labels = query_params_.tables_size(); + std::stringstream expand_code_ss; + for (auto i = 0; i < edge_labels; i++) { + auto edge_label = query_params_.tables(i).id(); + if (start_labels_set.size() > 1) { + boost::format multi_labels_fmter( + "let vertex_label = LDBCVertexParser::::get_label_id(i%1% " + "as usize);\n" + "%2%"); + std::string labels_expand_code; + for (auto j = 0; j < src_vertex_labels_.size(); ++j) { + boost::format with_label_fmter( + "if vertex_label == %1% {\n" + "%2%" + "}\n"); + with_label_fmter % start_labels[j]; + std::string expand_code; + if (direction_ == + physical::EdgeExpand_Direction::EdgeExpand_Direction_IN) { + expand_code = write_edge_expand( + src_vertex_labels_[j], edge_label, dst_vertex_labels_[j], + physical::EdgeExpand_Direction::EdgeExpand_Direction_IN); + } else if (direction_ == + physical::EdgeExpand_Direction::EdgeExpand_Direction_OUT) { + expand_code = write_edge_expand( + src_vertex_labels_[j], edge_label, dst_vertex_labels_[j], + physical::EdgeExpand_Direction::EdgeExpand_Direction_OUT); + } else if (direction_ == physical::EdgeExpand_Direction:: + EdgeExpand_Direction_BOTH) { + expand_code = write_edge_expand( + src_vertex_labels_[j], edge_label, dst_vertex_labels_[j], + physical::EdgeExpand_Direction::EdgeExpand_Direction_IN); + expand_code += write_edge_expand( + src_vertex_labels_[j], edge_label, dst_vertex_labels_[j], + physical::EdgeExpand_Direction::EdgeExpand_Direction_OUT); + } + with_label_fmter % expand_code; + labels_expand_code += with_label_fmter.str(); + } + multi_labels_fmter % input_index % labels_expand_code; + expand_code_ss << multi_labels_fmter.str(); + } else { + for (auto j = 0; j < src_vertex_labels_.size(); ++j) { + if (direction_ == + physical::EdgeExpand_Direction::EdgeExpand_Direction_IN) { + expand_code_ss << write_edge_expand( + src_vertex_labels_[j], edge_label, dst_vertex_labels_[j], + physical::EdgeExpand_Direction::EdgeExpand_Direction_IN); + } else if (direction_ == + physical::EdgeExpand_Direction::EdgeExpand_Direction_OUT) { + expand_code_ss << write_edge_expand( + src_vertex_labels_[j], edge_label, dst_vertex_labels_[j], + physical::EdgeExpand_Direction::EdgeExpand_Direction_OUT); + } else if (direction_ == physical::EdgeExpand_Direction:: + EdgeExpand_Direction_BOTH) { + expand_code_ss << write_edge_expand( + src_vertex_labels_[j], edge_label, dst_vertex_labels_[j], + physical::EdgeExpand_Direction::EdgeExpand_Direction_IN); + expand_code_ss << write_edge_expand( + src_vertex_labels_[j], edge_label, dst_vertex_labels_[j], + physical::EdgeExpand_Direction::EdgeExpand_Direction_OUT); + } + } + } + } + VLOG(10) << "Start write body"; + boost::format edge_expand_body_fmter( + "let vertex_id = graph.get_internal_id(i%1% as usize);\n" + "%2%"); + edge_expand_body_fmter % input_index % expand_code_ss.str(); + + std::vector output; + output.push_back(codegen::DataType::kInt64); + auto outputs = ctx_.GetOutput(); + + boost::format edge_expand_output_fmter( + "Ok(result.into_iter().map(move |res| %1%))\n" + "})?;\n"); + + int32_t alias_index = -1; + if (res_alias_ != -1) { + ctx_.SetAlias(res_alias_); + ctx_.SetAliasType(res_alias_, 0, end_labels); + alias_index = ctx_.GetAliasIndex(res_alias_); + } + ctx_.SetHead(true); + ctx_.SetHeadType(0, end_labels); + + std::string output_params = generate_output_list( + "i", input_size, "res", alias_index, ctx_.ContainHead()); + edge_expand_output_fmter % output_params; + + return head_code + edge_expand_body_fmter.str() + + edge_expand_output_fmter.str(); + } + + private: + std::string write_head() const { + int32_t input_size = ctx_.InputSize(); + boost::format head_fmter( + "let stream_%1% = stream_%2%.flat_map(move |%3%| {\n" + "let mut result = vec![];\n"); + std::string input_params = generate_arg_list("i", input_size); + head_fmter % operator_index_ % (operator_index_ - 1) % input_params; + return head_fmter.str(); + } + + std::string write_edge_expand( + LabelT src_label, int32_t edge_label, LabelT dst_label, + physical::EdgeExpand::Direction direction) const { + std::string predicate_code; + boost::format edge_expand_fmter( + "if let Some(edges) = %1%.get_adj_list(vertex_id) {\n" + "for e in edges{\n" + "%2%" // predicate & get global_id + "}\n" + "}\n"); + std::string subgraph_name = + get_subgraph_name(src_label, edge_label, dst_label, direction); + + int32_t adj_label; + if (direction_ == physical::EdgeExpand_Direction::EdgeExpand_Direction_IN) { + adj_label = src_label; + } else if (direction_ == + physical::EdgeExpand_Direction::EdgeExpand_Direction_OUT) { + adj_label = dst_label; + } else { + LOG(FATAL) << "Unexpected direction"; + } + std::string edge_traverse_code; + if (query_params_.has_predicate()) { + boost::format predicate_fmter( + "%1%" + "if %2% {\n" + "result.push(graph.get_global_id(e.neighbor, %3%).unwrap() as u64);\n" + "}\n"); + std::stringstream vars_stream; + for (auto i = 0; i < var_names_.size(); i++) { + boost::format var_fmter("let %1% = %2%[e.neighbor];\n"); + var_fmter % var_names_[i] % + get_edge_prop_column_name(properties_[i].var_name, src_label, + edge_label, dst_label, direction); + vars_stream << var_fmter.str(); + } + predicate_fmter % vars_stream.str() % predicate_expr_ % adj_label; + edge_traverse_code = predicate_fmter.str(); + } else { + boost::format no_predicate_fmter( + "result.push(graph.get_global_id(e.neighbor, %1%).unwrap() as " + "u64);\n"); + no_predicate_fmter % adj_label; + edge_traverse_code = no_predicate_fmter.str(); + } + + edge_expand_fmter % subgraph_name % edge_traverse_code; + + return edge_expand_fmter.str(); + } + + BuildingContext& ctx_; + int32_t operator_index_; + int32_t res_alias_; + algebra::QueryParams query_params_; + physical::EdgeExpand::ExpandOpt expand_opt_; + physical::EdgeExpand::Direction direction_; + std::vector src_vertex_labels_; + std::vector dst_vertex_labels_; + int32_t v_tag_; + physical::PhysicalOpr::MetaData meta_data_; + std::string predicate_expr_; + std::vector var_names_; + std::vector properties_; + bool is_intersect_ = false; +}; + +template +static std::string BuildEdgeExpandOp( + BuildingContext& ctx, int32_t operator_index, + const physical::EdgeExpand& edge_expand, + const physical::PhysicalOpr::MetaData& meta_data, + bool is_intersect = false) { + EdgeExpandOpBuilder builder(ctx); + if (edge_expand.has_alias()) { + builder.resAlias(edge_expand.alias().value()); + } else { + builder.resAlias(-1); + } + builder.query_params(edge_expand.params()) + .expand_opt(edge_expand.expand_opt()) + .direction(edge_expand.direction()) + .meta_data(meta_data); + if (edge_expand.has_v_tag()) { + builder.v_tag(edge_expand.v_tag().value()); + } else { + builder.v_tag(-1); + } + builder.set_intersect(is_intersect); + return builder.operator_index(operator_index).Build(); +} + +} // namespace pegasus +} // namespace gs + +#endif // CODEGEN_SRC_PEGASUS_PEGASUS_EDGE_EXPAND_BUILDER_H_ \ No newline at end of file diff --git a/flex/codegen/src/pegasus/pegasus_expr_builder.h b/flex/codegen/src/pegasus/pegasus_expr_builder.h new file mode 100644 index 000000000000..f6294760fd9f --- /dev/null +++ b/flex/codegen/src/pegasus/pegasus_expr_builder.h @@ -0,0 +1,578 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PEGASUS_PEGASUS_EXPR_BUILDER_H_ +#define CODEGEN_SRC_PEGASUS_PEGASUS_EXPR_BUILDER_H_ + +#include +#include +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/expr.pb.h" + +namespace gs { +namespace pegasus { +static std::string logical_to_str(const common::Logical& logical) { + switch (logical) { + case common::Logical::AND: + return "&&"; + case common::Logical::OR: + return "||"; + case common::Logical::NOT: + return "!"; + case common::Logical::EQ: + return "=="; + case common::Logical::NE: + return "!="; + case common::Logical::GT: + return ">"; + case common::Logical::GE: + return ">="; + case common::Logical::LT: + return "<"; + case common::Logical::LE: + return "<="; + case common::Logical::WITHIN: + return "< WithIn > "; + default: + throw std::runtime_error("unknown logical"); + } +} + +std::string i64_array_pb_to_str(const common::I64Array& array) { + auto size = array.item_size(); + std::stringstream ss; + ss << "std::array{"; + for (int i = 0; i < size; ++i) { + ss << array.item(i); + if (i + 1 != size) { + ss << ","; + } + } + ss << "}"; + return ss.str(); +} + +// i32_array_pb_to_str +std::string i32_array_pb_to_str(const common::I32Array& array) { + auto size = array.item_size(); + std::stringstream ss; + ss << "std::array{"; + for (int i = 0; i < size; ++i) { + ss << array.item(i); + if (i + 1 != size) { + ss << ","; + } + } + ss << "}"; + return ss.str(); +} + +static std::string value_pb_to_str(const common::Value& value) { + switch (value.item_case()) { + case common::Value::kI32: + return std::to_string(value.i32()); + case common::Value::kI64: + return std::to_string(value.i64()); + case common::Value::kF64: + return std::to_string(value.f64()); + case common::Value::kStr: + return with_quote(value.str()); + case common::Value::kBoolean: + return value.boolean() ? "true" : "false"; + case common::Value::kI32Array: + return i32_array_pb_to_str(value.i32_array()); + case common::Value::kI64Array: + return i64_array_pb_to_str(value.i64_array()); + case common::Value::kNone: + return NONE_LITERAL; + default: + throw std::runtime_error("unknown value type" + value.DebugString()); + } +} + +bool constains_vertex_id(const std::vector& params) { + for (auto& param : params) { + if (param.type == codegen::DataType::kVertexId) { + return true; + } + } + return false; +} + +/*Build a expression struct from expression*/ +class ExprBuilder { + protected: + static constexpr const char* EXPR_OPERATOR_CALL_VAR_NAME = "var"; + + public: + ExprBuilder(BuildingContext& ctx, int var_id = 0, bool no_build = false, + int cur_var_start = 0) + : ctx_(ctx), + cur_var_start_(cur_var_start), + cur_var_id_(var_id), + cur_case_id_(0) { + if (no_build) { + // no build indicates whether we will use this builder as a helper. + // If set to true, we will not let queryClassName and next_expr_name + // increase. + class_name_ = ctx_.GetQueryClassName() + ctx_.GetNextExprName(); + } + } + + void AddAllExprOpr( + const google::protobuf::RepeatedPtrField& expr_ops) { + // we currently don't support filter with label keys! + // If we meet label keys just ignore. + auto size = expr_ops.size(); + VLOG(10) << "Adding expr of size: " << size; + for (auto i = 0; i < size;) { + auto expr = expr_ops[i]; + if (expr.has_var() && expr.var().property().has_label()) { + VLOG(10) << "Found label in expr, skip this check"; + // try to find next right brace + int j = i; + for (; j < size; ++j) { + if (expr_ops[j].item_case() == common::ExprOpr::kBrace && + expr_ops[j].brace() == + common::ExprOpr::Brace::ExprOpr_Brace_RIGHT_BRACE) { + VLOG(10) << "Found right brace at ind: " << j + << ", started at: " << i; + AddExprOpr(std::string("true")); + AddExprOpr(expr_ops[j]); + i = j + 1; + break; + } + } + if (j == size) { + LOG(WARNING) << "no right brace found" << j << "size: " << size; + i = j; + } + } else { + AddExprOpr(expr_ops[i]); + ++i; + } + } + VLOG(10) << "Added expr of size: " << size; + } + + void AddExprOpr(const std::string expr_str) { + expr_nodes_.emplace_back(expr_str); + } + + // visit each expr opr. + void AddExprOpr(const common::ExprOpr& opr) { + switch (opr.item_case()) { + case common::ExprOpr::kBrace: { + auto brace = opr.brace(); + if (brace == common::ExprOpr::Brace::ExprOpr_Brace_LEFT_BRACE) { + VLOG(10) << "left brace"; + expr_nodes_.emplace_back("("); + } else if (brace == common::ExprOpr::Brace::ExprOpr_Brace_RIGHT_BRACE) { + VLOG(10) << "right brace"; + expr_nodes_.emplace_back(")"); + } else { + throw std::runtime_error("unknown brace"); + } + break; + } + + case common::ExprOpr::kConst: { + VLOG(10) << "Got const: " << opr.const_().DebugString(); + auto str = value_pb_to_str(opr.const_()); + VLOG(10) << "Got const: " << str; + expr_nodes_.emplace_back(std::move(str)); + break; + } + + case common::ExprOpr::kVar: { + VLOG(10) << "Got var"; + auto& var = opr.var(); + auto param_const = variable_to_param_const(var, ctx_); + // for each variable, we need add the variable to func_call_vars_. + // and also set a expr node for it. which is unique. + func_call_vars_.push_back(param_const); + if (!var.has_tag()) { + func_call_tags_.push_back(-1); + } else { + func_call_tags_.push_back(var.tag().id()); + } + expr_nodes_.emplace_back(std::string(EXPR_OPERATOR_CALL_VAR_NAME) + + std::to_string(cur_var_id_++)); + + // expr_nodes_.emplace_back(param_const.var_name); + // convert a variable to a tag property, + // gs::NamedProperty{"prop1"}, saved for later use. + // tag_prop_strs_.emplace_back(variable_to_named_property(ctx_, var)); + VLOG(10) << "Got var: " << var.DebugString(); + break; + } + + case common::ExprOpr::kLogical: { + auto logical = opr.logical(); + auto str = logical_to_str(logical); + VLOG(10) << "Got expr opt logical: " << str; + expr_nodes_.emplace_back(std::move(str)); + break; + } + + // todo: use dynamic param + case common::ExprOpr::kParam: { + auto param_const_pb = opr.param(); + auto param_node_type = opr.node_type(); + auto param_const = + param_const_pb_to_param_const(param_const_pb, param_node_type); + VLOG(10) << "receive param const: " << param_const_pb.DebugString(); + ctx_.AddParameterVar(param_const); + expr_nodes_.emplace_back(param_const.var_name); + break; + } + + case common::ExprOpr::kArith: { + auto arith = opr.arith(); + auto str = arith_to_str(arith); + VLOG(10) << "Got expr opt arith: " << str; + expr_nodes_.emplace_back(std::move(str)); + break; + } + + case common::ExprOpr::kCase: { + int32_t when_then_size = opr.case_().when_then_expressions_size(); + std::stringstream case_ss; + std::stringstream var_ss, when_then_ss; + case_ss << "{\n"; + int32_t cur_var_num = 0; + std::unordered_set tag_used; + for (auto i = 0; i < when_then_size; i++) { + auto when_then = opr.case_().when_then_expressions(i); + auto when_expr = when_then.when_expression(); + auto then_expr = when_then.then_result_expression(); + { + auto when_builder = ExprBuilder(ctx_, cur_var_num); + when_builder.AddAllExprOpr(when_expr.operators()); + std::string predicate_expr; + std::vector var_names; + std::vector var_tags; + std::vector properties; + std::vector case_exprs; + std::tie(predicate_expr, var_names, var_tags, properties, + case_exprs) = when_builder.BuildRust(); + cur_var_num += var_names.size(); + if (i != 0) { + when_then_ss << "} else "; + } + when_then_ss << "if " << predicate_expr << "{\n"; + write_var_expr(var_ss, var_names, var_tags, properties, tag_used); + } + + { + auto then_builder = ExprBuilder(ctx_, cur_var_num); + then_builder.AddAllExprOpr(then_expr.operators()); + std::string predicate_expr; + std::vector var_names; + std::vector var_tags; + std::vector properties; + std::vector case_exprs; + std::tie(predicate_expr, var_names, var_tags, properties, + case_exprs) = then_builder.BuildRust(); + when_then_ss << predicate_expr << "\n"; + write_var_expr(var_ss, var_names, var_tags, properties, tag_used); + } + } + when_then_ss << "}"; + auto else_expr = opr.case_().else_result_expression(); + { + auto else_builder = ExprBuilder(ctx_, cur_var_num); + else_builder.AddAllExprOpr(else_expr.operators()); + std::string predicate_expr; + std::vector var_names; + std::vector var_tags; + std::vector properties; + std::vector case_exprs; + std::tie(predicate_expr, var_names, var_tags, properties, case_exprs) = + else_builder.BuildRust(); + when_then_ss << " else {\n" << predicate_expr << "\n}\n"; + write_var_expr(var_ss, var_names, var_tags, properties, tag_used); + when_then_ss << "};\n"; + } + std::string case_name = "case_" + std::to_string(cur_case_id_); + case_exprs_.push_back("let " + case_name + " = {\n" + var_ss.str() + + when_then_ss.str()); + expr_nodes_.emplace_back(case_name); + cur_case_id_++; + break; + } + + default: + LOG(FATAL) << "not recognized expr opr: " << opr.DebugString(); + break; + } + } + + // get expr nodes + const std::vector& GetExprNodes() const { return expr_nodes_; } + + // get func call vars + const std::vector& GetFuncCallVars() const { + return func_call_vars_; + } + + // get tag property strs + const std::vector& GetTagPropertyStrs() const { + return tag_prop_strs_; + } + + // get construct params + const std::vector& GetConstructParams() const { + return construct_params_; + } + + int32_t GetCurVarId() const { return cur_var_id_; } + + bool empty() const { return expr_nodes_.empty(); } + + std::tuple, std::vector, + std::vector, std::vector> + BuildRust() const { + std::stringstream expr_ss; + + for (auto i = 0; i < expr_nodes_.size(); ++i) { + expr_ss << expr_nodes_[i] << " "; + } + std::string predicate_expr = expr_ss.str(); + + std::vector var_names; + std::vector properties; + if (func_call_vars_.size() > 0) { + for (auto i = 0; i < func_call_vars_.size(); ++i) { + var_names.push_back(std::string(EXPR_OPERATOR_CALL_VAR_NAME) + + std::to_string(cur_var_start_ + i)); + if (func_call_vars_[i].var_name.find("var") == 0) { + codegen::ParamConst empty; + empty.var_name = "none"; + properties.push_back(empty); + } else { + properties.push_back(func_call_vars_[i]); + } + } + } + + return std::make_tuple(predicate_expr, var_names, func_call_tags_, + properties, case_exprs_); + } + + protected: + void start_class(std::stringstream& ss) const { + ss << "template <"; + for (size_t i = 0; i < tag_prop_strs_.size() - 1; ++i) { + ss << "typename TAG_PROP_" << i << ", "; + } + ss << "typename TAG_PROP_" << tag_prop_strs_.size() - 1 << ">"; + ss << std::endl; + ss << "struct " << class_name_ << " {" << std::endl; + ss << " using tag_prop_t = std::tuple<"; + for (size_t i = 0; i < tag_prop_strs_.size() - 1; ++i) { + ss << "TAG_PROP_" << i << ", "; + } + ss << "TAG_PROP_" << tag_prop_strs_.size() - 1 << ">;" << std::endl; + } + + void end_class(std::stringstream& ss) const { ss << "};"; } + + void add_constructor(std::stringstream& ss) const { + ss << _4_SPACES << class_name_ << "("; + { + // params + // if (construct_params_.size() >= 1) { + for (size_t i = 0; i < construct_params_.size(); ++i) { + ss << data_type_2_string(construct_params_[i].type) << " " + << construct_params_[i].var_name << ", "; + } + + // } + // tag_props + CHECK(tag_prop_strs_.size() > 0); + for (size_t i = 0; i < tag_prop_strs_.size() - 1; ++i) { + ss << "TAG_PROP_" << i << "&& prop_" << i << ", "; + } + ss << "TAG_PROP_" << tag_prop_strs_.size() - 1 << "&& prop_" + << tag_prop_strs_.size() - 1; + ss << ")"; + } + { + // constructor's code. + ss << " : "; + if (construct_params_.size() >= 1) { + for (size_t i = 0; i < construct_params_.size() - 1; ++i) { + ss << construct_params_[i].var_name << "_" + << "(" << construct_params_[i].var_name << "), "; + } + ss << construct_params_.back().var_name << "_" + << "(" << construct_params_.back().var_name << ")"; + ss << ","; + } + + for (size_t i = 0; i < tag_prop_strs_.size() - 1; ++i) { + ss << "prop_" << i << "_(std::move(prop_" << i << ")),"; + } + ss << "prop_" << tag_prop_strs_.size() - 1 << "_(" + << "std::move(prop_" << tag_prop_strs_.size() - 1 << "))"; + } + + ss << "{}" << std::endl; + } + + void add_func_call(std::stringstream& ss) const { + // for function call, there can be vertex_id_t as input param, which depends + // on vertex_id type. so we need to template typename. + if (constains_vertex_id(func_call_vars_)) { + ss << _4_SPACES << "template " << std::endl; + } + ss << _4_SPACES << "inline auto operator()"; + ss << "("; + if (func_call_vars_.size() > 0) { + for (auto i = 0; i < func_call_vars_.size() - 1; ++i) { + ss << data_type_2_string(func_call_vars_[i].type) << " " + << EXPR_OPERATOR_CALL_VAR_NAME << i << ","; + } + ss << data_type_2_string(func_call_vars_.back().type) << " " + << EXPR_OPERATOR_CALL_VAR_NAME << func_call_vars_.size() - 1; + } + ss << ") const {" << std::endl; + ss << _8_SPACES << "return "; + for (auto i = 0; i < expr_nodes_.size(); ++i) { + ss << expr_nodes_[i] << " "; + } + ss << ";" << std::endl; + ss << _4_SPACES << "}" << std::endl; + } + + void add_tag_prop_getter(std::stringstream& ss) const { + ss << _4_SPACES << "inline auto Properties() const {" << std::endl; + ss << _8_SPACES << "return std::make_tuple("; + + for (auto i = 0; i < tag_prop_strs_.size() - 1; ++i) { + ss << "prop_" << i << "_" + << ","; + } + ss << "prop_" << tag_prop_strs_.size() - 1 << "_" + << ");" << std::endl; + ss << _4_SPACES << "}"; + ss << std::endl; + } + + void add_private_member(std::stringstream& ss) const { + ss << _4_SPACES << "private:" << std::endl; + for (auto i = 0; i < construct_params_.size(); ++i) { + ss << _8_SPACES << data_type_2_string(construct_params_[i].type) << " " + << construct_params_[i].var_name << "_;"; + ss << std::endl; + } + for (auto i = 0; i < tag_prop_strs_.size(); ++i) { + ss << _8_SPACES << "TAG_PROP_" << i << " prop_" << i << "_;"; + ss << std::endl; + } + } + + void write_var_expr(std::stringstream& ss, + std::vector& var_names, + std::vector& var_tags, + std::vector& properties, + std::unordered_set& tag_used) { + std::unordered_set tags_set; + for (auto var_tag : var_tags) { + if (tag_used.find(var_tag) == tag_used.end()) { + tags_set.insert(var_tag); + } + } + for (auto tag : tags_set) { + int32_t var_index = 0; + std::pair> input_type; + if (tag != -1) { + var_index = ctx_.GetAliasIndex(tag); + input_type = ctx_.GetAliasType(tag); + } else { + input_type = ctx_.GetHeadType(); + } + ss << "let vertex_id" << tag + 1 << " = CSR.get_internal_id(i" + << var_index << " as usize);\n"; + VLOG(10) << "Get input alias type, index " << var_index << " label size " + << input_type.second.size(); + if (input_type.first == 0 && input_type.second.size() > 1) { + ss << "let vertex_label" << tag + 1 + << " = LDBCVertexParser::::get_label_id(i" << var_index + << " as usize);\n"; + } + } + + for (auto i = 0; i < var_names.size(); i++) { + int32_t var_tag = var_tags[i]; + std::pair> input_type; + if (var_tag != -1) { + input_type = ctx_.GetAliasType(var_tag); + } else { + input_type = ctx_.GetHeadType(); + } + ss << "let " << var_names[i] << " = "; + if (input_type.first == 0 && input_type.second.size() > 1) { + for (auto j = 0; j < input_type.second.size(); j++) { + if (j != 0) { + ss << "} else "; + } + if (j != input_type.second.size() - 1) { + ss << "if vertex_label" << var_tag + 1 + << " == " << input_type.second[j] << "{\n"; + } else { + ss << "{\n"; + } + ss << properties[i].var_name << "_" << input_type.second[j] + << "[vertex_id" << var_tag + 1 << "]\n"; + } + ss << "};\n"; + } else { + ss << properties[i].var_name << "_" << input_type.second[0] + << "[vertex_id" << var_tag + 1 << "];\n"; + } + } + } + + // this corresponding to the input params. + std::vector construct_params_; + // input var list of function call + std::vector func_call_vars_; + // tag used in input var list + std::vector func_call_tags_; + // we shall also keep the private member too, use {var}; + std::vector + tag_prop_strs_; // gs::NamedProperty({"creationDate"}) + std::vector case_exprs_; + // component of expression + std::vector expr_nodes_; + BuildingContext& ctx_; + int cur_var_start_; + int cur_var_id_; + int cur_case_id_; + + std::string class_name_; +}; +} // namespace pegasus +} // namespace gs + +#endif // CODEGEN_SRC_PEGASUS_PEGASUS_EXPR_BUILDER_H_ \ No newline at end of file diff --git a/flex/codegen/src/pegasus/pegasus_get_v_builder.h b/flex/codegen/src/pegasus/pegasus_get_v_builder.h new file mode 100644 index 000000000000..4ca8e923f5c6 --- /dev/null +++ b/flex/codegen/src/pegasus/pegasus_get_v_builder.h @@ -0,0 +1,409 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PEGASUS_PEGASUS_GET_V_BUILDER_H_ +#define CODEGEN_SRC_PEGASUS_PEGASUS_GET_V_BUILDER_H_ + +#include +#include +#include + +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/physical.pb.h" + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/pb_parser/query_params_parser.h" +#include "flex/codegen/src/pegasus/pegasus_expr_builder.h" + +namespace gs { +namespace pegasus { +enum class GetVType { + kStart = 0, + kEnd = 1, + kOther = 2, + kBoth = 3, + kItself = 4, +}; + +GetVType vopt_pb_to_internal(const physical::GetV::VOpt& v_opt) { + switch (v_opt) { + case physical::GetV_VOpt_START: + return GetVType::kStart; + case physical::GetV_VOpt_END: + return GetVType::kEnd; + case physical::GetV_VOpt_OTHER: + return GetVType::kOther; + case physical::GetV_VOpt_BOTH: + return GetVType::kBoth; + case physical::GetV_VOpt_ITSELF: + return GetVType::kItself; + default: + throw std::runtime_error("unknown vopt"); + } +} + +template +class GetVOpBuilder { + public: + GetVOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + GetVOpBuilder& operator_index(const int32_t operator_index) { + operator_index_ = operator_index; + return *this; + } + + GetVOpBuilder& v_opt(const physical::GetV::VOpt& v_opt) { + v_opt_ = vopt_pb_to_internal(v_opt); + return *this; + } + + GetVOpBuilder& in_tag(int32_t in_tag_id) { + in_tag_id_ = in_tag_id; + return *this; + } + + GetVOpBuilder& out_tag(int32_t out_tag_id) { + out_tag_id_ = out_tag_id; + return *this; + } + + GetVOpBuilder& add_vertex_label(const common::NameOrId& vertex_label) { + vertex_labels_.push_back( + try_get_label_from_name_or_id(vertex_label)); + return *this; + } + + GetVOpBuilder& filter(const common::Expression& expr) { + auto size = expr.operators().size(); + if (size > 0) { + ExprBuilder expr_builder(ctx_); + + auto& expr_oprs = expr.operators(); + expr_builder.AddAllExprOpr(expr_oprs); + std::vector predicate_tag; + std::vector case_exprs; + std::tie(predicate_expr_, var_names_, predicate_tag, properties_, + case_exprs) = expr_builder.BuildRust(); + has_predicate_ = true; + } else { + has_predicate_ = false; + VLOG(10) << "no expression in getv"; + } + return *this; + } + + // code for GetV + std::string Build() const { + std::stringstream ss; + + bool filter_label = false; + std::pair> input_type; + int32_t input_index = 0; + + if (in_tag_id_ == -1) { + input_type = ctx_.GetHeadType(); + CHECK(input_type.first == 0); + if (!vertex_labels_.empty()) { + for (auto i = 0; i < input_type.second.size(); ++i) { + if (std::find(vertex_labels_.begin(), vertex_labels_.end(), + input_type.second[i]) == vertex_labels_.end()) { + VLOG(10) << "Can not find label " << input_type.second.size(); + filter_label = true; + break; + } + } + } + } else { + input_type = ctx_.GetAliasType(in_tag_id_); + input_index = ctx_.GetAliasIndex(in_tag_id_); + if (input_type.first != 0) { + VLOG(10) << "Unexpected input type " << input_type.first; + } + CHECK(input_type.first == 0); + if (!vertex_labels_.empty()) { + for (auto i = 0; i < input_type.second.size(); ++i) { + if (std::find(vertex_labels_.begin(), vertex_labels_.end(), + input_type.second[i]) == vertex_labels_.end()) { + VLOG(10) << "Can not find label " << input_type.second.size(); + filter_label = true; + break; + } + } + } + } + VLOG(10) << "Labels size " << input_type.second.size(); + VLOG(10) << "Labels type " << input_type.first; + + auto latest_outputs = ctx_.GetOutput(); + std::string getv_head_code = write_head(); + + std::string getv_body_code; + auto in_data_type = latest_outputs[input_index]; + switch (v_opt_) { + case GetVType::kItself: { + if (filter_label) { + } else { + if (has_predicate_) { + if (input_type.second.size() == 1) { + int32_t label_id = input_type.second[0]; + getv_body_code = filter_by_predicate(input_index, label_id); + } else { + boost::format multi_labels_fmter( + "let vertex_label = " + "LDBCVertexParser::::get_label_id(i%1% as usize);\n" + "%2%"); + std::string getv_code; + for (auto i : input_type.second) { + boost::format getv_fmter( + "if vertex_label == %1% {\n" + "%2" + "}\n"); + getv_fmter % i % filter_by_predicate(input_index, i); + getv_code += getv_fmter.str(); + } + multi_labels_fmter % input_index % getv_code; + getv_body_code = multi_labels_fmter.str(); + } + } else { + // return stream and do nothing + boost::format empty_fmter("let stream_%1% = stream%2%"); + empty_fmter % operator_index_ % (operator_index_ - 1); + return empty_fmter.str(); + } + } + break; + } + case GetVType::kEnd: { + boost::format get_id_fmter("i%1%%2%"); + if (in_data_type.size() == 1) { + if (in_data_type[0] == codegen::DataType::kInt64Array) { + get_id_fmter % input_index % ".last()"; + } else { + get_id_fmter % input_index % ""; + } + } else { + LOG(FATAL) << "Unsupported type"; + } + if (filter_label) { + boost::format filter_fmter( + "vertex_id = %1%;\n" + "let vertex_label = " + "LDBCVertexParser::::get_label_id(vertex_id as usize);\n" + "let label_list = vec![%2%];\n" + "if label_list.contains(vertex_label) {\n" + "result.push(vertex_id);\n" + "}\n"); + + std::string label_string = generate_label_string(); + filter_fmter % get_id_fmter.str() % label_string; + getv_body_code = filter_fmter.str(); + } else { + boost::format no_filter_fmter( + "vertex_id = %1%;\n" + "result.push(vertex_id);\n"); + no_filter_fmter % get_id_fmter.str(); + getv_body_code = no_filter_fmter.str(); + } + break; + } + case GetVType::kStart: { + boost::format get_id_fmter("i%1%%2%"); + if (in_data_type.size() == 1) { + if (in_data_type[0] == codegen::DataType::kInt64Array) { + get_id_fmter % input_index % ".start()"; + } else { + get_id_fmter % input_index % ""; + } + } else { + LOG(FATAL) << "Unsupported type"; + } + if (filter_label) { + boost::format filter_fmter( + "vertex_id = %1%;\n" + "let vertex_label = " + "LDBCVertexParser::::get_label_id(vertex_id as usize);\n" + "let label_list = vec![%2%];\n" + "if label_list.contains(vertex_label) {\n" + "result.push(vertex_id);\n" + "}\n"); + + std::string label_string = generate_label_string(); + filter_fmter % get_id_fmter.str() % label_string; + getv_body_code = filter_fmter.str(); + } else { + boost::format no_filter_fmter( + "vertex_id = %1%;\n" + "result.push(vertex_id);\n"); + no_filter_fmter % get_id_fmter.str(); + getv_body_code = no_filter_fmter.str(); + } + break; + } + case GetVType::kOther: { + boost::format get_id_fmter("i%1%%2%"); + if (in_data_type.size() == 1) { + if (in_data_type[0] == codegen::DataType::kInt64Array) { + LOG(FATAL) << "Unsupported data type in kOther"; + } else { + get_id_fmter % input_index % ""; + } + } else { + LOG(FATAL) << "Unsupported type"; + } + if (filter_label) { + boost::format filter_fmter( + "vertex_id = %1%;\n" + "let vertex_label = " + "LDBCVertexParser::::get_label_id(vertex_id as usize);\n" + "let label_list = vec![%2%];\n" + "if label_list.contains(vertex_label) {\n" + "result.push(vertex_id);\n" + "}\n"); + + std::string label_string = generate_label_string(); + filter_fmter % get_id_fmter.str() % label_string; + getv_body_code = filter_fmter.str(); + } else { + boost::format no_filter_fmter( + "vertex_id = %1%;\n" + "result.push(vertex_id);\n"); + no_filter_fmter % get_id_fmter.str(); + getv_body_code = no_filter_fmter.str(); + } + break; + } + case GetVType::kBoth: + LOG(FATAL) << "Unsupported getv type"; + } + + std::vector output; + output.push_back(codegen::DataType::kInt64); + latest_outputs[0] = output; + ctx_.SetHeadType(input_type.first, input_type.second); + + int32_t input_size = ctx_.InputSize(); + boost::format edge_expand_output_fmter( + "Ok(result.into_iter().map(move |res| %1%))\n" + "})?;"); + ss << "Ok(result.into_iter().map(|res| (res"; + + int32_t output_index = -1; + if (out_tag_id_ != -1) { + ctx_.SetAlias(out_tag_id_); + ctx_.SetAliasType(out_tag_id_, input_type.first, input_type.second); + output_index = ctx_.GetAliasIndex(out_tag_id_); + ctx_.SetOutput(output_index, output); + } + + std::string output_params = generate_output_list( + "i", input_size, "res", output_index, ctx_.ContainHead()); + edge_expand_output_fmter % output_params; + ctx_.SetHead(true); + return getv_head_code + getv_body_code + edge_expand_output_fmter.str(); + } + + private: + std::string write_head() const { + int32_t input_size = ctx_.InputSize(); + boost::format head_fmter( + "let stream_%1% = stream_%2%\n" + ".flat_map(move |%3%|5 {\n" + "let mut result = vec![];"); + std::string input_params = generate_arg_list("i", input_size); + head_fmter % operator_index_ % (operator_index_ - 1) % input_params; + return head_fmter.str(); + } + + std::string generate_label_string() const { + std::stringstream labels_ss; + for (auto i = 0; i < vertex_labels_.size(); i++) { + labels_ss << vertex_labels_[i]; + if (i != vertex_labels_.size() - 1) { + labels_ss << ","; + } + } + return labels_ss.str(); + } + + std::string filter_by_predicate(int32_t index, + const int32_t& label_id) const { + if (predicate_expr_.empty()) { + boost::format no_predicate_fmter("result.push(i%1%);\n"); + no_predicate_fmter % index; + return no_predicate_fmter.str(); + } + boost::format predicate_fmter( + "let vertex_id = CSR.get_internal_id(i%1% as usize);\n" + "%2%" + "if %3% {\n" + "let vertex_global_id = CSR.get_global_id(i, %4%).unwrap() as u64;\n" + "result.push(i%1%);\n" + "}\n"); + std::string vars_code; + for (auto i = 0; i < var_names_.size(); ++i) { + boost::format var_fmter("let %1% = %2%[vertex_id];\n"); + std::string prop_name = + get_vertex_prop_column_name(properties_[i].var_name, label_id); + var_fmter % var_names_[i] % prop_name; + vars_code += var_fmter.str(); + } + predicate_fmter % index % vars_code % predicate_expr_ % label_id; + return predicate_fmter.str(); + } + + BuildingContext& ctx_; + int32_t operator_index_; + GetVType v_opt_; + int32_t in_tag_id_, out_tag_id_; + std::vector vertex_labels_; + bool has_predicate_; + std::string predicate_expr_; + std::vector var_names_; + std::vector properties_; +}; + +template +static std::string BuildGetVOp( + BuildingContext& ctx, int32_t operator_index, + const physical::GetV& get_v_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + GetVOpBuilder builder(ctx); + builder.v_opt(get_v_pb.opt()); + if (get_v_pb.has_tag()) { + builder.in_tag(get_v_pb.tag().value()); + } else { + builder.in_tag(-1); + } + + if (get_v_pb.has_alias()) { + builder.out_tag(get_v_pb.alias().value()); + } else { + builder.out_tag(-1); + } + auto& vertex_labels_pb = get_v_pb.params().tables(); + for (auto vertex_label_pb : vertex_labels_pb) { + builder.add_vertex_label(vertex_label_pb); + } + + return builder.operator_index(operator_index) + .filter(get_v_pb.params().predicate()) + .Build(); +} +} // namespace pegasus +} // namespace gs + +#endif // CODEGEN_SRC_PEGASUS_PEGASUS_GET_V_BUILDER_H_ diff --git a/flex/codegen/src/pegasus/pegasus_group_by_builder.h b/flex/codegen/src/pegasus/pegasus_group_by_builder.h new file mode 100644 index 000000000000..9a8f3110ea1c --- /dev/null +++ b/flex/codegen/src/pegasus/pegasus_group_by_builder.h @@ -0,0 +1,365 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PEGASUS_PEGASUS_GROUP_BY_BUILDER_H_ +#define CODEGEN_SRC_PEGASUS_PEGASUS_GROUP_BY_BUILDER_H_ + +#include "flex/codegen/src/building_context.h" + +namespace gs { +namespace pegasus { +class GroupByOpBuilder { + public: + GroupByOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + GroupByOpBuilder& operator_index(const int32_t operator_index) { + operator_index_ = operator_index; + return *this; + } + + // add group key + GroupByOpBuilder& AddKeyAlias(const physical::GroupBy::KeyAlias& key_alias) { + int32_t input_tag = key_alias.key().tag().id(); + int32_t output_tag = key_alias.alias().value(); + codegen::ParamConst param_const; + if (key_alias.key().has_property()) { + param_const.var_name = key_alias.key().property().key().name(); + param_const.type = common_data_type_pb_2_data_type( + key_alias.key().node_type().data_type()); + } else { + param_const.var_name = ""; + } + key_input_tag_.emplace_back(input_tag); + key_output_tag_.emplace_back(output_tag); + key_input_type_.emplace_back(param_const); + return *this; + } + + // add aggregation function + GroupByOpBuilder& AddAggFunc(const physical::GroupBy::AggFunc& agg_func) { + // agg function can apply on multiple tag's prop + std::string agg_fun_var_name, agg_fun_code; + agg_func_list_.emplace_back(agg_func.aggregate()); + std::vector var_tags; + for (auto i = 0; i < agg_func.vars_size(); i++) { + var_tags.emplace_back(agg_func.vars(i)); + } + group_input_vars_.emplace_back(var_tags); + group_output_tag_.emplace_back(agg_func.alias().value()); + return *this; + } + + GroupByOpBuilder& MetaDatas( + const std::vector meta_datas) { + meta_datas_ = meta_datas; + return *this; + } + + // return at least one key, at least one agg func + // and the operator code. + std::string Build() const { + VLOG(10) << "[GroupBy Builder] Start build groupby operator"; + VLOG(10) << "[GroupBy Builder] Start build key_by operator"; + std::string key_by_code = write_key_by_operator(); + + VLOG(10) << "[GroupBy Builder] Start build fold_by_key operator"; + std::string fold_by_key_code = write_fold_by_operator(); + + VLOG(10) << "[GroupBy Builder] Start build unfold operator"; + std::string unfold_code = write_unfold_operator(); + + // update output info + VLOG(10) << "[GroupBy Builder] Start update output info"; + VLOG(10) << "key size " << key_output_tag_.size() << ", meta size " + << meta_datas_.size(); + ctx_.SetHead(false); + ctx_.ResetAlias(); + for (auto i = 0; i < key_output_tag_.size(); ++i) { + int32_t key_output = key_output_tag_[i]; + ctx_.SetAlias(key_output); + int32_t key_index = ctx_.GetAliasIndex(key_output); + std::vector output_type; + auto column_meta = meta_datas_[i]; + std::vector labels; + if (column_meta.type().has_graph_type()) { + for (auto j = 0; + j < column_meta.type().graph_type().graph_data_type_size(); j++) { + labels.push_back(column_meta.type() + .graph_type() + .graph_data_type(j) + .label() + .label()); + } + if (column_meta.type().graph_type().element_opt() == + common::GraphDataType_GraphElementOpt:: + GraphDataType_GraphElementOpt_VERTEX) { + ctx_.SetAliasType(key_output, 0, labels); + } else { + ctx_.SetAliasType(key_output, 1, labels); + } + output_type.push_back(codegen::DataType::kInt64); + } else if (column_meta.type().type_case() == + common::IrDataType::kGraphType) { + ctx_.SetAliasType(key_output, 2, labels); + output_type.push_back( + common_data_type_pb_2_data_type(column_meta.type().data_type())); + } + ctx_.SetOutput(key_index, output_type); + } + + for (auto i = 0; i < group_output_tag_.size(); ++i) { + int32_t key_output = group_output_tag_[i]; + ctx_.SetAlias(key_output); + int32_t key_index = ctx_.GetAliasIndex(key_output); + std::vector output_type; + auto column_meta = meta_datas_[i + key_output_tag_.size()]; + std::vector labels; + if (column_meta.type().has_graph_type()) { + for (auto j = 0; + j < column_meta.type().graph_type().graph_data_type_size(); j++) { + labels.push_back(column_meta.type() + .graph_type() + .graph_data_type(j) + .label() + .label()); + } + if (column_meta.type().graph_type().element_opt() == + common::GraphDataType_GraphElementOpt:: + GraphDataType_GraphElementOpt_VERTEX) { + ctx_.SetAliasType(key_output, 0, labels); + } else { + ctx_.SetAliasType(key_output, 1, labels); + } + output_type.push_back(codegen::DataType::kInt64); + } else if (column_meta.type().type_case() == + common::IrDataType::kDataType) { + ctx_.SetAliasType(key_output, 2, labels); + output_type.push_back( + common_data_type_pb_2_data_type(column_meta.type().data_type())); + } + ctx_.SetOutput(key_index, output_type); + } + + return key_by_code + fold_by_key_code + unfold_code; + } + + private: + std::string write_key_by_operator() const { + // codegen for key_by + boost::format key_by_head_fmter( + "let stream_%1% = stream_%2%.key_by(|%3%| {\n"); + auto input_size = ctx_.InputSize(); + std::string key_by_input = generate_arg_list("i", input_size); + key_by_head_fmter % operator_index_ % (operator_index_ - 1) % key_by_input; + + VLOG(10) << "[GroupBy Builder] Key input size is " << key_input_tag_.size(); + std::string key_by_key_code; + for (auto i = 0; i < key_input_tag_.size(); ++i) { + auto in_tag = key_input_tag_[i]; + auto input_index = ctx_.GetAliasIndex(in_tag); + boost::format key_fmter("let key%1% = i%2%;\n"); + key_fmter % i % input_index; + key_by_key_code += key_fmter.str(); + } + VLOG(10) << "[GroupBy Builder] Finished write key"; + + std::string key_by_value_code; + for (auto i = 0; i < group_input_vars_.size(); ++i) { + // Only support value with one column + CHECK(group_input_vars_[i].size() == 1); + auto in_tag = group_input_vars_[i][0].tag().id(); + auto input_index = ctx_.GetAliasIndex(in_tag); + boost::format value_fmter("let value%1% = i%2%;\n"); + value_fmter % i % input_index; + key_by_value_code += value_fmter.str(); + } + VLOG(10) << "[GroupBy Builder] Finished write value"; + + boost::format key_by_end_fmter("Ok((%1%, %2%))\n})?\n"); + std::string key_list = generate_arg_list("key", key_input_tag_.size()); + std::string value_list = + generate_arg_list("value", group_input_vars_.size()); + key_by_end_fmter % key_list % value_list; + + return key_by_head_fmter.str() + key_by_key_code + key_by_value_code + + key_by_end_fmter.str(); + } + + std::string write_fold_by_operator() const { + boost::format fold_by_head_fmter(".fold_by_key(%1%, || |%2%, %3%|{\n"); + std::stringstream fold_by_init_ss; + if (agg_func_list_.size() > 1) { + fold_by_init_ss << "("; + } + for (auto i = 0; i < agg_func_list_.size(); ++i) { + switch (agg_func_list_[i]) { + case physical::GroupBy::AggFunc::Aggregate::GroupBy_AggFunc_Aggregate_SUM: + case physical::GroupBy::AggFunc::Aggregate::GroupBy_AggFunc_Aggregate_AVG: + case physical::GroupBy::AggFunc::Aggregate::GroupBy_AggFunc_Aggregate_MAX: + case physical::GroupBy::AggFunc::Aggregate::GroupBy_AggFunc_Aggregate_MIN: + case physical::GroupBy::AggFunc::Aggregate:: + GroupBy_AggFunc_Aggregate_COUNT: { + fold_by_init_ss << "0"; + break; + } + case physical::GroupBy::AggFunc::Aggregate:: + GroupBy_AggFunc_Aggregate_COUNT_DISTINCT: { + fold_by_init_ss << "HashSet::new()"; + break; + } + case physical::GroupBy::AggFunc::Aggregate:: + GroupBy_AggFunc_Aggregate_TO_LIST: { + fold_by_init_ss << "vec![]"; + break; + } + default: + LOG(FATAL) << "Unsupported aggregate function"; + } + if (i < agg_func_list_.size() - 1) { + fold_by_init_ss << ", "; + } + } + if (agg_func_list_.size() > 1) { + fold_by_init_ss << ")"; + } + std::string fold_by_init = fold_by_init_ss.str(); + std::string agg_params = + generate_arg_list("mut agg", agg_func_list_.size()); + std::string input_params = generate_arg_list("i", agg_func_list_.size()); + fold_by_head_fmter % fold_by_init % agg_params % input_params; + + std::stringstream agg_func_ss; + for (auto i = 0; i < agg_func_list_.size(); ++i) { + switch (agg_func_list_[i]) { + case physical::GroupBy::AggFunc::Aggregate::GroupBy_AggFunc_Aggregate_SUM: + case physical::GroupBy::AggFunc::Aggregate:: + GroupBy_AggFunc_Aggregate_AVG: { + boost::format sum_fmter("agg%1% += i%1%;\n"); + sum_fmter % i; + agg_func_ss << sum_fmter.str(); + break; + } + case physical::GroupBy::AggFunc::Aggregate:: + GroupBy_AggFunc_Aggregate_MAX: { + boost::format max_fmter("agg%1% = max(agg%1%, i%1%);\n"); + max_fmter % i; + agg_func_ss << max_fmter.str(); + break; + } + case physical::GroupBy::AggFunc::Aggregate:: + GroupBy_AggFunc_Aggregate_MIN: { + boost::format min_fmter("agg%1% = max(agg%1%, i%1%);\n"); + min_fmter % i; + agg_func_ss << min_fmter.str(); + break; + } + case physical::GroupBy::AggFunc::Aggregate:: + GroupBy_AggFunc_Aggregate_COUNT: { + boost::format count_fmter("agg%1% += 1;\n"); + count_fmter % i; + agg_func_ss << count_fmter.str(); + break; + } + case physical::GroupBy::AggFunc::Aggregate:: + GroupBy_AggFunc_Aggregate_COUNT_DISTINCT: { + boost::format count_distinct_fmter("agg%1%.insert(i%1%);\n"); + count_distinct_fmter % i; + agg_func_ss << count_distinct_fmter.str(); + break; + } + case physical::GroupBy::AggFunc::Aggregate:: + GroupBy_AggFunc_Aggregate_TO_LIST: { + boost::format to_list_fmter("agg%1%.append(i%1%);\n"); + to_list_fmter % i; + agg_func_ss << to_list_fmter.str(); + break; + } + default: + LOG(FATAL) << "Unsupported aggregate function"; + } + } + std::string agg_func_code = agg_func_ss.str(); + + boost::format fold_by_end_fmter("Ok(%1%)\n})?\n"); + std::string fold_by_output = + generate_arg_list("agg", agg_func_list_.size()); + fold_by_end_fmter % fold_by_output; + + return fold_by_head_fmter.str() + agg_func_code + fold_by_end_fmter.str(); + } + + std::string write_unfold_operator() const { + boost::format unfold_fmter( + ".unfold(|group_map|{\n" + "Ok(group_map.into_iter().map(|(key, value)| (%1%%2%)))\n" + "})?;ß\n"); + std::string key_outputs; + if (key_output_tag_.size() == 1) { + key_outputs = "key, "; + } else { + for (auto i = 0; i < key_output_tag_.size(); i++) { + key_outputs = key_outputs + "key." + std::to_string(i) + ", "; + } + } + std::string value_outputs; + if (group_output_tag_.size() == 1) { + value_outputs = "value"; + } else { + for (auto i = 0; i < group_output_tag_.size(); i++) { + value_outputs = value_outputs + "value." + std::to_string(i); + if (i != group_output_tag_.size() - 1) { + value_outputs += ", "; + } + } + } + unfold_fmter % key_outputs % value_outputs; + return unfold_fmter.str(); + } + + BuildingContext& ctx_; + int32_t operator_index_; + std::vector> key_alias_name_and_code; + std::vector key_input_tag_; + std::vector key_output_tag_; + std::vector key_input_type_; + std::vector agg_func_list_; + std::vector> group_input_vars_; + std::vector group_output_tag_; + std::vector meta_datas_; +}; + +static std::string BuildGroupByOp( + BuildingContext& ctx, int32_t operator_index, + const physical::GroupBy& group_by_pb, + const std::vector& meta_datas) { + GroupByOpBuilder builder(ctx); + auto& key_aliases = group_by_pb.mappings(); + + CHECK(group_by_pb.functions_size() >= 1); + auto& functions = group_by_pb.functions(); + for (auto i = 0; i < key_aliases.size(); ++i) { + auto& key_alias = key_aliases[i]; + builder.AddKeyAlias(key_alias); + } + + for (auto i = 0; i < functions.size(); ++i) { + auto& func = functions[i]; + builder.AddAggFunc(func); + } + return builder.operator_index(operator_index).MetaDatas(meta_datas).Build(); +} +} // namespace pegasus +} // namespace gs + +#endif // CODEGEN_SRC_PEGASUS_PEGASUS_GROUP_BY_BUILDER_H_ diff --git a/flex/codegen/src/pegasus/pegasus_intersect_builder.h b/flex/codegen/src/pegasus/pegasus_intersect_builder.h new file mode 100644 index 000000000000..83c71a91931f --- /dev/null +++ b/flex/codegen/src/pegasus/pegasus_intersect_builder.h @@ -0,0 +1,119 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PEGASUS_PEGASUS_INTERSECT_BUILDER_H_ +#define CODEGEN_SRC_PEGASUS_PEGASUS_INTERSECT_BUILDER_H_ + +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/expr.pb.h" +#include "proto_generated_gie/physical.pb.h" + +namespace gs { +namespace pegasus { +class IntersectOpBuilder { + public: + IntersectOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + IntersectOpBuilder& intersect_key(int32_t intersect_key) { + intersect_key_ = intersect_key; + return *this; + } + + IntersectOpBuilder& add_plan(const physical::PhysicalPlan& plan) { + sub_plans_.push_back(plan); + return *this; + } + + std::string Build() { + VLOG(10) << "Start Build intersect"; + std::stringstream ss; + for (auto i = 0; i < sub_plans_.size(); i++) { + auto operator_size = sub_plans_[i].plan_size(); + for (auto j = 0; j < operator_size; j++) { + VLOG(10) << "Get " << j << "th operator from sub plan " << i; + auto op = sub_plans_[i].plan(j); + auto& meta_datas = op.meta_data(); + // CHECK(meta_datas.size() == 1) << "meta data size: " << + // meta_datas.size(); + // physical::PhysicalOpr::MetaData meta_data; //fake meta + auto opr = op.opr(); + switch (opr.op_kind_case()) { + case physical::PhysicalOpr::Operator::kRepartition: { + physical::PhysicalOpr::MetaData meta_data; + + VLOG(10) << "Found a repartition operator"; + auto& repartition_op = opr.repartition(); + auto repartition_codegen = pegasus::BuildRepartitionOp( + ctx_, i + 1, repartition_op, meta_data); + VLOG(10) << repartition_codegen; + ss << repartition_codegen; + break; + } + case physical::PhysicalOpr::Operator::kEdge: { // edge expand + auto& meta_data = meta_datas[0]; + VLOG(10) << "Found a edge expand operator"; + auto& edge_op = opr.edge(); + auto edge_expand_codegen = pegasus::BuildEdgeExpandOp( + ctx_, i + 1, edge_op, meta_data); + VLOG(10) << edge_expand_codegen; + ss << edge_expand_codegen; + break; + } + case physical::PhysicalOpr::Operator::kVertex: { + physical::PhysicalOpr::MetaData meta_data; + + VLOG(10) << "Found a get_v operator"; + auto& vertex_op = opr.vertex(); + auto vertex_codegen = + pegasus::BuildGetVOp(ctx_, i + 1, vertex_op, meta_data); + VLOG(10) << vertex_codegen; + ss << vertex_codegen; + + break; + } + default: + LOG(FATAL) << "Not supproted in intersect."; + } + } + } + VLOG(10) << "Finish Build intersect"; + return ss.str(); + } + + private: + BuildingContext ctx_; + int32_t intersect_key_; + std::vector sub_plans_; +}; + +static std::string BuildIntersectOp( + BuildingContext& ctx, const physical::Intersect& intersect_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + IntersectOpBuilder builder(ctx); + for (auto i = 0; i < intersect_pb.sub_plans_size(); i++) { + builder.add_plan(intersect_pb.sub_plans(i)); + } + return builder.intersect_key(intersect_pb.key()).Build(); +} +} // namespace pegasus +} // namespace gs + +#endif // CODEGEN_SRC_PEGASUS_PEGASUS_INTERSECT_BUILDER_H_ diff --git a/flex/codegen/src/pegasus/pegasus_join_builder.h b/flex/codegen/src/pegasus/pegasus_join_builder.h new file mode 100644 index 000000000000..2c9c3a4eb995 --- /dev/null +++ b/flex/codegen/src/pegasus/pegasus_join_builder.h @@ -0,0 +1,247 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PEGASUS_PEGASUS_JOIN_BUILDER_H_ +#define CODEGEN_SRC_PEGASUS_PEGASUS_JOIN_BUILDER_H_ + +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/pegasus/pegasus_order_by_builder.h" +#include "flex/codegen/src/pegasus/pegasus_project_builder.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/expr.pb.h" +#include "proto_generated_gie/physical.pb.h" + +namespace gs { +namespace pegasus { +class JoinOpBuilder { + public: + JoinOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + JoinOpBuilder& operator_index(const int32_t operator_index) { + operator_index_ = operator_index; + return *this; + } + + JoinOpBuilder& add_plan(const physical::PhysicalPlan& left_plan, + const physical::PhysicalPlan& right_plan) { + left_plan_ = left_plan; + right_plan_ = right_plan; + return *this; + } + + JoinOpBuilder& set_join_kind(const physical::Join::JoinKind& join_kind) { + join_kind_ = join_kind; + return *this; + } + + JoinOpBuilder& set_join_key(const std::vector left_keys, + const std::vector right_keys) { + left_keys_ = left_keys; + right_keys_ = right_keys; + return *this; + } + + std::string Build() { + VLOG(10) << "Start build join"; + std::stringstream ss; + // codegen for stream copy + ss << "let (mut left_stream, mut right_stream) = stream.copied();\n"; + + boost::format join_code_fmter( + "let stream_%1% = {\n" + "let (mut left_stream, mut right_stream) = stream_%2%.copied();\n" + "left_stream = {\n" + "let stream_0 = left_stream;\n" + "%3%" + "};\n" + "right_stream = {\n" + "let stream_0 = right_stream;\n" + "%4%" + "};\n" + "%5%" + "};\n" // code for copied + ); + join_code_fmter % operator_index_ % (operator_index_ - 1); + // codegen for left & right plan + auto left_context = ctx_.CreateSubTaskContext("left_"); + auto right_context = ctx_.CreateSubTaskContext("right_"); + + join_code_fmter % write_sub_plan(left_context, left_plan_); + + join_code_fmter % write_sub_plan(right_context, right_plan_); + switch (join_kind_) { + case physical::Join::JoinKind::Join_JoinKind_INNER: { + join_code_fmter % "left_stream.inner_join(right_stream)?\n"; + break; + } + case physical::Join::JoinKind::Join_JoinKind_LEFT_OUTER: { + join_code_fmter % "left_stream.left_outer_join(right_stream)?\n"; + break; + } + case physical::Join::JoinKind::Join_JoinKind_RIGHT_OUTER: { + join_code_fmter % "left_stream.right_outer_join(right_stream)?\n"; + break; + } + case physical::Join::JoinKind::Join_JoinKind_SEMI: { + join_code_fmter % "left_stream.semi_join(right_stream)?\n"; + break; + } + case physical::Join::JoinKind::Join_JoinKind_ANTI: { + join_code_fmter % "left_stream.anti_join(right_stream)?\n"; + break; + } + default: + LOG(FATAL) << "Unsupported join type"; + } + + // combine building context after join + + // codegen for final join value combination + + return join_code_fmter.str(); + } + + private: + std::string write_sub_plan(BuildingContext& context, + physical::PhysicalPlan& plan) { + auto plan_size = plan.plan_size(); + std::stringstream sub_plan_code_ss; + for (auto i = 0; i < plan_size; i++) { + auto op = plan.plan(i); + auto& meta_datas = op.meta_data(); + + auto opr = op.opr(); + switch (opr.op_kind_case()) { + case physical::PhysicalOpr::Operator::kRepartition: { + physical::PhysicalOpr::MetaData meta_data; + + VLOG(10) << "Found a repartition operator"; + auto& repartition_op = opr.repartition(); + auto repartition_codegen = pegasus::BuildRepartitionOp( + context, i + 1, repartition_op, meta_data); + VLOG(10) << repartition_codegen; + sub_plan_code_ss << repartition_codegen; + break; + } + case physical::PhysicalOpr::Operator::kGroupBy: { + std::vector meta_datas; + for (auto i = 0; i < op.meta_data_size(); i++) { + meta_datas.push_back(op.meta_data(i)); + } + + VLOG(10) << "Found a groupby operator"; + auto& groupby_op = opr.group_by(); + + sub_plan_code_ss << pegasus::BuildGroupByOp(context, i + 1, groupby_op, + meta_datas); + break; + } + case physical::PhysicalOpr::Operator::kOrderBy: { + physical::PhysicalOpr::MetaData meta_data; + + VLOG(10) << "Found a order_by operator"; + auto& orderby_op = opr.order_by(); + + sub_plan_code_ss << pegasus::BuildOrderByOp(context, i + 1, orderby_op, + meta_data); + break; + } + case physical::PhysicalOpr::Operator::kProject: { + std::vector meta_data; + for (auto i = 0; i < op.meta_data_size(); i++) { + meta_data.push_back(op.meta_data(i)); + } + + VLOG(10) << "Found a project operator"; + auto& project_op = opr.project(); + + sub_plan_code_ss << pegasus::BuildProjectOp(context, i + 1, project_op, + meta_data); + break; + } + case physical::PhysicalOpr::Operator::kEdge: { // edge expand + auto& meta_data = meta_datas[0]; + VLOG(10) << "Found a edge expand operator"; + auto& edge_op = opr.edge(); + auto edge_codegen = pegasus::BuildEdgeExpandOp( + context, i + 1, edge_op, meta_data); + VLOG(10) << edge_codegen; + sub_plan_code_ss << edge_codegen; + break; + } + case physical::PhysicalOpr::Operator::kVertex: { + physical::PhysicalOpr::MetaData meta_data; + + VLOG(10) << "Found a get_v operator"; + auto& vertex_op = opr.vertex(); + auto vertex_codegen = + pegasus::BuildGetVOp(context, i + 1, vertex_op, meta_data); + VLOG(10) << vertex_codegen; + sub_plan_code_ss << vertex_codegen; + + break; + } + case physical::PhysicalOpr::Operator::kDedup: { + physical::PhysicalOpr::MetaData meta_data; + VLOG(10) << "Found a dedup operator"; + auto& dedup_op = opr.dedup(); + auto dedup_codegen = + pegasus::BuildDedupOp(context, i + 1, dedup_op, meta_data); + VLOG(10) << dedup_codegen; + sub_plan_code_ss << dedup_codegen; + break; + } + default: + LOG(FATAL) << "Not supproted in union."; + } + } + sub_plan_code_ss << "stream_" << plan_size; + return sub_plan_code_ss.str(); + } + + BuildingContext ctx_; + int32_t operator_index_; + physical::PhysicalPlan left_plan_; + physical::PhysicalPlan right_plan_; + std::vector left_keys_; + std::vector right_keys_; + physical::Join::JoinKind join_kind_; +}; + +static std::string BuildJoinOp( + BuildingContext& ctx, int32_t operator_index, const physical::Join& join_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + JoinOpBuilder builder(ctx); + builder.set_join_kind(join_pb.join_kind()) + .add_plan(join_pb.left_plan(), join_pb.right_plan()); + std::vector left_keys, right_keys; + for (auto i = 0; i < join_pb.left_keys_size(); i++) { + left_keys.emplace_back(join_pb.left_keys(i)); + } + for (auto i = 0; i < join_pb.right_keys_size(); i++) { + right_keys.emplace_back(join_pb.right_keys(i)); + } + builder.set_join_key(left_keys, right_keys); + return builder.operator_index(operator_index).Build(); +} +} // namespace pegasus +} // namespace gs + +#endif // CODEGEN_SRC_PEGASUS_PEGASUS_JOIN_BUILDER_H_ diff --git a/flex/codegen/src/pegasus/pegasus_limit_builder.h b/flex/codegen/src/pegasus/pegasus_limit_builder.h new file mode 100644 index 000000000000..917d76b8ca78 --- /dev/null +++ b/flex/codegen/src/pegasus/pegasus_limit_builder.h @@ -0,0 +1,72 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PEGASUS_PEGASUS_LIMIT_BUILDER_H_ +#define CODEGEN_SRC_PEGASUS_PEGASUS_LIMIT_BUILDER_H_ + +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/expr.pb.h" +#include "proto_generated_gie/physical.pb.h" + +namespace gs { +namespace pegasus { + +class LimitOpBuilder { + public: + LimitOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + LimitOpBuilder& operator_index(const int32_t operator_index) { + operator_index_ = operator_index; + return *this; + } + + LimitOpBuilder& limit(int32_t limit) { + limit_ = limit; + return *this; + } + + std::string Build() { + VLOG(10) << "Start build limit"; + + boost::format limit_fmter("let stream_%1% = stream_%2%.limit(%3%)?;"); + limit_fmter % operator_index_ % (operator_index_ - 1) % limit_; + return limit_fmter.str(); + } + + private: + BuildingContext ctx_; + int32_t operator_index_; + int32_t limit_; +}; + +static std::string BuildLimitOp( + BuildingContext& ctx, int32_t operator_index, + const algebra::Limit& limit_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + LimitOpBuilder builder(ctx); + return builder.operator_index(operator_index) + .limit(limit_pb.range().upper()) + .Build(); +} +} // namespace pegasus +} // namespace gs + +#endif // CODEGEN_SRC_PEGASUS_PEGASUS_LIMIT_BUILDER_H_ diff --git a/flex/codegen/src/pegasus/pegasus_order_by_builder.h b/flex/codegen/src/pegasus/pegasus_order_by_builder.h new file mode 100644 index 000000000000..ae00f17e340e --- /dev/null +++ b/flex/codegen/src/pegasus/pegasus_order_by_builder.h @@ -0,0 +1,148 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PEGASUS_PEGASUS_ORDER_BY_BUILDER_H_ +#define CODEGEN_SRC_PEGASUS_PEGASUS_ORDER_BY_BUILDER_H_ + +#include +#include +#include + +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/physical.pb.h" + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/pb_parser/query_params_parser.h" + +namespace gs { +namespace pegasus { +class OrderByOpBuilder { + public: + OrderByOpBuilder(BuildingContext& ctx) : ctx_(ctx), limit_(-1) {} + + OrderByOpBuilder& operator_index(const int32_t operator_index) { + operator_index_ = operator_index; + return *this; + } + + OrderByOpBuilder& AddOrderingPair( + const algebra::OrderBy_OrderingPair& order_pair) { + ordering_pair_.emplace_back(order_pair); + return *this; + } + + OrderByOpBuilder& SetLimit(const int32_t limit) { + limit_ = limit; + return *this; + } + + std::string Build() { + std::stringstream ss; + boost::format order_by_fmt( + "%1%" + "%2%" + "})?;\n"); + + std::string head_code = write_head(); + + std::string order_body_code; + VLOG(10) << "Ordering pair size is " << ordering_pair_.size(); + for (auto i = 0; i < ordering_pair_.size(); ++i) { + boost::format cmp_fmter("x.%1%.%2%(&y.%1%)%3%"); + + int32_t input_tag = ordering_pair_[i].key().tag().id(); + auto data_type = ordering_pair_[i].key().node_type().data_type(); + int32_t tag_index = ctx_.GetAliasIndex(input_tag); + + if (i > 0) { + ss << ".then("; + } + std::string cmp_type; + switch (data_type) { + case common::DataType::BOOLEAN: + case common::DataType::INT32: + case common::DataType::INT64: + case common::DataType::STRING: { + cmp_type = "cmp"; + break; + } + case common::DataType::DOUBLE: { + cmp_type = "partial_cmp"; + break; + } + default: + LOG(FATAL) << "Unsupported type " << data_type; + } + std::string reverse_str; + if (ordering_pair_[i].order() == algebra::OrderBy_OrderingPair_Order:: + OrderBy_OrderingPair_Order_DESC) { + reverse_str = ".reverse()"; + } + cmp_fmter % tag_index % cmp_type % reverse_str; + if (i > 0) { + order_body_code = order_body_code + ".then(" + cmp_fmter.str() + ")\n"; + } else { + order_body_code += cmp_fmter.str(); + } + } + order_by_fmt % head_code % order_body_code; + return order_by_fmt.str(); + } + + private: + std::string write_head() const { + boost::format head_fmter("let stream_%1% = stream_%2%.%3%(%4% |x, y| {\n"); + std::string operator_name; + std::string limit_code; + if (limit_ < 0) { + operator_name = "sort_by"; + } else { + operator_name = "sort_limit_by"; + limit_code = std::to_string(limit_) + ", "; + } + head_fmter % operator_index_ % (operator_index_ - 1) % operator_name % + limit_code; + return head_fmter.str(); + } + + BuildingContext& ctx_; + int32_t operator_index_; + std::vector ordering_pair_; + int32_t limit_; +}; + +static std::string BuildOrderByOp( + BuildingContext& ctx, int32_t operator_index, + const algebra::OrderBy& order_by_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + OrderByOpBuilder builder(ctx); + + CHECK(order_by_pb.pairs_size() >= 1); + for (auto i = 0; i < order_by_pb.pairs_size(); i++) { + builder.AddOrderingPair(order_by_pb.pairs(i)); + } + if (order_by_pb.has_limit()) { + builder.SetLimit(order_by_pb.limit().upper()); + } + + return builder.operator_index(operator_index).Build(); +} + +} // namespace pegasus +} // namespace gs + +#endif // CODEGEN_SRC_PEGASUS_PEGASUS_ORDER_BY_BUILDER_H_ diff --git a/flex/codegen/src/pegasus/pegasus_path_expand_builder.h b/flex/codegen/src/pegasus/pegasus_path_expand_builder.h new file mode 100644 index 000000000000..024aef93d7fa --- /dev/null +++ b/flex/codegen/src/pegasus/pegasus_path_expand_builder.h @@ -0,0 +1,364 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PEGASUS_PEGASUS_PATH_EXPAND_BUILDER_H_ +#define CODEGEN_SRC_PEGASUS_PEGASUS_PATH_EXPAND_BUILDER_H_ + +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/pegasus/pegasus_repartition_builder.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/expr.pb.h" + +namespace gs { +namespace pegasus { +template +class PathExpandOpBuilder { + public: + PathExpandOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + PathExpandOpBuilder& in_tag(int32_t in_tag_id) { + in_tag_id_ = in_tag_id; + return *this; + } + + PathExpandOpBuilder& out_tag(int32_t out_tag_id) { + out_tag_id_ = out_tag_id; + return *this; + } + + PathExpandOpBuilder& meta_data( + const physical::PhysicalOpr::MetaData& meta_data) { + meta_data_ = meta_data; + // we can get the edge tuplet from meta_data, in case we fail to extract + // edge triplet from ir_data_type + { + auto& ir_data_type = meta_data_.type(); + VLOG(10) << "str: " << ir_data_type.DebugString(); + CHECK(ir_data_type.has_graph_type()); + auto& graph_type = ir_data_type.graph_type(); + VLOG(10) << "debug string: " << graph_type.DebugString(); + CHECK(graph_type.element_opt() == common::GraphDataType::GraphElementOpt:: + GraphDataType_GraphElementOpt_EDGE) + << "expect edge meta for edge builder"; + auto& ele_label_types = graph_type.graph_data_type(); + CHECK(ele_label_types.size() > 0); + for (auto ele_label_type : ele_label_types) { + auto& triplet = ele_label_type.label(); + auto& src_label = triplet.src_label(); + auto& dst_label = triplet.dst_label(); + if (edge_expand_.direction() == + physical::EdgeExpand_Direction::EdgeExpand_Direction_OUT) { + src_vertex_labels_.emplace_back(src_label.value()); + dst_vertex_labels_.emplace_back(dst_label.value()); + } else { + src_vertex_labels_.emplace_back(dst_label.value()); + dst_vertex_labels_.emplace_back(src_label.value()); + } + } + VLOG(10) << "extract dst vertex label: " + << gs::to_string(dst_vertex_labels_) << ", from meta data"; + } + return *this; + } + + // get the expand_opt name and the expand_opt code. + PathExpandOpBuilder& edge_expand_opt( + const physical::EdgeExpand& edge_expand_pb) { + edge_expand_ = edge_expand_pb; + return *this; + } + + // get the getv name and the getv code. + PathExpandOpBuilder& getv_opt(const physical::GetV& get_v_pb) { + get_v_ = get_v_pb; + return *this; + } + + PathExpandOpBuilder& hop_range(const algebra::Range& hop_range_pb) { + range_lower_ = hop_range_pb.lower(); + range_upper_ = hop_range_pb.upper(); + VLOG(10) << "got range: " << range_lower_ << " " << range_upper_; + return *this; + } + + PathExpandOpBuilder& path_opt( + const physical::PathExpand::PathOpt& path_opt_pb) { + path_opt_ = path_opt_pb; + return *this; + } + + PathExpandOpBuilder& result_opt( + const physical::PathExpand::ResultOpt& result_opt_pb) { + result_opt_ = result_opt_pb; + return *this; + } + + PathExpandOpBuilder& condition(const common::Expression& condition_pb) { + LOG(WARNING) << "Skiped for path expand with condition"; + return *this; + } + + std::string Build() const { + std::stringstream ss; + int32_t input_size = ctx_.InputSize(); + VLOG(10) << "path_expand start"; + if (in_tag_id_ != -1 || + path_opt_ == physical::PathExpand::PathOpt::PathExpand_PathOpt_SIMPLE || + result_opt_ == + physical::PathExpand::ResultOpt::PathExpand_ResultOpt_ALL_V) { + // move data to head + ss << ".map(move |"; + write_arg_list(ss, "i", input_size); + ss << "| {\n"; + int32_t input_index = 0; + if (in_tag_id_ != -1) { + input_index = ctx_.GetAliasIndex(in_tag_id_); + } + if (path_opt_ == + physical::PathExpand::PathOpt::PathExpand_PathOpt_SIMPLE || + result_opt_ == + physical::PathExpand::ResultOpt::PathExpand_ResultOpt_ALL_V) { + ss << "let result = vec![i" << input_index << "];\n"; + write_result(ss, "result", -1); + } else { + std::string result_name = "i" + std::to_string(input_index); + write_result(ss, result_name, -1); + } + ss << "})?\n"; + } + VLOG(10) << "path_expand finished write head operator"; + + // start iterate operator + ss << ".iterate_emit_until(IterCondition::max_iters(" << range_upper_ + << "), EmitKind::Before, |start| {\nstart\n"; + // repartition for path_expand + ss << ".repartition(move |input| {\n"; + + ss << "Ok(get_partition(&input.0, workers as usize, " + "pegasus::get_servers_len()))\n"; + ss << "})\n"; + VLOG(10) << "path_expand finished write iterate_emit & repartition"; + + // edge_expand for path_expand + ss << ".flat_map(move |"; + write_arg_list(ss, "i", input_size); + ss << "| {\n"; + ss << "let mut result = vec![];\n"; + ss << "let vertex_id = CSR.get_internal_id(i0 as usize);\n"; + int32_t edge_labels = edge_expand_.params().tables_size(); + for (auto i = 0; i < edge_labels; i++) { + auto edge_label = edge_expand_.params().tables(i).id(); + VLOG(10) << "src labels size: " << src_vertex_labels_.size(); + VLOG(10) << "dst labels size: " << dst_vertex_labels_.size(); + if (src_vertex_labels_.size() > 1) { + ss << "let vertex_label = LDBCVertexParser::::get_label_id(i0 " + "as usize);\n"; + for (auto j = 0; j < src_vertex_labels_.size(); ++j) { + VLOG(10) << "get vertex_label " << src_vertex_labels_[j]; + if (j == 0) { + ss << "if vertex_label == " << src_vertex_labels_[j] << " {\n"; + } else { + ss << "else if vertex_label == " << src_vertex_labels_[j] << " {\n"; + } + write_edge_expand(ss, src_vertex_labels_[j], edge_label, + dst_vertex_labels_[j]); + ss << "}"; + } + } else { + auto src_label = src_vertex_labels_[0]; + for (auto j = 0; j < dst_vertex_labels_.size(); ++j) { + write_edge_expand(ss, src_label, edge_label, dst_vertex_labels_[j]); + } + } + ss << "\n"; + } + ss << "Ok(result.into_iter().map(|res| (res"; + for (auto i = 1; i < input_size; i++) { + ss << ", i" << i; + } + ss << ")))\n"; + ss << "})?\n"; + VLOG(10) << "path_expand finished write edge_expand"; + + // get_v for path_expand + std::stringstream getv_ss; + getv_ss << ".flat_map(move |"; + write_arg_list(getv_ss, "i", input_size); + getv_ss << "| {\n"; + bool filter_label = false; + std::vector vertex_labels; + for (auto vertex_label_pb : get_v_.params().tables()) { + vertex_labels.push_back(vertex_label_pb.id()); + } + if (!vertex_labels.empty()) { + for (auto i = 0; i < dst_vertex_labels_.size(); ++i) { + if (std::find(vertex_labels.begin(), vertex_labels.end(), + dst_vertex_labels_[i]) == vertex_labels.end()) { + filter_label = true; + break; + } + } + } + if (filter_label) { + ss << getv_ss.str(); + } + ss << "})?\n"; + + std::vector labels; + for (auto label : dst_vertex_labels_) { + labels.push_back(label); + } + ctx_.SetHeadType(0, labels); + return ss.str(); + } + + private: + void write_arg_list(std::stringstream& ss, std::string arg_name, + int32_t size) const { + if (size > 1) { + ss << "("; + } + for (auto i = 0; i < size; ++i) { + ss << arg_name << i; + if (i < size - 1) { + ss << ", "; + } + } + if (size > 1) { + ss << ")"; + } + } + + void write_result(std::stringstream& ss, std::string result_name, + int32_t alias_index) const { + int32_t input_size = ctx_.InputSize(); + ss << "Ok((" << result_name; + if (ctx_.ContainHead()) { + for (auto i = 1; i < input_size; i++) { + if (i == alias_index) { + ss << ", " << result_name; + } else { + ss << ", i" << i; + } + } + if (input_size == alias_index) { + ss << ", " << result_name; + } + } else { + for (auto i = 0; i < input_size; i++) { + if (i == alias_index) { + ss << ", " << result_name; + } else { + ss << ", i" << i; + } + } + if (input_size == alias_index) { + ss << ", " << result_name; + } + } + ss << "))"; + } + + void write_edge_expand(std::stringstream& ss, LabelT src_label, + int32_t edge_label, LabelT dst_label) const { + if (edge_expand_.direction() == + physical::EdgeExpand_Direction::EdgeExpand_Direction_IN) { + ss << " if let Some(edges) = EDGE_" << src_label << "_" << edge_label + << "_" << dst_label << "_IN.get_adj_list(vertex_id) {\n"; + ss << "for e in edges {\n"; + ss << "result.push(CSR.get_global_id(e.neighbor).unwrap() as u64)"; + ss << "}\n"; + ss << "}\n"; + } else if (edge_expand_.direction() == + physical::EdgeExpand_Direction::EdgeExpand_Direction_OUT) { + ss << " if let Some(edges) = EDGE_" << src_label << "_" << edge_label + << "_" << dst_label << "_OUT.get_adj_list(vertex_id) {\n"; + ss << "for e in edges {\n"; + ss << "result.push(CSR.get_global_id(e.neighbor).unwrap() as u64)"; + ss << "}\n"; + ss << "}\n"; + } else if (edge_expand_.direction() == + physical::EdgeExpand_Direction::EdgeExpand_Direction_BOTH) { + ss << " if let Some(edges) = EDGE_" << src_label << "_" << edge_label + << "_" << dst_label << "_IN.get_adj_list(vertex_id) {\n"; + ss << "for e in edges {\n"; + ss << "result.push(CSR.get_global_id(e.neighbor).unwrap() as u64)"; + ss << "}\n"; + ss << "}\n"; + ss << " if let Some(edges) = EDGE_" << src_label << "_" << edge_label + << "_" << dst_label << "_OUT.get_adj_list(vertex_id) {\n"; + ss << "for e in edges {\n"; + ss << "result.push(CSR.get_global_id(e.neighbor).unwrap() as u64)"; + ss << "}\n"; + ss << "}\n"; + } else { + LOG(FATAL) << "Unsupported direction"; + } + } + + BuildingContext& ctx_; + int32_t in_tag_id_, out_tag_id_; + physical::EdgeExpand edge_expand_; + physical::GetV get_v_; + int32_t range_lower_, range_upper_; + physical::PathExpand::PathOpt path_opt_; + physical::PathExpand::ResultOpt result_opt_; + std::vector src_vertex_labels_; + std::vector dst_vertex_labels_; + physical::PhysicalOpr::MetaData meta_data_; +}; + +// edge_expand_opt +// get_v_opt +// path_expand_opt +// op_code. +template +static std::string BuildPathExpandOp( + BuildingContext& ctx, const physical::PathExpand& path_expand_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + PathExpandOpBuilder builder(ctx); + if (path_expand_pb.has_start_tag()) { + builder.in_tag(path_expand_pb.start_tag().value()); + } else { + builder.in_tag(-1); + } + + if (path_expand_pb.has_alias()) { + builder.out_tag(path_expand_pb.alias().value()); + } else { + builder.out_tag(-1); + } + + return builder + .getv_opt( + path_expand_pb.base().get_v()) // get_v_opt must be called first to + // provide dst_label ids. + .edge_expand_opt(path_expand_pb.base().edge_expand()) + .hop_range(path_expand_pb.hop_range()) + .path_opt(path_expand_pb.path_opt()) + .result_opt(path_expand_pb.result_opt()) + .condition(path_expand_pb.condition()) + .meta_data(meta_data) + .Build(); +} +} // namespace pegasus +} // namespace gs + +#endif // CODEGEN_SRC_PEGASUS_PEGASUS_PATH_EXPAND_BUILDER_H_ \ No newline at end of file diff --git a/flex/codegen/src/pegasus/pegasus_project_builder.h b/flex/codegen/src/pegasus/pegasus_project_builder.h new file mode 100644 index 000000000000..09b3a840e6c6 --- /dev/null +++ b/flex/codegen/src/pegasus/pegasus_project_builder.h @@ -0,0 +1,335 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PEGASUS_PEGASUS_PROJECT_BUILDER_H_ +#define CODEGEN_SRC_PEGASUS_PEGASUS_PROJECT_BUILDER_H_ + +#include +#include +#include + +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/physical.pb.h" + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/pb_parser/query_params_parser.h" +#include "flex/codegen/src/pegasus/pegasus_expr_builder.h" + +namespace gs { +namespace pegasus { +std::string project_mapping_to_string( + const physical::Project::ExprAlias& mapping) { + std::stringstream ss; + int32_t res_alias = mapping.alias().value(); + int32_t in_tag_id = -2; + std::vector prop_names; + std::vector data_types; + bool project_self = false; + auto& expr = mapping.expr(); + CHECK(expr.operators_size() == 1) << "can only support one variable"; + auto& expr_op = expr.operators(0); + switch (expr_op.item_case()) { + case common::ExprOpr::kVar: { + VLOG(10) << "Got var in projecting"; + auto& var = expr_op.var(); + in_tag_id = var.tag().id(); + if (var.has_property()) { + auto& prop = var.property(); + if (prop.has_id()) { + // project itself. + project_self = true; + } else if (prop.has_key()) { + prop_names.push_back(prop.key().name()); + data_types.push_back( + common_data_type_pb_2_data_type(var.node_type().data_type())); + } else { + LOG(FATAL) << "Unknown property type" << prop.DebugString(); + } + } else { + VLOG(10) << "receives no property, project itself"; + project_self = true; + } + break; + } + case common::ExprOpr::kVarMap: { + VLOG(10) << "Got variable map in projecting"; + LOG(WARNING) << "CURRENTLY we flat the var map to a list of variables"; + } + + case common::ExprOpr::kVars: { + VLOG(10) << "Got variable keys in projecting"; + // project properties to a list. + auto& vars = + expr_op.has_vars() ? expr_op.vars().keys() : expr_op.var_map().keys(); + for (auto i = 0; i < vars.size(); ++i) { + auto& var = vars[i]; + if (in_tag_id == -2) { + in_tag_id = var.tag().id(); + } else { + CHECK(in_tag_id == var.tag().id()) << "can only support one tag"; + } + + auto& prop = var.property(); + if (prop.has_id()) { + LOG(FATAL) << "Not support project id in projecting with vars"; + } else if (prop.has_key()) { + prop_names.push_back(prop.key().name()); + data_types.push_back( + common_data_type_pb_2_data_type(var.node_type().data_type())); + } else { + LOG(FATAL) << "Unknown property type" << prop.DebugString(); + } + } + break; + } + + default: + LOG(FATAL) << "Unknown variable type"; + } + + if (project_self) { + VLOG(10) << "Projecting self"; + CHECK(prop_names.size() == 0 && data_types.size() == 0); + ss << PROJECT_SELF_STR << "<" << in_tag_id << ", " << res_alias << ">()"; + } else { + VLOG(10) << "Projecting properties" << gs::to_string(prop_names); + CHECK(prop_names.size() == data_types.size()); + CHECK(prop_names.size() > 0); + ss << PROJECT_PROPS_STR << "<" << in_tag_id << ", " << res_alias; + for (auto i = 0; i < data_types.size(); ++i) { + ss << "," << data_type_2_string(data_types[i]); + } + ss << ">({"; + for (auto i = 0; i < prop_names.size() - 1; ++i) { + ss << "\"" << prop_names[i] << "\", "; + } + ss << "\"" << prop_names[prop_names.size() - 1] << "\""; + ss << "})"; + } + + return ss.str(); +} + +class ProjectOpBuilder { + public: + ProjectOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + ProjectOpBuilder& operator_index(const int32_t operator_index) { + operator_index_ = operator_index; + return *this; + } + + ProjectOpBuilder& is_append(bool is_append) { + is_append_ = is_append; + return *this; + } + + ProjectOpBuilder& add_mapping(const physical::Project::ExprAlias& mapping) { + mappings_.push_back(mapping); + return *this; + } + + ProjectOpBuilder& meta_data( + const std::vector& meta_data) { + meta_data_ = meta_data; + return *this; + } + + // return make_project code and call project code. + std::string Build() const { + std::stringstream ss; + std::string head_code = write_head(); + + std::string project_body_code; + for (auto i = 0; i < mappings_.size(); ++i) { + project_body_code += project_map_to_code(i); + } + if (is_append_) { + LOG(FATAL) << "Unsupported type"; + } else { + ctx_.SetHead(false); + ctx_.ResetAlias(); + for (auto i = 0; i < mappings_.size(); i++) { + int32_t output_alias = mappings_[i].alias().value(); + ctx_.SetAlias(output_alias); + VLOG(10) << "Set alias " << output_alias << ", index " + << ctx_.GetAliasIndex(output_alias); + auto column_meta = meta_data_[i]; + VLOG(10) << "Get meta"; + if (column_meta.type().has_graph_type()) { + std::vector data_types; + data_types.push_back(codegen::DataType::kInt64); + ctx_.SetOutput(i, data_types); + } else if (column_meta.type().type_case() == + common::IrDataType::kDataType) { + switch (column_meta.type().data_type()) { + case common::DataType::INT64: { + std::vector data_types; + data_types.push_back(codegen::DataType::kInt64); + ctx_.SetOutput(i, data_types); + break; + } + case common::DataType::STRING: { + std::vector data_types; + data_types.push_back(codegen::DataType::kString); + ctx_.SetOutput(i, data_types); + break; + } + default: + std::vector data_types; + data_types.push_back(codegen::DataType::kString); + ctx_.SetOutput(i, data_types); + // LOG(FATAL) << "Unsupported type"; + } + } + } + } + VLOG(10) << "Project done"; + std::string end_code = write_end(); + return head_code + project_body_code + end_code; + } + + private: + std::string write_head() const { + boost::format head_fmter("let stream_%1% = stream_%2%.map(move |%3%| {\n"); + int32_t input_size = ctx_.InputSize(); + std::string input_params = generate_arg_list("i", input_size); + head_fmter % operator_index_ % (operator_index_ - 1) % input_params; + return head_fmter.str(); + } + + std::string project_map_to_code(int32_t index) const { + std::vector prop_names; + std::vector data_types; + auto& expr = mappings_[index].expr(); + auto expr_builder = ExprBuilder(ctx_); + VLOG(10) << "operators size is: " << expr.operators().size() << "\n"; + expr_builder.AddAllExprOpr(expr.operators()); + + std::string expression; + std::vector var_names; + std::vector var_tags; + std::vector properties; + std::vector case_exprs; + std::tie(expression, var_names, var_tags, properties, case_exprs) = + expr_builder.BuildRust(); + VLOG(10) << "Start build expr"; + + std::string vars_code; + for (auto i : case_exprs) { + vars_code += i; + } + for (auto i = 0; i < var_names.size(); i++) { + int32_t input_index; + std::pair> input_type; + VLOG(10) << "Input tag is " << var_tags[i]; + if (var_tags[i] == -1) { + input_index = 0; + input_type = ctx_.GetHeadType(); + } else { + input_index = ctx_.GetAliasIndex(var_tags[i]); + input_type = ctx_.GetAliasType(var_tags[i]); + } + VLOG(10) << "Property is " << properties[i].var_name << ", var name is " + << var_names[i]; + + boost::format itself_fmter("let %1% = i%2%;\n"); + if (properties[i].var_name == "none") { + itself_fmter % var_names[i] % input_index; + vars_code += itself_fmter.str(); + } else { + CHECK(input_type.first == 0); + if (input_type.second.size() == 1) { + boost::format property_fmter( + "let vertex_id = CSR.get_internal_id(i%1% as usize);\n" + "let %1% = %2%[vertex_id];"); + int32_t label_id = input_type.second[0]; + std::string property_name = + get_vertex_prop_column_name(properties[i].var_name, label_id); + property_fmter % var_names[i] % property_name; + vars_code += property_fmter.str(); + } else { + boost::format properties_fmter( + "let vertex_id = CSR.get_internal_id(i%1% as usize);\n" + "let vertex_label = LDBCVertexParser::::get_label_id(i%1% " + "as usize);\n" + "let %2% = \n" + "%3%" // get property for differen labels + "else {\n" + "panic!(\"Unexpected label: {}\", vertex_label)" + "};\n"); + + std::string condition_code; + for (auto j = 0; j < input_type.second.size(); j++) { + boost::format condition_fmter( + "if vertex_label == %1% {\n" + "%2%[vertex_id]\n" + "}\n"); + std::string property_name = get_vertex_prop_column_name( + properties[i].var_name, input_type.second[j]); + int32_t label_id = input_type.second[j]; + condition_fmter % label_id % property_name; + if (j > 0) { + condition_code += "else "; + } + condition_code += condition_fmter.str(); + } + + properties_fmter % input_index % var_names[i] % condition_code; + vars_code += properties_fmter.str(); + } + } + } + + boost::format map_fmter( + "%1%\n" + "let output%2% = %3%;\n"); + map_fmter % vars_code % index % expression; + VLOG(10) << "Finished build mapping"; + return map_fmter.str(); + } + + std::string write_end() const { + boost::format end_fmter("Ok(%1%)\n})?;\n"); + std::string output_params = generate_arg_list("output", mappings_.size()); + end_fmter % output_params; + return end_fmter.str(); + } + + BuildingContext& ctx_; + int32_t operator_index_; + bool is_append_; + std::vector mappings_; + std::vector meta_data_; +}; + +static std::string BuildProjectOp( + BuildingContext& ctx, int32_t operator_index, + const physical::Project& project_pb, + const std::vector& meta_data) { + ProjectOpBuilder builder(ctx); + builder.is_append(project_pb.is_append()); + auto& mappings = project_pb.mappings(); + for (auto i = 0; i < mappings.size(); ++i) { + builder.add_mapping(mappings[i]); + } + return builder.operator_index(operator_index).meta_data(meta_data).Build(); +} +} // namespace pegasus +} // namespace gs + +#endif // CODEGEN_SRC_PEGASUS_PEGASUS_PROJECT_BUILDER_H_ diff --git a/flex/codegen/src/pegasus/pegasus_repartition_builder.h b/flex/codegen/src/pegasus/pegasus_repartition_builder.h new file mode 100644 index 000000000000..e2d6ef2954a7 --- /dev/null +++ b/flex/codegen/src/pegasus/pegasus_repartition_builder.h @@ -0,0 +1,84 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PEGASUS_PEGASUS_REPARTITION_BUILDER_H_ +#define CODEGEN_SRC_PEGASUS_PEGASUS_REPARTITION_BUILDER_H_ + +#include +#include +#include + +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/physical.pb.h" + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/pb_parser/query_params_parser.h" +#include "flex/codegen/src/pegasus/pegasus_expr_builder.h" + +namespace gs { +namespace pegasus { +class PepartitionOpBuilder { + public: + PepartitionOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + PepartitionOpBuilder& operator_index(const int32_t operator_index) { + operator_index_ = operator_index; + return *this; + } + + PepartitionOpBuilder& input_tag(const int32_t input_tag) { + in_tag_ = input_tag; + return *this; + } + + // return make_project code and call project code. + std::string Build() const { + boost::format repartition_fmter( + "let stream_%1% = stream_%2%.repartition(move |input| {\n" + "Ok(get_partition(&input.%3%, workers as usize, " + "pegasus::get_servers_len()))\n" + "});\n"); + int32_t index; + if (in_tag_ == -1) { + index = 0; + } else { + index = ctx_.GetAliasIndex(in_tag_); + } + repartition_fmter % operator_index_ % (operator_index_ - 1) % index; + return repartition_fmter.str(); + } + + private: + BuildingContext& ctx_; + int32_t operator_index_; + int32_t in_tag_ = -1; +}; + +static std::string BuildRepartitionOp( + BuildingContext& ctx, int32_t operator_index, + const physical::Repartition& repartition_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + PepartitionOpBuilder builder(ctx); + if (repartition_pb.to_another().has_shuffle_key()) { + builder.input_tag(repartition_pb.to_another().shuffle_key().value()); + } + return builder.operator_index(operator_index).Build(); +} +} // namespace pegasus +} // namespace gs + +#endif // CODEGEN_SRC_PEGASUS_PEGASUS_REPARTITION_BUILDER_H_ diff --git a/flex/codegen/src/pegasus/pegasus_scan_builder.h b/flex/codegen/src/pegasus/pegasus_scan_builder.h new file mode 100644 index 000000000000..0eca8a352f14 --- /dev/null +++ b/flex/codegen/src/pegasus/pegasus_scan_builder.h @@ -0,0 +1,238 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PEGASUS_PEGASUS_SCAN_BUILDER_H_ +#define CODEGEN_SRC_PEGASUS_PEGASUS_SCAN_BUILDER_H_ + +#include +#include +#include + +#include + +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/physical.pb.h" + +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/physical.pb.h" + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/pb_parser/query_params_parser.h" + +namespace gs { +namespace pegasus { + +class ScanOpBuilder { + public: + ScanOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + ScanOpBuilder& operator_index(const int32_t operator_index) { + operator_index_ = operator_index; + return *this; + } + + ScanOpBuilder& scanOpt(const physical::Scan::ScanOpt& opt) { + if (opt != physical::Scan::ScanOpt::Scan_ScanOpt_VERTEX) { + throw std::runtime_error( + std::string("Currently only support from vertex")); + } + scan_opt_ = opt; + return *this; + } + + ScanOpBuilder& resAlias(const int32_t& res_alias) { + res_alias_ = res_alias; + return *this; + } + + // get required oid from query params + ScanOpBuilder& queryParams(const algebra::QueryParams& query_params) { + query_params_ = query_params; + return *this; + } + + std::string Build() const { + VLOG(10) << "[Scan Builder] Start build scan operator"; + + boost::format scan_fmter("%1%%2%%3%"); + + VLOG(10) << "[Scan Builder] Start write head"; + std::string head_code = write_head(); + + std::vector label_ids; + try_to_get_label_id_from_query_params(query_params_, label_ids); + int32_t label_nums = label_ids.size(); + + auto& predicate = query_params_.predicate(); + auto expr_builder = ExprBuilder(ctx_); + VLOG(10) << "operators size is: " << predicate.operators().size() << "\n"; + expr_builder.AddAllExprOpr(predicate.operators()); + + std::string predicate_expr; + std::vector var_names; + std::vector var_tags; + std::vector properties; + std::vector case_exprs; + std::tie(predicate_expr, var_names, var_tags, properties, case_exprs) = + expr_builder.BuildRust(); + + VLOG(10) << "[Scan Builder] Start write scan body"; + std::string scan_body_code; + for (auto i = 0; i < label_nums; i++) { + for (auto property : properties) { + ctx_.AddVertexProperty(label_ids[i], property); + } + scan_body_code += write_scan_body(i, label_ids[i], predicate_expr, + var_names, properties); + } + + VLOG(10) << "[Scan Builder] Start write end"; + std::string end_code = write_end(); + + VLOG(10) << "[Scan Builder] Set output"; + ctx_.SetHead(true); + ctx_.SetHeadType(0, label_ids); + if (res_alias_ != -1) { + ctx_.SetAliasType(res_alias_, 0, label_ids); + } + + std::vector output; + output.push_back(codegen::DataType::kInt64); + ctx_.SetOutput(0, output); + if (res_alias_ != -1) { + ctx_.SetOutput(1, output); + } + + scan_fmter % head_code % scan_body_code % end_code; + return scan_fmter.str(); + } + + private: + std::string write_head() const { + boost::format head_fmter( + "let stream_%1% = stream_%2%.flat_map(move |_| {\n" + "let mut result = vec![];\n"); + head_fmter % operator_index_ % (operator_index_ - 1); + return head_fmter.str(); + } + + std::string write_scan_body( + int32_t index, const int32_t& label_id, const std::string& predicate_expr, + const std::vector& var_names, + const std::vector& properties) const { + boost::format scan_body_fmter( + "let vertex_%1%_num = CSR.get_vertices_num(%2%);\n" + "let vertex_%1%_local_num = vertex_%1%_num / workers as usize +1;\n" + "let mut vertex_%1%_start = vertex_%1%_local_num * worker_id as " + "usize;\n" + "let mut vertex_%1%_end = vertex_%1%_local_num * (worker_id + 1) as " + "usize;\n" + "vertex_%1%_start = std::cmp::min(vertex_%1%_start, vertex_%1%_num);\n" + "vertex_%1%_end = std::cmp::min(vertex_%1%_end, vertex_%1%_num);\n" + "for i in vertex_%1%_start..vertex_%1%_end { \n" + "%3%" // Filter by prediction + "}\n"); + + // Generate predicate code + std::string predicate_code; + if (query_params_.has_predicate()) { + predicate_code = + scan_with_expression(label_id, predicate_expr, var_names, properties); + } else { + predicate_code = scan_without_expression(label_id); + } + scan_body_fmter % index % label_id % predicate_code; + return scan_body_fmter.str(); + } + + std::string scan_with_expression( + const int32_t& label_id, const std::string& predicate_expr, + const std::vector& var_names, + const std::vector& properties) const { + boost::format scan_vertex_fmter( + "%1%" + "if %2% {\n" + "let vertex_global_id = CSR.get_global_id(i, %3%).unwrap() as u64;\n" + "result.push(vertex_global_id);\n" + "}\n"); + + std::string vars_code; + for (auto i = 0; i < var_names.size(); i++) { + std::string var_name = var_names[i]; + std::string prop_name = properties[i].var_name; + std::string prop_column_name = + get_vertex_prop_column_name(prop_name, label_id); + + boost::format var_fmter("let %1% = %2%[i];\n"); + var_fmter % var_name % prop_column_name; + vars_code += var_fmter.str(); + } + scan_vertex_fmter % vars_code % predicate_expr % label_id; + return scan_vertex_fmter.str(); + } + + std::string scan_without_expression(const int32_t& label_id) const { + boost::format scan_vertex_fmter( + "let vertex_global_id = CSR.get_global_id(i, %1%).unwrap() as u64;\n" + "result.push(vertex_global_id);\n"); + scan_vertex_fmter % label_id; + return scan_vertex_fmter.str(); + } + + std::string write_end() const { + boost::format end_fmter( + "Ok(result.into_iter()%1%)\n" + "})?;\n"); + + std::string map_code; + if (res_alias_ != -1) { + ctx_.SetAlias(res_alias_); + map_code = ".map(|res| (res, res))"; + } + end_fmter % map_code; + return end_fmter.str(); + } + + int32_t operator_index_; + BuildingContext& ctx_; + physical::Scan::ScanOpt scan_opt_; + algebra::QueryParams query_params_; + int res_alias_; +}; + +static std::string BuildScanOp( + BuildingContext& ctx, int32_t operator_index, const physical::Scan& scan_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + if (!scan_pb.has_params()) { + throw std::runtime_error(std::string("expect scan pb has params")); + } + auto builder = ScanOpBuilder(ctx).scanOpt(scan_pb.scan_opt()); + if (scan_pb.has_alias()) { + VLOG(10) << "scan pb has alias" << scan_pb.alias().value(); + builder.resAlias(scan_pb.alias().value()); + } else { + builder.resAlias(-1); + } + return builder.operator_index(operator_index) + .queryParams(scan_pb.params()) + .Build(); +} + +} // namespace pegasus +} // namespace gs + +#endif // CODEGEN_SRC_PEGASUS_PEGASUS_SCAN_BUILDER_H_ diff --git a/flex/codegen/src/pegasus/pegasus_select_builder.h b/flex/codegen/src/pegasus/pegasus_select_builder.h new file mode 100644 index 000000000000..29935648c7b8 --- /dev/null +++ b/flex/codegen/src/pegasus/pegasus_select_builder.h @@ -0,0 +1,162 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PEGASUS_PEGASUS_SELECT_BUILDER_H_ +#define CODEGEN_SRC_PEGASUS_PEGASUS_SELECT_BUILDER_H_ + +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/expr.pb.h" +#include "proto_generated_gie/physical.pb.h" + +namespace gs { +namespace pegasus { +class SelectOpBuilder { + public: + SelectOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + SelectOpBuilder& operator_index(const int32_t operator_index) { + operator_index_ = operator_index; + return *this; + } + + SelectOpBuilder& predicate(common::Expression expr) { + expr_ = expr; + return *this; + } + + std::string Build() { + VLOG(10) << "Start build select"; + + int32_t input_size = ctx_.InputSize(); + boost::format select_head_fmter( + "let stream_%1% = stream_%2%\n" + ".filter_map(move |%3%| {\n"); + std::string input_params = generate_arg_list("i", input_size); + select_head_fmter % operator_index_ % (operator_index_ - 1) % input_params; + + auto expr_builder = ExprBuilder(ctx_); + expr_builder.AddAllExprOpr(expr_.operators()); + std::string predicate_expr; + std::vector var_names; + std::vector var_tags; + std::vector properties; + std::vector case_exprs; + std::tie(predicate_expr, var_names, var_tags, properties, case_exprs) = + expr_builder.BuildRust(); + + // Codegen for property expression + std::string vars_code; + for (auto i : case_exprs) { + vars_code += i; + } + for (auto i = 0; i < var_names.size(); i++) { + int32_t input_index; + std::pair> input_type; + VLOG(10) << "Input tag is " << var_tags[i]; + if (var_tags[i] == -1) { + input_index = 0; + input_type = ctx_.GetHeadType(); + } else { + input_index = ctx_.GetAliasIndex(var_tags[i]); + input_type = ctx_.GetAliasType(var_tags[i]); + } + VLOG(10) << "Property is " << properties[i].var_name << ", var name is " + << var_names[i]; + + boost::format itself_fmter("let %1% = i%2%;\n"); + boost::format property_fmter( + "let vertex_id = CSR.get_internal_id(i%1% as usize);\n" + "%2%"); + if (properties[i].var_name == "none") { + itself_fmter % var_names[i] % input_index; + vars_code += itself_fmter.str(); + } else { + CHECK(input_type.first == 0); + if (input_type.second.size() == 1) { + boost::format property_fmter("let %1% = %2%[vertex_id];\n"); + int32_t label_id = input_type.second[0]; + std::string property_name = + get_vertex_prop_column_name(properties[i].var_name, label_id); + property_fmter % var_names[i] % property_name; + vars_code += property_fmter.str(); + } else { + boost::format properties_fmter( + "let vertex_label = LDBCVertexParser::::get_label_id(i%1% " + "as usize);\n" + "let %2% = \n" + "%3%" // get property for differen labels + "else {\n" + "panic!(\"Unexpected label: {}\", vertex_label)" + "}\n"); + + std::string condition_code; + for (auto j = 0; j < input_type.second.size(); j++) { + boost::format condition_fmter( + "if vertex_label == %1% {\n" + "%2%[vertex_id]\n" + "}\n"); + std::string property_name = + get_vertex_prop_column_name(properties[i].var_name, j); + int32_t label_id = input_type.second[j]; + condition_fmter % label_id % property_name; + if (j > 0) { + condition_code += "else"; + } + condition_code += condition_fmter.str(); + } + + properties_fmter % input_index % var_names[i] % condition_code; + vars_code += properties_fmter.str(); + } + } + } + + boost::format select_result_fmter( + "%1%\n" + "if %2% {\n" + "Ok(Some(%3%))\n" + "} else {\n" + "Ok(None)\n" + "}\n" + "})?;\n"); + select_result_fmter % vars_code % predicate_expr % input_params; + + return select_head_fmter.str() + select_result_fmter.str(); + } + + private: + BuildingContext ctx_; + int32_t operator_index_; + common::Expression expr_; +}; + +static std::string BuildSelectOp( + BuildingContext& ctx, int32_t operator_index, + const algebra::Select& select_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + SelectOpBuilder builder(ctx); + builder.predicate(select_pb.predicate()); + return builder.operator_index(operator_index).Build(); +} + +} // namespace pegasus +} // namespace gs +#endif // CODEGEN_SRC_PEGASUS_PEGASUS_SELECT_BUILDER_H_ diff --git a/flex/codegen/src/pegasus/pegasus_sink_builder.h b/flex/codegen/src/pegasus/pegasus_sink_builder.h new file mode 100644 index 000000000000..b2f4e20f5b43 --- /dev/null +++ b/flex/codegen/src/pegasus/pegasus_sink_builder.h @@ -0,0 +1,63 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PEGASUS_PEGASUS_SINK_BUILDER_H_ +#define CODEGEN_SRC_PEGASUS_PEGASUS_SINK_BUILDER_H_ + +#include +#include +#include + +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/physical.pb.h" + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/pb_parser/query_params_parser.h" +#include "flex/codegen/src/pegasus/pegasus_expr_builder.h" + +namespace gs { +namespace pegasus { +class SinkOpBuilder { + public: + SinkOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + SinkOpBuilder& operator_index(const int32_t operator_index) { + operator_index_ = operator_index; + return *this; + } + + std::string Build() { + boost::format sink_fmter("stream_%1%.sink_into(output)\n"); + sink_fmter % (operator_index_ - 1); + return sink_fmter.str(); + } + + private: + BuildingContext ctx_; + int32_t operator_index_; +}; + +std::string BuildSinkOp(BuildingContext& ctx, int32_t operator_index, + const physical::Sink& sink_op_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + SinkOpBuilder builder(ctx); + return builder.operator_index(operator_index).Build(); +} +} // namespace pegasus +} // namespace gs + +#endif // CODEGEN_SRC_PEGASUS_PEGASUS_SINK_BUILDER_H_ diff --git a/flex/codegen/src/pegasus/pegasus_unfold_builder.h b/flex/codegen/src/pegasus/pegasus_unfold_builder.h new file mode 100644 index 000000000000..5774a51e82f1 --- /dev/null +++ b/flex/codegen/src/pegasus/pegasus_unfold_builder.h @@ -0,0 +1,90 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PEGASUS_PEGASUS_UNFOLD_BUILDER_H_ +#define CODEGEN_SRC_PEGASUS_PEGASUS_UNFOLD_BUILDER_H_ + +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "flex/codegen/src/pegasus/pegasus_repartition_builder.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/expr.pb.h" + +namespace gs { +namespace pegasus { +class UnfoldOpBuilder { + public: + UnfoldOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + UnfoldOpBuilder& operator_index(const int32_t operator_index) { + operator_index_ = operator_index; + return *this; + } + + UnfoldOpBuilder& add_tag(int32_t in_tag_id, int32_t out_tag_id) { + in_tag_id_ = in_tag_id; + out_tag_id_ = out_tag_id; + return *this; + } + + std::string Build() { + auto input_size = ctx_.InputSize(); + + boost::format unfold_fmter( + "let stream_%1% = stream_%2%\n" + ".flat_map(|%3%| {\n" + "Ok(i%4%.into_iter().map(|res| Ok(%5%)))\n" + "})?;\n"); + + std::string input_params = generate_arg_list("i", input_size); + + int32_t input_index = 0; + if (in_tag_id_ != -1) { + input_index = ctx_.GetAliasIndex(in_tag_id_); + } + ctx_.SetAlias(out_tag_id_); + auto outputs = ctx_.GetOutput(); + auto output_index = ctx_.GetAliasIndex(out_tag_id_); + ctx_.SetOutput(0, outputs[input_index]); + ctx_.SetOutput(output_index, outputs[input_index]); + std::string output_params = generate_output_list( + "i", input_size, "res", output_index, ctx_.ContainHead()); + + unfold_fmter % operator_index_ % (operator_index_ - 1) % input_params % + input_index % output_params; + return unfold_fmter.str(); + } + + private: + BuildingContext& ctx_; + int32_t operator_index_; + int32_t in_tag_id_, out_tag_id_; +}; + +static std::string BuildUnfoldOp( + BuildingContext& ctx, int32_t operator_index, + const physical::Unfold& unfold_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + UnfoldOpBuilder builder(ctx); + builder.add_tag(unfold_pb.tag().value(), unfold_pb.alias().value()); + return builder.operator_index(operator_index).Build(); +} +} // namespace pegasus +} // namespace gs +#endif // CODEGEN_SRC_PEGASUS_PEGASUS_UNFOLD_BUILDER_H_ diff --git a/flex/codegen/src/pegasus/pegasus_union_builder.h b/flex/codegen/src/pegasus/pegasus_union_builder.h new file mode 100644 index 000000000000..2d65087bf74f --- /dev/null +++ b/flex/codegen/src/pegasus/pegasus_union_builder.h @@ -0,0 +1,227 @@ + +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_PEGASUS_PEGASUS_UNION_BUILDER_H_ +#define CODEGEN_SRC_PEGASUS_PEGASUS_UNION_BUILDER_H_ + +#include +#include + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/codegen_utils.h" +#include "flex/codegen/src/graph_types.h" +#include "proto_generated_gie/algebra.pb.h" +#include "proto_generated_gie/common.pb.h" +#include "proto_generated_gie/expr.pb.h" +#include "proto_generated_gie/physical.pb.h" + +namespace gs { +namespace pegasus { +class UnionOpBuilder { + public: + UnionOpBuilder(BuildingContext& ctx) : ctx_(ctx) {} + + UnionOpBuilder& operator_index(const int32_t operator_index) { + operator_index_ = operator_index; + return *this; + } + + UnionOpBuilder& add_plan(const physical::PhysicalPlan& plan) { + sub_plans_.push_back(plan); + return *this; + } + + std::string Build() { + VLOG(10) << "Start build union"; + int32_t sub_plan_size = sub_plans_.size(); + + boost::format union_fmter( + "let stream_%1% = {\n" + "%2%" // stream copied + "%3%" // sub_plan_code + "%4%" // union stream + "}\n"); + + std::string copied_code = write_copied_code(); + + // codegen for sub plans + std::stringstream plan_ss; + for (auto i = 0; i < sub_plan_size; i++) { + auto sub_ctx = ctx_.CreateSubTaskContext(); + auto sub_plan = sub_plans_[i]; + plan_ss << generate_sub_plan(sub_ctx, sub_plan, i); + + // aggregate sub plan context + } + std::string merge_code = write_merge_code(); + + union_fmter % operator_index_ % copied_code % plan_ss.str() % merge_code; + return union_fmter.str(); + } + + private: + std::string write_copied_code() { + boost::format copied_code_fmter( + "let stream_%1%_0 = stream_%2%;\n" + "%3%" // code for copied + ); + std::stringstream copied_ss; + for (auto i = 0; i < sub_plans_.size() - 1; i++) { + boost::format copied_fmter( + "let (mut stream_%1%_%2%, mut stream_%1%_%3%) = " + "stream_%1%_%2%.copied();\n"); + copied_fmter % operator_index_ % i % (i + 1); + copied_ss << copied_fmter.str(); + } + copied_code_fmter % operator_index_ % (operator_index_ - 1) % + copied_ss.str(); + return copied_code_fmter.str(); + } + + std::string generate_sub_plan(BuildingContext& sub_plan_context, + physical::PhysicalPlan& sub_plan, + int32_t index) { + boost::format union_fmter( + "stream_%1%_%2% = {\n" + "let stream_0 = stream_%1%_%2%;\n" + "%3%" + "};\n"); + std::stringstream sub_plan_code_ss; + for (auto i = 0; i < sub_plan.plan_size(); i++) { + auto op = sub_plan.plan(i); + auto& meta_datas = op.meta_data(); + + auto opr = op.opr(); + switch (opr.op_kind_case()) { + case physical::PhysicalOpr::Operator::kRepartition: { + physical::PhysicalOpr::MetaData meta_data; + + VLOG(10) << "Found a repartition operator"; + auto& repartition_op = opr.repartition(); + auto repartition_codegen = pegasus::BuildRepartitionOp( + sub_plan_context, i + 1, repartition_op, meta_data); + VLOG(10) << repartition_codegen; + sub_plan_code_ss << repartition_codegen; + break; + } + case physical::PhysicalOpr::Operator::kGroupBy: { + std::vector meta_datas; + for (auto i = 0; i < op.meta_data_size(); i++) { + meta_datas.push_back(op.meta_data(i)); + } + + VLOG(10) << "Found a groupby operator"; + auto& groupby_op = opr.group_by(); + + sub_plan_code_ss << pegasus::BuildGroupByOp(sub_plan_context, i + 1, + groupby_op, meta_datas); + break; + } + case physical::PhysicalOpr::Operator::kOrderBy: { + physical::PhysicalOpr::MetaData meta_data; + + VLOG(10) << "Found a order_by operator"; + auto& orderby_op = opr.order_by(); + + sub_plan_code_ss << pegasus::BuildOrderByOp(sub_plan_context, i + 1, + orderby_op, meta_data); + break; + } + case physical::PhysicalOpr::Operator::kProject: { + std::vector meta_data; + for (auto i = 0; i < op.meta_data_size(); i++) { + meta_data.push_back(op.meta_data(i)); + } + + VLOG(10) << "Found a project operator"; + auto& project_op = opr.project(); + + sub_plan_code_ss << pegasus::BuildProjectOp(sub_plan_context, i + 1, + project_op, meta_data); + break; + } + case physical::PhysicalOpr::Operator::kEdge: { // edge expand + auto& meta_data = meta_datas[0]; + VLOG(10) << "Found a edge expand operator"; + auto& edge_op = opr.edge(); + auto edge_codegen = pegasus::BuildEdgeExpandOp( + sub_plan_context, i + 1, edge_op, meta_data); + VLOG(10) << edge_codegen; + sub_plan_code_ss << edge_codegen; + break; + } + case physical::PhysicalOpr::Operator::kVertex: { + physical::PhysicalOpr::MetaData meta_data; + + VLOG(10) << "Found a get_v operator"; + auto& vertex_op = opr.vertex(); + auto vertex_codegen = pegasus::BuildGetVOp( + sub_plan_context, i + 1, vertex_op, meta_data); + VLOG(10) << vertex_codegen; + sub_plan_code_ss << vertex_codegen; + + break; + } + case physical::PhysicalOpr::Operator::kDedup: { + physical::PhysicalOpr::MetaData meta_data; + VLOG(10) << "Found a dedup operator"; + auto& dedup_op = opr.dedup(); + auto dedup_codegen = + pegasus::BuildDedupOp(sub_plan_context, i + 1, dedup_op, meta_data); + VLOG(10) << dedup_codegen; + sub_plan_code_ss << dedup_codegen; + break; + } + default: + LOG(FATAL) << "Not supproted in union."; + } + } + union_fmter % operator_index_ % index % sub_plan_code_ss.str(); + return union_fmter.str(); + } + + std::string write_merge_code() { + boost::format merge_code_fmter( + "let result_stream = stream_%1%_0%2%;\n" + "result_stream"); + std::stringstream merge_ss; + for (auto i = 1; i < sub_plans_.size(); i++) { + boost::format merge_fmter(".merge(stream_%1%_%2%)?"); + merge_fmter % operator_index_ % i; + merge_ss << merge_fmter.str(); + } + merge_code_fmter % operator_index_ % merge_ss.str(); + return merge_code_fmter.str(); + } + + BuildingContext ctx_; + int32_t operator_index_; + std::vector sub_plans_; +}; + +static std::string BuildUnionOp( + BuildingContext& ctx, int32_t operator_index, + const physical::Union& union_pb, + const physical::PhysicalOpr::MetaData& meta_data) { + UnionOpBuilder builder(ctx); + for (auto i = 0; i < union_pb.sub_plans_size(); i++) { + builder.add_plan(union_pb.sub_plans(i)); + } + return builder.operator_index(operator_index).Build(); +} + +} // namespace pegasus +} // namespace gs +#endif // CODEGEN_SRC_PEGASUS_PEGASUS_UNION_BUILDER_H_ diff --git a/flex/codegen/src/pegasus_generator.h b/flex/codegen/src/pegasus_generator.h new file mode 100644 index 000000000000..c35b5720e937 --- /dev/null +++ b/flex/codegen/src/pegasus_generator.h @@ -0,0 +1,354 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef HQPS_CODEGEN_SRC_PEGASUS_GENERATOR_H_ +#define HQPS_CODEGEN_SRC_PEGASUS_GENERATOR_H_ + +#include +#include + +#include "google/protobuf/message.h" +#include "google/protobuf/text_format.h" +#include "google/protobuf/util/json_util.h" +#include "proto_generated_gie/physical.pb.h" + +#include "flex/codegen/src/building_context.h" +#include "flex/codegen/src/pegasus/pegasus_dedup_builder.h" +#include "flex/codegen/src/pegasus/pegasus_edge_expand_builder.h" +#include "flex/codegen/src/pegasus/pegasus_get_v_builder.h" +#include "flex/codegen/src/pegasus/pegasus_group_by_builder.h" +#include "flex/codegen/src/pegasus/pegasus_intersect_builder.h" +#include "flex/codegen/src/pegasus/pegasus_join_builder.h" +#include "flex/codegen/src/pegasus/pegasus_limit_builder.h" +#include "flex/codegen/src/pegasus/pegasus_order_by_builder.h" +#include "flex/codegen/src/pegasus/pegasus_path_expand_builder.h" +#include "flex/codegen/src/pegasus/pegasus_project_builder.h" +#include "flex/codegen/src/pegasus/pegasus_repartition_builder.h" +#include "flex/codegen/src/pegasus/pegasus_scan_builder.h" +#include "flex/codegen/src/pegasus/pegasus_select_builder.h" +#include "flex/codegen/src/pegasus/pegasus_sink_builder.h" +#include "flex/codegen/src/pegasus/pegasus_unfold_builder.h" +#include "flex/codegen/src/pegasus/pegasus_union_builder.h" + +namespace gs { + +// Entrance for generating a parameterized query +class PegasusGenerator { + public: + PegasusGenerator(BuildingContext& ctx, std::string query_name, + const physical::PhysicalPlan& plan) + : ctx_(ctx), query_name_(query_name), plan_(plan) {} + + std::string GenerateQuery() { + std::stringstream header, exprs, query_body; + addHeaders(header); + + addQueryBody(query_body); + + startQueryFunc( + exprs); // prepend function signature after visiting all operators + + addProperties(exprs); + + endQueryFunc( + query_body); // append function call after visiting all operators + return header.str() + exprs.str() + query_body.str(); + } + + private: + // add dependency for query function + void addHeaders(std::stringstream& ss) const { + ss << "use std::collections::{HashMap, HashSet};\n"; + ss << "use mcsr::columns::*;\n"; + ss << "use mcsr::graph_db_impl::{CsrDB, SingleSubGraph, SubGraph};\n"; + ss << "use mcsr::ldbc_parser::LDBCVertexParser;\n"; + ss << "use pegasus::api::*;\n"; + ss << "use pegasus::errors::BuildJobError;\n"; + ss << "use pegasus::result::ResultSink;\n"; + ss << "use pegasus::{get_servers_len, JobConf};\n"; + ss << "use crate::utils::*;\n"; + ss << std::endl; + LOG(INFO) << "Finish adding headers"; + } + + // add start part of query function + void startQueryFunc(std::stringstream& ss) const { + std::stringstream input_ss; + if (ctx_.GetParameterVars().size() > 0) { + auto vars = ctx_.GetParameterVars(); + sort(vars.begin(), vars.end(), + [](const auto& a, const auto& b) { return a.id < b.id; }); + // FIXME: ENable this line + // the dynamic params can be duplicate. + CHECK(vars[0].id == 0); + for (auto i = 0; i < vars.size(); ++i) { + if (i > 0 && vars[i].id == vars[i - 1].id) { + // found duplicate + CHECK(vars[i] == vars[i - 1]); + continue; + } else { + input_ss << ", " << vars[i].var_name << ":" + << data_type_2_rust_string(vars[i].type); + } + } + } + + ss << "#[no_mangle]\n"; + ss << "pub fn Query(conf: JobConf, graph: &'static CsrDB" + << ", input_params: Vec) -> Box, " + "ResultSink) -> Result<(), BuildJobError>> {\n"; + ss << "let workers = conf.workers;\n"; + } + + // add properties handler used in query + void addProperties(std::stringstream& ss) const { + for (auto kv : ctx_.GetVertexProperties()) { + int32_t vertex_label = kv.first; + std::vector& properties = kv.second; + for (auto property : properties) { + std::string property_name = + get_vertex_prop_column_name(property.var_name, vertex_label); + auto property_type = property.type; + ss << "let " << property_name << " = &graph.vertex_prop_table[" + << vertex_label << " as usize]\n"; + ss << ".get_column_by_name(\"" << property.var_name << "\")\n"; + ss << ".as_any()\n"; + switch (property_type) { + case codegen::DataType::kInt32: { + ss << ".downcast_ref::()\n"; + break; + } + case codegen::DataType::kString: { + ss << ".downcast_ref::()\n"; + break; + } + default: + ss << ".downcast_ref::()\n"; + break; + } + ss << ".unwrap()\n"; + ss << ".data;\n"; + } + } + } + + void endQueryFunc(std::stringstream& ss) const { + ss << "})\n"; + ss << "}\n"; + } + + void addQueryBody(std::stringstream& ss) const { + auto size = plan_.plan_size(); + + LOG(INFO) << "Found " << size << " operators in the plan"; + ss << "Box::new(move |input: &mut Source, output: ResultSink| " + "{\n"; + ss << "let worker_id = input.get_worker_index() % workers;\n"; + ss << "let stream_0 = input.input_from(vec![0])?;\n"; + + std::string plan_json; + google::protobuf::util::JsonOptions option; + option.always_print_primitive_fields = true; + google::protobuf::util::MessageToJsonString(plan_, &plan_json, option); + for (auto i = 0; i < size; ++i) { + auto op = plan_.plan(i); + LOG(INFO) << "Start codegen for operator " << i; + auto& meta_datas = op.meta_data(); + // CHECK(meta_datas.size() == 1) << "meta data size: " << + // meta_datas.size(); + // physical::PhysicalOpr::MetaData meta_data; //fake meta + auto opr = op.opr(); + LOG(INFO) << "Input size of current operator is " << ctx_.InputSize(); + switch (opr.op_kind_case()) { + case physical::PhysicalOpr::Operator::kScan: { // scan + physical::PhysicalOpr::MetaData meta_data; + + LOG(INFO) << "Found a scan operator"; + auto& scan_op = opr.scan(); + auto scan_codegen = + pegasus::BuildScanOp(ctx_, i + 1, scan_op, meta_data); + LOG(INFO) << scan_codegen; + ss << scan_codegen; + break; + } + case physical::PhysicalOpr::Operator::kRepartition: { + physical::PhysicalOpr::MetaData meta_data; + + LOG(INFO) << "Found a repartition operator"; + auto& repartition_op = opr.repartition(); + auto repartition_codegen = + pegasus::BuildRepartitionOp(ctx_, i + 1, repartition_op, meta_data); + LOG(INFO) << repartition_codegen; + ss << repartition_codegen; + break; + } + case physical::PhysicalOpr::Operator::kGroupBy: { + std::vector meta_datas; + for (auto i = 0; i < op.meta_data_size(); i++) { + meta_datas.push_back(op.meta_data(i)); + } + + LOG(INFO) << "Found a groupby operator"; + auto& groupby_op = opr.group_by(); + + ss << pegasus::BuildGroupByOp(ctx_, i + 1, groupby_op, meta_datas); + break; + } + case physical::PhysicalOpr::Operator::kOrderBy: { + physical::PhysicalOpr::MetaData meta_data; + + LOG(INFO) << "Found a order_by operator"; + auto& orderby_op = opr.order_by(); + + ss << pegasus::BuildOrderByOp(ctx_, i + 1, orderby_op, meta_data); + break; + } + case physical::PhysicalOpr::Operator::kProject: { + std::vector meta_data; + for (auto i = 0; i < op.meta_data_size(); i++) { + meta_data.push_back(op.meta_data(i)); + } + + LOG(INFO) << "Found a project operator"; + auto& project_op = opr.project(); + + ss << pegasus::BuildProjectOp(ctx_, i + 1, project_op, meta_data); + break; + } + case physical::PhysicalOpr::Operator::kEdge: { // edge expand + auto& meta_data = meta_datas[0]; + LOG(INFO) << "Found a edge expand operator"; + auto& edge_op = opr.edge(); + auto edge_codegen = pegasus::BuildEdgeExpandOp( + ctx_, i + 1, edge_op, meta_data); + LOG(INFO) << edge_codegen; + ss << edge_codegen; + break; + } + case physical::PhysicalOpr::Operator::kVertex: { + physical::PhysicalOpr::MetaData meta_data; + + LOG(INFO) << "Found a get_v operator"; + auto& vertex_op = opr.vertex(); + auto vertex_codegen = + pegasus::BuildGetVOp(ctx_, i + 1, vertex_op, meta_data); + LOG(INFO) << vertex_codegen; + ss << vertex_codegen; + + break; + } + case physical::PhysicalOpr::Operator::kSink: { + physical::PhysicalOpr::MetaData meta_data; + LOG(INFO) << "Found a sink operator"; + auto& sink_op = opr.sink(); + std::string call_sink_code = + pegasus::BuildSinkOp(ctx_, i + 1, sink_op, meta_data); + ss << call_sink_code; + break; + } + case physical::PhysicalOpr::Operator::kPath: { + auto& meta_data = meta_datas[0]; + LOG(INFO) << "Found a path expand operator"; + auto& path_op = opr.path(); + auto path_expand_codegen = + pegasus::BuildPathExpandOp(ctx_, path_op, meta_data); + LOG(INFO) << path_expand_codegen; + ss << path_expand_codegen; + break; + } + case physical::PhysicalOpr::Operator::kIntersect: { + physical::PhysicalOpr::MetaData meta_data; + LOG(INFO) << "Found a intersect operator"; + auto& intersect_op = opr.intersect(); + auto intersect_codegen = + pegasus::BuildIntersectOp(ctx_, intersect_op, meta_data); + LOG(INFO) << intersect_codegen; + ss << intersect_codegen; + break; + } + case physical::PhysicalOpr::Operator::kUnfold: { + physical::PhysicalOpr::MetaData meta_data; + LOG(INFO) << "Found a unfold operator"; + auto& unfold_op = opr.unfold(); + auto unfold_codegen = + pegasus::BuildUnfoldOp(ctx_, i + 1, unfold_op, meta_data); + LOG(INFO) << unfold_codegen; + ss << unfold_codegen; + break; + } + case physical::PhysicalOpr::Operator::kDedup: { + physical::PhysicalOpr::MetaData meta_data; + LOG(INFO) << "Found a dedup operator"; + auto& dedup_op = opr.dedup(); + auto dedup_codegen = + pegasus::BuildDedupOp(ctx_, i + 1, dedup_op, meta_data); + LOG(INFO) << dedup_codegen; + ss << dedup_codegen; + break; + } + case physical::PhysicalOpr::Operator::kUnion: { + physical::PhysicalOpr::MetaData meta_data; + LOG(INFO) << "Found a union operator"; + auto& union_op = opr.union_(); + auto union_codegen = + pegasus::BuildUnionOp(ctx_, i + 1, union_op, meta_data); + LOG(INFO) << union_codegen; + ss << union_codegen; + break; + } + case physical::PhysicalOpr::Operator::kJoin: { + physical::PhysicalOpr::MetaData meta_data; + LOG(INFO) << "Found a join operator"; + auto& join_op = opr.join(); + auto join_codegen = + pegasus::BuildJoinOp(ctx_, i + 1, join_op, meta_data); + LOG(INFO) << join_codegen; + ss << join_codegen; + break; + } + case physical::PhysicalOpr::Operator::kSelect: { + physical::PhysicalOpr::MetaData meta_data; + LOG(INFO) << "Found a select operator"; + auto& select_op = opr.select(); + auto select_codegen = + pegasus::BuildSelectOp(ctx_, i + 1, select_op, meta_data); + LOG(INFO) << select_codegen; + ss << select_codegen; + break; + } + case physical::PhysicalOpr::Operator::kLimit: { + physical::PhysicalOpr::MetaData meta_data; + LOG(INFO) << "Found a select operator"; + auto& limit_pb = opr.limit(); + auto limit_codegen = + pegasus::BuildLimitOp(ctx_, i + 1, limit_pb, meta_data); + LOG(INFO) << limit_codegen; + ss << limit_codegen; + break; + } + default: + LOG(FATAL) << "Unsupported operator type: " << opr.op_kind_case(); + } + } + LOG(INFO) << "Finish adding query"; + } + + BuildingContext& ctx_; + std::string query_name_; + const physical::PhysicalPlan& plan_; +}; + +} // namespace gs + +#endif // HQPS_CODEGEN_SRC_PEGASUS_GENERATOR_H_ diff --git a/flex/codegen/src/string_utils.h b/flex/codegen/src/string_utils.h new file mode 100644 index 000000000000..b9d4c7e5ce29 --- /dev/null +++ b/flex/codegen/src/string_utils.h @@ -0,0 +1,135 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef CODEGEN_SRC_STRING_UTILS_H_ +#define CODEGEN_SRC_STRING_UTILS_H_ + +#include +#include + +#include "flex/codegen/src/pb_parser/internal_struct.h" +#include "proto_generated_gie/physical.pb.h" + +#include "glog/logging.h" +namespace gs { + +static constexpr const char* _4_SPACES = " "; +static constexpr const char* _8_SPACES = " "; +static constexpr const char* _ASSIGN_STR_ = " = "; + +static constexpr const char* EDGE_EXPAND_V_METHOD_NAME = "EdgeExpandV"; +static constexpr const char* EDGE_EXPAND_E_METHOD_NAME = "EdgeExpandE"; +static constexpr const char* MAKE_GETV_OPT_NAME = "make_getv_opt"; + +static constexpr const char* NAMED_PROPERTY_CLASS_NAME = "gs::NamedProperty"; +static constexpr const char* SORT_PROPER_PAIR_NAME = "gs::OrderingPropPair"; +static constexpr const char* MAKE_PROJECT_OPT_NAME = "gs::make_project_opt"; +static constexpr const char* PROJECT_SELF_STR = "gs::ProjectSelf"; +static constexpr const char* PROJECT_PROPS_STR = "gs::AliasTagProp"; +static constexpr const char* LABEL_ID_T = "label_id_t"; +static constexpr const char* LABEL_ID_T_CASTER = "(label_id_t)"; +static constexpr const char* EMPTY_TYPE = "grape::EmptyType"; +static constexpr const char* INNER_ID_PROPERTY_NAME = "InnerIdProperty"; +static constexpr const char* VERTEX_ID_T = "vertex_id_t"; +static constexpr const char* MAKE_PROJECT_EXPR = "make_project_expr"; +static constexpr const char* APPEND_OPT_TEMP = "gs::AppendOpt::Temp"; +static constexpr const char* APPEND_OPT_PERSIST = "gs::AppendOpt::Persist"; +static constexpr const char* APPEND_OPT_REPLACE = "gs::AppendOpt::Replace"; +static constexpr const char* GRAPE_EMPTY_TYPE = "grape::EmptyType"; + +static constexpr const char* NONE_LITERAL = "gs::NONE"; +static constexpr const char* PROPERTY_SELECTOR = + "gs::PropertySelector<%1%>(\"%2%\")"; +static constexpr const char* PROP_NAME_ARRAY = "gs::PropNameArray<%1%>{%2%}"; + +std::string project_is_append_str(bool is_append) { + if (is_append) { + return "PROJ_TO_APPEND"; + } else { + return "PROJ_TO_NEW"; + } +} + +std::string res_alias_to_append_opt(int res_alias) { + return res_alias == -1 ? APPEND_OPT_TEMP : APPEND_OPT_PERSIST; +} + +template +static std::string ensure_label_id(LabelIdT label_id) { + return std::string(LABEL_ID_T_CASTER) + std::string(" ") + + std::to_string(label_id); +} + +static std::string make_move(int32_t i) { + return "std::move(" + std::to_string(i) + ")"; +} + +static std::string make_move(const std::string& param) { + return "std::move(" + param + ")"; +} + +static std::string format_input_col(const int32_t v_tag) { + return "INPUT_COL_ID(" + std::to_string(v_tag) + ")"; +} + +static std::string add_quote(const std::string& str) { + return "\"" + str + "\""; +} + +static std::string direction_pb_to_str( + const physical::EdgeExpand::Direction& direction) { + switch (direction) { + case physical::EdgeExpand::Direction::EdgeExpand_Direction_IN: + return "gs::Direction::In"; + case physical::EdgeExpand::Direction::EdgeExpand_Direction_OUT: + return "gs::Direction::Out"; + case physical::EdgeExpand::Direction::EdgeExpand_Direction_BOTH: + return "gs::Direction::Both"; + default: + // LOG(FATAL) << "Unknown direction: " << direction; + throw std::runtime_error("Unknown direction: "); + } +} + +static std::string direction_pb_to_str( + const gs::internal::Direction& direction) { + switch (direction) { + case gs::internal::Direction::kIn: + return "gs::Direction::In"; + case gs::internal::Direction::kOut: + return "gs::Direction::Out"; + case gs::internal::Direction::kBoth: + return "gs::Direction::Both"; + default: + throw std::runtime_error("Unknown direction: "); + } +} + +template +static std::string label_ids_to_array_str( + const std::vector& label_ids) { + std::stringstream ss; + ss << "std::array{"; + for (int i = 0; i < label_ids.size(); ++i) { + ss << ensure_label_id(label_ids[i]); + if (i != label_ids.size() - 1) { + ss << ", "; + } + } + ss << "}"; + return ss.str(); +} +} // namespace gs + +#endif // CODEGEN_SRC_STRING_UTILS_H_ \ No newline at end of file diff --git a/flex/engines/CMakeLists.txt b/flex/engines/CMakeLists.txt index 7961cb012577..2438f1d51548 100644 --- a/flex/engines/CMakeLists.txt +++ b/flex/engines/CMakeLists.txt @@ -1,4 +1,9 @@ - add_subdirectory(graph_db) add_subdirectory(bsp) +add_subdirectory(http_server) +message(STATUS "BUILD_HQPS: ${BUILD_HQPS}") +if (BUILD_HQPS) + add_subdirectory(hqps_db) +endif() + diff --git a/flex/engines/graph_db/CMakeLists.txt b/flex/engines/graph_db/CMakeLists.txt index 53eca189d8e1..288bb0e15487 100644 --- a/flex/engines/graph_db/CMakeLists.txt +++ b/flex/engines/graph_db/CMakeLists.txt @@ -1,24 +1,3 @@ -find_package (Hiactor) -if (Hiactor_FOUND) - include (${Hiactor_CODEGEN_CMAKE_FILE}) - - hiactor_codegen (graph_db_actor_autogen graph_db_actor_autogen_files - SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/server - INCLUDE_PATHS ${Hiactor_INCLUDE_DIR},${CMAKE_CURRENT_SOURCE_DIR}/../../../) - - file(GLOB_RECURSE GRAPH_DB_SERVER_SRC_FILES "${CMAKE_CURRENT_SOURCE_DIR}/server/*") - - add_library(flex_graph_db_server STATIC - ${GRAPH_DB_SERVER_SRC_FILES} - ${graph_db_actor_autogen_files}) - add_dependencies(flex_graph_db_server graph_db_actor_autogen) - target_compile_options (flex_graph_db_server - PUBLIC - -Wno-attributes) - # target_link_libraries(flex_graph_db_server Hiactor::hiactor ${LIBGRAPELITE_LIBRARIES} flex_utils flex_rt_mutable_graph ${GLOG_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) - target_link_libraries(flex_graph_db_server Hiactor::hiactor) -endif () - file(GLOB_RECURSE GRAPH_DB_SRC_FILES "${CMAKE_CURRENT_SOURCE_DIR}/app/*.cc" "${CMAKE_CURRENT_SOURCE_DIR}/database/*.cc") diff --git a/flex/engines/graph_db/database/transaction_utils.h b/flex/engines/graph_db/database/transaction_utils.h index 68aa215bca64..f219414f15ec 100644 --- a/flex/engines/graph_db/database/transaction_utils.h +++ b/flex/engines/graph_db/database/transaction_utils.h @@ -39,6 +39,9 @@ inline void serialize_field(grape::InArchive& arc, const Any& prop) { case PropertyType::kInt64: arc << prop.value.l; break; + case PropertyType::kDouble: + arc << prop.value.db; + break; default: LOG(FATAL) << "Unexpected property type"; } @@ -60,6 +63,9 @@ inline void deserialize_field(grape::OutArchive& arc, Any& prop) { case PropertyType::kInt64: arc >> prop.value.l; break; + case PropertyType::kDouble: + arc >> prop.value.db; + break; default: LOG(FATAL) << "Unexpected property type"; } diff --git a/flex/engines/graph_db/grin/CMakeLists.txt b/flex/engines/graph_db/grin/CMakeLists.txt new file mode 100644 index 000000000000..3a066292b992 --- /dev/null +++ b/flex/engines/graph_db/grin/CMakeLists.txt @@ -0,0 +1,69 @@ +cmake_minimum_required(VERSION 3.1) + +set(GRIN_READER_MAJOR_VERSION 0) +set(GRIN_READER_MINOR_VERSION 1) +set(GRIN_READER_VERSION ${GRIN_READER_MAJOR_VERSION}.${GRIN_READER_MINOR_VERSION}) + +project(grin_reader LANGUAGES C CXX VERSION ${GRIN_READER_VERSION}) + +# Set flags +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99") + +# ------------------------------------------------------------------------------ +# find_libraries +# ------------------------------------------------------------------------------ + +find_package(libgrapelite REQUIRED) +include_directories(${LIBGRAPELITE_INCLUDE_DIRS}) + +include("../../../../flex/cmake/FindGFlags.cmake") +if (GFLAGS_FOUND) + include_directories(SYSTEM ${GFLAGS_INCLUDE_DIRS}) +else () + message(FATAL_ERROR "gflags not found") +endif () + +include("../../../../flex/cmake/FindGlog.cmake") +include_directories(SYSTEM ${GLOG_INCLUDE_DIRS}) +if (GLOG_FOUND) + set(CMAKE_REQUIRED_INCLUDES "${GLOG_INCLUDE_DIRS}") + set(CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES} ${GLOG_LIBRARIES}") +endif () + +find_package(yaml-cpp REQUIRED) +include_directories(SYSTEM ${yaml-cpp_INCLUDE_DIRS}) + +#set(yaml-cpp_INCLUDE_DIRS "/usr/local/Cellar/yaml-cpp/0.7.0/include") +#include_directories(SYSTEM ${yaml-cpp_INCLUDE_DIRS}) +#set(YAML_CPP_LIBRARIES "/usr/local/Cellar/yaml-cpp/0.7.0/lib/libyaml-cpp.0.7.0.dylib") + + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../..) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../..) +message(STATUS "${CMAKE_CURRENT_SOURCE_DIR}") +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../storages/rt_mutable_graph) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../utils/property) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) + +file(GLOB_RECURSE FILES_NEED_FORMAT "src/*.cc") + +add_custom_target(grin_clformat + COMMAND clang-format --style=file -i ${FILES_NEED_FORMAT} + COMMENT "Running clang-format." + VERBATIM) + +file(GLOB SOURCES "src/*.cc" "src/topology/*.cc" "src/property/*.cc" "src/index/*.cc" "src/common/*.cc" "../../../utils/property/*.cc" "../../../storages/rt_mutable_graph/*.cc") +add_library(flex_grin SHARED ${SOURCES}) +target_link_libraries(flex_grin ${LIBGRAPELITE_LIBRARIES} ${GFLAGS_LIBRARIES} ${CMAKE_DL_LIBS} ${YAML_CPP_LIBRARIES}) + + +add_executable(run_grin_test test/test.c) + +target_include_directories(run_grin_test PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps fragment) +target_link_libraries(run_grin_test flex_grin ${LIBGRAPELITE_LIBRARIES} ${GFLAGS_LIBRARIES} ${CMAKE_DL_LIBS}) +target_link_libraries(run_grin_test) diff --git a/flex/engines/graph_db/grin/predefine.h b/flex/engines/graph_db/grin/predefine.h new file mode 100644 index 000000000000..85578a8c4a8c --- /dev/null +++ b/flex/engines/graph_db/grin/predefine.h @@ -0,0 +1,296 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +/** + * @file predefine.h + * @brief This template file consists of four parts: + * 1. The predefined enumerate types of GRIN, which should NOT be modified. + * 2. The supported macros which should be specified by storage implementors + * based on storage features. + * 3. The typedefs of the enabled handles. This should be specified by storage. + * 4. The corresponding null values of the enabled handles. This should be + * specified by storage. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +/* 1. Predefined enumerate types of GRIN */ +/// Enumerates the directions of edges with respect to a certain vertex +typedef enum { + IN = 0, ///< incoming + OUT = 1, ///< outgoing + BOTH = 2, ///< incoming & outgoing +} GRIN_DIRECTION; + +/// Enumerates the datatype supported in the storage +typedef enum { + Undefined = 0, ///< other unknown types + Int32 = 1, ///< int + UInt32 = 2, ///< unsigned int + Int64 = 3, ///< long int + UInt64 = 4, ///< unsigned long int + Float = 5, ///< float + Double = 6, ///< double + String = 7, ///< string + Date32 = 8, ///< date + Time32 = 9, ///< Time32 + Timestamp64 = 10, ///< Timestamp +} GRIN_DATATYPE; + +/// Enumerates the error codes of grin +typedef enum { + NO_ERROR = 0, ///< success + UNKNOWN_ERROR = 1, ///< unknown error + INVALID_VALUE = 2, ///< invalid value + UNKNOWN_DATATYPE = 3, ///< unknown datatype +} GRIN_ERROR_CODE; + +/* 2. Define supported macros based on storage features */ +// Topology +#define GRIN_ASSUME_HAS_DIRECTED_GRAPH +#define GRIN_ASSUME_HAS_UNDIRECTED_GRAPH +#define GRIN_ASSUME_HAS_MULTI_EDGE_GRAPH +// #define GRIN_WITH_VERTEX_DATA +#define GRIN_WITH_EDGE_DATA +#define GRIN_ENABLE_VERTEX_LIST +// #define GRIN_ENABLE_VERTEX_LIST_ARRAY +#define GRIN_ENABLE_VERTEX_LIST_ITERATOR +#define GRIN_ENABLE_EDGE_LIST +// #define GRIN_ENABLE_EDGE_LIST_ARRAY +#define GRIN_ENABLE_EDGE_LIST_ITERATOR +#define GRIN_ENABLE_ADJACENT_LIST +// #define GRIN_ENABLE_ADJACENT_LIST_ARRAY +#define GRIN_ENABLE_ADJACENT_LIST_ITERATOR + +// Partition +/** +#define GRIN_ENABLE_GRAPH_PARTITION +#define GRIN_TRAIT_NATURAL_ID_FOR_PARTITION +#define GRIN_ENABLE_VERTEX_REF +#define GRIN_TRAIT_FAST_VERTEX_REF +#define GRIN_ENABLE_EDGE_REF +#define GRIN_ASSUME_ALL_REPLICATE_PARTITION +#define GRIN_ASSUME_EDGE_CUT_PARTITION +#define GRIN_ASSUME_EDGE_CUT_FOLLOW_SRC_PARTITION +#define GRIN_ASSUME_EDGE_CUT_FOLLOW_DST_PARTITION +#define GRIN_ASSUME_VERTEX_CUT_PARTITION +#define GRIN_ASSUME_MASTER_ONLY_PARTITION_FOR_VERTEX_DATA +#define GRIN_ASSUME_REPLICATE_MASTER_MIRROR_PARTITION_FOR_VERTEX_DATA +#define GRIN_ASSUME_MASTER_ONLY_PARTITION_FOR_EDGE_DATA +#define GRIN_ASSUME_REPLICATE_MASTER_MIRROR_PARTITION_FOR_EDGE_DATA +#define GRIN_TRAIT_MASTER_VERTEX_MIRROR_PARTITION_LIST +#define GRIN_TRAIT_MIRROR_VERTEX_MIRROR_PARTITION_LIST +#define GRIN_TRAIT_MASTER_EDGE_MIRROR_PARTITION_LIST +#define GRIN_TRAIT_MIRROR_EDGE_MIRROR_PARTITION_LIST +#define GRIN_TRAIT_SELECT_MASTER_FOR_VERTEX_LIST +#define GRIN_TRAIT_SELECT_PARTITION_FOR_VERTEX_LIST +#define GRIN_TRAIT_SELECT_MASTER_FOR_EDGE_LIST +#define GRIN_TRAIT_SELECT_PARTITION_FOR_EDGE_LIST +#define GRIN_TRAIT_SELECT_MASTER_NEIGHBOR_FOR_ADJACENT_LIST +#define GRIN_TRAIT_SELECT_NEIGHBOR_PARTITION_FOR_ADJACENT_LIST +*/ + +// Property +#define GRIN_ENABLE_ROW +#define GRIN_TRAIT_CONST_VALUE_PTR +#define GRIN_WITH_VERTEX_PROPERTY +#define GRIN_WITH_VERTEX_PROPERTY_NAME +#define GRIN_WITH_VERTEX_TYPE_NAME +#define GRIN_TRAIT_NATURAL_ID_FOR_VERTEX_TYPE +#define GRIN_ENABLE_VERTEX_PRIMARY_KEYS +#define GRIN_TRAIT_NATURAL_ID_FOR_VERTEX_PROPERTY +#define GRIN_WITH_EDGE_PROPERTY +// #define GRIN_WITH_EDGE_PROPERTY_NAME +#define GRIN_WITH_EDGE_TYPE_NAME +#define GRIN_TRAIT_NATURAL_ID_FOR_EDGE_TYPE +// #define GRIN_ENABLE_EDGE_PRIMARY_KEYS +// #define GRIN_TRAIT_NATURAL_ID_FOR_EDGE_PROPERTY +// #define GRIN_TRAIT_SPECIFIC_VEV_RELATION +// #define GRIN_ASSUME_MASTER_ONLY_PARTITION_FOR_VERTEX_PROPERTY +// #define GRIN_ASSUME_REPLICATE_MASTER_MIRROR_PARTITION_FOR_VERTEX_PROPERTY +// #define GRIN_ASSUME_SPLIT_MASTER_MIRROR_PARTITION_FOR_VERTEX_PROPERTY +// #define GRIN_ASSUME_MASTER_ONLY_PARTITION_FOR_EDGE_PROPERTY +// #define GRIN_ASSUME_REPLICATE_MASTER_MIRROR_PARTITION_FOR_EDGE_PROPERTY +// #define GRIN_ASSUME_SPLIT_MASTER_MIRROR_PARTITION_FOR_EDGE_PROPERTY +// Index +#define GRIN_WITH_VERTEX_LABEL +#define GRIN_WITH_EDGE_LABEL +/// #define GRIN_ASSUME_ALL_VERTEX_LIST_SORTED +#define GRIN_ENABLE_VERTEX_INTERNAL_ID_INDEX +#define GRIN_ENABLE_VERTEX_PK_INDEX +// #define GRIN_ENABLE_EDGE_PK_INDEX + +/* 3. Define the handles using typedef */ +typedef void* GRIN_GRAPH; +// label: 8, vid: 32 +typedef uint64_t GRIN_VERTEX; +typedef void* GRIN_EDGE; + +#ifdef GRIN_WITH_VERTEX_DATA +typedef void* GRIN_VERTEX_DATA; +#endif + +#ifdef GRIN_WITH_VERTEX_PROPERTY +typedef unsigned GRIN_VERTEX_TYPE; +typedef void* GRIN_VERTEX_TYPE_LIST; +// datatype: 8, vertex label: 8, property index:8 +typedef uint32_t GRIN_VERTEX_PROPERTY; + +typedef void* GRIN_VERTEX_PROPERTY_LIST; +#endif + +#ifdef GRIN_ENABLE_VERTEX_LIST +typedef struct GRIN_VERTEX_LIST { + size_t vertex_num; + GRIN_VERTEX_TYPE label; +} GRIN_VERTEX_LIST; +#endif + +#ifdef GRIN_ENABLE_VERTEX_LIST_ITERATOR +typedef struct GRIN_VERTEX_LIST_ITERATOR { + uint32_t* cur_vid; + GRIN_VERTEX_LIST vertex_list; +} GRIN_VERTEX_LIST_ITERATOR; +#endif + +#ifdef GRIN_WITH_EDGE_DATA +typedef void* GRIN_EDGE_DATA; +#endif + +#ifdef GRIN_ENABLE_EDGE_LIST +typedef void* GRIN_EDGE_LIST; +#endif + +#ifdef GRIN_ENABLE_EDGE_LIST_ITERATOR +typedef void* GRIN_EDGE_LIST_ITERATOR; +#endif + +/** +#ifdef GRIN_ENABLE_GRAPH_PARTITION +typedef void* GRIN_PARTITIONED_GRAPH; +typedef void* GRIN_PARTITION; +typedef void* GRIN_PARTITION_LIST; +#endif + +#ifdef GRIN_TRAIT_NATURAL_ID_FOR_PARTITION +typedef unsigned GRIN_PARTITION_ID; +#endif +*/ + +#ifdef GRIN_ENABLE_VERTEX_REF +typedef void* GRIN_VERTEX_REF; +#endif + +#ifdef GRIN_ENABLE_EDGE_REF +typedef void* GRIN_EDGE_REF; +#endif + +#ifdef GRIN_TRAIT_NATURAL_ID_FOR_VERTEX_TYPE +typedef unsigned GRIN_VERTEX_TYPE_ID; +#endif + +#ifdef GRIN_TRAIT_NATURAL_ID_FOR_VERTEX_PROPERTY +typedef unsigned GRIN_VERTEX_PROPERTY_ID; +#endif + +#ifdef GRIN_WITH_EDGE_PROPERTY +// src_label: 8, dst_label: 8, edge_label: 8 +typedef unsigned GRIN_EDGE_TYPE; +typedef void* GRIN_EDGE_TYPE_LIST; +// index: 8, src_label: 8, dst_label: 8, edge_label: 8 +typedef unsigned GRIN_EDGE_PROPERTY; +typedef void* GRIN_EDGE_PROPERTY_LIST; +#endif + +#ifdef GRIN_TRAIT_NATURAL_ID_FOR_EDGE_TYPE +typedef unsigned GRIN_EDGE_TYPE_ID; +#endif + +#ifdef GRIN_ENABLE_ADJACENT_LIST +typedef struct GRIN_ADJACENT_LIST { + GRIN_VERTEX v; + GRIN_DIRECTION dir; + GRIN_EDGE_TYPE edge_label; +} GRIN_ADJACENT_LIST; +#endif + +#ifdef GRIN_ENABLE_ADJACENT_LIST_ITERATOR +typedef struct GRIN_ADJACENT_LIST_ITERATOR { + // gs::MutableCsrConstEdgeIterBase* + void* edge_iter; + GRIN_ADJACENT_LIST adj_list; +} GRIN_ADJACENT_LIST_ITERATOR; +#endif + +#ifdef GRIN_TRAIT_NATURAL_ID_FOR_EDGE_PROPERTY +typedef unsigned GRIN_EDGE_PROPERTY_ID; +#endif + +#ifdef GRIN_ENABLE_ROW +typedef void* GRIN_ROW; +#endif + +#if defined(GRIN_WITH_VERTEX_LABEL) || defined(GRIN_WITH_EDGE_LABEL) +typedef void* GRIN_LABEL; +typedef void* GRIN_LABEL_LIST; +#endif + +/* 4. Define invalid values for returns of handles */ +#define GRIN_NULL_GRAPH NULL +#define GRIN_NULL_VERTEX (unsigned long long) ~0 +#define GRIN_NULL_EDGE NULL +#define GRIN_NULL_VERTEX_DATA NULL +#define GRIN_NULL_VERTEX_LIST NULL +#define GRIN_NULL_VERTEX_LIST_ITERATOR NULL +#define GRIN_NULL_ADJACENT_LIST NULL +#define GRIN_NULL_ADJACENT_LIST_ITERATOR NULL +#define GRIN_NULL_EDGE_DATA NULL +#define GRIN_NULL_EDGE_LIST NULL +#define GRIN_NULL_EDGE_LIST_ITERATOR NULL +#define GRIN_NULL_PARTITIONED_GRAPH NULL +#define GRIN_NULL_PARTITION NULL +#define GRIN_NULL_PARTITION_LIST NULL +#define GRIN_NULL_PARTITION_ID (unsigned) ~0 +#define GRIN_NULL_VERTEX_REF NULL +#define GRIN_NULL_EDGE_REF NULL +#define GRIN_NULL_VERTEX_TYPE (unsigned) ~0 +#define GRIN_NULL_VERTEX_TYPE_LIST NULL +#define GRIN_NULL_VERTEX_PROPERTY (unsigned) ~0 +#define GRIN_NULL_VERTEX_PROPERTY_LIST NULL +#define GRIN_NULL_VERTEX_TYPE_ID (unsigned) ~0 +#define GRIN_NULL_VERTEX_PROPERTY_ID (unsigned) ~0 +#define GRIN_NULL_EDGE_TYPE (unsigned) ~0 +#define GRIN_NULL_EDGE_TYPE_LIST NULL +#define GRIN_NULL_VEV_TYPE NULL +#define GRIN_NULL_VEV_TYPE_LIST NULL +#define GRIN_NULL_EDGE_PROPERTY NULL +#define GRIN_NULL_EDGE_PROPERTY_LIST NULL +#define GRIN_NULL_EDGE_TYPE_ID (unsigned) ~0 +#define GRIN_NULL_EDGE_PROPERTY_ID (unsigned) ~0 +#define GRIN_NULL_ROW NULL +#define GRIN_NULL_LABEL NULL +#define GRIN_NULL_LABEL_LIST NULL +#define GRIN_NULL_SIZE (unsigned) ~0 +#define GRIN_NULL_NAME NULL + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/flex/engines/graph_db/grin/src/common/error.cc b/flex/engines/graph_db/grin/src/common/error.cc new file mode 100644 index 000000000000..d910f0b74892 --- /dev/null +++ b/flex/engines/graph_db/grin/src/common/error.cc @@ -0,0 +1,24 @@ +/** Copyright 2020 Alibaba Group Holding Limited. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +/** + @file error.h + @brief Define the error code related APIs +*/ + +#include "grin/src/predefine.h" + +#include "grin/include/common/error.h" + +__thread GRIN_ERROR_CODE grin_error_code = GRIN_ERROR_CODE::NO_ERROR; + +GRIN_ERROR_CODE grin_get_last_error_code() { return grin_error_code; } \ No newline at end of file diff --git a/flex/engines/graph_db/grin/src/index/internal_id.cc b/flex/engines/graph_db/grin/src/index/internal_id.cc new file mode 100644 index 000000000000..923f5692d988 --- /dev/null +++ b/flex/engines/graph_db/grin/src/index/internal_id.cc @@ -0,0 +1,101 @@ +/** Copyright 2020 Alibaba Group Holding Limited. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "grin/src/predefine.h" + +#include "grin/include/index/internal_id.h" + +#if defined(GRIN_ENABLE_VERTEX_INTERNAL_ID_INDEX) && \ + !defined(GRIN_WITH_VERTEX_PROPERTY) +/** + * @brief Get the int64 internal id of a vertex + * @param GRIN_GRAPH The graph + * @param GRIN_VERTEX The vertex + * @return The int64 internal id of the vertex + */ +long long int grin_get_vertex_internal_id(GRIN_GRAPH, GRIN_VERTEX); + +/** + * @brief Get the vertex by internal id. + * Different from pk_of_int64, the internal id is unique over all vertex types. + * @param GRIN_GRAPH The graph + * @param id The internal id of the vertex + * @return The vertex + */ +GRIN_VERTEX grin_get_vertex_by_internal_id(GRIN_GRAPH, long long int id); + +/** + * @brief Get the upper bound of internal id. + * @param GRIN_GRAPH The graph + * @return The upper bound + */ +long long int grin_get_vertex_internal_id_upper_bound(GRIN_GRAPH); + +/** + * @brief Get the lower bound of internal id. + * @param GRIN_GRAPH The graph + * @return The lower bound + */ +long long int grin_get_vertex_internal_id_lower_bound(GRIN_GRAPH); +#endif + +#if defined(GRIN_ENABLE_VERTEX_INTERNAL_ID_INDEX) && \ + defined(GRIN_WITH_VERTEX_PROPERTY) +/** + * @brief Get the int64 internal id of a vertex + * @param GRIN_GRAPH The graph + * @param GRIN_VERTEX The vertex + * @return The int64 internal id of the vertex + */ +long long int grin_get_vertex_internal_id_by_type(GRIN_GRAPH g, + GRIN_VERTEX_TYPE vt, + GRIN_VERTEX v) { + return v & (0xffffffff); +} + +/** + * @brief Get the vertex by internal id under type + * @param GRIN_GRAPH The graph + * @param GRIN_VERTEX_TYPE The vertex type + * @param id The internal id of the vertex under type + * @return The vertex + */ +GRIN_VERTEX grin_get_vertex_by_internal_id_by_type(GRIN_GRAPH g, + GRIN_VERTEX_TYPE vt, + long long int id) { + auto v = static_cast(vt); + return (v << 32) + id; +} + +/** + * @brief Get the upper bound of internal id under type. + * @param GRIN_GRAPH The graph + * @param GRIN_VERTEX_TYPE The vertex type + * @return The upper bound of internal id under type + */ +long long int grin_get_vertex_internal_id_upper_bound_by_type( + GRIN_GRAPH g, GRIN_VERTEX_TYPE vt) { + auto _g = static_cast(g); + return _g->g.vertex_num(vt); +} + +/** + * @brief Get the lower bound internal id under type. + * @param GRIN_GRAPH The graph + * @param GRIN_VERTEX_TYPE The vertex type + * @return The lower bound internal id under type + */ +long long int grin_get_vertex_internal_id_lower_bound_by_type( + GRIN_GRAPH g, GRIN_VERTEX_TYPE vt) { + return 0; +} +#endif \ No newline at end of file diff --git a/flex/engines/graph_db/grin/src/index/pk.cc b/flex/engines/graph_db/grin/src/index/pk.cc new file mode 100644 index 000000000000..6227edd46482 --- /dev/null +++ b/flex/engines/graph_db/grin/src/index/pk.cc @@ -0,0 +1,43 @@ +/** Copyright 2020 Alibaba Group Holding Limited. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "grin/src/predefine.h" + +#include "grin/include/index/pk.h" + +#if defined(GRIN_ENABLE_VERTEX_PK_INDEX) && \ + defined(GRIN_ENABLE_VERTEX_PRIMARY_KEYS) +/** + * @brief Get the vertex by primary keys row. + * The values in the row must be in the same order as the primary keys + * properties, which can be obtained by + * ``grin_get_primary_keys_by_vertex_type``. + * @param GRIN_GRAPH The graph. + * @param GRIN_VERTEX_TYPE The vertex type. + * @param GRIN_ROW The values row of primary keys properties. + * @return The vertex. + */ +GRIN_VERTEX grin_get_vertex_by_primary_keys_row(GRIN_GRAPH g, + GRIN_VERTEX_TYPE label, + GRIN_ROW r) { + auto _r = static_cast(r); + auto _g = static_cast(g); + auto oid = *static_cast((*_r)[0]); + uint32_t vid; + + if (!_g->g.get_lid(label, oid, vid)) { + return GRIN_NULL_VERTEX; + } + uint64_t v = ((label * 1ull) << 32) + vid; + return v; +} +#endif \ No newline at end of file diff --git a/flex/engines/graph_db/grin/src/predefine.cc b/flex/engines/graph_db/grin/src/predefine.cc new file mode 100644 index 000000000000..75ee84d8e3b0 --- /dev/null +++ b/flex/engines/graph_db/grin/src/predefine.cc @@ -0,0 +1,53 @@ +#include "grin/src/predefine.h" + +GRIN_DATATYPE _get_data_type(const gs::PropertyType& type) { + if (type == gs::PropertyType::kInt32) { + return GRIN_DATATYPE::Int32; + } else if (type == gs::PropertyType::kInt64) { + return GRIN_DATATYPE::Int64; + } else if (type == gs::PropertyType::kString) { + return GRIN_DATATYPE::String; + } else if (type == gs::PropertyType::kDate) { + return GRIN_DATATYPE::Timestamp64; + } else if (type == gs::PropertyType::kDouble) { + return GRIN_DATATYPE::Double; + } else { + return GRIN_DATATYPE::Undefined; + } +} + +void init_cache(GRIN_GRAPH_T* g) { + auto v_label_num = g->g.vertex_label_num_; + for (size_t i = 0; i < v_label_num; ++i) { + std::vector tmp; + const auto& vec = g->g.schema().get_vertex_properties(i); + const auto& table = g->g.get_vertex_table(i); + for (size_t idx = 0; idx < vec.size(); ++idx) { + const auto& type = vec[idx]; + if (type == gs::PropertyType::kInt32) { + tmp.emplace_back(std::dynamic_pointer_cast( + table.get_column_by_id(idx)) + .get()); + } else if (type == gs::PropertyType::kInt64) { + tmp.emplace_back(std::dynamic_pointer_cast( + table.get_column_by_id(idx)) + .get()); + } else if (type == gs::PropertyType::kString) { + tmp.emplace_back(std::dynamic_pointer_cast( + table.get_column_by_id(idx)) + .get()); + } else if (type == gs::PropertyType::kDate) { + tmp.emplace_back(std::dynamic_pointer_cast( + table.get_column_by_id(idx)) + .get()); + } else if (type == gs::PropertyType::kDouble) { + tmp.emplace_back(std::dynamic_pointer_cast( + table.get_column_by_id(idx)) + .get()); + } else { + tmp.emplace_back((const void*) NULL); + } + } + g->vproperties.emplace_back(tmp); + } +} \ No newline at end of file diff --git a/flex/engines/graph_db/grin/src/predefine.h b/flex/engines/graph_db/grin/src/predefine.h new file mode 100644 index 000000000000..a04ccf186565 --- /dev/null +++ b/flex/engines/graph_db/grin/src/predefine.h @@ -0,0 +1,38 @@ +#include + +#include "grin/predefine.h" +#include "storages/rt_mutable_graph/mutable_property_fragment.h" + +typedef gs::oid_t GRIN_OID_T; +typedef gs::vid_t GRIN_VID_T; + +typedef struct GRIN_GRAPH_T { + gs::MutablePropertyFragment g; + std::vector> vproperties; + // std::vector> eproperties; +} GRIN_GRAPH_T; + +typedef struct GRIN_EDGE_T { + GRIN_VERTEX dst; + GRIN_VERTEX src; + GRIN_DIRECTION dir; + gs::label_t label; + gs::Any data; +} GRIN_EDGE_T; + +#ifdef GRIN_WITH_VERTEX_PROPERTY +typedef std::vector GRIN_VERTEX_TYPE_LIST_T; +typedef std::vector GRIN_VERTEX_PROPERTY_LIST_T; +#endif + +#if defined(GRIN_WITH_VERTEX_PROPERTY) || defined(GRIN_WITH_EDGE_PROPERTY) +typedef std::vector GRIN_ROW_T; +#endif + +#ifdef GRIN_WITH_EDGE_PROPERTY +typedef std::vector GRIN_EDGE_TYPE_LIST_T; +typedef std::vector GRIN_EDGE_PROPERTY_LIST_T; +#endif + +GRIN_DATATYPE _get_data_type(const gs::PropertyType& type); +void init_cache(GRIN_GRAPH_T* g); \ No newline at end of file diff --git a/flex/engines/graph_db/grin/src/property/primarykey.cc b/flex/engines/graph_db/grin/src/property/primarykey.cc new file mode 100644 index 000000000000..878fa0b2b32f --- /dev/null +++ b/flex/engines/graph_db/grin/src/property/primarykey.cc @@ -0,0 +1,57 @@ +#include "grin/src/predefine.h" + +#include "grin/include/common/error.h" +#include "grin/include/property/primarykey.h" +#ifdef GRIN_ENABLE_VERTEX_PRIMARY_KEYS +/** + * @brief Get the vertex types that have primary keys + * In some graph, not every vertex type has primary keys. + * @param GRIN_GRAPH The graph + * @return The vertex type list of types that have primary keys + */ +GRIN_VERTEX_TYPE_LIST grin_get_vertex_types_with_primary_keys(GRIN_GRAPH g) { + auto _g = static_cast(g); + GRIN_VERTEX_TYPE_LIST_T* vtl = new GRIN_VERTEX_TYPE_LIST_T(); + for (size_t idx = 0; idx < _g->g.vertex_label_num_; ++idx) { + vtl->push_back(idx); + } + return vtl; +} + +/** + * @brief Get the primary keys properties of a vertex type + * The primary keys properties are the properties that can be used to identify a + * vertex. They are a subset of the properties of a vertex type. + * @param GRIN_GRAPH The graph + * @param GRIN_VERTEX_TYPE The vertex type + * @return The primary keys properties list + */ +GRIN_VERTEX_PROPERTY_LIST grin_get_primary_keys_by_vertex_type( + GRIN_GRAPH, GRIN_VERTEX_TYPE label) { + GRIN_VERTEX_PROPERTY_LIST_T* vpl = new GRIN_VERTEX_PROPERTY_LIST_T(); + GRIN_VERTEX_PROPERTY vp; + vp = 0; + vp += (label * 1u) << 8; + vp += (GRIN_DATATYPE::Int64 * 1u) << 16; + vpl->emplace_back(vp); + return vpl; +} + +/** + * @brief Get the primary keys values row of a vertex + * The values in the row are in the same order as the primary keys properties. + * @param GRIN_GRAPH The graph + * @param GRIN_VERTEX The vertex + * @return The primary keys values row + */ +GRIN_ROW grin_get_vertex_primary_keys_row(GRIN_GRAPH g, GRIN_VERTEX v) { + GRIN_ROW_T* row = new GRIN_ROW_T(); + auto _g = static_cast(g); + auto vid = v & (0xffffffff); + auto label = v >> 32; + auto oid = _g->g.get_oid(label, vid); + auto p = new gs::oid_t(oid); + row->emplace_back(p); + return row; +} +#endif diff --git a/flex/engines/graph_db/grin/src/property/property.cc b/flex/engines/graph_db/grin/src/property/property.cc new file mode 100644 index 000000000000..0a5c2d2f4262 --- /dev/null +++ b/flex/engines/graph_db/grin/src/property/property.cc @@ -0,0 +1,447 @@ +/** Copyright 2020 Alibaba Group Holding Limited. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "grin/src/predefine.h" + +#include "grin/include/common/error.h" +#include "grin/include/property/property.h" + +void grin_destroy_string_value(GRIN_GRAPH g, const char* value) { + delete value; +} + +#ifdef GRIN_WITH_VERTEX_PROPERTY_NAME +const char* grin_get_vertex_property_name(GRIN_GRAPH g, GRIN_VERTEX_TYPE vt, + GRIN_VERTEX_PROPERTY vp) { + auto _g = static_cast(g); + auto& table = _g->g.get_vertex_table(vt); + + const auto& name = table.column_name(vp & (0xff)); + auto len = name.length() + 1; + char* out = new char[len]; + snprintf(out, len, "%s", name.c_str()); + return out; +} + +GRIN_VERTEX_PROPERTY grin_get_vertex_property_by_name(GRIN_GRAPH g, + GRIN_VERTEX_TYPE vt, + const char* name) { + auto _g = static_cast(g); + auto& table = _g->g.get_vertex_table(vt); + auto col = table.get_column(name); + if (col == nullptr) { + return GRIN_NULL_VERTEX_PROPERTY; + } + GRIN_VERTEX_PROPERTY vp; + vp = table.get_column_id_by_name(name); + vp += (vt * 1u) << 8; + vp += (_get_data_type(col->type()) * 1u) << 16; + return vp; +} + +GRIN_VERTEX_PROPERTY_LIST grin_get_vertex_properties_by_name(GRIN_GRAPH g, + const char* name) { + auto _g = static_cast(g); + std::string prop_name(name); + auto vps = new GRIN_VERTEX_PROPERTY_LIST_T(); + std::string _name = std::string(name); + for (auto idx = 0; idx < _g->g.vertex_label_num_; idx++) { + auto& table = _g->g.get_vertex_table(static_cast(idx)); + + auto col = table.get_column(name); + + if (col != nullptr) { + GRIN_VERTEX_PROPERTY vp; + vp = table.get_column_id_by_name(name); + vp += (idx * 1u) << 8; + vp += (_get_data_type(col->type()) * 1u) << 16; + vps->emplace_back(vp); + } + } + if (vps->size() == 0) { + delete vps; + return GRIN_NULL_VERTEX_PROPERTY_LIST; + } + return vps; +} +#endif + +#ifdef GRIN_WITH_VERTEX_PROPERTY +bool grin_equal_vertex_property(GRIN_GRAPH g, GRIN_VERTEX_PROPERTY vp1, + GRIN_VERTEX_PROPERTY vp2) { + return vp1 == vp2; +} + +void grin_destroy_vertex_property(GRIN_GRAPH g, GRIN_VERTEX_PROPERTY vp) {} + +/** + * @TODO add type for GRIN_VERTEX_PROPERTY_T + */ +GRIN_DATATYPE grin_get_vertex_property_datatype(GRIN_GRAPH g, + GRIN_VERTEX_PROPERTY vp) { + return (GRIN_DATATYPE) (vp >> 16); +} + +int grin_get_vertex_property_value_of_int32(GRIN_GRAPH g, GRIN_VERTEX v, + GRIN_VERTEX_PROPERTY vp) { + auto _g = static_cast(g); + auto label = v >> 32; + auto vid = v & (0xffffffff); + auto plabel = (vp >> 8) & (0xff); + auto pdt = (vp >> 16); + auto pid = vp & (0xff); + if (label != plabel || pdt != GRIN_DATATYPE::Int32) { + grin_error_code = INVALID_VALUE; + return 0; + } + if (label >= _g->g.vertex_label_num_ || + pid >= _g->vproperties[label].size()) { + grin_error_code = INVALID_VALUE; + return 0; + } + auto pcol = _g->vproperties[label][pid]; + if (pcol == NULL) { + grin_error_code = INVALID_VALUE; + return 0; + } + auto col = static_cast(pcol); + return col->get_view(vid); +} + +unsigned int grin_get_vertex_property_value_of_uint32(GRIN_GRAPH g, + GRIN_VERTEX v, + GRIN_VERTEX_PROPERTY vp) { + grin_error_code = INVALID_VALUE; + return 0; +} + +long long int grin_get_vertex_property_value_of_int64(GRIN_GRAPH g, + GRIN_VERTEX v, + GRIN_VERTEX_PROPERTY vp) { + auto _g = static_cast(g); + auto label = v >> 32; + auto vid = v & (0xffffffff); + auto plabel = (vp >> 8) & (0xff); + auto pdt = (vp >> 16); + auto pid = vp & (0xff); + + if (label != plabel || pdt != GRIN_DATATYPE::Int64) { + grin_error_code = INVALID_VALUE; + return 0; + } + + if (label >= _g->g.vertex_label_num_ || + pid >= _g->vproperties[label].size()) { + grin_error_code = INVALID_VALUE; + return 0; + } + auto pcol = _g->vproperties[label][pid]; + if (pcol == NULL) { + grin_error_code = INVALID_VALUE; + return 0.0f; + } + auto col = static_cast(pcol); + return col->get_view(vid); +} + +unsigned long long int grin_get_vertex_property_value_of_uint64( + GRIN_GRAPH g, GRIN_VERTEX v, GRIN_VERTEX_PROPERTY vp) { + grin_error_code = INVALID_VALUE; + return 0; +} + +float grin_get_vertex_property_value_of_float(GRIN_GRAPH g, GRIN_VERTEX v, + GRIN_VERTEX_PROPERTY vp) { + grin_error_code = INVALID_VALUE; + return 0.0f; +} + +double grin_get_vertex_property_value_of_double(GRIN_GRAPH g, GRIN_VERTEX v, + GRIN_VERTEX_PROPERTY vp) { + auto _g = static_cast(g); + auto label = v >> 32; + auto vid = v & (0xffffffff); + auto plabel = (vp >> 8) & (0xff); + auto pdt = (vp >> 16); + auto pid = vp & (0xff); + + if (label != plabel || pdt != GRIN_DATATYPE::Double) { + grin_error_code = INVALID_VALUE; + return 0.0; + } + + if (label >= _g->g.vertex_label_num_ || + pid >= _g->vproperties[label].size()) { + grin_error_code = INVALID_VALUE; + return 0.0; + } + auto pcol = _g->vproperties[label][pid]; + if (pcol == NULL) { + grin_error_code = INVALID_VALUE; + return 0; + } + auto col = static_cast(pcol); + return col->get_view(vid); +} + +const char* grin_get_vertex_property_value_of_string(GRIN_GRAPH g, + GRIN_VERTEX v, + GRIN_VERTEX_PROPERTY vp) { + auto _g = static_cast(g); + auto label = v >> 32; + auto vid = v & (0xffffffff); + auto plabel = (vp >> 8) & (0xff); + auto pdt = (vp >> 16); + auto pid = vp & (0xff); + + if (label != plabel || pdt != GRIN_DATATYPE::String) { + grin_error_code = INVALID_VALUE; + return NULL; + } + + if (label >= _g->g.vertex_label_num_ || + pid >= _g->vproperties[label].size()) { + grin_error_code = INVALID_VALUE; + return ""; + } + auto pcol = _g->vproperties[label][pid]; + if (pcol == NULL) { + grin_error_code = INVALID_VALUE; + return ""; + } + auto col = static_cast(pcol); + + auto s = col->get_view(vid); + auto len = s.size() + 1; + char* out = new char[len]; + snprintf(out, len, "%s", s.data()); + return out; +} + +int grin_get_vertex_property_value_of_date32(GRIN_GRAPH g, GRIN_VERTEX v, + GRIN_VERTEX_PROPERTY vp) { + grin_error_code = INVALID_VALUE; + return 0; +} + +int grin_get_vertex_property_value_of_time32(GRIN_GRAPH g, GRIN_VERTEX v, + GRIN_VERTEX_PROPERTY vp) { + grin_error_code = INVALID_VALUE; + return 0; +} + +long long int grin_get_vertex_property_value_of_timestamp64( + GRIN_GRAPH g, GRIN_VERTEX v, GRIN_VERTEX_PROPERTY vp) { + auto _g = static_cast(g); + auto label = v >> 32; + auto vid = v & (0xffffffff); + auto plabel = (vp >> 8) & (0xff); + auto pdt = (vp >> 16); + auto pid = vp & (0xff); + + if (label != plabel || pdt != GRIN_DATATYPE::Timestamp64) { + grin_error_code = INVALID_VALUE; + return 0; + } + + if (label >= _g->g.vertex_label_num_ || + pid >= _g->vproperties[label].size()) { + grin_error_code = INVALID_VALUE; + return 0; + } + auto pcol = _g->vproperties[label][pid]; + if (pcol == NULL) { + grin_error_code = INVALID_VALUE; + return 0; + } + auto col = static_cast(pcol); + return col->get_view(vid).milli_second; +} + +GRIN_VERTEX_TYPE grin_get_vertex_type_from_property(GRIN_GRAPH g, + GRIN_VERTEX_PROPERTY vp) { + return (vp >> 8) & (0xff); +} +#endif + +#if defined(GRIN_WITH_VERTEX_PROPERTY) && defined(GRIN_TRAIT_CONST_VALUE_PTR) +const void* grin_get_vertex_property_value(GRIN_GRAPH g, GRIN_VERTEX v, + GRIN_VERTEX_PROPERTY vp) { + auto plabel = (vp >> 8) & (0xff); + auto type = (vp >> 16); + auto pid = vp & (0xff); + + auto _g = static_cast(g); + + if (plabel >= _g->g.vertex_label_num_ || + pid >= _g->vproperties[plabel].size()) { + grin_error_code = INVALID_VALUE; + return 0; + } + auto col = _g->vproperties[plabel][pid]; + if (col == NULL) { + grin_error_code = UNKNOWN_DATATYPE; + return 0; + } + + auto vid = v & (0xffffffff); + + switch (type) { + case GRIN_DATATYPE::Int32: { + auto _col = static_cast(col); + return _col->buffer().data() + vid; + } + case GRIN_DATATYPE::Int64: { + auto _col = static_cast(col); + return _col->buffer().data() + vid; + } + case GRIN_DATATYPE::String: { + auto _col = static_cast(col); + return _col->buffer()[vid].data(); + } + case GRIN_DATATYPE::Timestamp64: { + auto _col = static_cast(col); + return _col->buffer().data() + vid; + } + case GRIN_DATATYPE::Double: { + auto _col = static_cast(col); + return _col->buffer().data() + vid; + } + default: + grin_error_code = UNKNOWN_DATATYPE; + return NULL; + } +} +#endif + +#ifdef GRIN_WITH_EDGE_PROPERTY +bool grin_equal_edge_property(GRIN_GRAPH g, GRIN_EDGE_PROPERTY ep1, + GRIN_EDGE_PROPERTY ep2) { + return ep1 == ep2; +} + +void grin_destroy_edge_property(GRIN_GRAPH g, GRIN_EDGE_PROPERTY ep) {} + +GRIN_DATATYPE grin_get_edge_property_datatype(GRIN_GRAPH g, + GRIN_EDGE_PROPERTY ep) { + auto _g = static_cast(g); + auto src_label_i = (ep >> 16) & 0xff; + const auto& src_label = _g->g.schema().get_vertex_label_name(src_label_i); + auto dst_label_i = (ep >> 8) & 0xff; + const auto& dst_label = _g->g.schema().get_vertex_label_name(dst_label_i); + auto edge_label_i = ep & 0xff; + const auto& edge_label = _g->g.schema().get_edge_label_name(edge_label_i); + const auto& type = + _g->g.schema().get_edge_properties(src_label, dst_label, edge_label); + auto idx = ep >> 24; + return _get_data_type(type[idx]); +} + +int grin_get_edge_property_value_of_int32(GRIN_GRAPH g, GRIN_EDGE e, + GRIN_EDGE_PROPERTY ep) { + auto _e = static_cast(e); + auto idx = ep >> 24; + if (idx > 0 || _get_data_type(_e->data.type) != GRIN_DATATYPE::Int32) { + grin_error_code = INVALID_VALUE; + return 0; + } + return _e->data.value.i; +} + +unsigned int grin_get_edge_property_value_of_uint32(GRIN_GRAPH g, GRIN_EDGE e, + GRIN_EDGE_PROPERTY ep) { + grin_error_code = INVALID_VALUE; + return 0; +} + +long long int grin_get_edge_property_value_of_int64(GRIN_GRAPH g, GRIN_EDGE e, + GRIN_EDGE_PROPERTY ep) { + auto _e = static_cast(e); + auto idx = ep >> 24; + if (idx > 0 || _get_data_type(_e->data.type) != GRIN_DATATYPE::Int64) { + grin_error_code = INVALID_VALUE; + return 0; + } + return _e->data.value.l; +} + +unsigned long long int grin_get_edge_property_value_of_uint64( + GRIN_GRAPH g, GRIN_EDGE e, GRIN_EDGE_PROPERTY ep) { + grin_error_code = INVALID_VALUE; + return 0; +} + +float grin_get_edge_property_value_of_float(GRIN_GRAPH g, GRIN_EDGE e, + GRIN_EDGE_PROPERTY ep) { + grin_error_code = INVALID_VALUE; + return 0.0; +} +double grin_get_edge_property_value_of_double(GRIN_GRAPH g, GRIN_EDGE e, + GRIN_EDGE_PROPERTY ep) { + auto _e = static_cast(e); + auto idx = ep >> 24; + if (idx > 0 || _get_data_type(_e->data.type) != GRIN_DATATYPE::Double) { + grin_error_code = INVALID_VALUE; + return 0.0; + } + return _e->data.value.db; +} + +const char* grin_get_edge_property_value_of_string(GRIN_GRAPH g, GRIN_EDGE e, + GRIN_EDGE_PROPERTY ep) { + auto _e = static_cast(e); + auto idx = ep >> 24; + if (idx > 0 || _get_data_type(_e->data.type) != GRIN_DATATYPE::String) { + grin_error_code = INVALID_VALUE; + return NULL; + } + auto s = _e->data.value.s; + auto len = s.size() + 1; + char* out = new char[len]; + snprintf(out, len, "%s", s.data()); + return out; +} + +int grin_get_edge_property_value_of_date32(GRIN_GRAPH g, GRIN_EDGE e, + GRIN_EDGE_PROPERTY ep) { + grin_error_code = INVALID_VALUE; + return 0; +} +int grin_get_edge_property_value_of_time32(GRIN_GRAPH g, GRIN_EDGE e, + GRIN_EDGE_PROPERTY ep) { + grin_error_code = INVALID_VALUE; + return 0; +} + +long long int grin_get_edge_property_value_of_timestamp64( + GRIN_GRAPH g, GRIN_EDGE e, GRIN_EDGE_PROPERTY ep) { + auto _e = static_cast(e); + auto idx = ep >> 24; + if (idx > 0 || _get_data_type(_e->data.type) != GRIN_DATATYPE::Timestamp64) { + grin_error_code = INVALID_VALUE; + return 0; + } + return _e->data.value.d.milli_second; +} + +GRIN_EDGE_TYPE grin_get_edge_type_from_property(GRIN_GRAPH g, + GRIN_EDGE_PROPERTY ep) { + return ep & (~0xff000000); +} +#endif + +#if defined(GRIN_WITH_EDGE_PROPERTY) && defined(GRIN_TRAIT_CONST_VALUE_PTR) +const void* grin_get_edge_property_value(GRIN_GRAPH g, GRIN_EDGE e, + GRIN_EDGE_PROPERTY ep) { + return NULL; +} +#endif \ No newline at end of file diff --git a/flex/engines/graph_db/grin/src/property/propertylist.cc b/flex/engines/graph_db/grin/src/property/propertylist.cc new file mode 100644 index 000000000000..a52639f105c9 --- /dev/null +++ b/flex/engines/graph_db/grin/src/property/propertylist.cc @@ -0,0 +1,147 @@ +/** Copyright 2020 Alibaba Group Holding Limited. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "grin/src/predefine.h" + +#include "grin/include/property/propertylist.h" + +#ifdef GRIN_WITH_VERTEX_PROPERTY +GRIN_VERTEX_PROPERTY_LIST grin_get_vertex_property_list_by_type( + GRIN_GRAPH g, GRIN_VERTEX_TYPE vt) { + auto _g = static_cast(g); + auto& table = _g->g.get_vertex_table(vt); + + auto vertex_prop_num = table.col_num(); + GRIN_VERTEX_PROPERTY_LIST_T* vpl = new GRIN_VERTEX_PROPERTY_LIST_T(); + const auto& prop_types = table.column_types(); + for (size_t i = 0; i < vertex_prop_num; ++i) { + GRIN_VERTEX_PROPERTY vp; + vp = i; + vp += (vt * 1u) << 8; + vp += (_get_data_type(prop_types[i]) * 1u) << 16; + vpl->emplace_back(vp); + } + return vpl; +} + +size_t grin_get_vertex_property_list_size(GRIN_GRAPH g, + GRIN_VERTEX_PROPERTY_LIST vpl) { + auto _vpl = static_cast(vpl); + return _vpl->size(); +} + +GRIN_VERTEX_PROPERTY grin_get_vertex_property_from_list( + GRIN_GRAPH g, GRIN_VERTEX_PROPERTY_LIST vpl, size_t idx) { + auto _vpl = static_cast(vpl); + return (*_vpl)[idx]; +} + +GRIN_VERTEX_PROPERTY_LIST grin_create_vertex_property_list(GRIN_GRAPH g) { + return new GRIN_VERTEX_PROPERTY_LIST_T(); +} + +void grin_destroy_vertex_property_list(GRIN_GRAPH g, + GRIN_VERTEX_PROPERTY_LIST vpl) { + auto _vpl = static_cast(vpl); + delete _vpl; +} + +bool grin_insert_vertex_property_to_list(GRIN_GRAPH g, + GRIN_VERTEX_PROPERTY_LIST vpl, + GRIN_VERTEX_PROPERTY vp) { + auto _vpl = static_cast(vpl); + _vpl->push_back(vp); + return true; +} +#endif + +#ifdef GRIN_TRAIT_NATURAL_ID_FOR_VERTEX_PROPERTY +GRIN_VERTEX_PROPERTY grin_get_vertex_property_by_id( + GRIN_GRAPH g, GRIN_VERTEX_TYPE vt, GRIN_VERTEX_PROPERTY_ID pid) { + auto _g = static_cast(g); + + auto& table = _g->g.get_vertex_table(vt); + auto vertex_prop_num = table.col_num(); + + if (pid >= vertex_prop_num) { + return GRIN_NULL_VERTEX_PROPERTY; + } + const auto& prop_types = table.column_types(); + GRIN_VERTEX_PROPERTY vp; + vp = pid; + vp += (vt * 1u) << 8; + vp += (_get_data_type(prop_types[pid]) * 1u) << 16; + return vp; +} + +GRIN_VERTEX_PROPERTY_ID grin_get_vertex_property_id(GRIN_GRAPH g, + GRIN_VERTEX_TYPE vt, + GRIN_VERTEX_PROPERTY vp) { + return vp & (0xff); +} +#endif + +#ifdef GRIN_WITH_EDGE_PROPERTY +GRIN_EDGE_PROPERTY_LIST grin_get_edge_property_list_by_type(GRIN_GRAPH g, + GRIN_EDGE_TYPE et) { + GRIN_EDGE_PROPERTY_LIST_T* p = new GRIN_EDGE_PROPERTY_LIST_T(); + + auto _g = static_cast(g); + auto src_label_i = et >> 16; + auto src_label = _g->g.schema().get_vertex_label_name(src_label_i); + auto dst_label_i = (et >> 8) & (0xff); + auto dst_label = _g->g.schema().get_vertex_label_name(dst_label_i); + auto edge_label_i = et & 0xff; + auto edge_label = _g->g.schema().get_edge_label_name(edge_label_i); + auto sz = _g->g.schema() + .get_edge_properties(src_label, dst_label, edge_label) + .size(); + for (size_t i = 0; i < sz; ++i) { + p->emplace_back(et + (i << 24)); + } + return p; +} + +size_t grin_get_edge_property_list_size(GRIN_GRAPH g, + GRIN_EDGE_PROPERTY_LIST epl) { + auto _epl = static_cast(epl); + return _epl->size(); +} + +GRIN_EDGE_PROPERTY grin_get_edge_property_from_list(GRIN_GRAPH g, + GRIN_EDGE_PROPERTY_LIST epl, + size_t idx) { + auto _epl = static_cast(epl); + if (_epl->size() <= idx) { + return GRIN_NULL_EDGE_PROPERTY; + } + return (*_epl)[idx]; +} + +GRIN_EDGE_PROPERTY_LIST grin_create_edge_property_list(GRIN_GRAPH g) { + return new GRIN_EDGE_PROPERTY_LIST_T(); +} + +void grin_destroy_edge_property_list(GRIN_GRAPH g, + GRIN_EDGE_PROPERTY_LIST epl) { + auto _epl = static_cast(epl); + delete _epl; +} + +bool grin_insert_edge_property_to_list(GRIN_GRAPH g, + GRIN_EDGE_PROPERTY_LIST epl, + GRIN_EDGE_PROPERTY ep) { + auto _epl = static_cast(epl); + _epl->emplace_back(ep); + return true; +} +#endif diff --git a/flex/engines/graph_db/grin/src/property/row.cc b/flex/engines/graph_db/grin/src/property/row.cc new file mode 100644 index 000000000000..c81af3f08099 --- /dev/null +++ b/flex/engines/graph_db/grin/src/property/row.cc @@ -0,0 +1,268 @@ +/** Copyright 2020 Alibaba Group Holding Limited. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "grin/src/predefine.h" + +#include "grin/include/property/row.h" + +#ifdef GRIN_ENABLE_ROW +void grin_destroy_row(GRIN_GRAPH g, GRIN_ROW r) { + auto _r = static_cast(r); + delete _r; +} + +int grin_get_int32_from_row(GRIN_GRAPH g, GRIN_ROW r, size_t idx) { + auto _r = static_cast(r); + return *static_cast((*_r)[idx]); +} + +unsigned int grin_get_uint32_from_row(GRIN_GRAPH g, GRIN_ROW r, size_t idx) { + auto _r = static_cast(r); + return *static_cast((*_r)[idx]); +} + +long long int grin_get_int64_from_row(GRIN_GRAPH g, GRIN_ROW r, size_t idx) { + auto _r = static_cast(r); + return *static_cast((*_r)[idx]); +} + +unsigned long long int grin_get_uint64_from_row(GRIN_GRAPH g, GRIN_ROW r, + size_t idx) { + auto _r = static_cast(r); + return *static_cast((*_r)[idx]); +} + +float grin_get_float_from_row(GRIN_GRAPH g, GRIN_ROW r, size_t idx) { + auto _r = static_cast(r); + return *static_cast((*_r)[idx]); +} + +double grin_get_double_from_row(GRIN_GRAPH g, GRIN_ROW r, size_t idx) { + auto _r = static_cast(r); + return *static_cast((*_r)[idx]); +} + +const char* grin_get_string_from_row(GRIN_GRAPH g, GRIN_ROW r, size_t idx) { + auto _r = static_cast(r); + auto s = static_cast((*_r)[idx]); + return s; +} + +int grin_get_date32_from_row(GRIN_GRAPH g, GRIN_ROW r, size_t idx) { + auto _r = static_cast(r); + return *static_cast((*_r)[idx]); +} + +int grin_get_time32_from_row(GRIN_GRAPH g, GRIN_ROW r, size_t idx) { + auto _r = static_cast(r); + return *static_cast((*_r)[idx]); +} + +long long int grin_get_timestamp64_from_row(GRIN_GRAPH g, GRIN_ROW r, + size_t idx) { + auto _r = static_cast(r); + return (*static_cast((*_r)[idx])); +} + +GRIN_ROW grin_create_row(GRIN_GRAPH g) { + auto r = new GRIN_ROW_T(); + return r; +} + +bool grin_insert_int32_to_row(GRIN_GRAPH g, GRIN_ROW r, int value) { + auto _r = static_cast(r); + _r->push_back(new int32_t(value)); + return true; +} + +bool grin_insert_uint32_to_row(GRIN_GRAPH g, GRIN_ROW r, unsigned int value) { + auto _r = static_cast(r); + _r->push_back(new uint32_t(value)); + return true; +} + +bool grin_insert_int64_to_row(GRIN_GRAPH g, GRIN_ROW r, long long int value) { + auto _r = static_cast(r); + _r->push_back(new int64_t(value)); + return true; +} + +bool grin_insert_uint64_to_row(GRIN_GRAPH g, GRIN_ROW r, + unsigned long long int value) { + auto _r = static_cast(r); + _r->push_back(new uint64_t(value)); + return true; +} + +bool grin_insert_float_to_row(GRIN_GRAPH g, GRIN_ROW r, float value) { + auto _r = static_cast(r); + _r->push_back(new float(value)); + return true; +} + +bool grin_insert_double_to_row(GRIN_GRAPH g, GRIN_ROW r, double value) { + auto _r = static_cast(r); + _r->push_back(new double(value)); + return true; +} + +bool grin_insert_string_to_row(GRIN_GRAPH g, GRIN_ROW r, const char* value) { + auto _r = static_cast(r); + _r->push_back(value); + return true; +} + +bool grin_insert_date32_to_row(GRIN_GRAPH g, GRIN_ROW r, int value) { + auto _r = static_cast(r); + _r->push_back(new int32_t(value)); + return true; +} + +bool grin_insert_time32_to_row(GRIN_GRAPH g, GRIN_ROW r, int value) { + auto _r = static_cast(r); + _r->push_back(new int32_t(value)); + return true; +} + +bool grin_insert_timestamp64_to_row(GRIN_GRAPH g, GRIN_ROW r, + long long int value) { + auto _r = static_cast(r); + _r->push_back(new int64_t(value)); + return true; +} +#endif + +#if defined(GRIN_ENABLE_ROW) && defined(GRIN_TRAIT_CONST_VALUE_PTR) +/** @brief the value of a property from row by its position in row */ +const void* grin_get_value_from_row(GRIN_GRAPH g, GRIN_ROW r, GRIN_DATATYPE dt, + size_t idx) { + auto _r = static_cast(r); + switch (dt) { + case GRIN_DATATYPE::Int32: + return static_cast((*_r)[idx]); + case GRIN_DATATYPE::UInt32: + return static_cast((*_r)[idx]); + case GRIN_DATATYPE::Int64: + return static_cast((*_r)[idx]); + case GRIN_DATATYPE::UInt64: + return static_cast((*_r)[idx]); + case GRIN_DATATYPE::Float: + return static_cast((*_r)[idx]); + case GRIN_DATATYPE::Double: + return static_cast((*_r)[idx]); + case GRIN_DATATYPE::String: + return static_cast((*_r)[idx]); + case GRIN_DATATYPE::Date32: + return static_cast((*_r)[idx]); + case GRIN_DATATYPE::Time32: + return static_cast((*_r)[idx]); + case GRIN_DATATYPE::Timestamp64: + return static_cast((*_r)[idx]); + default: + return NULL; + } + return NULL; +} +#endif +///@} + +#if defined(GRIN_WITH_VERTEX_PROPERTY) && defined(GRIN_ENABLE_ROW) +GRIN_ROW grin_get_vertex_row(GRIN_GRAPH g, GRIN_VERTEX v) { + auto _g = static_cast(g); + auto vid = v & (0xffffffff); + auto label = v >> 32; + + if (label >= _g->g.vertex_label_num_) { + return NULL; + } + auto& table = _g->g.get_vertex_table(label); + auto prop_size = table.col_num(); + const auto& types = table.column_types(); + auto r = new GRIN_ROW_T(); + for (size_t prop_id = 0; prop_id < prop_size; prop_id++) { + auto col = _g->vproperties[label][prop_id]; + auto type = _get_data_type(types[prop_id]); + switch (type) { + case GRIN_DATATYPE::Int32: { + auto _col = static_cast(col); + r->emplace_back(_col->buffer().data() + vid); + break; + } + case GRIN_DATATYPE::Int64: { + auto _col = static_cast(col); + r->emplace_back(_col->buffer().data() + vid); + break; + } + case GRIN_DATATYPE::String: { + auto _col = static_cast(col); + auto s = _col->get_view(vid); + auto len = s.size() + 1; + char* out = new char[len]; + snprintf(out, len, "%s", s.data()); + r->emplace_back(out); + break; + } + case GRIN_DATATYPE::Timestamp64: { + auto _col = static_cast(col); + r->emplace_back(_col->buffer().data() + vid); + break; + } + case GRIN_DATATYPE::Double: { + auto _col = static_cast(col); + r->emplace_back(_col->buffer().data() + vid); + break; + } + default: + r->emplace_back(static_cast(NULL)); + } + } + return r; +} +#endif + +#if defined(GRIN_WITH_EDGE_PROPERTY) && defined(GRIN_ENABLE_ROW) +GRIN_ROW grin_get_edge_row(GRIN_GRAPH g, GRIN_EDGE e) { + auto _e = static_cast(e); + auto type = _get_data_type(_e->data.type); + GRIN_ROW_T* r = new GRIN_ROW_T(); + switch (type) { + case GRIN_DATATYPE::Int32: { + r->emplace_back(new int(_e->data.value.i)); + break; + } + case GRIN_DATATYPE::Int64: { + r->emplace_back(new int64_t(_e->data.value.l)); + break; + } + case GRIN_DATATYPE::String: { + auto s = _e->data.value.s; + auto len = s.size() + 1; + char* out = new char[len]; + snprintf(out, len, "%s", s.data()); + + r->emplace_back(out); + break; + } + case GRIN_DATATYPE::Timestamp64: { + r->emplace_back(new int64_t(_e->data.value.d.milli_second)); + break; + } + case GRIN_DATATYPE::Double: { + r->emplace_back(new double(_e->data.value.db)); + break; + } + default: + r->emplace_back(static_cast(NULL)); + } + return r; +} +#endif \ No newline at end of file diff --git a/flex/engines/graph_db/grin/src/property/topology.cc b/flex/engines/graph_db/grin/src/property/topology.cc new file mode 100644 index 000000000000..5a58e07ecb03 --- /dev/null +++ b/flex/engines/graph_db/grin/src/property/topology.cc @@ -0,0 +1,75 @@ +/** Copyright 2020 Alibaba Group Holding Limited. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "grin/src/predefine.h" + +#include "grin/include/property/topology.h" + +#ifdef GRIN_WITH_VERTEX_PROPERTY +size_t grin_get_vertex_num_by_type(GRIN_GRAPH g, GRIN_VERTEX_TYPE vt) { + auto _g = static_cast(g); + return _g->g.vertex_num(vt); +} +#endif + +#ifdef GRIN_WITH_EDGE_PROPERTY +size_t grin_get_edge_num_by_type(GRIN_GRAPH g, GRIN_EDGE_TYPE et) { + auto src_label = et >> 16; + auto dst_label = (et >> 8) & (0xff); + auto edge_label = et & (0xff); + auto _g = static_cast(g); + + auto oe = _g->g.get_oe_csr(src_label, dst_label, edge_label); + auto vertex_num = _g->g.vertex_num(src_label); + size_t edge_num = 0; + for (size_t i = 0; i < vertex_num; ++i) { + edge_num += oe->edge_iter(i)->size(); + } + if (edge_num != 0) { + return edge_num; + } + auto ie = _g->g.get_ie_csr(dst_label, src_label, edge_label); + vertex_num = _g->g.vertex_num(dst_label); + for (size_t i = 0; i < vertex_num; ++i) { + edge_num += ie->edge_iter(i)->size(); + } + return edge_num; +} +#endif + +#if defined(GRIN_ENABLE_VERTEX_LIST) && defined(GRIN_WITH_VERTEX_PROPERTY) +GRIN_VERTEX_LIST grin_get_vertex_list_by_type(GRIN_GRAPH g, + GRIN_VERTEX_TYPE vt) { + GRIN_VERTEX_LIST vl; + vl.label = vt; + auto _g = static_cast(g); + vl.vertex_num = _g->g.vertex_num(vt); + return vl; +} +#endif + +#if defined(GRIN_ENABLE_EDGE_LIST) && defined(GRIN_WITH_EDGE_PROPERTY) +GRIN_EDGE_LIST grin_get_edge_list_by_type(GRIN_GRAPH, GRIN_EDGE_TYPE); +#endif + +#if defined(GRIN_ENABLE_ADJACENT_LIST) && defined(GRIN_WITH_EDGE_PROPERTY) +GRIN_ADJACENT_LIST grin_get_adjacent_list_by_edge_type(GRIN_GRAPH g, + GRIN_DIRECTION dir, + GRIN_VERTEX v, + GRIN_EDGE_TYPE et) { + GRIN_ADJACENT_LIST adj_list; + adj_list.v = v; + adj_list.dir = dir; + adj_list.edge_label = et; + return adj_list; +} +#endif \ No newline at end of file diff --git a/flex/engines/graph_db/grin/src/property/type.cc b/flex/engines/graph_db/grin/src/property/type.cc new file mode 100644 index 000000000000..ea40c524b980 --- /dev/null +++ b/flex/engines/graph_db/grin/src/property/type.cc @@ -0,0 +1,275 @@ +/** Copyright 2020 Alibaba Group Holding Limited. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "grin/src/predefine.h" + +#include "grin/include/property/type.h" + +#ifdef GRIN_WITH_VERTEX_PROPERTY +// Vertex type +bool grin_equal_vertex_type(GRIN_GRAPH g, GRIN_VERTEX_TYPE vt1, + GRIN_VERTEX_TYPE vt2) { + return (vt1 == vt2); +} + +GRIN_VERTEX_TYPE grin_get_vertex_type(GRIN_GRAPH g, GRIN_VERTEX v) { + return v >> 32; +} + +void grin_destroy_vertex_type(GRIN_GRAPH g, GRIN_VERTEX_TYPE vt) {} + +// Vertex type list +GRIN_VERTEX_TYPE_LIST grin_get_vertex_type_list(GRIN_GRAPH g) { + auto _g = static_cast(g); + auto vtl = new GRIN_VERTEX_TYPE_LIST_T(); + auto vertex_label_num = _g->g.schema().vertex_label_num(); + for (size_t idx = 0; idx < vertex_label_num; ++idx) { + vtl->push_back(idx); + } + return vtl; +} + +void grin_destroy_vertex_type_list(GRIN_GRAPH g, GRIN_VERTEX_TYPE_LIST vtl) { + auto _vtl = static_cast(vtl); + delete _vtl; +} + +GRIN_VERTEX_TYPE_LIST grin_create_vertex_type_list(GRIN_GRAPH g) { + auto vtl = new GRIN_VERTEX_TYPE_LIST_T(); + return vtl; +} + +bool grin_insert_vertex_type_to_list(GRIN_GRAPH g, GRIN_VERTEX_TYPE_LIST vtl, + GRIN_VERTEX_TYPE vt) { + auto _vtl = static_cast(vtl); + _vtl->push_back(vt); + return true; +} + +size_t grin_get_vertex_type_list_size(GRIN_GRAPH g, GRIN_VERTEX_TYPE_LIST vtl) { + auto _vtl = static_cast(vtl); + return _vtl->size(); +} + +GRIN_VERTEX_TYPE grin_get_vertex_type_from_list(GRIN_GRAPH g, + GRIN_VERTEX_TYPE_LIST vtl, + size_t idx) { + auto _vtl = static_cast(vtl); + return (*_vtl)[idx]; +} +#endif + +#ifdef GRIN_WITH_VERTEX_TYPE_NAME +const char* grin_get_vertex_type_name(GRIN_GRAPH g, GRIN_VERTEX_TYPE vt) { + auto _g = static_cast(g); + std::string type_name = _g->g.schema().get_vertex_label_name(vt); + auto len = type_name.length() + 1; + char* out = new char[len]; + snprintf(out, len, "%s", type_name.c_str()); + return out; +} + +GRIN_VERTEX_TYPE grin_get_vertex_type_by_name(GRIN_GRAPH g, const char* name) { + auto _g = static_cast(g); + std::string type_name(name); + if ((!_g->g.schema().contains_vertex_label(type_name))) { + return GRIN_NULL_VERTEX_TYPE; + } + auto type = _g->g.schema().get_vertex_label_id(type_name); + return type; +} +#endif + +#ifdef GRIN_TRAIT_NATURAL_ID_FOR_VERTEX_TYPE +GRIN_VERTEX_TYPE_ID grin_get_vertex_type_id(GRIN_GRAPH g, GRIN_VERTEX_TYPE vt) { + return vt; +} + +GRIN_VERTEX_TYPE grin_get_vertex_type_by_id(GRIN_GRAPH g, + GRIN_VERTEX_TYPE_ID tid) { + return tid; +} +#endif + +#ifdef GRIN_WITH_EDGE_PROPERTY +// Edge type +bool grin_equal_edge_type(GRIN_GRAPH g, GRIN_EDGE_TYPE et1, + GRIN_EDGE_TYPE et2) { + return (et1 == et2); +} + +GRIN_EDGE_TYPE grin_get_edge_type(GRIN_GRAPH g, GRIN_EDGE e) { + auto _e = static_cast(e); + auto src_label = _e->src >> 32; + auto dst_label = _e->dst >> 32; + return _e->label + (src_label << 16) + (dst_label << 8); +} + +void grin_destroy_edge_type(GRIN_GRAPH g, GRIN_EDGE_TYPE et) { + // do nothing +} + +// Edge type list +GRIN_EDGE_TYPE_LIST grin_get_edge_type_list(GRIN_GRAPH g) { + auto _g = static_cast(g); + auto etl = new GRIN_EDGE_TYPE_LIST_T(); + auto edge_label_num = _g->g.edge_label_num_; + auto vertex_label_num = _g->g.vertex_label_num_; + for (size_t src_label_i = 0; src_label_i < vertex_label_num; ++src_label_i) { + const auto& src_label = _g->g.schema().get_vertex_label_name(src_label_i); + for (size_t dst_label_i = 0; dst_label_i < vertex_label_num; + ++dst_label_i) { + const auto& dst_label = _g->g.schema().get_vertex_label_name(dst_label_i); + for (size_t edge_label_i = 0; edge_label_i < edge_label_num; + ++edge_label_i) { + const auto& edge_label = + _g->g.schema().get_edge_label_name(edge_label_i); + if (_g->g.schema().exist(src_label, dst_label, edge_label)) { + auto label = (src_label_i << 16) + (dst_label_i << 8) + edge_label_i; + etl->push_back(label); + } + } + } + } + return etl; +} + +void grin_destroy_edge_type_list(GRIN_GRAPH g, GRIN_EDGE_TYPE_LIST etl) { + auto _etl = static_cast(etl); + delete _etl; +} + +GRIN_EDGE_TYPE_LIST grin_create_edge_type_list(GRIN_GRAPH g) { + auto etl = new GRIN_EDGE_TYPE_LIST_T(); + return etl; +} + +bool grin_insert_edge_type_to_list(GRIN_GRAPH g, GRIN_EDGE_TYPE_LIST etl, + GRIN_EDGE_TYPE et) { + auto _etl = static_cast(etl); + _etl->push_back(et); + return true; +} + +size_t grin_get_edge_type_list_size(GRIN_GRAPH g, GRIN_EDGE_TYPE_LIST etl) { + auto _etl = static_cast(etl); + return _etl->size(); +} + +GRIN_EDGE_TYPE grin_get_edge_type_from_list(GRIN_GRAPH g, + GRIN_EDGE_TYPE_LIST etl, + size_t idx) { + auto _etl = static_cast(etl); + return (*_etl)[idx]; +} +#endif + +#ifdef GRIN_WITH_EDGE_TYPE_NAME +const char* grin_get_edge_type_name(GRIN_GRAPH g, GRIN_EDGE_TYPE et) { + auto _g = static_cast(g); + const auto& schema = _g->g.schema(); + auto edge_label_i = et & 0xff; + auto src_label_i = et >> 16; + auto dst_label_i = (et >> 8) & 0xff; + const auto& edge_label = schema.get_edge_label_name(edge_label_i); + const auto& src_label = schema.get_vertex_label_name(src_label_i); + const auto& dst_label = schema.get_vertex_label_name(dst_label_i); + auto label = src_label + "#" + dst_label + "#" + edge_label; + auto len = label.length() + 1; + char* out = new char[len]; + snprintf(out, len, "%s", label.c_str()); + return out; +} + +GRIN_EDGE_TYPE grin_get_edge_type_by_name(GRIN_GRAPH g, const char* name) { + auto _g = static_cast(g); + + const auto& schema = _g->g.schema(); + std::vector vec; + size_t len = strlen(name); + std::string ss{}; + for (size_t i = 0; i <= len; ++i) { + if (name[i] == '#' || name[i] == '\0') { + vec.emplace_back(ss); + ss = ""; + } else { + ss += name[i]; + } + } + + if (vec.size() != 3) { + return GRIN_NULL_EDGE_TYPE; + } + if ((!schema.contains_vertex_label(vec[0])) || + (!schema.contains_vertex_label(vec[1])) || + (!schema.contains_edge_label(vec[2]))) { + return GRIN_NULL_EDGE_TYPE; + } + auto src_label = schema.get_vertex_label_id(vec[0]); + auto dst_label = schema.get_vertex_label_id(vec[1]); + auto edge_label = schema.get_edge_label_id(vec[2]); + return (src_label << 16) + (dst_label << 8) + edge_label; +} +#endif + +#ifdef GRIN_TRAIT_NATURAL_ID_FOR_EDGE_TYPE +GRIN_EDGE_TYPE_ID grin_get_edge_type_id(GRIN_GRAPH g, GRIN_EDGE_TYPE et) { + return et; +} + +GRIN_EDGE_TYPE grin_get_edge_type_by_id(GRIN_GRAPH g, GRIN_EDGE_TYPE_ID etid) { + return etid; +} +#endif + +#if defined(GRIN_WITH_VERTEX_PROPERTY) && defined(GRIN_WITH_EDGE_PROPERTY) +/** @brief the src vertex type list */ +GRIN_VERTEX_TYPE_LIST grin_get_src_types_by_edge_type(GRIN_GRAPH g, + GRIN_EDGE_TYPE et) { + auto vtl = new GRIN_VERTEX_TYPE_LIST_T(); + vtl->emplace_back(et >> 16); + return vtl; +} + +/** @brief get the dst vertex type list */ +GRIN_VERTEX_TYPE_LIST grin_get_dst_types_by_edge_type(GRIN_GRAPH g, + GRIN_EDGE_TYPE et) { + auto vtl = new GRIN_VERTEX_TYPE_LIST_T(); + vtl->emplace_back((et >> 8) & (0xff)); + return vtl; +} + +/** @brief get the edge type list related to a given pair of vertex types */ +GRIN_EDGE_TYPE_LIST grin_get_edge_types_by_vertex_type_pair( + GRIN_GRAPH g, GRIN_VERTEX_TYPE vt1, GRIN_VERTEX_TYPE vt2) { + auto _g = static_cast(g); + + auto vtl = new GRIN_VERTEX_TYPE_LIST_T(); + const auto& schema = _g->g.schema(); + auto edge_label_num = _g->g.edge_label_num_; + std::string src_label = + schema.get_vertex_label_name(static_cast(vt1)); + std::string dst_label = + schema.get_vertex_label_name(static_cast(vt2)); + auto label = (vt1 << 16) + (vt2 << 8); + for (size_t edge_label_i = 0; edge_label_i != edge_label_num; + ++edge_label_i) { + std::string edge_label = + schema.get_vertex_label_name(static_cast(edge_label_i)); + if (schema.exist(src_label, dst_label, edge_label)) { + vtl->push_back(label + edge_label_i); + } + } + return vtl; +} +#endif +///@} diff --git a/flex/engines/graph_db/grin/src/topology/adjacentlist.cc b/flex/engines/graph_db/grin/src/topology/adjacentlist.cc new file mode 100644 index 000000000000..112da2e552d3 --- /dev/null +++ b/flex/engines/graph_db/grin/src/topology/adjacentlist.cc @@ -0,0 +1,125 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "grin/src/predefine.h" + +#include "grin/include/topology/adjacentlist.h" + +#if defined(GRIN_ENABLE_ADJACENT_LIST) && !defined(GRIN_ENABLE_EDGE_PROPERTY) +GRIN_ADJACENT_LIST grin_get_adjacent_list(GRIN_GRAPH g, GRIN_DIRECTION dir, + GRIN_VERTEX v) { + GRIN_ADJACENT_LIST alt; + alt.dir = dir; + alt.v = v; + return alt; +} +#endif + +#ifdef GRIN_ENABLE_ADJACENT_LIST +void grin_destroy_adjacent_list(GRIN_GRAPH g, GRIN_ADJACENT_LIST adj_list) {} +#endif + +#ifdef GRIN_ENABLE_ADJACENT_LIST_ITERATOR +GRIN_ADJACENT_LIST_ITERATOR grin_get_adjacent_list_begin( + GRIN_GRAPH g, GRIN_ADJACENT_LIST adj_list) { + auto _g = static_cast(g); + GRIN_ADJACENT_LIST_ITERATOR iter; + auto& v = adj_list.v; + iter.adj_list = adj_list; + auto label = adj_list.edge_label; + auto src_label = label >> 16; + auto dst_label = (label >> 8) & 0xff; + auto edge_label = label & 0xff; + auto v_label = v >> 32; + auto vid = v & (0xffffffff); + if (adj_list.dir == GRIN_DIRECTION::OUT) { + if (src_label == v_label) { + iter.edge_iter = + _g->g.get_outgoing_edges_raw(src_label, vid, dst_label, edge_label); + + } else { + iter.edge_iter = nullptr; + } + } else { + if (dst_label == v_label) { + iter.edge_iter = + _g->g.get_incoming_edges_raw(dst_label, vid, src_label, edge_label); + } else { + iter.edge_iter = nullptr; + } + } + return iter; +} + +void grin_destroy_adjacent_list_iter(GRIN_GRAPH g, + GRIN_ADJACENT_LIST_ITERATOR iter) { + if (iter.edge_iter != nullptr) { + auto edge_iter = + static_cast(iter.edge_iter); + delete edge_iter; + } +} + +void grin_get_next_adjacent_list_iter(GRIN_GRAPH g, + GRIN_ADJACENT_LIST_ITERATOR iter) { + auto edge_iter = + static_cast(iter.edge_iter); + edge_iter->next(); +} + +bool grin_is_adjacent_list_end(GRIN_GRAPH g, GRIN_ADJACENT_LIST_ITERATOR iter) { + if (iter.edge_iter == nullptr) { + return true; + } + auto edge_iter = + static_cast(iter.edge_iter); + return !edge_iter->is_valid(); +} + +GRIN_VERTEX grin_get_neighbor_from_adjacent_list_iter( + GRIN_GRAPH g, GRIN_ADJACENT_LIST_ITERATOR iter) { + auto edge_iter = + static_cast(iter.edge_iter); + auto vid = edge_iter->get_neighbor(); + auto label = iter.adj_list.edge_label; + + if (iter.adj_list.dir == GRIN_DIRECTION::OUT) { + label = (label >> 8) & 0xff; + } else { + label = label >> 16; + } + return ((label * 1ull) << 32) + vid; +} + +GRIN_EDGE grin_get_edge_from_adjacent_list_iter( + GRIN_GRAPH g, GRIN_ADJACENT_LIST_ITERATOR iter) { + auto edge_iter = + static_cast(iter.edge_iter); + GRIN_EDGE_T* edge = new GRIN_EDGE_T(); + auto nbr = grin_get_neighbor_from_adjacent_list_iter(g, iter); + if (iter.adj_list.dir == GRIN_DIRECTION::IN) { + edge->src = nbr; + edge->dst = iter.adj_list.v; + } else { + edge->src = iter.adj_list.v; + edge->dst = nbr; + } + edge->dir = iter.adj_list.dir; + edge->data = edge_iter->get_data(); + auto label = iter.adj_list.edge_label; + edge->label = label & 0xff; + return edge; +} +#endif \ No newline at end of file diff --git a/flex/engines/graph_db/grin/src/topology/structure.cc b/flex/engines/graph_db/grin/src/topology/structure.cc new file mode 100644 index 000000000000..469cfa672c33 --- /dev/null +++ b/flex/engines/graph_db/grin/src/topology/structure.cc @@ -0,0 +1,146 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "grin/src/predefine.h" + +#include "grin/include/topology/structure.h" + +/** + * @brief Get a (non-partitioned) graph from storage + * @param uri The URI of the graph. + * Current URI format: + * flex://{path_to_yaml} + * @return A graph handle. + */ +GRIN_GRAPH grin_get_graph_from_storage(const char* uri) { + std::string _uri(uri); + std::string::size_type pos = _uri.find("://"); + if (pos == std::string::npos) { + return GRIN_NULL_GRAPH; + } + auto protocol = _uri.substr(0, pos); + if (protocol != "flex") { + return GRIN_NULL_GRAPH; + } + _uri = _uri.substr(pos + 3); + std::string graph_schema_path = _uri + "/modern_graph.yaml"; + std::string data_path = uri; + std::string bulk_load_config_path = _uri + "/bulk_load.yaml"; + if (!std::filesystem::exists(graph_schema_path) || + !(std::filesystem::exists(bulk_load_config_path))) { + return GRIN_NULL_GRAPH; + } + auto ret = gs::Schema::LoadFromYaml(graph_schema_path, bulk_load_config_path); + const auto& schema = std::get<0>(ret); + auto& vertex_files = std::get<1>(ret); + + auto& edge_files = std::get<2>(ret); + + GRIN_GRAPH_T* g = new GRIN_GRAPH_T(); + g->g.Init(schema, vertex_files, edge_files); + init_cache(g); + return g; +} + +void grin_destroy_graph(GRIN_GRAPH g) { + auto _g = static_cast(g); + delete _g; +} + +// Graph +#if defined(GRIN_ASSUME_HAS_DIRECTED_GRAPH) && \ + defined(GRIN_ASSUME_HAS_UNDIRECTED_GRAPH) +bool grin_is_directed(GRIN_GRAPH g) { return true; } +#endif + +#ifdef GRIN_ASSUME_HAS_MULTI_EDGE_GRAPH +bool grin_is_multigraph(GRIN_GRAPH) { return true; } +#endif + +#ifndef GRIN_WITH_VERTEX_PROPERTY +size_t grin_get_vertex_num(GRIN_GRAPH); +#endif + +#ifndef GRIN_WITH_EDGE_PROPERTY +size_t grin_get_edge_num(GRIN_GRAPH); +#endif + +// Vertex +void grin_destroy_vertex(GRIN_GRAPH, GRIN_VERTEX v) {} + +bool grin_equal_vertex(GRIN_GRAPH g, GRIN_VERTEX v1, GRIN_VERTEX v2) { + return v1 == v2; +} + +// Data +#ifdef GRIN_WITH_VERTEX_DATA +GRIN_DATATYPE grin_get_vertex_data_datatype(GRIN_GRAPH, GRIN_VERTEX); + +const void* grin_get_vertex_data_value(GRIN_GRAPH, GRIN_VERTEX); +#endif + +// Edge +void grin_destroy_edge(GRIN_GRAPH, GRIN_EDGE e) { + auto _e = static_cast(e); + delete _e; +} + +GRIN_VERTEX grin_get_src_vertex_from_edge(GRIN_GRAPH, GRIN_EDGE e) { + auto _e = static_cast(e); + auto v = _e->src; + return v; +} + +GRIN_VERTEX grin_get_dst_vertex_from_edge(GRIN_GRAPH, GRIN_EDGE e) { + auto _e = static_cast(e); + auto v = _e->dst; + return v; +} + +#ifdef GRIN_WITH_EDGE_DATA +GRIN_DATATYPE grin_get_edge_data_datatype(GRIN_GRAPH, GRIN_EDGE e) { + auto _e = static_cast(e); + auto type = _e->data.type; + return _get_data_type(type); +} + +const void* grin_get_edge_data_value(GRIN_GRAPH, GRIN_EDGE e) { + auto _e = static_cast(e); + auto type = _e->data.type; + switch (_get_data_type(type)) { + case GRIN_DATATYPE::Int32: { + return new int32_t(_e->data.value.i); + } + case GRIN_DATATYPE::Int64: { + return new int64_t(_e->data.value.l); + } + case GRIN_DATATYPE::Double: { + return new double(_e->data.value.db); + } + case GRIN_DATATYPE::String: { + auto s = _e->data.value.s; + auto len = s.size() + 1; + char* out = new char[len]; + snprintf(out, len, "%s", s.data()); + return out; + } + case GRIN_DATATYPE::Timestamp64: { + return new int64_t(_e->data.value.d.milli_second); + } + default: + return GRIN_NULL_EDGE_DATA; + } +} +#endif \ No newline at end of file diff --git a/flex/engines/graph_db/grin/src/topology/vertexlist.cc b/flex/engines/graph_db/grin/src/topology/vertexlist.cc new file mode 100644 index 000000000000..413525e24980 --- /dev/null +++ b/flex/engines/graph_db/grin/src/topology/vertexlist.cc @@ -0,0 +1,58 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "grin/src/predefine.h" + +#include "grin/include/topology/vertexlist.h" + +#if defined(GRIN_ENABLE_VERTEX_LIST) && !defined(GRIN_WITH_VERTEX_PROPERTY) +GRIN_VERTEX_LIST grin_get_vertex_list(GRIN_GRAPH g) {} +#endif + +#ifdef GRIN_ENABLE_VERTEX_LIST +void grin_destroy_vertex_list(GRIN_GRAPH g, GRIN_VERTEX_LIST vl) {} +#endif + +#ifdef GRIN_ENABLE_VERTEX_LIST_ITERATOR +GRIN_VERTEX_LIST_ITERATOR grin_get_vertex_list_begin(GRIN_GRAPH g, + GRIN_VERTEX_LIST vl) { + GRIN_VERTEX_LIST_ITERATOR vlt; + vlt.cur_vid = new uint32_t(0); + vlt.vertex_list = vl; + return vlt; +} + +void grin_destroy_vertex_list_iter(GRIN_GRAPH g, + GRIN_VERTEX_LIST_ITERATOR iter) { + delete iter.cur_vid; +} + +void grin_get_next_vertex_list_iter(GRIN_GRAPH g, + GRIN_VERTEX_LIST_ITERATOR iter) { + if (*iter.cur_vid < iter.vertex_list.vertex_num) + ++(*iter.cur_vid); +} + +bool grin_is_vertex_list_end(GRIN_GRAPH g, GRIN_VERTEX_LIST_ITERATOR iter) { + return *iter.cur_vid == iter.vertex_list.vertex_num; +} + +GRIN_VERTEX grin_get_vertex_from_iter(GRIN_GRAPH g, + GRIN_VERTEX_LIST_ITERATOR iter) { + auto label = iter.vertex_list.label; + auto vid = *iter.cur_vid; + return ((label * 1ull) << 32) + vid; +} +#endif \ No newline at end of file diff --git a/flex/engines/graph_db/grin/test/test.c b/flex/engines/graph_db/grin/test/test.c new file mode 100644 index 000000000000..4612d041885b --- /dev/null +++ b/flex/engines/graph_db/grin/test/test.c @@ -0,0 +1,1065 @@ +#include +#include +#include +#include +#include + +#include "grin/predefine.h" + +#include "grin/include/common/error.h" +#include "grin/include/index/internal_id.h" +#include "grin/include/index/label.h" +#include "grin/include/index/order.h" +#include "grin/include/index/pk.h" +#include "grin/include/partition/partition.h" +#include "grin/include/partition/reference.h" +#include "grin/include/partition/topology.h" +#include "grin/include/property/partition.h" +#include "grin/include/property/primarykey.h" +#include "grin/include/property/property.h" +#include "grin/include/property/propertylist.h" +#include "grin/include/property/row.h" +#include "grin/include/property/topology.h" +#include "grin/include/property/type.h" +#include "grin/include/topology/adjacentlist.h" +#include "grin/include/topology/edgelist.h" +#include "grin/include/topology/structure.h" +#include "grin/include/topology/vertexlist.h" + +#define eps (1e-8) +#define FOR_VERTEX_BEGIN(g, vl, v) \ + GRIN_VERTEX_LIST_ITERATOR __vli = grin_get_vertex_list_begin(g, vl); \ + unsigned __vcnt = 0; \ + while (!grin_is_vertex_list_end(g, __vli)) { \ + GRIN_VERTEX v = grin_get_vertex_from_iter(g, __vli); + +#ifdef GRIN_WITH_VERTEX_PROPERTY +#define FOR_VERTEX_END(g, vl, v) \ + grin_destroy_vertex(g, v); \ + __vcnt++; \ + grin_get_next_vertex_list_iter(g, __vli); \ + } \ + printf("vertex type %s, checked: %u\n", vt_names[__vtl_i], __vcnt); + +#define FOR_VERTEX_LIST_BEGIN(g, vl) \ + { \ + GRIN_VERTEX_TYPE_LIST __vtl = grin_get_vertex_type_list(g); \ + size_t __vtl_sz = grin_get_vertex_type_list_size(g, __vtl); \ + for (size_t __vtl_i = 0; __vtl_i < __vtl_sz; ++__vtl_i) { \ + GRIN_VERTEX_TYPE __vt = \ + grin_get_vertex_type_from_list(g, __vtl, __vtl_i); \ + GRIN_VERTEX_LIST vl = grin_get_vertex_list_by_type(g, __vt); \ + grin_destroy_vertex_type(g, __vt); + +#define FOR_VERTEX_LIST_SELECT_MASTER_BEGIN(g, vl) \ + { \ + GRIN_VERTEX_TYPE_LIST __vtl = grin_get_vertex_type_list(g); \ + size_t __vtl_sz = grin_get_vertex_type_list_size(g, __vtl); \ + for (size_t __vtl_i = 0; __vtl_i < __vtl_sz; ++__vtl_i) { \ + GRIN_VERTEX_TYPE __vt = \ + grin_get_vertex_type_from_list(g, __vtl, __vtl_i); \ + GRIN_VERTEX_LIST vl = \ + grin_get_vertex_list_by_type_select_master(g, __vt); \ + grin_destroy_vertex_type(g, __vt); + +#define FOR_VERTEX_LIST_SELECT_MIRROR_BEGIN(g, vl) \ + { \ + GRIN_VERTEX_TYPE_LIST __vtl = grin_get_vertex_type_list(g); \ + size_t __vtl_sz = grin_get_vertex_type_list_size(g, __vtl); \ + for (size_t __vtl_i = 0; __vtl_i < __vtl_sz; ++__vtl_i) { \ + GRIN_VERTEX_TYPE __vt = \ + grin_get_vertex_type_from_list(g, __vtl, __vtl_i); \ + GRIN_VERTEX_LIST vl = \ + grin_get_vertex_list_by_type_select_mirror(g, __vt); \ + grin_destroy_vertex_type(g, __vt); + +#define FOR_VERTEX_LIST_END(g, vl) \ + grin_destroy_vertex_list(g, vl); \ + } \ + grin_destroy_vertex_type_list(g, __vtl); \ + } +#else +#define FOR_VERTEX_END(g, vl) \ + grin_destroy_vertex(g, v); \ + __vcnt++; \ + grin_get_next_vertex_list_iter(g, __vli); \ + } \ + printf("vertex checked: %u\n", __vcnt); + +#define FOR_VERTEX_LIST_BEGIN(g, vl) \ + GRIN_VERTEX_LIST vl = grin_get_vertex_list(g); + +#define FOR_VERTEX_LIST_SELECT_MASTER_BEGIN(g, vl) \ + GRIN_VERTEX_LIST vl = grin_get_vertex_list_select_master(g); + +#define FOR_VERTEX_LIST_SELECT_MIRROR_BEGIN(g, vl) \ + GRIN_VERTEX_LIST vl = grin_get_vertex_list_select_mirror(g); + +#define FOR_VERTEX_LIST_END(g, vl) grin_destroy_vertex_list(g, vl); +#endif + +#ifdef GRIN_WITH_EDGE_PROPERTY +#define FOR_ADJ_LIST_BEGIN(g, dir, v, al) \ + { \ + GRIN_EDGE_TYPE_LIST __etl = grin_get_edge_type_list(g); \ + size_t __etl_size = grin_get_edge_type_list_size(g, __etl); \ + for (size_t __etl_i = 0; __etl_i < __etl_size; ++__etl_i) { \ + GRIN_EDGE_TYPE __et = grin_get_edge_type_from_list(g, __etl, __etl_i); \ + GRIN_ADJACENT_LIST al = \ + grin_get_adjacent_list_by_edge_type(g, dir, v, __et); \ + grin_destroy_edge_type(g, __et); +#define FOR_ADJ_LIST_END(g, al) \ + grin_destroy_adjacent_list(g, al); \ + } \ + grin_destroy_edge_type_list(g, __etl); \ + } +#else +#define FOR_ADJ_LIST_BEGIN(g, dir, v, al) \ + GRIN_ADJACENT_LIST al = grin_get_adjacent_list(g, dir, v); +#define FOR_ADJ_LIST_END(g, al) grin_destroy_adjacent_list(g, al); +#endif + +const char* vt_names[] = {"person", "software"}; +const char* et_names[] = {"knows", "created"}; + +const char* v_names[][4] = {{"josh", "vadas", "peter", "marko"}, + {"lop", "ripple", "wrong", + "wrong"}}; // TODO align with order in local graph + +GRIN_GRAPH get_graph(const char* uri_str, int p) { +#ifdef GRIN_ENABLE_GRAPH_PARTITION + GRIN_PARTITIONED_GRAPH pg = grin_get_partitioned_graph_from_storage(argv[1]); + GRIN_PARTITION_LIST local_partitions = grin_get_local_partition_list(pg); + assert(p < grin_get_partition_list_size(pg, local_partitions)); + GRIN_PARTITION partition = + grin_get_partition_from_list(pg, local_partitions, p); + GRIN_PARTITION_ID partition_id = grin_get_partition_id(pg, partition); + GRIN_PARTITION p1 = grin_get_partition_by_id(pg, partition_id); + if (!grin_equal_partition(pg, partition, p1)) { + printf("partition not match\n"); + } + grin_destroy_partition(pg, p1); + GRIN_GRAPH g = grin_get_local_graph_by_partition(pg, partition); + grin_destroy_partition(pg, partition); + grin_destroy_partition_list(pg, local_partitions); + grin_destroy_partitioned_graph(pg); +#else + GRIN_GRAPH g = grin_get_graph_from_storage(uri_str); +#endif + return g; +} + +#ifdef GRIN_ENABLE_GRAPH_PARTITION +GRIN_VERTEX get_one_master_person(GRIN_GRAPH g) { + GRIN_VERTEX_TYPE vt = grin_get_vertex_type_by_name(g, "person"); + GRIN_VERTEX_LIST vl = grin_get_vertex_list_by_type_select_master(g, vt); + grin_destroy_vertex_type(g, vt); + GRIN_VERTEX_LIST_ITERATOR vli = grin_get_vertex_list_begin(g, vl); + GRIN_VERTEX v = grin_get_vertex_from_iter(g, vli); + grin_destroy_vertex_list_iter(g, vli); + grin_destroy_vertex_list(g, vl); +#ifdef GRIN_ENABLE_VERTEX_INTERNAL_ID_INDEX + printf("Got vertex %s\n", + v_names[vt][grin_get_vertex_internal_id_by_type(g, vt, v)]); +#endif + return v; +} +#endif + +GRIN_VERTEX get_one_person(GRIN_GRAPH g) { + GRIN_VERTEX_TYPE vt = grin_get_vertex_type_by_name(g, "person"); + GRIN_VERTEX_LIST vl = grin_get_vertex_list_by_type(g, vt); + grin_destroy_vertex_type(g, vt); + GRIN_VERTEX_LIST_ITERATOR vli = grin_get_vertex_list_begin(g, vl); + GRIN_VERTEX v = grin_get_vertex_from_iter(g, vli); + grin_destroy_vertex_list_iter(g, vli); + grin_destroy_vertex_list(g, vl); +#ifdef GRIN_ENABLE_VERTEX_INTERNAL_ID_INDEX + printf("Got vertex %s\n", + v_names[vt][grin_get_vertex_internal_id_by_type(g, vt, v)]); +#endif + return v; +} + +void test_property_type(const char* uri_str) { + printf("+++++++++++++++++++++ Test property/type +++++++++++++++++++++\n"); + + GRIN_GRAPH g = get_graph(uri_str, 0); + + printf("------------ Vertex Type ------------\n"); + GRIN_VERTEX_TYPE_LIST vtl = grin_get_vertex_type_list(g); + size_t vtl_size = grin_get_vertex_type_list_size(g, vtl); + printf("vertex type list size: %zu\n", vtl_size); + + for (size_t i = 0; i < vtl_size; ++i) { + printf("------------ Iterate the %zu-th vertex type ------------\n", i); + GRIN_VERTEX_TYPE vt = grin_get_vertex_type_from_list(g, vtl, i); +#ifdef GRIN_WITH_VERTEX_TYPE_NAME + const char* vt_name = grin_get_vertex_type_name(g, vt); + printf("vertex type name: %s\n", vt_name); + GRIN_VERTEX_TYPE vt0 = grin_get_vertex_type_by_name(g, vt_name); + if (!grin_equal_vertex_type(g, vt, vt0)) { + printf("vertex type name not match\n"); + } + grin_destroy_vertex_type(g, vt0); +#endif +#ifdef GRIN_TRAIT_NATURAL_ID_FOR_VERTEX_TYPE + printf("vertex type id: %u\n", grin_get_vertex_type_id(g, vt)); + GRIN_VERTEX_TYPE vt1 = + grin_get_vertex_type_by_id(g, grin_get_vertex_type_id(g, vt)); + if (!grin_equal_vertex_type(g, vt, vt1)) { + printf("vertex type id not match\n"); + } + grin_destroy_vertex_type(g, vt1); +#endif + } + grin_destroy_vertex_type_list(g, vtl); + + printf( + "------------ Create a vertex type list of one type \"person\" " + "------------\n"); + GRIN_VERTEX_TYPE_LIST vtl2 = grin_create_vertex_type_list(g); +#ifdef GRIN_WITH_VERTEX_TYPE_NAME + GRIN_VERTEX_TYPE vt2_w = grin_get_vertex_type_by_name(g, "knows"); + if (vt2_w == GRIN_NULL_VERTEX_TYPE) { + printf("(Correct) vertex type of knows does not exists\n"); + } + + GRIN_VERTEX_TYPE vt2 = grin_get_vertex_type_by_name(g, "person"); + if (vt2 == GRIN_NULL_VERTEX_TYPE) { + printf("(Wrong) vertex type of person can not be found\n"); + } else { + const char* vt2_name = grin_get_vertex_type_name(g, vt2); + printf("vertex type name: %s\n", vt2_name); + } +#else + GRIN_VERTEX_TYPE vt2 = get_one_vertex_type(g); +#endif + grin_insert_vertex_type_to_list(g, vtl2, vt2); + size_t vtl2_size = grin_get_vertex_type_list_size(g, vtl2); + printf("created vertex type list size: %zu\n", vtl2_size); + GRIN_VERTEX_TYPE vt3 = grin_get_vertex_type_from_list(g, vtl2, 0); + if (!grin_equal_vertex_type(g, vt2, vt3)) { + printf("vertex type not match\n"); + } + grin_destroy_vertex_type(g, vt2); + grin_destroy_vertex_type(g, vt3); + grin_destroy_vertex_type_list(g, vtl2); + + // edge + printf("------------ Edge Type ------------\n"); + GRIN_EDGE_TYPE_LIST etl = grin_get_edge_type_list(g); + size_t etl_size = grin_get_edge_type_list_size(g, etl); + printf("edge type list size: %zu\n", etl_size); + + for (size_t i = 0; i < etl_size; ++i) { + printf("------------ Iterate the %zu-th edge type ------------\n", i); + GRIN_EDGE_TYPE et = grin_get_edge_type_from_list(g, etl, i); +#ifdef GRIN_WITH_EDGE_TYPE_NAME + const char* et_name = grin_get_edge_type_name(g, et); + printf("edge type name: %s\n", et_name); + GRIN_EDGE_TYPE et0 = grin_get_edge_type_by_name(g, et_name); + if (!grin_equal_edge_type(g, et, et0)) { + printf("edge type name not match\n"); + } + grin_destroy_edge_type(g, et0); +#endif +#ifdef GRIN_TRAIT_NATURAL_ID_FOR_EDGE_TYPE + printf("edge type id: %u\n", grin_get_edge_type_id(g, et)); + GRIN_EDGE_TYPE et1 = + grin_get_edge_type_by_id(g, grin_get_edge_type_id(g, et)); + if (!grin_equal_edge_type(g, et, et1)) { + printf("edge type id not match\n"); + } + grin_destroy_edge_type(g, et1); +#endif + // relation + GRIN_VERTEX_TYPE_LIST src_vtl = grin_get_src_types_by_edge_type(g, et); + size_t src_vtl_size = grin_get_vertex_type_list_size(g, src_vtl); + printf("source vertex type list size: %zu\n", src_vtl_size); + + GRIN_VERTEX_TYPE_LIST dst_vtl = grin_get_dst_types_by_edge_type(g, et); + + size_t dst_vtl_size = grin_get_vertex_type_list_size(g, dst_vtl); + printf("destination vertex type list size: %zu\n", dst_vtl_size); + + if (src_vtl_size != dst_vtl_size) { + printf("source and destination vertex type list size not match\n"); + } + for (size_t j = 0; j < src_vtl_size; ++j) { + GRIN_VERTEX_TYPE src_vt = grin_get_vertex_type_from_list(g, src_vtl, j); + GRIN_VERTEX_TYPE dst_vt = grin_get_vertex_type_from_list(g, dst_vtl, j); + const char* src_vt_name = grin_get_vertex_type_name(g, src_vt); + const char* dst_vt_name = grin_get_vertex_type_name(g, dst_vt); + const char* et_name = grin_get_edge_type_name(g, et); + printf("edge type name: %s-%s-%s\n", src_vt_name, et_name, dst_vt_name); + grin_destroy_vertex_type(g, src_vt); + grin_destroy_vertex_type(g, dst_vt); + } + grin_destroy_vertex_type_list(g, src_vtl); + grin_destroy_vertex_type_list(g, dst_vtl); + } + grin_destroy_edge_type_list(g, etl); + + printf( + "------------ Create an edge type list of one type \"created\" " + "------------\n"); + GRIN_EDGE_TYPE_LIST etl2 = grin_create_edge_type_list(g); +#ifdef GRIN_WITH_EDGE_TYPE_NAME + GRIN_EDGE_TYPE et2_w = grin_get_edge_type_by_name(g, "person"); + if (et2_w == GRIN_NULL_EDGE_TYPE) { + printf("(Correct) edge type of person does not exists\n"); + } + GRIN_EDGE_TYPE et2 = grin_get_edge_type_by_name(g, "created"); + if (et2 == GRIN_NULL_EDGE_TYPE) { + printf("(Wrong) edge type of created can not be found\n"); + } else { + const char* et2_name = grin_get_edge_type_name(g, et2); + printf("edge type name: %s\n", et2_name); + } +#else + GRIN_EDGE_TYPE et2 = get_one_edge_type(g); +#endif + grin_insert_edge_type_to_list(g, etl2, et2); + size_t etl2_size = grin_get_edge_type_list_size(g, etl2); + printf("created edge type list size: %zu\n", etl2_size); + GRIN_EDGE_TYPE et3 = grin_get_edge_type_from_list(g, etl2, 0); + if (!grin_equal_edge_type(g, et2, et3)) { + printf("edge type not match\n"); + } + grin_destroy_edge_type(g, et2); + grin_destroy_edge_type(g, et3); + grin_destroy_edge_type_list(g, etl2); + + grin_destroy_graph(g); +} + +void test_property_vertex_property_value(const char* uri_str) { + printf("------------ Test Vertex property value ------------\n"); + GRIN_GRAPH g = get_graph(uri_str, 0); + + // value check + printf("------ check value ------\n"); + FOR_VERTEX_LIST_BEGIN(g, vl) + GRIN_VERTEX_PROPERTY_LIST vpl = + grin_get_vertex_property_list_by_type(g, __vt); + size_t vpl_size = grin_get_vertex_property_list_size(g, vpl); + FOR_VERTEX_BEGIN(g, vl, v) +#ifdef GRIN_ENABLE_VERTEX_INTERNAL_ID_INDEX + long long int vid = grin_get_vertex_internal_id_by_type(g, __vt, v); +#else + long long int vid = __vcnt; +#endif +#ifdef GRIN_ENABLE_ROW + GRIN_ROW row = grin_get_vertex_row(g, v); +#endif + for (size_t j = 0; j < vpl_size; ++j) { + GRIN_VERTEX_PROPERTY vp = grin_get_vertex_property_from_list(g, vpl, j); + GRIN_DATATYPE dt = grin_get_vertex_property_datatype(g, vp); + if (dt == Int64) { + long long int pv = grin_get_vertex_property_value_of_int64(g, v, vp); + assert(grin_get_last_error_code() == NO_ERROR); +#ifdef GRIN_ENABLE_ROW + long long int rv = grin_get_int64_from_row(g, row, j); + assert(pv == rv); +#endif +#ifdef GRIN_WITH_VERTEX_PROPERTY_NAME + printf("%s %s: %lld\n", v_names[__vt][vid], + grin_get_vertex_property_name(g, __vt, vp), pv); +#else + printf("%s %zu: %lld\n", v_names[__vt][vid], j, pv); +#endif + } else if (dt == String) { + const char* pv = grin_get_vertex_property_value_of_string(g, v, vp); + assert(grin_get_last_error_code() == NO_ERROR); +#ifdef GRIN_ENABLE_ROW + const char* rv = grin_get_string_from_row(g, row, j); + assert(strcmp(pv, rv) == 0); +#endif +#ifdef GRIN_WITH_VERTEX_PROPERTY_NAME + printf("%s %s: %s\n", v_names[__vt][vid], + grin_get_vertex_property_name(g, __vt, vp), pv); +#else + printf("%s %zu: %s\n", v_names[__vt][vid], j, pv); +#endif + grin_destroy_string_value(g, pv); + grin_destroy_string_value(g, rv); + } + grin_destroy_vertex_property(g, vp); + } +#ifdef GRIN_ENABLE_ROW + grin_destroy_row(g, row); +#endif + FOR_VERTEX_END(g, vl, v) + grin_destroy_vertex_property_list(g, vpl); + FOR_VERTEX_LIST_END(g, vl) + + // check schema + printf("------ check schema ------\n"); + GRIN_VERTEX_TYPE_LIST vtl = grin_get_vertex_type_list(g); + size_t vtl_size = grin_get_vertex_type_list_size(g, vtl); + for (size_t i = 0; i < vtl_size; ++i) { + GRIN_VERTEX_TYPE vt = grin_get_vertex_type_from_list(g, vtl, i); + GRIN_VERTEX_PROPERTY_LIST vpl = + grin_get_vertex_property_list_by_type(g, vt); + size_t vpl_size = grin_get_vertex_property_list_size(g, vpl); + for (size_t j = 0; j < vpl_size; ++j) { + GRIN_VERTEX_PROPERTY vp = grin_get_vertex_property_from_list(g, vpl, j); + GRIN_VERTEX_TYPE vt1 = grin_get_vertex_type_from_property(g, vp); + assert(grin_equal_vertex_type(g, vt, vt1)); + grin_destroy_vertex_type(g, vt1); + +#ifdef GRIN_TRAIT_NATURAL_ID_FOR_VERTEX_PROPERTY + unsigned int id = grin_get_vertex_property_id(g, vt, vp); + GRIN_VERTEX_PROPERTY vp1 = grin_get_vertex_property_by_id(g, vt, id); + assert(grin_equal_vertex_property(g, vp, vp1)); + grin_destroy_vertex_property(g, vp1); +#else + unsigned int id = i; +#endif + +#ifdef GRIN_WITH_VERTEX_PROPERTY_NAME + const char* vp_name = grin_get_vertex_property_name(g, vt, vp); + GRIN_VERTEX_PROPERTY vp2 = + grin_get_vertex_property_by_name(g, vt, vp_name); + assert(grin_equal_vertex_property(g, vp, vp2)); +#else + const char* vp_name = "unknown"; +#endif + printf("%s %u %s checked\n", vt_names[i], id, vp_name); + } + grin_destroy_vertex_property_list(g, vpl); + + // corner case +#ifdef GRIN_TRAIT_NATURAL_ID_FOR_VERTEX_PROPERTY + GRIN_VERTEX_PROPERTY vp3 = grin_get_vertex_property_by_id(g, vt, vpl_size); + assert(vp3 == GRIN_NULL_VERTEX_PROPERTY); +#endif + +#ifdef GRIN_WITH_VERTEX_PROPERTY_NAME + GRIN_VERTEX_PROPERTY vp4 = + grin_get_vertex_property_by_name(g, vt, "unknown"); + assert(vp4 == GRIN_NULL_VERTEX_PROPERTY); +#endif + grin_destroy_vertex_type(g, vt); + } + grin_destroy_vertex_type_list(g, vtl); + + // corner case +#ifdef GRIN_WITH_VERTEX_PROPERTY_NAME + GRIN_VERTEX_PROPERTY_LIST vpl1 = + grin_get_vertex_properties_by_name(g, "unknown"); + assert(vpl1 == GRIN_NULL_VERTEX_PROPERTY_LIST); + + GRIN_VERTEX_PROPERTY_LIST vpl2 = + grin_get_vertex_properties_by_name(g, "name"); + assert(vpl2 != GRIN_NULL_VERTEX_PROPERTY_LIST); + + size_t vpl2_size = grin_get_vertex_property_list_size(g, vpl2); + for (size_t i = 0; i < vpl2_size; ++i) { + GRIN_VERTEX_PROPERTY vp5 = grin_get_vertex_property_from_list(g, vpl2, i); + GRIN_VERTEX_TYPE vt5 = grin_get_vertex_type_from_property(g, vp5); + const char* vp5_name = grin_get_vertex_property_name(g, vt5, vp5); + assert(strcmp(vp5_name, "name") == 0); + } + grin_destroy_vertex_property_list(g, vpl2); +#endif + + grin_destroy_graph(g); +} + +void test_property_edge_property_value(const char* uri_str, + GRIN_DIRECTION dir) { + printf("------------ Test Edge property value ------------\n"); + GRIN_GRAPH g = get_graph(uri_str, 0); + + // value check + printf("------ check value ------\n"); + FOR_VERTEX_LIST_BEGIN(g, vl) + FOR_VERTEX_BEGIN(g, vl, v) + FOR_ADJ_LIST_BEGIN(g, dir, v, al) + GRIN_EDGE_PROPERTY_LIST epl = grin_get_edge_property_list_by_type(g, __et); + size_t epl_size = grin_get_edge_property_list_size(g, epl); + GRIN_ADJACENT_LIST_ITERATOR ali = grin_get_adjacent_list_begin(g, al); + size_t acnt = 0; + while (!grin_is_adjacent_list_end(g, ali)) { + GRIN_EDGE e = grin_get_edge_from_adjacent_list_iter(g, ali); + GRIN_VERTEX u = grin_get_neighbor_from_adjacent_list_iter(g, ali); +#ifdef GRIN_ENABLE_VERTEX_INTERNAL_ID_INDEX + GRIN_VERTEX_TYPE ut = grin_get_vertex_type(g, u); + long long int vid = grin_get_vertex_internal_id_by_type(g, __vt, v); + long long int uid = grin_get_vertex_internal_id_by_type(g, ut, u); + grin_destroy_vertex_type(g, ut); +#else + long long int vid = __vcnt; + long long int uid = acnt; +#endif +#ifdef GRIN_ENABLE_ROW + GRIN_ROW row = grin_get_edge_row(g, e); + +#endif + for (size_t j = 0; j < epl_size; ++j) { + GRIN_EDGE_PROPERTY ep = grin_get_edge_property_from_list(g, epl, j); + GRIN_DATATYPE dt = grin_get_edge_property_datatype(g, ep); + if (dt == Int64) { + long long int pv = grin_get_edge_property_value_of_int64(g, e, ep); + assert(grin_get_last_error_code() == NO_ERROR); +#ifdef GRIN_ENABLE_ROW + long long int rv = grin_get_int64_from_row(g, row, j); + assert(pv == rv); +#endif +#ifdef GRIN_WITH_EDGE_PROPERTY_NAME + printf("%s %s %s: %lld\n", v_names[__vt][vid], v_names[ut][uid], + grin_get_edge_property_name(g, __et, ep), pv); +#else + printf("%s %zu %lld: %lld\n", v_names[__vt][vid], j, uid, pv); +#endif + } else if (dt == Double) { + double pv = grin_get_edge_property_value_of_double(g, e, ep); + assert(grin_get_last_error_code() == NO_ERROR); +#ifdef GRIN_ENABLE_ROW + double rv = grin_get_double_from_row(g, row, j); + assert(fabs(pv - rv) < eps); +#endif +#ifdef GRIN_WITH_EDGE_PROPERTY_NAME + printf("%s %s %s: %lf\n", v_names[__vt][vid], v_names[ut][uid], + grin_get_edge_property_name(g, __et, ep), pv); +#else + printf("%s %zu %lld: %lf\n", v_names[__vt][vid], j, uid, pv); +#endif + } else if (dt == String) { + const char* pv = grin_get_edge_property_value_of_string(g, e, ep); + assert(grin_get_last_error_code() == NO_ERROR); +#ifdef GRIN_ENABLE_ROW + const char* rv = grin_get_string_from_row(g, row, j); + assert(strcmp(pv, rv) == 0); +#endif +#ifdef GRIN_WITH_EDGE_PROPERTY_NAME + printf("%s %s %s: %s\n", v_names[__vt][vid], v_names[ut][uid], + grin_get_edge_property_name(g, __et, ep), pv); +#else + printf("%s %zu %lld: %s\n", v_names[__vt][vid], j, uid, pv); +#endif + } + } +#ifdef GRIN_ENABLE_ROW + grin_destroy_row(g, row); +#endif + grin_destroy_edge(g, e); + grin_destroy_vertex(g, u); + acnt++; + grin_get_next_adjacent_list_iter(g, ali); + } + grin_destroy_adjacent_list_iter(g, ali); + grin_destroy_edge_property_list(g, epl); + FOR_ADJ_LIST_END(g, al) + FOR_VERTEX_END(g, vl, v) + FOR_VERTEX_LIST_END(g, vl) + + // check schema + printf("------ check schema ------\n"); + GRIN_EDGE_TYPE_LIST etl = grin_get_edge_type_list(g); + size_t etl_size = grin_get_edge_type_list_size(g, etl); + for (size_t i = 0; i < etl_size; ++i) { + GRIN_EDGE_TYPE et = grin_get_edge_type_from_list(g, etl, i); + GRIN_EDGE_PROPERTY_LIST epl = grin_get_edge_property_list_by_type(g, et); + size_t epl_size = grin_get_edge_property_list_size(g, epl); + for (size_t j = 0; j < epl_size; ++j) { + GRIN_EDGE_PROPERTY ep = grin_get_edge_property_from_list(g, epl, j); + GRIN_EDGE_TYPE et1 = grin_get_edge_type_from_property(g, ep); + assert(grin_equal_edge_type(g, et, et1)); + grin_destroy_edge_type(g, et1); + +#ifdef GRIN_TRAIT_NATURAL_ID_FOR_EDGE_PROPERTY + unsigned int id = grin_get_edge_property_id(g, et, ep); + GRIN_EDGE_PROPERTY ep1 = grin_get_edge_property_by_id(g, et, id); + assert(grin_equal_edge_property(g, ep, ep1)); + grin_destroy_edge_property(g, ep1); +#else + unsigned int id = i; +#endif + +#ifdef GRIN_WITH_EDGE_PROPERTY_NAME + const char* ep_name = grin_get_edge_property_name(g, et, ep); + GRIN_EDGE_PROPERTY ep2 = grin_get_edge_property_by_name(g, et, ep_name); + assert(grin_equal_edge_property(g, ep, ep2)); +#else + const char* ep_name = "unknown"; +#endif + printf("%s %u %s checked\n", et_names[i], id, ep_name); + } + grin_destroy_edge_property_list(g, epl); + + // corner case +#ifdef GRIN_TRAIT_NATURAL_ID_FOR_EDGE_PROPERTY + GRIN_EDGE_PROPERTY ep3 = grin_get_edge_property_by_id(g, et, epl_size); + assert(ep3 == GRIN_NULL_EDGE_PROPERTY); +#endif + +#ifdef GRIN_WITH_EDGE_PROPERTY_NAME + GRIN_EDGE_PROPERTY ep4 = grin_get_edge_property_by_name(g, et, "unknown"); + assert(ep4 == GRIN_NULL_EDGE_PROPERTY); +#endif + grin_destroy_edge_type(g, et); + } + grin_destroy_edge_type_list(g, etl); + + // corner case +#ifdef GRIN_WITH_EDGE_PROPERTY_NAME + GRIN_EDGE_PROPERTY_LIST epl1 = grin_get_edge_properties_by_name(g, "unknown"); + assert(epl1 == GRIN_NULL_EDGE_PROPERTY_LIST); + + GRIN_EDGE_PROPERTY_LIST epl2 = grin_get_edge_properties_by_name(g, "weight"); + assert(epl2 != GRIN_NULL_EDGE_PROPERTY_LIST); + + size_t epl2_size = grin_get_edge_property_list_size(g, epl2); + for (size_t i = 0; i < epl2_size; ++i) { + GRIN_EDGE_PROPERTY ep5 = grin_get_edge_property_from_list(g, epl2, i); + GRIN_EDGE_TYPE et5 = grin_get_edge_type_from_property(g, ep5); + const char* ep5_name = grin_get_edge_property_name(g, et5, ep5); + assert(strcmp(ep5_name, "weight") == 0); + } + grin_destroy_edge_property_list(g, epl2); +#endif + + grin_destroy_graph(g); +} + +#ifdef GRIN_ENABLE_VERTEX_PRIMARY_KEYS +void test_property_primary_key(const char* uri_str) { + printf( + "+++++++++++++++++++++ Test property/primary key " + "+++++++++++++++++++++\n"); + GRIN_GRAPH g = get_graph(uri_str, 0); + GRIN_VERTEX_TYPE_LIST vtl = grin_get_vertex_types_with_primary_keys(g); + size_t vtl_size = grin_get_vertex_type_list_size(g, vtl); + printf("vertex type num with primary key: %zu\n", vtl_size); + + unsigned id_type[7] = {(~0), 0, 0, 1, 0, 1, 0}; + + for (size_t i = 0; i < vtl_size; ++i) { + GRIN_VERTEX_TYPE vt = grin_get_vertex_type_from_list(g, vtl, i); + const char* vt_name = grin_get_vertex_type_name(g, vt); + printf("vertex type name: %s\n", vt_name); + + GRIN_VERTEX_PROPERTY_LIST vpl = grin_get_primary_keys_by_vertex_type(g, vt); + size_t vpl_size = grin_get_vertex_property_list_size(g, vpl); + assert(vpl_size == 1); + + for (size_t j = 0; j < vpl_size; ++j) { + GRIN_VERTEX_PROPERTY vp = grin_get_vertex_property_from_list(g, vpl, j); + const char* vp_name = grin_get_vertex_property_name(g, vt, vp); + printf("primary key name: %s\n", vp_name); + grin_destroy_vertex_property(g, vp); + } + + GRIN_VERTEX_PROPERTY vp = grin_get_vertex_property_from_list(g, vpl, 0); + GRIN_DATATYPE dt = grin_get_vertex_property_datatype(g, vp); + + for (size_t j = 1; j <= 6; ++j) { + GRIN_ROW r = grin_create_row(g); + assert(dt == Int64); + grin_insert_int64_to_row(g, r, j); +#ifdef GRIN_ENABLE_VERTEX_PK_INDEX + GRIN_VERTEX v = grin_get_vertex_by_primary_keys_row(g, vt, r); + if (v != GRIN_NULL_VERTEX && id_type[j] == i) { + GRIN_ROW nr = grin_get_vertex_primary_keys_row(g, v); + long long int k = grin_get_int64_from_row(g, nr, 0); + assert(k == j); + grin_destroy_row(g, nr); + grin_destroy_vertex(g, v); + } +#endif + grin_destroy_row(g, r); + } + + grin_destroy_vertex_property(g, vp); + grin_destroy_vertex_property_list(g, vpl); + grin_destroy_vertex_type(g, vt); + } + + grin_destroy_vertex_type_list(g, vtl); + grin_destroy_graph(g); +} +#endif + +void test_error_code(const char* uri_str) { + printf("+++++++++++++++++++++ Test error code +++++++++++++++++++++\n"); + GRIN_GRAPH g = get_graph(uri_str, 0); + + GRIN_VERTEX_TYPE vt1 = grin_get_vertex_type_by_name(g, "person"); + GRIN_VERTEX_TYPE vt2 = grin_get_vertex_type_by_name(g, "software"); + GRIN_VERTEX_PROPERTY vp = grin_get_vertex_property_by_name(g, vt2, "lang"); +#ifdef GRIN_ENABLE_GRAPH_PARTITION + GRIN_VERTEX v = get_one_master_person(g); +#else + GRIN_VERTEX v = get_one_person(g); +#endif + + const char* value = grin_get_vertex_property_value_of_string(g, v, vp); + assert(grin_get_last_error_code() == INVALID_VALUE); +} + +void test_property(const char* uri_str) { + test_property_type(uri_str); + test_property_vertex_property_value(uri_str); + test_property_edge_property_value(uri_str, OUT); + test_property_edge_property_value(uri_str, IN); +#ifdef GRIN_ENABLE_VERTEX_PRIMARY_KEYS + test_property_primary_key(uri_str); +#endif +#ifdef GRIN_WITH_VERTEX_PROPERTY_NAME + test_error_code(uri_str); +#endif +} +/** +void test_partition_reference(const char* uri_str) { + printf("+++++++++++++++++++++ Test partition/reference ++++++++++++++++++++++\n"); +GRIN_PARTITIONED_GRAPH pg = +grin_get_partitioned_graph_from_storage(argv[1]); +GRIN_PARTITION_LIST +local_partitions = grin_get_local_partition_list(pg); + assert(grin_get_partition_list_size(pg, local_partitions) >= 2); + + GRIN_PARTITION p0 = grin_get_partition_from_list(pg, local_partitions, 0); + GRIN_PARTITION p1 = grin_get_partition_from_list(pg, local_partitions, 1); + GRIN_GRAPH g0 = grin_get_local_graph_by_partition(pg, p0); + GRIN_GRAPH g1 = grin_get_local_graph_by_partition(pg, p1); + +FOR_VERTEX_LIST_BEGIN(g0, vl0) + size_t mcnt = 0; + FOR_VERTEX_BEGIN(g0, vl0, v0) + GRIN_VERTEX_REF vref0 = grin_get_vertex_ref_by_vertex(g0, v0); + if (grin_is_master_vertex(g0, v0)) { + mcnt++; +#ifdef GRIN_TRAIT_FAST_VERTEX_REF + long long int sref = grin_serialize_vertex_ref_as_int64(g0, vref0); + GRIN_VERTEX_REF vref1 = grin_deserialize_int64_to_vertex_ref(g0, sref); +#else + const char* sref = grin_serialize_vertex_ref(g0, vref0); + GRIN_VERTEX_REF vref1 = grin_deserialize_vertex_ref(g0, sref); + grin_destroy_string_value(g0, sref); +#endif + GRIN_VERTEX v1 = grin_get_vertex_from_vertex_ref(g0, vref1); + if (!grin_equal_vertex(g0, v0, v1)) { + printf("vertex not match after deserialize\n"); + } + GRIN_PARTITION p = grin_get_master_partition_from_vertex_ref(g0, vref0); + if (!grin_equal_partition(g0, p, p0)) { + printf("(Wrong) partition not match in vertex ref\n"); + } + grin_destroy_partition(pg, p); + grin_destroy_vertex(g0, v1); + grin_destroy_vertex_ref(g0, vref1); + } else if (grin_is_mirror_vertex(g0, v0)) { +#ifdef GRIN_TRAIT_FAST_VERTEX_REF + long long int sref = grin_serialize_vertex_ref_as_int64(g0, vref0); + GRIN_VERTEX_REF vref1 = grin_deserialize_int64_to_vertex_ref(g1, sref); +#else + const char* sref = grin_serialize_vertex_ref(g0, vref0); + GRIN_VERTEX_REF vref1 = grin_deserialize_vertex_ref(g1, sref); + grin_destroy_string_value(g0, sref); +#endif + GRIN_VERTEX v1 = grin_get_vertex_from_vertex_ref(g1, vref1); + if (!grin_is_master_vertex(g1, v1)) { + printf("(Wrong) vertex not master after deserialize\n"); + } + GRIN_PARTITION p = grin_get_master_partition_from_vertex_ref(g0, vref0); + if (!grin_equal_partition(g0, p, p1)) { + printf("(Wrong) partition not match in vertex ref\n"); + } + grin_destroy_partition(pg, p); + grin_destroy_vertex(g1, v1); + grin_destroy_vertex_ref(g1, vref1); + } else { + printf("(Wrong) vertex other than master or mirror\n"); + } + grin_destroy_vertex_ref(g0, vref0); + FOR_VERTEX_END(g0, vl0, v0) + printf("master checked: %zu\n", mcnt); +FOR_VERTEX_LIST_END(g0, vl0) + + grin_destroy_partition(pg, p0); + grin_destroy_partition(pg, p1); + grin_destroy_graph(g0); + grin_destroy_graph(g1); + grin_destroy_partition_list(pg, local_partitions); + grin_destroy_partitioned_graph(pg); +} + + +void test_partition_topology(const char* uri_str) { + printf("+++++++++++++++++++++ Test partition/topology ++++++++++++++++++++++\n"); GRIN_GRAPH g = get_graph(uri_str, 0); + + printf("----- check master ----- \n"); +FOR_VERTEX_LIST_SELECT_MASTER_BEGIN(g, vl) + FOR_VERTEX_BEGIN(g, vl, v) + #ifdef GRIN_ENABLE_VERTEX_LIST_ARRAY + GRIN_VERTEX v1 = grin_get_vertex_from_list(g, vl, __vcnt); + assert(grin_equal_vertex(g, v, v1)); + grin_destroy_vertex(g, v1); + #endif + assert(grin_is_master_vertex(g, v)); + FOR_VERTEX_END(g, vl, v) +FOR_VERTEX_LIST_END(g, vl) + + printf("----- check mirror ----- \n"); +FOR_VERTEX_LIST_SELECT_MIRROR_BEGIN(g, vl) + FOR_VERTEX_BEGIN(g, vl, v) + #ifdef GRIN_ENABLE_VERTEX_LIST_ARRAY + GRIN_VERTEX v1 = grin_get_vertex_from_list(g, vl, __vcnt); + assert(grin_equal_vertex(g, v, v1)); + grin_destroy_vertex(g, v1); + #endif + assert(grin_is_mirror_vertex(g, v)); + FOR_VERTEX_END(g, vl, v) +FOR_VERTEX_LIST_END(g, vl) + + grin_destroy_graph(g); +} + +void test_partition(const char* uri_str) { +#ifdef GRIN_ENABLE_GRAPH_PARTITION + test_partition_reference(uri_str); + test_partition_topology(uri_str); +#endif +} +*/ +void test_topology_structure(const char* uri_str) { + printf( + "+++++++++++++++++++++ Test topology/structure +++++++++++++++++++++\n"); + GRIN_GRAPH g = get_graph(uri_str, 0); +#ifndef GRIN_WITH_VERTEX_PROPERTY + printf("vertex num: %zu\n", grin_get_vertex_num(g)); +#endif + +#ifndef GRIN_WITH_EDGE_PROPERTY + printf("edge num: %zu\n", grin_get_edge_num(g)); +#endif + grin_destroy_graph(g); +} + +void test_topology_vertex_list(const char* uri_str) { + printf( + "+++++++++++++++++++++ Test topology/vertex_list " + "+++++++++++++++++++++\n"); + GRIN_GRAPH g = get_graph(uri_str, 0); + + FOR_VERTEX_LIST_BEGIN(g, vl) + FOR_VERTEX_BEGIN(g, vl, v) +#ifdef GRIN_ENABLE_VERTEX_LIST_ARRAY + GRIN_VERTEX v1 = grin_get_vertex_from_list(g, vl, __vcnt); + assert(grin_equal_vertex(g, v, v1)); + grin_destroy_vertex(g, v1); +#endif + FOR_VERTEX_END(g, vl, v) + FOR_VERTEX_LIST_END(g, vl) + + grin_destroy_graph(g); +} + +void test_topology_adjacent_list(const char* uri_str, GRIN_DIRECTION dir) { + if (dir == IN) { + printf( + "+++++++++++++++++++++ Test topology/adjacent_list IN " + "+++++++++++++++++++++\n"); + } else { + printf( + "+++++++++++++++++++++ Test topology/adjacent_list OUT " + "+++++++++++++++++++++\n"); + } + + GRIN_GRAPH g = get_graph(uri_str, 0); + + FOR_VERTEX_LIST_BEGIN(g, vl) + FOR_VERTEX_BEGIN(g, vl, v) +#ifdef GRIN_ENABLE_VERTEX_INTERNAL_ID_INDEX + long long int vid = grin_get_vertex_internal_id_by_type(g, __vt, v); +#else + long long int vid = __vcnt; +#endif +#ifdef GRIN_ENABLE_GRAPH_PARTITION + if (!grin_is_master_vertex(g, v)) { + grin_destroy_vertex(g, v); + grin_get_next_vertex_list_iter(g, __vli); + continue; + } +#endif + FOR_ADJ_LIST_BEGIN(g, dir, v, al) + GRIN_ADJACENT_LIST_ITERATOR ali = grin_get_adjacent_list_begin(g, al); + size_t acnt = 0; + while (!grin_is_adjacent_list_end(g, ali)) { + GRIN_EDGE e = grin_get_edge_from_adjacent_list_iter(g, ali); + GRIN_VERTEX v1 = grin_get_src_vertex_from_edge(g, e); + GRIN_VERTEX v2 = grin_get_dst_vertex_from_edge(g, e); + GRIN_VERTEX u = grin_get_neighbor_from_adjacent_list_iter(g, ali); + +#ifdef GRIN_ENABLE_ADJACENT_LIST_ARRAY + GRIN_EDGE e1 = grin_get_edge_from_adjacent_list(g, al, acnt); + GRIN_VERTEX e1v1 = grin_get_src_vertex_from_edge(g, e1); + GRIN_VERTEX e1v2 = grin_get_dst_vertex_from_edge(g, e1); + assert(grin_equal_vertex(g, v1, e1v1)); + assert(grin_equal_vertex(g, v2, e1v2)); + grin_destroy_edge(g, e1); + grin_destroy_vertex(g, e1v1); + grin_destroy_vertex(g, e1v2); +#endif + + if (dir == OUT) { + assert(grin_equal_vertex(g, v, v1)); + assert(grin_equal_vertex(g, v2, u)); + } else { + assert(grin_equal_vertex(g, v, v2)); + assert(grin_equal_vertex(g, v1, u)); + } + grin_destroy_vertex(g, v1); + grin_destroy_vertex(g, v2); + grin_destroy_vertex(g, u); + grin_destroy_edge(g, e); + + acnt++; + grin_get_next_adjacent_list_iter(g, ali); + } +#ifdef GRIN_ENABLE_ADJAECENT_LIST_ARRAY + assert(acnt == grin_get_adjacent_list_size(g, al)); +#endif + grin_destroy_adjacent_list_iter(g, ali); +#ifdef GRIN_WITH_EDGE_PROPERTY + printf("vertex %s adjlist, edgetype: %s, checked num: %zu\n", + v_names[__vt][vid], et_names[__etl_i], acnt); +#else + printf("vertex %s adjlist, checked num: %zu\n", v_names[__vt][vid], acnt); +#endif + FOR_ADJ_LIST_END(g, al) + FOR_VERTEX_END(g, vl, v) + FOR_VERTEX_LIST_END(g, vl) + grin_destroy_graph(g); +} + +void test_topology(const char* uri_str) { + test_topology_structure(uri_str); + test_topology_vertex_list(uri_str); + test_topology_adjacent_list(uri_str, OUT); + test_topology_adjacent_list(uri_str, IN); +} + +#if defined(GRIN_ASSUME_ALL_VERTEX_LIST_SORTED) && \ + defined(GRIN_ENABLE_VERTEX_LIST_ARRAY) +void test_index_order(const char* uri_str) { + printf("+++++++++++++++++++++ Test index order +++++++++++++++++++++\n"); + GRIN_GRAPH g = get_graph(uri_str, 0); + + FOR_VERTEX_LIST_BEGIN(g, vl) + FOR_VERTEX_BEGIN(g, vl, v) + size_t pos = grin_get_position_of_vertex_from_sorted_list(g, vl, v); + assert(pos == __vcnt); + FOR_VERTEX_END(g, vl, v) +#ifdef GRIN_ENABLE_GRAPH_PARTITION + { + GRIN_VERTEX_LIST mvlist = + grin_get_vertex_list_by_type_select_master(g, __vt); + size_t mvlist_sz = grin_get_vertex_list_size(g, mvlist); + for (size_t i = 0; i < mvlist_sz; ++i) { + GRIN_VERTEX v = grin_get_vertex_from_list(g, mvlist, i); + size_t pos = grin_get_position_of_vertex_from_sorted_list(g, mvlist, v); + assert(pos == i); + size_t pos1 = grin_get_position_of_vertex_from_sorted_list(g, vl, v); + GRIN_VERTEX v1 = grin_get_vertex_from_list(g, vl, pos1); + assert(grin_equal_vertex(g, v, v1)); + grin_destroy_vertex(g, v1); + grin_destroy_vertex(g, v); + } + grin_destroy_vertex_list(g, mvlist); + } + { + GRIN_VERTEX_LIST mvlist = + grin_get_vertex_list_by_type_select_mirror(g, __vt); + size_t mvlist_sz = grin_get_vertex_list_size(g, mvlist); + for (size_t i = 0; i < mvlist_sz; ++i) { + GRIN_VERTEX v = grin_get_vertex_from_list(g, mvlist, i); + size_t pos = grin_get_position_of_vertex_from_sorted_list(g, mvlist, v); + assert(pos == i); + size_t pos1 = grin_get_position_of_vertex_from_sorted_list(g, vl, v); + GRIN_VERTEX v1 = grin_get_vertex_from_list(g, vl, pos1); + assert(grin_equal_vertex(g, v, v1)); + grin_destroy_vertex(g, v1); + grin_destroy_vertex(g, v); + } + grin_destroy_vertex_list(g, mvlist); + } +#endif + FOR_VERTEX_LIST_END(g, vl) + + grin_destroy_graph(g); +} +#endif + +void test_index_internal_id(const char* uri_str) { + printf( + "+++++++++++++++++++++ Test index internal id +++++++++++++++++++++\n"); + GRIN_GRAPH g = get_graph(uri_str, 0); + + FOR_VERTEX_LIST_BEGIN(g, vl) + long long int min = grin_get_vertex_internal_id_lower_bound_by_type(g, __vt); + long long int max = grin_get_vertex_internal_id_upper_bound_by_type(g, __vt); + FOR_VERTEX_BEGIN(g, vl, v) +#ifdef GRIN_ENABLE_VERTEX_INTERNAL_ID_INDEX + long long int oid = grin_get_vertex_internal_id_by_type(g, __vt, v); + assert(oid >= min && oid < max); + GRIN_VERTEX v1 = grin_get_vertex_by_internal_id_by_type(g, __vt, oid); + assert(grin_equal_vertex(g, v, v1)); + grin_destroy_vertex(g, v1); +#endif + FOR_VERTEX_END(g, vl, v) + FOR_VERTEX_LIST_END(g, vl) + + grin_destroy_graph(g); +} + +void test_index(const char* uri_str) { +#if defined(GRIN_ASSUME_ALL_VERTEX_LIST_SORTED) && \ + defined(GRIN_ENABLE_VERTEX_LIST_ARRAY) + test_index_order(uri_str); +#endif +#ifdef GRIN_ENABLE_VERTEX_INTERNAL_ID_INDEX + test_index_internal_id(uri_str); +#endif +} + +void test_vertex_property_value(const char* uri_str) { + GRIN_GRAPH g = get_graph(uri_str, 0); + GRIN_VERTEX_TYPE vt = grin_get_vertex_type_by_name(g, "person"); + GRIN_VERTEX_PROPERTY vp = grin_get_vertex_property_by_name(g, vt, "age"); + GRIN_VERTEX v = get_one_person(g); + + struct timeval t1, t2; + gettimeofday(&t1, NULL); + for (int i = 0; i < 1000000; ++i) { + long long int age = grin_get_vertex_property_value_of_int32(g, v, vp); + } + gettimeofday(&t2, NULL); + double elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0; + elapsedTime += (t2.tv_usec - t1.tv_usec) / 1000.0; + printf("%f ms.\n", elapsedTime); + grin_destroy_vertex(g, v); + grin_destroy_vertex_property(g, vp); + grin_destroy_vertex_type(g, vt); + grin_destroy_graph(g); +} + +void test_perf(const char* uri_str) { test_vertex_property_value(uri_str); } + +int main(int argc, char** argv) { + const char* uri_str = + "flex://" + "../../../../storages/rt_mutable_graph/modern_graph/"; + + test_index(uri_str); + test_property(uri_str); + // test_partition(uri_str); + test_topology(uri_str); + test_perf(uri_str); + return 0; +} diff --git a/flex/engines/graph_db/server/executor.act.cc b/flex/engines/graph_db/server/executor.act.cc deleted file mode 100644 index 389b26211996..000000000000 --- a/flex/engines/graph_db/server/executor.act.cc +++ /dev/null @@ -1,45 +0,0 @@ -/** Copyright 2020 Alibaba Group Holding Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "flex/engines/graph_db/server/executor.act.h" - -#include "flex/engines/graph_db/database/graph_db.h" -#include "flex/engines/graph_db/database/graph_db_session.h" - -#include - -namespace server { - -executor::~executor() { - // finalization - // ... -} - -executor::executor(hiactor::actor_base* exec_ctx, const hiactor::byte_t* addr) - : hiactor::actor(exec_ctx, addr) { - set_max_concurrency(1); // set max concurrency for task reentrancy (stateful) - // initialization - // ... -} - -seastar::future executor::run_query(query_param&& param) { - auto ret = gs::GraphDB::get() - .GetSession(hiactor::local_shard_id()) - .Eval(param.content); - seastar::sstring content(ret.data(), ret.size()); - return seastar::make_ready_future(std::move(content)); -} - -} // namespace server diff --git a/flex/engines/hqps_db/CMakeLists.txt b/flex/engines/hqps_db/CMakeLists.txt new file mode 100644 index 000000000000..c5c4c4131681 --- /dev/null +++ b/flex/engines/hqps_db/CMakeLists.txt @@ -0,0 +1,55 @@ + +find_package(Protobuf REQUIRED) +include_directories(${Protobuf_INCLUDE_DIRS}) + + +set(GIE_COMPILER_PROTO_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../interactive_engine/executor/ir/proto/) +set(GIE_COMPILER_PROTO_JOB_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../interactive_engine/executor/engine/pegasus/server/proto/) +# proto files of gie compiler +set(CODE_GEN_PROTOBUF_FILES + ${GIE_COMPILER_PROTO_DIR}/algebra.proto + ${GIE_COMPILER_PROTO_DIR}/common.proto + ${GIE_COMPILER_PROTO_DIR}/expr.proto + ${GIE_COMPILER_PROTO_DIR}/physical.proto + ${GIE_COMPILER_PROTO_DIR}/results.proto + ${GIE_COMPILER_PROTO_DIR}/schema.proto + ${GIE_COMPILER_PROTO_DIR}/type.proto + ${GIE_COMPILER_PROTO_DIR}/stored_procedure.proto + ${GIE_COMPILER_PROTO_JOB_DIR}/job_service.proto +) + +#create directory first +file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/proto_generated_gie) + +# proto gen for gie proto +protobuf_generate(APPEND_PATH + TARGET ${LOCAL_EXE_NAME} + LANGUAGE cpp + OUT_VAR PROTO_SRCS_GIE + PROTOS ${CODE_GEN_PROTOBUF_FILES} + IMPORT_DIRS ${GIE_COMPILER_PROTO_DIR} + PROTOC_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/proto_generated_gie +) + +add_library(hqps_plan_proto SHARED ${PROTO_SRCS_GIE}) +target_include_directories(hqps_plan_proto PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) +target_link_libraries(hqps_plan_proto PUBLIC ${Protobuf_LIBRARIES}) + + + +install(TARGETS hqps_plan_proto + RUNTIME DESTINATION bin + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib) + +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + DESTINATION include/flex/ + FILES_MATCHING + PATTERN "*.h" +) +#install proto_generated_files +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/proto_generated_gie/ + DESTINATION include/flex/ + FILES_MATCHING + PATTERN "*.h" +) diff --git a/flex/engines/hqps_db/README.md b/flex/engines/hqps_db/README.md new file mode 100644 index 000000000000..e2b2286a2c53 --- /dev/null +++ b/flex/engines/hqps_db/README.md @@ -0,0 +1,3 @@ +## HighQPS Engine + +HighQPS Engine is a graph computing engine based on Hiactor. \ No newline at end of file diff --git a/flex/engines/hqps_db/app/hqps_app_base.h b/flex/engines/hqps_db/app/hqps_app_base.h new file mode 100644 index 000000000000..0cea66f408d2 --- /dev/null +++ b/flex/engines/hqps_db/app/hqps_app_base.h @@ -0,0 +1,48 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_APP_CYPHER_APP_BASE_H_ +#define ENGINES_HQPS_APP_CYPHER_APP_BASE_H_ + +#include "flex/engines/hqps_db/database/mutable_csr_interface.h" +#include "flex/utils/app_utils.h" +#include "proto_generated_gie/results.pb.h" + +namespace gs { + +enum class GraphStoreType { + Grape = 0, +}; + +template +class HqpsAppBase { + public: + /** + * @brief Construct a new Hqps App Base object + */ + virtual ~HqpsAppBase() = default; + /** + * @brief Query the graph with the given input + * + * @param graph The graph to query + * @param input The input to query + * @return virtual results::CollectiveResults The query result + */ + virtual results::CollectiveResults Query(const GRAPH_TYPE& graph, + Decoder& input) const = 0; +}; + +} // namespace gs + +#endif // ENGINES_HQPS_APP_CYPHER_APP_BASE_H_ \ No newline at end of file diff --git a/flex/engines/hqps_db/core/base_engine.h b/flex/engines/hqps_db/core/base_engine.h new file mode 100644 index 000000000000..49ee40e1856c --- /dev/null +++ b/flex/engines/hqps_db/core/base_engine.h @@ -0,0 +1,1187 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_ENGINE_BASE_ENGINE_H_ +#define ENGINES_HQPS_ENGINE_BASE_ENGINE_H_ + +#include +#include +#include +#include +#include +#include + +#include + +#include "flex/engines/hqps_db/core/context.h" +#include "flex/engines/hqps_db/core/params.h" + +#include "flex/engines/hqps_db/core/params.h" +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" + +#include "flex/engines/hqps_db/structures/multi_vertex_set/multi_label_vertex_set.h" +#include "flex/storages/rt_mutable_graph/types.h" +#include "flex/utils/property/column.h" +#include "grape/utils/bitset.h" + +#include "flex/engines/hqps_db/core/operator/group_by.h" +#include "flex/engines/hqps_db/core/operator/project.h" +#include "flex/engines/hqps_db/core/operator/sink.h" +#include "flex/engines/hqps_db/core/operator/sort.h" + +#include "flex/engines/hqps_db/core/null_record.h" + +#include "glog/logging.h" + +namespace gs { + +template +struct BuilderTuple; + +template +struct BuilderTuple> { + using type = std::tuple; +}; + +class BaseEngine { + public: + template > + static RES_T Alias(Context&& prev) { + return prev.template Alias(); + } + + //--------- Sink the context to output-------- + template + static auto Sink( + Context& ctx, + std::array::col_num> + tag_ids) { + return SinkOp::Sink(ctx, tag_ids); + } + + //////////////////////////////////////Dedup///////////////////////// + // Only can dedup head node. + template < + int alias_to_use, typename CTX_HEAD_T, int cur_alias, int base_tag, + typename... CTX_PREV, + typename RES_T = Context> + static RES_T Dedup( + Context&& ctx) { + if constexpr (alias_to_use != cur_alias) { + // When we dedup a intermediate node, we need to + // 1) first dedup current node, no duplicate in us. + // 2) then iterate whole context, for later nodes, we only preserve the + // first element met. + // + // the result context type should be same with previous. + // 1 -> (2, 3) + // 2 -> (4, 5), 3 -> (6, 7); + // + // dedup on col 2, then we 1 -> (2, 3), 2 -> 4, 3 -> 6; + + // first remove all possible duplication introduced by later csr. + ctx.template Dedup(); + } + auto& select_node = gs::Get(ctx); + // dedup inplace, and return the offset_array to old node. + auto offset_to_old_node = select_node.Dedup(); + // The offset need to be changed. + ctx.template UpdateChildNode(std::move(offset_to_old_node)); + return ctx; + } + + /// @brief /////////////Dedup on multiple keys//////////////// + /// @param ctx + /// @return + template < + int... alias_to_use, typename CTX_HEAD_T, int cur_alias, int base_tag, + typename... CTX_PREV, + typename RES_T = Context, + typename std::enable_if<(sizeof...(alias_to_use) > 1)>::type* = nullptr> + static RES_T Dedup( + Context&& ctx) { + // get all ele_t of context + using CTX_T = Context; + using ctx_iter_t = typename CTX_T::iterator; + using ctx_all_ele_t = std::remove_reference_t().GetAllElement())>; + using dedup_tuple_t = + std::tuple...>; + std::unordered_set> dedup_set; + std::vector active_indices; + std::vector new_offset; + auto& cur_ = ctx.GetHead(); + new_offset.reserve(cur_.Size()); + new_offset.emplace_back(0); + size_t cnt = 0; + for (auto iter : ctx) { + auto eles = iter.GetAllElement(); + dedup_tuple_t dedup_tuple = + std::make_tuple(std::get(eles)...); + if (dedup_set.find(dedup_tuple) == dedup_set.end()) { + dedup_set.insert(dedup_tuple); + active_indices.emplace_back(cnt); + } + cnt += 1; + new_offset.emplace_back(active_indices.size()); + } + + cur_.SubSetWithIndices(active_indices); + ctx.merge_offset_with_back(new_offset); + return ctx; + } + + /////////////////////////Apply/////////////////////////////// + /// With a apply function, we get the result, and join with current node. + // append the result data to current traversal. + template < + JoinKind join_kind, typename CTX_HEAD_T, int cur_alias, int base_tag, + typename... CTX_PREV, typename FUNC_T, + typename std::enable_if::type* = nullptr> + static auto Apply(Context&& ctx, + FUNC_T&& func) { + VLOG(10) << "[Apply AntiJoin]: "; + static constexpr size_t start_tag = + (cur_alias == -1 ? base_tag + sizeof...(CTX_PREV) : cur_alias); + // create a copied ctx + auto copied_ctx(ctx); + copied_ctx.set_sub_task_start_tag(start_tag); + + auto inner_ctx = func(std::move(copied_ctx)); + // We shall obtain the active indcies in res_ctx via csr offset + // arrays. + + std::vector tmp_vec = inner_ctx.ObtainOffsetFromTag(start_tag); + // Filter the total context with keys. + // The res_alias is currently not used. + ctx.FilterWithOffsets(tmp_vec, join_kind); + return ctx; + } + /////////////////////////OuterJoin/////////////////////////////// + // Join on two context, and append the result data to current traversal. + template ::type* = nullptr> + static auto Join(CTX_X&& ctx_x, CTX_Y&& ctx_y) { + LOG(INFO) << "[LeftOuterJoin] with left ele: " << ctx_x.GetHead().Size() + << ", right: " << ctx_y.GetHead().Size(); + // get all tuples from two context. + using ctx_x_iter_t = typename CTX_X::iterator; + using ctx_y_iter_t = typename CTX_Y::iterator; + using ctx_x_all_ele_t = std::remove_reference_t().GetAllElement())>; + using ctx_y_all_ele_t = std::remove_reference_t().GetAllElement())>; + using ctx_y_all_ind_ele_t = std::remove_reference_t().GetAllIndexElement())>; + using ctx_y_all_data_t = std::remove_reference_t().GetAllData())>; + static constexpr size_t x_ele_num = std::tuple_size_v; + static constexpr size_t y_ele_num = std::tuple_size_v; + static constexpr int x_base_tag = CTX_X::base_tag_id; + static constexpr int y_base_tag = CTX_Y::base_tag_id; + LOG(INFO) << "x ele: " << x_ele_num << ", y ele num: " << y_ele_num; + + static constexpr size_t real_x_ind = + alias_x == -1 ? x_ele_num - 1 : alias_x - x_base_tag; + static constexpr size_t real_y_ind = + alias_y == -1 ? y_ele_num - 1 : alias_y - y_base_tag; + using ctx_x_ele_t = std::tuple_element_t; + using ctx_y_ele_t = std::tuple_element_t; + using ctx_y_res_ele_t = + typename gs::remove_ith_type::type; + using ctx_y_res_data_t = + typename gs::remove_ith_type::type; + static_assert(std::is_same_v, + "Join on different type is not supported."); + // We shall preserve the records on the left, and append the right context' + // columns(which is not in ctx_x) to ctx_x + // For CodegenBuilder, the mapping from tagId to tag_ind should be updated. + auto y_builder_tuple_init = ctx_y.CreateSetBuilder(); + auto x_builder_tuple_init = ctx_x.CreateSetBuilder(); + auto y_builder_tuple = remove_nth_element(y_builder_tuple_init); + // auto x_builder_tuple = + // remove_nth_element(x_builder_tuple_init); + auto builder_tuple = std::tuple_cat(x_builder_tuple_init, y_builder_tuple); + static_assert(is_tuple::value); + static_assert(is_tuple::value); + std::unordered_map< + ctx_y_ele_t, std::vector>, + boost::hash> + y_ele_to_ind; + { + // fillin ele_to_ind + for (auto iter : ctx_y) { + auto ele = iter.GetAllElement(); + auto index_ele = iter.GetAllIndexElement(); + auto data = iter.GetAllData(); + auto y_ele = std::get(ele); + y_ele_to_ind[y_ele].emplace_back( + std::make_pair(remove_nth_element(index_ele), + remove_nth_element(data))); + } + } + + { + double t0 = -grape::GetCurrentTime(); + for (auto iter_x : ctx_x) { + auto ele = iter_x.GetAllElement(); + auto ind_ele = iter_x.GetAllIndexElement(); + auto data_tuple = iter_x.GetAllData(); + auto x_ele = std::get(ele); + // auto x_ele_to_insert = remove_nth_element(ele); + auto iter = y_ele_to_ind.find(x_ele); + if (iter != y_ele_to_ind.end()) { + for (auto& y_ele_data : iter->second) { + auto copied_ele = y_ele_data.first; + auto new_ele = + std::tuple_cat(std::move(ind_ele), std::move(copied_ele)); + auto new_data = std::tuple_cat(std::move(data_tuple), + std::move(y_ele_data.second)); + insert_into_builder_v2(builder_tuple, new_ele, new_data); + } + } else { + LOG(INFO) << "no y ele found"; + auto new_ele = + std::tuple_cat(std::move(ind_ele), + NullRecordCreator::GetNull()); + LOG(INFO) << "new ele: " << gs::to_string(new_ele); + auto new_data = + std::tuple_cat(std::move(data_tuple), + NullRecordCreator::GetNull()); + LOG(INFO) << "new data: " << gs::to_string(new_data); + insert_into_builder_v2(builder_tuple, new_ele, new_data); + } + } + LOG(INFO) << "here"; + t0 += grape::GetCurrentTime(); + LOG(INFO) << "Join cost: " << t0; + } + static constexpr size_t final_col_num = x_ele_num + y_ele_num - 1; + auto built_tuple = builder_finish( + builder_tuple, std::make_index_sequence{}); + LOG(INFO) << "after build, size: " << std::get<0>(built_tuple).Size(); + auto offset_vec = + make_offset_vector(final_col_num - 1, std::get<0>(built_tuple).Size()); + VLOG(10) << "offset vec size: " << offset_vec.size(); + auto prev_tuple = gs::remove_nth_element(built_tuple); + auto head_tuple = std::get(built_tuple); + + return make_context<0, final_col_num - 1>( + std::move(prev_tuple), std::move(head_tuple), std::move(offset_vec)); + } + + // Join on two context, and append the result data to current traversal. + template ::type* = nullptr> + static auto Join(CTX_X&& ctx_x, CTX_Y&& ctx_y) { + LOG(INFO) << "[LeftOuterJoin] with "; + // get all tuples from two context. + using ctx_x_iter_t = typename CTX_X::iterator; + using ctx_y_iter_t = typename CTX_Y::iterator; + using ctx_x_all_ele_t = std::remove_reference_t().GetAllElement())>; + using ctx_y_all_ele_t = std::remove_reference_t().GetAllElement())>; + using ctx_y_all_ind_ele_t = std::remove_reference_t().GetAllIndexElement())>; + using ctx_y_all_data_t = std::remove_reference_t().GetAllData())>; + static constexpr size_t x_ele_num = std::tuple_size_v; + static constexpr size_t y_ele_num = std::tuple_size_v; + static constexpr int x_base_tag = CTX_X::base_tag_id; + static constexpr int y_base_tag = CTX_Y::base_tag_id; + LOG(INFO) << "x ele: " << x_ele_num << ", y ele num: " << y_ele_num; + + static constexpr size_t real_x_ind0 = + alias_x0 == -1 ? x_ele_num - 1 : alias_x0 - x_base_tag; + static constexpr size_t real_x_ind1 = + alias_x1 == -1 ? x_ele_num - 1 : alias_x1 - x_base_tag; + static constexpr size_t real_y_ind0 = + alias_y0 == -1 ? y_ele_num - 1 : alias_y0 - y_base_tag; + static constexpr size_t real_y_ind1 = + alias_y1 == -1 ? y_ele_num - 1 : alias_y1 - y_base_tag; + using ctx_x_ele_t = + std::pair, + std::tuple_element_t>; + using ctx_y_ele_t = + std::pair, + std::tuple_element_t>; + + using ctx_y_res_ele_t = + typename gs::remove_ith_jth_type::type; + using ctx_y_res_data_t = + typename gs::remove_ith_jth_type::type; + static_assert(std::is_same_v, + "Join on different type is not supported."); + // if + // contexpr(y_ele_num == 2) {} + // We shall preserve the records on the left, and append the right context' + // columns(which is not in ctx_x) to ctx_x + // For CodegenBuilder, the mapping from tagId to tag_ind should be updated. + auto y_builder_tuple_init = ctx_y.CreateSetBuilder(); + auto x_builder_tuple_init = ctx_x.CreateSetBuilder(); + auto y_builder_tuple = + remove_ith_jth_element(y_builder_tuple_init); + // auto x_builder_tuple = + // remove_nth_element(x_builder_tuple_init); + auto builder_tuple = std::tuple_cat(std::move(x_builder_tuple_init), + std::move(y_builder_tuple)); + static_assert(is_tuple::value); + static_assert(is_tuple::value); + std::unordered_map< + ctx_y_ele_t, std::vector>, + boost::hash> + y_ele_to_ind; + + LOG(INFO) << "ctx x: " << ctx_x.GetHead().Size() + << ", ctx y:" << ctx_y.GetHead().Size(); + { + auto t0 = -grape::GetCurrentTime(); + // fillin ele_to_ind + for (auto iter : ctx_y) { + auto ele = iter.GetAllElement(); + auto index_ele = iter.GetAllIndexElement(); + auto data = iter.GetAllData(); + auto y_ele = std::make_pair(std::get(ele), + std::get(ele)); + y_ele_to_ind[y_ele].emplace_back(std::make_pair( + remove_ith_jth_element(index_ele), + remove_ith_jth_element(data))); + } + t0 += grape::GetCurrentTime(); + LOG(INFO) << "fillin ele_to_ind takes " << t0 << "s"; + } + + { + double t0 = -grape::GetCurrentTime(); + for (auto iter_x : ctx_x) { + auto ele = iter_x.GetAllElement(); + auto ind_ele = iter_x.GetAllIndexElement(); + auto data_tuple = iter_x.GetAllData(); + auto x_ele = std::make_pair(std::get(ele), + std::get(ele)); + // auto x_ele_to_insert = remove_nth_element(ele); + auto iter = y_ele_to_ind.find(x_ele); + if (iter != y_ele_to_ind.end()) { + for (auto& y_ele_data : iter->second) { + auto copied_ele = y_ele_data.first; + auto new_ele = + std::tuple_cat(std::move(ind_ele), std::move(copied_ele)); + auto new_data = std::tuple_cat(std::move(data_tuple), + std::move(y_ele_data.second)); + insert_into_builder_v2(builder_tuple, new_ele, new_data); + } + } else { + auto new_ele = + std::tuple_cat(std::move(ind_ele), + NullRecordCreator::GetNull()); + auto new_data = + std::tuple_cat(std::move(data_tuple), + NullRecordCreator::GetNull()); + insert_into_builder_v2(builder_tuple, new_ele, new_data); + } + } + t0 += grape::GetCurrentTime(); + LOG(INFO) << "Join cost: " << t0; + } + static constexpr size_t final_col_num = x_ele_num + y_ele_num - 2; + auto head_index_seq = + gs::make_index_range{}; + auto other_index_seq = gs::make_index_range<0, final_col_num - 1>{}; + auto head_tuple = builder_finish(builder_tuple, head_index_seq); + auto prev_tuple = builder_finish(builder_tuple, other_index_seq); + LOG(INFO) << "after build, size: " << std::get<0>(head_tuple).Size(); + auto offset_vec = + make_offset_vector(final_col_num - 1, std::get<0>(head_tuple).Size()); + VLOG(10) << "offset vec size: " << offset_vec.size(); + // TODO: avoid copy here. + + return make_context<0, final_col_num - 1>( + std::move(prev_tuple), std::move(std::get<0>(head_tuple)), + std::move(offset_vec)); + } + + /////////////////////////Apply/////////////////////////////// + template ::type* = + nullptr> + static auto Apply(Context&& ctx, + FUNC_T&& func) { + VLOG(10) << "[Apply Innerjoin]: "; + static constexpr size_t start_tag = + (cur_alias == -1 ? base_tag + sizeof...(CTX_PREV) : cur_alias); + + // create a copied ctx + auto copied_ctx(ctx); + copied_ctx.set_sub_task_start_tag(start_tag); + + auto inner_ctx = func(std::move(copied_ctx)); + // append the record appears in copied_ctx(only last col) to current ctx. + + VLOG(10) << "After sub plan, try to obtain offset vec from: " << start_tag; + // Obtain the mapping/offset vector between subtask'result's head and old + // ctx's head. + std::vector tmp_vec = inner_ctx.ObtainOffsetFromSubTaskStart(); + // NOTE: With fold op considered, we may lost recording in start_tag, when + // they are filtered in sub plan. + // We need to add them back to form a complete result. + auto& inner_ctx_head = inner_ctx.GetHead(); + if (ctx.template GetNode().Size() > inner_ctx_head.Size()) { + VLOG(10) << "Make up empty entries filtered in subplan" + << ctx.template GetNode().Size() << ", " + << inner_ctx_head.Size(); + size_t old_size = inner_ctx_head.Size(); + inner_ctx_head.MakeUpTo(ctx.template GetNode().Size()); + // extend tmp_vec; + size_t new_size = inner_ctx_head.Size(); + VLOG(10) << "old size: " << old_size << ", new size: " << new_size; + for (auto i = old_size; i < new_size; ++i) { + tmp_vec.emplace_back(i + 1); + } + } + + VLOG(10) << "head node size: " << inner_ctx_head.Size(); + VLOG(10) << "Obtain tmp_vec, size:" << tmp_vec.size(); + return ctx.template ApplyNode(std::move(inner_ctx_head), + std::move(tmp_vec)); + } + + template + static auto BuilderConcate(const std::tuple& x_builders, + const std::tuple& y_builders) { + auto remove_x_th_col = remove_nth_element(x_builders); + auto remove_y_th_col = remove_nth_element(y_builders); + return std::tuple_cat(std::move(remove_x_th_col), + std::move(remove_y_th_col)); + } + + template + static auto create_builder_tuple( + const Context& + ctx_x, + const Context& + ctx_y) { + using CTX_X = Context; + using CTX_Y = Context; + + auto ctx_x_builder_tuple = ctx_x.CreateSetBuilder(); + auto ctx_y_builder_tuple = ctx_y.CreateSetBuilder(); + auto concated_builder_tuple = BuilderConcate( + ctx_x_builder_tuple, ctx_y_builder_tuple); + return std::make_pair(concated_builder_tuple, + std::get(ctx_x_builder_tuple)); + } + + template + static auto create_builder_tuple( + const Context& + ctx_x) { + using CTX_X = Context; + + auto ctx_x_builder_tuple = ctx_x.CreateSetBuilder(); + return std::make_pair(remove_nth_element(ctx_x_builder_tuple), + std::get(ctx_x_builder_tuple)); + } + + template + static auto create_builder_tuple_for_join_pair( + const Context& + ctx_x, + const Context& + ctx_y) { + static_assert(sizeof...(CTX_PREV_Y) == + 1); // expect ctx_y has only two columns + using CTX_X = Context; + using CTX_Y = Context; + + auto ctx_x_builder_tuple = ctx_x.CreateSetBuilder(); + return ctx_x_builder_tuple; + } + + // InnerJoin + // for example, join (a,b,c) with (b,c,d) we got (a,b,c,d); + // prob: the mapping of tag_id to tag_inds may change. + // prob: builing new columns. + template ::type* = + nullptr> + static auto Join(CTX_X&& ctx_x, CTX_Y&& ctx_y) { + // static_assert(alias_x == alias_y); + LOG(INFO) << "Join context with :" << gs::to_string(join_kind); + // get all tuples from two context. + using ctx_x_iter_t = typename CTX_X::iterator; + using ctx_y_iter_t = typename CTX_Y::iterator; + using ctx_x_all_ele_t = std::remove_reference_t().GetAllElement())>; + using ctx_x_all_data_t = std::remove_reference_t().GetAllData())>; + using ctx_y_all_ele_t = std::remove_reference_t().GetAllElement())>; + using ctx_y_all_data_t = std::remove_reference_t().GetAllData())>; + static constexpr size_t x_ele_num = std::tuple_size_v; + static constexpr size_t y_ele_num = std::tuple_size_v; + static constexpr int x_base_tag = CTX_X::base_tag_id; + static constexpr int y_base_tag = CTX_Y::base_tag_id; + LOG(INFO) << "x ele: " << x_ele_num << ", y ele num: " << y_ele_num; + + static constexpr size_t real_x_ind = + alias_x == -1 ? x_ele_num - 1 : alias_x - x_base_tag; + static constexpr size_t real_y_ind = + alias_y == -1 ? y_ele_num - 1 : alias_y - y_base_tag; + using ctx_x_ele_t = std::tuple_element_t; + using ctx_y_ele_t = std::tuple_element_t; + using ctx_y_res_ele_t = + typename gs::remove_ith_type::type; + using ctx_y_res_data_t = + typename gs::remove_ith_type::type; + static_assert(std::is_same_v, + "Join on different type is not supported."); + + auto x_builder_tuple_init = ctx_x.CreateSetBuilder(); + auto y_builder_tuple_init = ctx_y.CreateSetBuilder(); + auto y_builder_tuple = remove_nth_element(y_builder_tuple_init); + auto all_builder = std::tuple_cat(x_builder_tuple_init, y_builder_tuple); + + double t0 = -grape::GetCurrentTime(); + std::unordered_map< + ctx_x_ele_t, std::vector>> + join_key_map; + { + for (auto iter : ctx_y) { + auto y_ele = iter.GetAllElement(); + auto y_data = iter.GetAllData(); + auto y_key = std::get(y_ele); + if (join_key_map.find(y_key) == join_key_map.end()) { + join_key_map[y_key] = + std::vector>(); + } + auto y_res_ele = remove_nth_element(y_ele); + auto y_res_data = remove_nth_element(y_data); + join_key_map[y_key].emplace_back( + std::make_tuple(y_res_ele, y_res_data)); + } + } + LOG(INFO) << " key map valid num: " << join_key_map.size(); + + for (auto x_iter : ctx_x) { + auto ele = x_iter.GetAllElement(); + auto data = x_iter.GetAllData(); + // the sequence of x_tuple shall not change + auto x_key = std::get(ele); + if (join_key_map.find(x_key) != join_key_map.end()) { + for (auto y_res : join_key_map[x_key]) { + auto y_res_ele = std::get<0>(y_res); + auto y_res_data = std::get<1>(y_res); + auto res_ele = std::tuple_cat(ele, y_res_ele); + auto res_data = std::tuple_cat(data, y_res_data); + insert_into_builder_v2(all_builder, res_ele, res_data); + } + } + } + + auto built_tuple = builder_finish( + all_builder, std::make_index_sequence{}); + LOG(INFO) << "after build, size: " << std::get<0>(built_tuple).Size(); + auto offset_vec = make_offset_vector(x_ele_num + y_ele_num - 2, + std::get<0>(built_tuple).Size()); + VLOG(10) << "offset vec size: " << offset_vec.size(); + auto prev_tuple = + gs::remove_nth_element(built_tuple); + auto head_tuple = std::get(built_tuple); + + return make_context<0, x_ele_num + y_ele_num - 2>( + std::move(prev_tuple), std::move(head_tuple), std::move(offset_vec)); + } + + // We assume ctx_x and ctx_y doesn't contains duplicates. + // Can only join on last tag. + // join on alias_x0 == alias_y0, alias_x1 == alias_y1. + // the resulted context will contains the ctx x, we assume ctx_y contains no + // additional ele. + template ::type* = + nullptr> + static auto Join(CTX_X&& ctx_x, CTX_Y&& ctx_y) { + LOG(INFO) << "Join context with :" << gs::to_string(join_kind); + + // get all tuples from two context. + using ctx_x_iter_t = typename CTX_X::iterator; + using ctx_y_iter_t = typename CTX_Y::iterator; + using ctx_x_all_ele_t = std::remove_reference_t().GetAllElement())>; + using ctx_y_all_ele_t = std::remove_reference_t().GetAllElement())>; + static constexpr size_t x_ele_num = std::tuple_size_v; + static constexpr size_t y_ele_num = std::tuple_size_v; + static constexpr int x_base_tag = CTX_X::base_tag_id; + static constexpr int y_base_tag = CTX_Y::base_tag_id; + LOG(INFO) << "x ele: " << x_ele_num << ", y ele num: " << y_ele_num; + + static constexpr size_t real_x_ind0 = + alias_x0 == -1 ? x_ele_num - 1 : alias_x0 - x_base_tag; + static constexpr size_t real_x_ind1 = + alias_x1 == -1 ? x_ele_num - 1 : alias_x1 - x_base_tag; + static constexpr size_t real_y_ind0 = + alias_y0 == -1 ? y_ele_num - 1 : alias_y0 - y_base_tag; + static constexpr size_t real_y_ind1 = + alias_y1 == -1 ? y_ele_num - 1 : alias_y1 - y_base_tag; + using ctx_x_ele_t0 = std::tuple_element_t; + using ctx_x_ele_t1 = std::tuple_element_t; + using ctx_y_ele_t0 = std::tuple_element_t; + using ctx_y_ele_t1 = std::tuple_element_t; + using ctx_x_ele_t = std::pair; + using ctx_y_ele_t = std::pair; + static_assert(std::is_same_v, + "Join on different type is not supported."); + + auto builder_tuple = + create_builder_tuple_for_join_pair(ctx_x, + ctx_y); + + double t0 = -grape::GetCurrentTime(); + std::unordered_map> join_key_map; + { + for (auto iter : ctx_x) { + auto x_ele = iter.GetAllElement(); + auto pair = std::make_pair(std::get(x_ele), + std::get(x_ele)); + if (join_key_map.find(pair) == join_key_map.end()) { + join_key_map[pair] = 1; + } + } + } + { + for (auto iter : ctx_y) { + auto y_ele = iter.GetAllElement(); + auto pair = std::make_pair(std::get(y_ele), + std::get(y_ele)); + if (join_key_map.find(pair) != join_key_map.end()) { + join_key_map[pair] += 1; + } + } + } + LOG(INFO) << "total entry size in map: " << join_key_map.size(); + + t0 += grape::GetCurrentTime(); + + for (auto iter : ctx_x) { + auto eles = iter.GetAllElement(); + auto datas = iter.GetAllData(); + auto pair = std::make_pair(std::get(eles), + std::get(eles)); + if (join_key_map.find(pair) != join_key_map.end() && + join_key_map[pair] == 2) { + // join. + insert_into_builder_v2(builder_tuple, eles, datas); + } + } + + auto built_tuple = + builder_finish(builder_tuple, std::make_index_sequence{}); + LOG(INFO) << "after build, size: " << std::get<0>(built_tuple).Size(); + auto offset_vec = + make_offset_vector(x_ele_num - 1, std::get<0>(built_tuple).Size()); + VLOG(10) << "offset vec size: " << offset_vec.size(); + auto prev_tuple = gs::remove_nth_element(built_tuple); + auto head_tuple = std::get(built_tuple); + + return make_context<0, x_ele_num - 1>( + std::move(prev_tuple), std::move(head_tuple), std::move(offset_vec)); + } + + // We assume ctx_x and ctx_y doesn't contains duplicates. ???? + // After antijoin, we will only preserve elements in left ctx. + // 1. put all ctx_y eles into hash_set + // 2. iterate ctx_x, building a subset_indices array. + // 3. subset the head node and merge_offset with back + template < + int alias_x0, int alias_x1, int alias_y0, int alias_y1, + JoinKind join_kind, typename CTX_X, typename CTX_Y, + typename std::enable_if::type* = nullptr> + static auto Join(CTX_X&& ctx_x, CTX_Y&& ctx_y) { + LOG(INFO) << "Anti Join context with :" << gs::to_string(join_kind); + + // get all tuples from two context. + using ctx_x_iter_t = typename CTX_X::iterator; + using ctx_y_iter_t = typename CTX_Y::iterator; + using ctx_x_all_ele_t = std::remove_reference_t().GetAllElement())>; + using ctx_y_all_ele_t = std::remove_reference_t().GetAllElement())>; + static constexpr size_t x_ele_num = std::tuple_size_v; + static constexpr size_t y_ele_num = std::tuple_size_v; + static constexpr int x_base_tag = CTX_X::base_tag_id; + static constexpr int y_base_tag = CTX_Y::base_tag_id; + LOG(INFO) << "x ele: " << x_ele_num << ", y ele num: " << y_ele_num; + + static constexpr size_t real_x_ind0 = + alias_x0 == -1 ? x_ele_num - 1 : alias_x0 - x_base_tag; + static constexpr size_t real_x_ind1 = + alias_x1 == -1 ? x_ele_num - 1 : alias_x1 - x_base_tag; + static constexpr size_t real_y_ind0 = + alias_y0 == -1 ? y_ele_num - 1 : alias_y0 - y_base_tag; + static constexpr size_t real_y_ind1 = + alias_y1 == -1 ? y_ele_num - 1 : alias_y1 - y_base_tag; + using ctx_x_ele_t0 = std::tuple_element_t; + using ctx_x_ele_t1 = std::tuple_element_t; + using ctx_y_ele_t0 = std::tuple_element_t; + using ctx_y_ele_t1 = std::tuple_element_t; + using ctx_x_ele_t = std::pair; + using ctx_y_ele_t = std::pair; + static_assert(std::is_same_v, + "Join on different type is not supported."); + + double t0 = -grape::GetCurrentTime(); + std::unordered_set> join_key_set; + { + for (auto iter : ctx_y) { + auto y_ele = iter.GetAllElement(); + auto pair = std::make_pair(std::get(y_ele), + std::get(y_ele)); + + join_key_set.insert(pair); + } + } + auto& cur_ = ctx_x.GetHead(); + LOG(INFO) << "total entry size in set: " << join_key_set.size() + << ", ctx x size: " << cur_.Size(); + { + std::stringstream ss; + for (auto iter : join_key_set) { + ss << gs::to_string(iter) << ", "; + } + LOG(INFO) << "join key set: " << ss.str(); + } + + std::vector active_indices; + std::vector new_offsets; + new_offsets.reserve(cur_.Size() + 1); + new_offsets.emplace_back(0); + { + size_t cur_ind = 0; + ctx_x_ele_t prev_tuple; + bool prev_res = false; + for (auto iter : ctx_x) { + auto x_ele = iter.GetAllElement(); + auto pair = std::make_pair(std::get(x_ele), + std::get(x_ele)); + LOG(INFO) << "pair: " << pair.first << ", " << pair.second; + if (cur_ind != 0) { + if (prev_tuple == pair && prev_res) { + LOG(INFO) << gs::to_string(prev_tuple) + << " == " << gs::to_string(pair); + active_indices.emplace_back(cur_ind); + new_offsets.emplace_back(active_indices.size()); + cur_ind += 1; + continue; + } + } + if (join_key_set.find(pair) == join_key_set.end()) { + active_indices.emplace_back(cur_ind); + prev_res = true; + } else { + prev_res = false; + } + prev_tuple = pair; + cur_ind += 1; + new_offsets.emplace_back(active_indices.size()); + } + } + LOG(INFO) << "active indices size: " << active_indices.size(); + + t0 += grape::GetCurrentTime(); + LOG(INFO) << "filter time: " << t0; + + cur_.SubSetWithIndices(active_indices); + ctx_x.merge_offset_with_back(new_offsets); + return ctx_x; + } + + // We assume ctx_x and ctx_y doesn't contains duplicates. + // filter ctx_x with ctx_y; + template < + int alias_x, int alias_y, JoinKind join_kind, typename CTX_X, + typename CTX_Y, + typename std::enable_if::type* = nullptr> + static auto Join(CTX_X&& ctx_x, CTX_Y&& ctx_y) { + LOG(INFO) << "Join context with :" << gs::to_string(join_kind); + + // get all tuples from two context. + using ctx_x_iter_t = typename CTX_X::iterator; + using ctx_y_iter_t = typename CTX_Y::iterator; + using ctx_x_all_ele_t = std::remove_reference_t().GetAllElement())>; + using ctx_y_all_ele_t = std::remove_reference_t().GetAllElement())>; + static constexpr size_t x_ele_num = std::tuple_size_v; + static constexpr size_t real_x_ind = + alias_x == -1 ? x_ele_num - 1 : alias_x - CTX_X::base_tag_id; + static constexpr size_t y_ele_num = std::tuple_size_v; + static constexpr size_t real_y_ind = + alias_y == -1 ? y_ele_num - 1 : alias_y - CTX_Y::base_tag_id; + using ctx_x_join_key_t = + typename std::tuple_element::type; + using ctx_y_join_key_t = + typename gs::tuple_element::type; + static_assert(std::is_same_v); + + std::unordered_set key_set; + for (auto iter : ctx_y) { + auto ele = iter.GetAllElement(); + key_set.insert(gs::get_from_tuple(ele)); + } + std::vector active_indices; + std::vector new_offsets; + auto& x_head = ctx_x.GetHead(); + new_offsets.reserve(x_head.Size() + 1); + new_offsets.emplace_back(0); + size_t cur_ind = 0; + for (auto iter : ctx_x) { + auto ele = iter.GetAllElement(); + auto x_key = gs::get_from_tuple(ele); + if (key_set.find(x_key) == key_set.end()) { + active_indices.emplace_back(cur_ind); + } + cur_ind += 1; + new_offsets.emplace_back(active_indices.size()); + } + x_head.SubSetWithIndices(active_indices); + ctx_x.merge_offset_with_back(new_offsets); + return std::move(ctx_x); + } + + // intersect two context on the specified key, it is expected that two context + // only differs at the last column + template ::value>::type* = nullptr> + static auto Intersect(CTX_X&& ctx_x, CTX_Y&& ctx_y) { + using ctx_x_iter_t = typename CTX_X::iterator; + using ctx_y_iter_t = typename CTX_Y::iterator; + // the prev column (the last column in prev_tuple shoud be the same.) + using ctx_x_all_ele_t = std::remove_reference_t().GetAllElement())>; + using ctx_y_all_ele_t = std::remove_reference_t().GetAllElement())>; + static constexpr size_t x_ele_num = std::tuple_size_v; + static constexpr size_t y_ele_num = std::tuple_size_v; + static constexpr int x_base_tag = CTX_X::base_tag_id; + static constexpr int y_base_tag = CTX_Y::base_tag_id; + static constexpr size_t real_alias_x = + alias_x == -1 ? x_ele_num - 1 : alias_x - x_base_tag; + static constexpr size_t real_alias_y = + alias_y == -1 ? y_ele_num - 1 : alias_y - y_base_tag; + static_assert(real_alias_x > 0 && real_alias_y > 0); + static_assert(real_alias_x == real_alias_y); + static_assert(real_alias_x == x_ele_num - 1); + using x_head_ele_t = + std::tuple_element_t - 1, + ctx_x_all_ele_t>; + using y_head_ele_t = + std::tuple_element_t - 1, + ctx_y_all_ele_t>; + auto& head_x = ctx_x.GetHead(); + auto& head_y = ctx_y.GetHead(); + auto left_repeat_array = ctx_x.ObtainOffsetFromTag(real_alias_x - 1); + auto right_repeat_array = ctx_y.ObtainOffsetFromTag(real_alias_y - 1); + CHECK(left_repeat_array.size() == right_repeat_array.size()) + << "left size " << left_repeat_array.size() << " right size " + << right_repeat_array.size(); + + std::vector active_indices, new_offsets; + std::tie(active_indices, new_offsets) = + intersect_impl(head_x, head_y, left_repeat_array, right_repeat_array); + head_x.SubSetWithIndices(active_indices); + ctx_x.merge_offset_with_back(new_offsets); + return ctx_x; + } + + // intersect for rowVertexSet and twoLabelVertexSet + template + static std::pair, std::vector> intersect_impl( + const RowVertexSet& head_x, + const RowVertexSet& head_y, + const std::vector& left_repeat_array, + const std::vector& right_repeat_array) { + std::vector + active_indices; // got a active_indices array to filter ctx_x. + + std::vector new_offsets; + new_offsets.emplace_back(0); + + double t0 = -grape::GetCurrentTime(); + auto builder = head_x.CreateBuilder(); + auto x_vec = head_x.GetVertices(); + auto y_vec = head_y.GetVertices(); + active_indices.reserve(std::min(x_vec.size(), y_vec.size())); + VID_T max_vid = 0; + for (auto vid : x_vec) { + max_vid = std::max(max_vid, vid); + } + for (auto vid : y_vec) { + max_vid = std::max(max_vid, vid); + } + grape::Bitset bitset; + bitset.init(max_vid + 1); + CHECK(left_repeat_array.size() == right_repeat_array.size()); + for (auto i = 0; i + 1 < left_repeat_array.size(); ++i) { + auto x_start = left_repeat_array[i]; + auto x_end = left_repeat_array[i + 1]; + auto y_start = right_repeat_array[i]; + auto y_end = right_repeat_array[i + 1]; + if (x_start == x_end || y_start == y_end) { + for (auto i = x_start; i < x_end; ++i) { + new_offsets.emplace_back(active_indices.size()); + } + continue; + } else { + for (auto i = y_start; i < y_end; ++i) { + bitset.set_bit(y_vec[i]); + } + for (auto i = x_start; i < x_end; ++i) { + if (bitset.get_bit(x_vec[i])) { + active_indices.emplace_back(i); + } + new_offsets.emplace_back(active_indices.size()); + } + bitset.clear(); + } + } + + t0 += grape::GetCurrentTime(); + LOG(INFO) << "Intersect cost: " << t0; + return std::make_pair(std::move(active_indices), std::move(new_offsets)); + } + + // intersect for row set and two label set. + template + static std::pair, std::vector> intersect_impl( + const RowVertexSet& head_x, + const TwoLabelVertexSet& head_y, + const std::vector& left_repeat_array, + const std::vector& right_repeat_array) { + std::vector + active_indices; // got a active_indices array to filter ctx_x. + std::vector new_offsets; + new_offsets.emplace_back(0); + + size_t ind_x = 0; + size_t ind_x_limit = head_x.Size(); + auto x_iter = head_x.begin(); + auto x_end = head_x.end(); + auto y_iter = head_y.begin(); + auto y_end = head_y.end(); + // check whether there is same label in head_y + auto& y_labels = head_y.GetLabels(); + int valid_label_ind = -1; + if (y_labels[0] == head_x.GetLabel()) { + valid_label_ind = 0; + } else if (y_labels[1] == head_x.GetLabel()) { + valid_label_ind = 1; + } + if (valid_label_ind == -1) { + while (x_iter != x_end) { + new_offsets.emplace_back(active_indices.size()); + ind_x += 1; + ++x_iter; + } + return std::make_pair(std::move(active_indices), std::move(new_offsets)); + } else { + auto& vertices = head_y.GetVertices(); + auto& bitset = head_y.GetBitset(); + for (auto i = 0; i + 1 < left_repeat_array.size(); ++i) { + auto left_min = left_repeat_array[i]; + auto left_max = left_repeat_array[i + 1]; + auto right_min = right_repeat_array[i]; + auto right_max = right_repeat_array[i + 1]; + if (left_min == left_max || right_min == right_max) { + // skip + for (auto tmp = left_min; tmp < left_max; ++tmp) { + new_offsets.emplace_back(active_indices.size()); + ind_x += 1; + ++x_iter; + } + for (auto tmp = right_min; tmp < right_max; ++tmp) { + ++y_iter; + } + } else { + // intersect + std::unordered_set set; + for (auto tmp = right_min; tmp < right_max; ++tmp) { + auto ele = y_iter.GetElement(); + if (ele.first == valid_label_ind) { + set.insert(ele.second); + } + ++y_iter; + } + for (auto tmp = left_min; tmp < left_max; ++tmp) { + auto ele = x_iter.GetElement(); + if (set.find(ele) != set.end()) { + active_indices.emplace_back(ind_x); + } + ind_x += 1; + ++x_iter; + new_offsets.emplace_back(active_indices.size()); + } + } + } + return std::make_pair(std::move(active_indices), std::move(new_offsets)); + } + } + + template + static auto builder_finish(std::tuple& builder_tuple, + std::index_sequence) { + return std::make_tuple(std::get(builder_tuple).Build()...); + } + + template + static auto builder_finish_right_impl(std::tuple& builder_tuple, + std::index_sequence) { + return std::make_tuple(std::get(builder_tuple).Build()...); + } + + template + static auto builder_finish_right(std::tuple& builder_tuple) { + auto ind_seq = std::make_index_sequence{}; + return builder_finish_right_impl(builder_tuple, ind_seq); + } + + // if Is < Ind + template ::type* = nullptr> + static auto builder_finish_left_impl(std::tuple& set_builder, + KeySetBuilder& key_builder) { + return std::get(set_builder).Build(); + } + // if Is == Ind + template ::type* = nullptr> + static auto builder_finish_left_impl(std::tuple& set_builder, + KeySetBuilder& key_builder) { + return key_builder.Build(); + } + + // if Is > Ind + template Ind)>::type* = nullptr> + static auto builder_finish_left_impl(std::tuple& set_builder, + KeySetBuilder& key_builder) { + return std::get(set_builder).Build(); + } + + template + static auto builder_finish_left_impl(std::tuple& builder_tuple, + KeyBuilder& key_builder, + std::index_sequence) { + return std::make_tuple( + builder_finish_left_impl(builder_tuple, key_builder)...); + } + + template + static auto builder_finish_left(std::tuple& builder_tuple, + KeyBuilder& key_builder) { + auto ind_seq = std::make_index_sequence{}; + return builder_finish_left_impl(builder_tuple, key_builder, ind_seq); + } + + template + static void insert_into_builder_v2(std::tuple& builder_tuple, + const std::tuple& ele, + const std::tuple& data) { + static_assert(sizeof...(BuilderT) == sizeof...(ELE), + "Builder number and element number not match"); + static_assert(sizeof...(BuilderT) == sizeof...(DATA), + "Builder number and data number not match"); + insert_into_builder_v2(builder_tuple, ele, data, + std::make_index_sequence{}); + } + + template + static void insert_into_builder_v2(std::tuple& builder_tuple, + const std::tuple& ele, + const std::tuple& data, + std::index_sequence) { + // (std::get(builder_tuple).Insert(std::get(ele)), ...); + (insert_into_builder_v2_impl(std::get(builder_tuple), std::get(ele), + std::get(data)), + ...); + } + + template + static void insert_into_builder(std::tuple& builder_tuple, + const std::tuple& ele, + std::index_sequence) { + (insert_into_builder(builder_tuple, ele), ...); + } + + // if cur_ind == real_ind, skip inserting. + template ::type* = nullptr> + static inline void insert_into_builder(std::tuple& builder_tuple, + const std::tuple& ele) {} + + // if cur_ind > real_ind, insert + template ::type* = nullptr> + static inline void insert_into_builder(std::tuple& builder_tuple, + const std::tuple& ele) { + static constexpr size_t new_ind = (cur_ind - 1); + std::get(builder_tuple).Insert(std::get(ele)); + } + + // if cur_ind < real_ind, insert + template cur_ind)>::type* = nullptr> + static inline void insert_into_builder(std::tuple& builder_tuple, + const std::tuple& ele) { + std::get(builder_tuple).Insert(std::get(ele)); + } +}; +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_BASE_ENGINE_H_ diff --git a/flex/engines/hqps_db/core/context.h b/flex/engines/hqps_db/core/context.h new file mode 100644 index 000000000000..28b4c96f7189 --- /dev/null +++ b/flex/engines/hqps_db/core/context.h @@ -0,0 +1,1374 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifndef ENGINES_HQPS_ENGINE_CONTEXT_H_ +#define ENGINES_HQPS_ENGINE_CONTEXT_H_ + +#include +#include + +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" +#include "flex/storages/rt_mutable_graph/types.h" +#include "grape/types.h" + +namespace gs { + +static constexpr int INVALID_TAG = -2; + +// Convert a offset array to repeat array. +std::vector offset_array_to_repeat_array( + std::vector&& offset_array) { + std::vector repeat_array(offset_array.size() - 1); + for (auto i = 0; i < repeat_array.size(); ++i) { + repeat_array[i] = offset_array[i + 1] - offset_array[i]; + } + return repeat_array; +} + +/** + * @brief The iterator for context. + * + * @tparam SET_TS The sets we current holds, include head node. + * @tparam Enable For template specialization. + */ +template +class ContextIter; + +//-------------------------Definition for context iter-------------- + +// 0. specialization for only one vertex set +template +class ContextIter> { + public: + using tuple_t = typename SET_T::data_tuple_t; + using self_type_t = ContextIter>; + using head_iter = typename SET_T::iterator; + + ContextIter(head_iter&& iter) : iter_(std::move(iter)) {} + + // GetVertex for Vertex set and GetEdge for Edge set. + auto GetElement() const { return iter_.GetElement(); } + + // template + auto GetAllIndexDataEle() const { + return std::make_tuple(GetAllData(), GetAllIndexElement()); + } + + auto GetAllElement() const { return std::make_tuple(GetElement()); } + + auto GetAllIndexElement() const { + return std::make_tuple(iter_.GetIndexElement()); + } + + auto GetData() const { return iter_.GetData(); } + + auto GetAllData() const { return std::make_tuple(GetData()); } + + inline self_type_t& operator++() { + ++iter_; + return *this; + }; + inline bool operator==(const self_type_t& rhs) const { + return iter_ == rhs.iter_; + } + inline bool operator!=(const self_type_t& rhs) const { + return iter_ != rhs.iter_; + } + inline bool operator<(const self_type_t& rhs) const { + return iter_ < rhs.iter_; + } + inline const self_type_t& operator*() const { return *this; } + inline const self_type_t* operator->() const { return this; } + + private: + typename SET_T::iterator iter_; +}; + +// 2. specialization for head node is vertex_set +template +class ContextIter> { + public: + static constexpr int num_others = sizeof...(PREV_SETS); + static constexpr auto index_seq = + std::make_integer_sequence{}; + using others_tuple_t = std::tuple; + using others_iter_tuple_t = std::tuple...>; + using head_iter_t = typename SET_T::iterator; + using tuple_t = + std::tuple...>; + using self_type_t = ContextIter>; + + ContextIter(head_iter_t&& cur_iter, others_iter_tuple_t&& others_iter_tuple, + const std::vector>& offsets) + : cur_iter_(cur_iter), + others_iter_tuple_(std::move(others_iter_tuple)), + offsets_arrays_(offsets), + cur_offset_(0) { + others_offset_.fill(0); + init_iter_tuple(); // init iterator tuple to a valid position. + } + + // GetVertex for Vertex set and GetEdge for Edge set, for head node. + auto GetElement() const { return cur_iter_.GetElement(); } + + auto GetData() const { return cur_iter_.GetData(); } + + auto GetAllElement() const { return get_element_tuple_impl(index_seq); } + + template + auto get_element_tuple_impl(std::integer_sequence) const { + return std::make_tuple(std::get(others_iter_tuple_).GetElement()..., + cur_iter_.GetElement()); + } + + size_t GetTagOffset(int tag) const { + size_t real_tag = tag == -1 ? num_others : tag; + real_tag -= base_tag; + CHECK(real_tag <= others_offset_.size()); + return others_offset_[real_tag]; + } + + auto GetAllIndexElement() const { + return get_index_ele_tuple_impl(index_seq); + } + + template + auto get_index_ele_tuple_impl(std::integer_sequence) const { + return std::make_tuple( + std::get(others_iter_tuple_).GetIndexElement()..., + cur_iter_.GetIndexElement()); + } + + auto GetAllIndexDataEle() const { + return std::make_tuple(GetAllData(), GetAllIndexElement()); + } + + auto GetAllData() const { return get_data_tuple_impl(index_seq); } + + template + auto get_data_tuple_impl(std::integer_sequence) const { + return std::make_tuple(std::get(others_iter_tuple_).GetData()..., + cur_iter_.GetData()); + } + + inline self_type_t& operator++() { + cur_offset_ += 1; + // VLOG(10) << "cur offset:" << cur_offset_; + ++cur_iter_; + // VLOG(10) << "inc iter"; + // update with reverse index seq. + update_other_iter(); + return *this; + } + + // General implementation for cols + template + inline typename std::enable_if<(Is > -1 && Is < num_others - 1)>::type + update_other_iter() { + // VLOG(10) << "updating " << Is; + auto child_cur_ind = others_offset_[Is + 1]; + bool flag = false; + auto& cur_off_array = offsets_arrays_[Is]; + + // quick check + if (child_cur_ind < other_offset_limit_[Is]) { + return; + } + // cache the upper bound to avoid repeated computation + while (others_offset_[Is] + 1 < cur_off_array.size() && + child_cur_ind >= cur_off_array[others_offset_[Is] + 1]) { + ++std::get(others_iter_tuple_); + ++others_offset_[Is]; + flag = true; + } + if (flag) { + other_offset_limit_[Is] = cur_off_array[others_offset_[Is] + 1]; + // propagate to iterator with smaller index + update_other_iter(); + } + } + + // Specialization for -1 col. + template + typename std::enable_if<(Is == -1)>::type update_other_iter() { + return; + } + + // Specialization for the first col. + template + inline typename std::enable_if<(Is == num_others - 1)>::type + update_other_iter() { + // VLOG(10) << "updating " << Is; + auto& my_cur_ind = others_offset_[Is]; + auto& cur_offset_array_ = offsets_arrays_[Is]; + bool flag = false; + + if (cur_offset_ < other_offset_limit_[Is]) { + return; + } + + while (my_cur_ind + 1 < cur_offset_array_.size() && + cur_offset_ >= cur_offset_array_[my_cur_ind + 1]) { + ++std::get(others_iter_tuple_); + others_offset_[Is] += 1; + flag = true; + } + if (flag) { + other_offset_limit_[Is] = cur_offset_array_[my_cur_ind + 1]; + // propagate to iterator with smaller index + update_other_iter(); + } + } + + inline bool operator==(const self_type_t& rhs) const { + return cur_iter_ == rhs.cur_iter_; + } + inline bool operator!=(const self_type_t& rhs) const { + return cur_iter_ != rhs.cur_iter_; + } + inline bool operator<(const self_type_t& rhs) const { + return cur_iter_ < rhs.cur_iter_; + } + inline const self_type_t& operator*() const { return *this; } + inline const self_type_t* operator->() const { return this; } + + private: + // Init iter tuple recursively. + // a = [1,2], b = [3,4,5,6], c = [7,8,9,10] + // offset_arrays = [[0,2,4], [0,0,1,3,4]] + // init : others_offset = [0,0]; + // after init_iter_tuple_impl<1>: [0,1] + // after init_iter_tuple_impl<0>: [0,1] + void init_iter_tuple() { init_iter_tuple_impl(); } + + template + typename std::enable_if<(Is == -1)>::type init_iter_tuple_impl() { + return; + } + + template + typename std::enable_if<(Is > -1 && Is < num_others - 1)>::type + init_iter_tuple_impl() { + size_t limit = offsets_arrays_[Is].size(); + size_t this_offset = 0; + size_t child_offset = others_offset_[Is + 1]; + while (this_offset + 1 < limit && + offsets_arrays_[Is][this_offset + 1] <= child_offset) { + this_offset += 1; + ++std::get(others_iter_tuple_); + } + others_offset_[Is] = this_offset; + other_offset_limit_[Is] = offsets_arrays_[Is][this_offset + 1]; + init_iter_tuple_impl(); + } + + template + typename std::enable_if<(Is == num_others - 1)>::type init_iter_tuple_impl() { + size_t limit = offsets_arrays_[num_others - 1].size(); + size_t this_offset = 0; + while (this_offset + 1 < limit && + offsets_arrays_[num_others - 1][this_offset + 1] <= + cur_offset_) { // cur_offset_ == 0 + this_offset += 1; + ++std::get(others_iter_tuple_); + } + others_offset_[num_others - 1] = this_offset; + other_offset_limit_[Is] = offsets_arrays_[num_others - 1][this_offset + 1]; + init_iter_tuple_impl(); + } + /////////////////////////////////////////////////////////////////////////////// + + typename SET_T::iterator cur_iter_; + const std::vector>& offsets_arrays_; + others_iter_tuple_t others_iter_tuple_; + std::array others_offset_; + std::array other_offset_limit_; + size_t cur_offset_; +}; + +/** + * @brief A data structure holding all the data we have in query. + * + * @tparam HEAD_T The current head node. + * @tparam base_tag The base tag based on which the tag id increases. Defaultly + * 0, set to non-zero for grouped sets. + * @tparam cur_alias To which col_id it is aliased. + * @tparam ALIAS_COL The saved obj in query up till now. + */ +template +class Context; +template +using FirstEntityType = std::tuple_element_t<0, std::tuple>; + +template +struct Dummy; + +template +struct ResultContextTImpl; + +// Get the correct result context type for returning. +template +struct ResultContextTImpl< + new_alias, NEW_HEAD_T, old_alias, OLD_HEAD_T, base_tag, + std::tuple, + typename std::enable_if< + old_alias != -1 && new_alias != old_alias && + !std::is_same>, + grape::EmptyType>::value, + Dummy>::type> { + using result_t = + Context; +}; + +template +struct ResultContextTImpl< + new_alias, NEW_HEAD_T, old_alias, OLD_HEAD_T, base_tag, + std::tuple, + typename std::enable_if< + old_alias != -1 && new_alias != old_alias && + std::is_same>, + grape::EmptyType>::value, + Dummy>::type> { + using result_t = Context; +}; + +template +struct ResultContextTImpl< + new_alias, NEW_HEAD_T, old_alias, OLD_HEAD_T, base_tag, + std::tuple, + typename std::enable_if>::type> { + using result_t = Context; +}; + +template +struct ResultContextTImpl< + new_alias, NEW_HEAD_T, old_alias, OLD_HEAD_T, base_tag, + std::tuple, + typename std::enable_if< + old_alias == -1 && new_alias != old_alias && + std::is_same>, + grape::EmptyType>::value, + Dummy>::type> { + using result_t = Context; +}; + +template +struct ResultContextTImpl< + new_alias, NEW_HEAD_T, old_alias, OLD_HEAD_T, base_tag, + std::tuple, + typename std::enable_if< + old_alias == -1 && new_alias != old_alias && + !std::is_same>, + grape::EmptyType>::value, + Dummy>::type> { + using result_t = Context; +}; + +template +struct ResultContextT { + static constexpr int32_t new_alias = + ResultColId::res_alias; + using result_t = typename ResultContextTImpl< + new_alias, NEW_HEAD_T, old_alias, OLD_HEAD_T, base_tag, + std::tuple, + Dummy>::result_t; +}; + +template +struct ResultContextT { + static constexpr int32_t new_alias = + ResultColId::res_alias; + using result_t = typename ResultContextTImpl< + new_alias, NEW_HEAD_T, old_alias, OLD_HEAD_T, base_tag, + std::tuple, + Dummy>::result_t; +}; + +std::vector obtain_offset_between_tags_impl( + const std::vector>& offsets, int dst_tag) { + CHECK(offsets.size() > dst_tag) + << "offset size" << offsets.size() << ", dst tag" << dst_tag; + std::vector res = offsets[dst_tag]; + // VLOG(10) << "init offset: " << gs::to_string(res); + for (auto i = dst_tag + 1; i < offsets.size(); ++i) { + for (auto j = 0; j < res.size(); ++j) { + res[j] = offsets[i][res[j]]; + } + } + return res; +} + +// specialization for Context with aliased at least 1 alias_col. +template +class Context { + public: + using head_t = HEAD_T; + static constexpr int prev_alias_num = sizeof...(ALIAS_COL); + static constexpr int cur_col_id = cur_alias; + static_assert(cur_alias == -1 || cur_alias == prev_alias_num + base_tag); + // alias_num equals to the count of aliases, not total alias num, not included + // ones below base_tag. + static constexpr size_t alias_num = + cur_alias == -1 ? prev_alias_num : prev_alias_num + 1; + static constexpr size_t col_num = sizeof...(ALIAS_COL) + 1; + // max tag id appeared in current context. + // static constexpr size_t max_tag_id = alias_num + base_tag; + static constexpr int max_tag_id = + cur_alias == -1 ? base_tag + prev_alias_num - 1 : cur_alias; + static constexpr int base_tag_id = base_tag; + + static constexpr auto index_seq = + std::make_integer_sequence{}; + using iterator = ContextIter>; + using others_iter_tuple_t = std::tuple...>; + using self_type_t = Context; + using index_ele_tuples_t = + std::tuple; + using prev_tuple_t = std::tuple; + + template + using nth_node_t = + std::tuple_element_t>; + + // Move constructor, passing the member not the context object + Context(HEAD_T&& head, std::tuple&& old_cols, + std::vector>&& offset, + int sub_task_start_tag = INVALID_TAG) + : cur_(std::move(head)), + offsets_arrays_(std::move(offset)), + prev_(std::move(old_cols)), + sub_task_start_tag_(sub_task_start_tag) {} + + Context(Context&& other) noexcept + : cur_(std::move(other.cur_)), + offsets_arrays_(std::move(other.offsets_arrays_)), + prev_(std::move(other.prev_)), + sub_task_start_tag_(other.sub_task_start_tag_) {} + + // copy constructor + Context(Context& other) + : cur_(other.cur_), + offsets_arrays_(other.offsets_arrays_), + prev_(other.prev_), + sub_task_start_tag_(other.sub_task_start_tag_) {} + + // Merge another node with a different head. We expect the other things, like + // prev nodes, prev offset array, are the same. We will create a new Node, + // UnionedNode, which contains two labels. <1,2,<>, 4> <1,2,3,<>> + + ~Context() {} + + HEAD_T& GetHead() { return cur_; } + + HEAD_T& GetMutableHead() { return cur_; } + + // we shall never change. + const std::tuple& GetPrevCols() const { return prev_; } + + auto CreateSetBuilder() const { + return std::tuple_cat( + create_prev_set_builder(std::make_index_sequence{}), + std::make_tuple(cur_.CreateBuilder())); + } + + template + auto create_prev_set_builder(std::index_sequence) const { + return std::make_tuple(std::get(prev_).CreateBuilder()...); + } + + // for the passing offset array, check whether the corresponding data is + // valid. + // only filter, no data is append. + void FilterWithOffsets(std::vector& offset, JoinKind join_kind) { + std::vector active_indices; + for (auto i = 0; i < offset.size() - 1; ++i) { + if (offset[i] < offset[i + 1]) { + active_indices.emplace_back(i); + } + } + std::vector res_offset = + cur_.FilterWithIndices(active_indices, join_kind); + merge_offset(offsets_arrays_.back(), res_offset); + // this->Alias(); + } + + std::vector ObtainOffsetFromTag(int dst_tag) const { + CHECK(dst_tag > 0 || dst_tag <= prev_alias_num + base_tag); + if (dst_tag == -1) { + dst_tag = prev_alias_num + base_tag; + } + if (dst_tag < prev_alias_num + base_tag) { + return obtain_offset_between_tags_impl(offsets_arrays_, + dst_tag - base_tag); + } else { + std::vector res; + res.reserve(cur_.Size() + 1); + for (auto i = 0; i <= cur_.Size(); ++i) { + res.push_back(i); + } + return res; + } + } + + std::vector ObtainOffsetFromSubTaskStart() const { + CHECK(sub_task_start_tag_ != INVALID_TAG); + if (base_tag <= sub_task_start_tag_) { + size_t dst_tag = sub_task_start_tag_ - base_tag; + CHECK(offsets_arrays_.size() > dst_tag) + << "offset size" << offsets_arrays_.size() << ", dst tag" << dst_tag; + // VLOG(10) << "dst tag: " << dst_tag + // << ", offset size: " << offsets_arrays_.size(); + std::vector res = offsets_arrays_[dst_tag]; + for (auto i = dst_tag + 1; i < offsets_arrays_.size(); ++i) { + for (auto j = 0; j < res.size(); ++j) { + res[j] = offsets_arrays_[i][res[j]]; + } + } + return res; + // return the offset between sub_task_start_tag to head_tag; + } else { + // If we abandon the context's history, we must be one-one mapping. + // Only support fold, no + std::vector res; + res.reserve(cur_.Size() + 1); + for (auto i = 0; i <= cur_.Size(); ++i) { + res.push_back(i); + } + return res; + } + } + + // Is is counted from zero, not from base_tag. + template = 0 && Is < prev_alias_num + base_tag)>::type* = nullptr> + auto& GetNode() { + return std::get(prev_); + } + + template ::type* = nullptr> + HEAD_T& GetNode() { + return cur_; + } + + template = 0 && Is < prev_alias_num + base_tag)>::type* = nullptr> + auto& GetMutableNode() { + return std::get(prev_); + } + + template ::type* = nullptr> + HEAD_T& GetMutableNode() { + return cur_; + } + + const std::vector& GetOffset(int ind) const { + if (ind == -1) { + ind = offsets_arrays_.size() - 1; + } + CHECK(offsets_arrays_.size() > ind); + return offsets_arrays_[ind]; + } + + std::vector& GetMutableOffset(int ind) { + if (ind == -1) { + ind = offsets_arrays_.size() - 1; + } + CHECK(offsets_arrays_.size() > ind); + return offsets_arrays_[ind]; + } + + HEAD_T&& MoveHead() { return std::move(cur_); } + + size_t AliasNum() const { return alias_num; } + + template + others_iter_tuple_t make_others_begin_iter_tuple( + std::integer_sequence) const { + return std::make_tuple(std::get(prev_).begin()...); + } + + iterator begin() const { + auto others_iter_tuple = make_others_begin_iter_tuple(index_seq); + return iterator(std::move(cur_.begin()), std::move(others_iter_tuple), + offsets_arrays_); + } + iterator end() const { + auto others_iter_tuple = make_others_begin_iter_tuple(index_seq); + return iterator(std::move(cur_.end()), std::move(others_iter_tuple), + offsets_arrays_); + } + + // Alias head node with a alias value. + template > + RES_T Alias() { + static_assert(cur_alias == -1 && new_alias == prev_alias_num + base_tag); + return RES_T(std::move(cur_), std::move(prev_), std::move(offsets_arrays_), + sub_task_start_tag_); + } + + self_type_t ReplaceHead(HEAD_T&& new_head, std::vector&& offset) { + merge_offset(offsets_arrays_.back(), offset); + return self_type_t(std::move(new_head), std::move(prev_), + std::move(offsets_arrays_), sub_task_start_tag_); + } + + template ::type* = nullptr> + auto ApplyNode(NEW_HEAD_T&& new_head, std::vector&& offset) { + // Collection + auto new_set_and_offset = new_head.apply(offset); + CHECK(std::get<1>(new_set_and_offset).size() == cur_.Size() + 1); + return this->template AddNode( + std::move(std::get<0>(new_set_and_offset)), + std::move(std::get<1>(new_set_and_offset))); + } + + // For non-collection apply result, just append. + template < + AppendOpt append_opt, typename NEW_HEAD_T, + typename std::enable_if::type* = nullptr> + auto ApplyNode(NEW_HEAD_T&& new_head, std::vector&& offset) { + CHECK(offset.size() == cur_.Size() + 1); + return this->template AddNode(std::move(new_head), + std::move(offset)); + } + + // 0. add new node to obtain a new Context, if i'm not aliased + // alias_to_use indicates which column the input offset array is aligned to. + // we need to transform it to make it align with the ending column. + template < + AppendOpt opt, typename NEW_HEAD_T, + typename RES_T = Context< + NEW_HEAD_T, ResultColId::res_alias, + base_tag, ALIAS_COL...>, + typename std::enable_if<(cur_alias == -1), NEW_HEAD_T>::type* = nullptr> + RES_T AddNode(NEW_HEAD_T&& new_node, std::vector&& offset, + int alias_to_use = -1) { + VLOG(10) << "Replace head with cur_alias == -1, offset array size:" + << offsets_arrays_.size() << " alias to use: " << alias_to_use; + if (offsets_arrays_.size() == 0) { + offsets_arrays_.emplace_back(std::move(offset)); + } else { + // Make input offset array align with the last set. + auto new_offset = align_offset(new_node, std::move(offset), + offsets_arrays_, alias_to_use); + // Combine the input offset array with offset array of + // offsets_arrays_[-1]. + merge_offset(offsets_arrays_.back(), new_offset); + } + return RES_T(std::move(new_node), std::move(prev_), + std::move(offsets_arrays_), sub_task_start_tag_); + } + + // 1. res_alias eq cur_alias, we need to replace the current head node. + template < + AppendOpt opt, typename NEW_HEAD_T, + typename RES_T = typename ResultContextT< + opt, NEW_HEAD_T, cur_alias, HEAD_T, base_tag, ALIAS_COL...>::result_t, + typename std::enable_if<(opt == AppendOpt::Replace), NEW_HEAD_T>::type* = + nullptr> + RES_T AddNode(NEW_HEAD_T&& new_node, std::vector&& offset, + int alias_to_use = -1) { + VLOG(10) << "Replace head with cur_alias" << cur_alias + << ", append opt:" << gs::to_string(opt) + << ",align to use:" << alias_to_use; + // append the offset to the offset array. + // Make input offset array align with the last set. + auto new_offset = align_offset(new_node, std::move(offset), offsets_arrays_, + alias_to_use); + merge_offset(offsets_arrays_.back(), new_offset); + return RES_T(std::move(new_node), std::move(prev_), + std::move(offsets_arrays_), sub_task_start_tag_); + } + + // 2. + // Replace current Head with new node, if i'm aliased to prev_alias_num. + template ::result_t, + typename std::enable_if<(append_opt != AppendOpt::Replace && + cur_alias != -1), + NEW_HEAD_T>::type* = nullptr> + RES_T AddNode(NEW_HEAD_T&& new_node, std::vector&& offset, + int alias_to_use = -1) { + VLOG(10) << "Replace head with cur_alias" << cur_alias + << ", append opt:" << gs::to_string(append_opt) << ",align to use" + << alias_to_use; + // append the offset to the offset array. + // Make input offset array align with the last set. + { + std::vector offset_array_size; + for (auto& off : offsets_arrays_) { + offset_array_size.emplace_back(off.size()); + } + LOG(INFO) << "Cur ctx offset array size: " + << gs::to_string(offset_array_size); + LOG(INFO) << "input offset size: " << offset.size(); + } + auto new_offset = align_offset(new_node, std::move(offset), offsets_arrays_, + alias_to_use); + LOG(INFO) << "After alias " << new_offset.size(); + offsets_arrays_.emplace_back(std::move(new_offset)); + auto cated_tuple = + std::tuple_cat(std::move(prev_), std::make_tuple(std::move(cur_))); + return RES_T(std::move(new_node), std::move(cated_tuple), + std::move(offsets_arrays_), sub_task_start_tag_); + } + + template + typename std::enable_if<(Is == -1 || Is == prev_alias_num + base_tag)>::type + UpdateChildNode(std::vector&& offset) { + static constexpr size_t act_Is = prev_alias_num; + merge_offset(offsets_arrays_[act_Is - 1], offset); + } + + // Do not use this to replace head. + template + typename std::enable_if<(Is >= 0 && Is < prev_alias_num + base_tag)>::type + UpdateChildNode(std::vector&& offset) { + // The input offset is respect to the old node. + // we need + // 1) merge offset and offsets_array_[Is] to make sure (tag < Is) can be + // correctly visited. + // 2) propagate the change of offsets_array[Is] to later + // (tag > Is) nodes. + static constexpr int act_Is = Is - base_tag; + // VLOG(10) << "use act_Is: " << act_Is << ", " << Is; + if constexpr (act_Is > 0) { + merge_offset(offsets_arrays_[act_Is - 1], offset); + } + + auto new_size = std::get(prev_).Size(); + + std::vector removed_indices; + // CHECK(removed_indices.size() != old_size); + for (auto i = 0; i < offset.size() - 1; ++i) { + if (offset[i] == offset[i + 1]) { + removed_indices.emplace_back(i); + } + } + if (removed_indices.size() == 0) { + VLOG(10) << "no ele is delete from tag: " << act_Is << ", return "; + return; + } + VLOG(10) << "removed indices" << gs::to_string(removed_indices); + CHECK(new_size == offsets_arrays_[act_Is - 1].back()); + // create a vector contains all indices before this update. so we can gather + // the offset range vec. + std::vector all_indices; + for (auto i = 0; i < offset.size(); ++i) { + all_indices.emplace_back(i); + } + + updateChildNodeAndOffset(all_indices, removed_indices); + } + + // For current tag, remove ele with respect to removed indices, and update the + // offset array. + template + typename std::enable_if<(Is < prev_alias_num + base_tag)>::type + updateChildNodeAndOffset(std::vector& all_indices, + std::vector& removed_indices) { + static constexpr size_t act_Is = Is - base_tag; + for (auto i = 0; i < all_indices.size(); ++i) { + all_indices[i] = offsets_arrays_[act_Is - 1][all_indices[i]]; + } + auto res_offset = std::get(prev_).SubSetWithRemovedIndices( + removed_indices, all_indices); + offsets_arrays_[act_Is - 1].swap(res_offset); + + // all_indices are changed after each run, while removed_indices never + // changes. + updateChildNodeAndOffset(all_indices, removed_indices); + return; + } + + template + typename std::enable_if<(Is == prev_alias_num + base_tag)>::type + updateChildNodeAndOffset(std::vector& all_indices, + std::vector& removed_indices) { + static constexpr size_t act_Is = Is - base_tag; + for (auto i = 0; i < all_indices.size(); ++i) { + all_indices[i] = offsets_arrays_[act_Is - 1][all_indices[i]]; + } + auto res_offset = + cur_.SubSetWithRemovedIndices(removed_indices, all_indices); + offsets_arrays_[act_Is - 1].swap(res_offset); + return; + } + + template > + RES_T Flat(std::vector>&& index_eles) { + static_assert(std::tuple_size_v> == + 1 + prev_alias_num); + VLOG(10) << "Context: Flat"; + size_t old_head_size = cur_.Size(); + auto flat_head = cur_.template Flat(index_eles); + auto flat_prev = flat_prev_tuple(index_eles); + // now all values are 1-to-1 mapping. + std::vector> new_offset_array; + CHECK(offsets_arrays_.size() == prev_alias_num); + new_offset_array.reserve(offsets_arrays_.size()); + size_t num_eles = index_eles.size(); + + std::vector offset_vec(num_eles + 1, 0); + for (auto i = 0; i <= num_eles; ++i) { + offset_vec[i] = i; + } + + for (auto i = 0; i < prev_alias_num; ++i) { + new_offset_array.push_back(offset_vec); + } + VLOG(10) << "FInish flat"; + return RES_T(std::move(flat_head), std::move(flat_prev), + std::move(new_offset_array), sub_task_start_tag_); + } + + template + typename std::enable_if<(deduped_tag == prev_alias_num + base_tag || + deduped_tag == -1)>::type + Dedup() { + VLOG(10) << "Dedup on tag:" << deduped_tag << " means nothing" + << std::to_string(deduped_tag); + } + + // This dedup doesn't clear deplication in indivdual set.!!!!! + // start from alias_to_use, simplify all later csr. + // no meaning to dedup with tag == 0; + template + typename std::enable_if<(raw_deduped_tag < prev_alias_num + base_tag && + raw_deduped_tag >= 0)>::type + Dedup() { + // modify static variable? + static constexpr int deduped_tag = raw_deduped_tag - base_tag; + static_assert(deduped_tag >= 0); + // do stuff. + std::vector indices; + std::vector offset_vec; + // the offset vec with index deduped_tag - 1 's last elements indicates + // how many elements in `deduped_tag` + size_t num_deduped_ele = std::get(prev_).Size(); + VLOG(10) << "dedup at col:" << deduped_tag << ", with " << num_deduped_ele + << " eles"; + { + if constexpr (deduped_tag > 0) { + auto& vec = offsets_arrays_[deduped_tag - 1]; + CHECK(num_deduped_ele == vec[vec.size() - 1]); + } + } + { + auto offset_vec_toward_head = ObtainOffsetFromTag(deduped_tag); + offset_vec.reserve(offset_vec_toward_head.size()); + offset_vec.emplace_back(0); + for (auto i = 0; i < offset_vec_toward_head.size() - 1; ++i) { + if (offset_vec_toward_head[i] < offset_vec_toward_head[i + 1]) { + indices.emplace_back(i); + } + offset_vec.emplace_back(indices.size()); + } + } + std::vector> all_indices; + all_indices.push_back(indices); + for (auto i = deduped_tag; i < prev_alias_num; ++i) { + std::vector new_indices; + auto& cur_offset_vec = offsets_arrays_[i]; + VLOG(10) << "tag: " << i << " indices: " << gs::to_string(indices); + for (auto ind : indices) { + // select the first ele. + // if not element, skip. + if (cur_offset_vec[ind] < cur_offset_vec[ind + 1]) { + new_indices.emplace_back(cur_offset_vec[ind]); + } + } + VLOG(10) << "for tag: " << i + << ", new indices: " << gs::to_string(new_indices); + all_indices.push_back(new_indices); + indices.swap(new_indices); + } + // apply indices on all later sets + // if constexpr (prev_alias_num > deduped_tag + 1) { + + // first subset deduped col, get offset, then apply to later ones. + if constexpr (deduped_tag > 0) { + merge_offset(offsets_arrays_[deduped_tag - 1], offset_vec); + } + + static constexpr size_t num_prev_set_to_update = + prev_alias_num - deduped_tag; + if constexpr (num_prev_set_to_update > 0) { + auto index_seq = std::make_index_sequence(); + CHECK(all_indices.size() == prev_alias_num - deduped_tag + 1); + SubSetWithIndices(index_seq, all_indices); + VLOG(10) << "Finish subseting"; + } else { + VLOG(10) << "no prev set need subseting"; + } + // } + // if constexpr (prev_alias_num >= deduped_tag + 1) { + VLOG(10) << "Subset current set"; + cur_.SubSetWithIndices(all_indices[prev_alias_num - deduped_tag]); + // } + // start from deduped_tag + 1, + + updateOffsetVec(); + } + + template + void SubSetWithIndices(std::index_sequence index_seq, + std::vector>& new_indices) { + VLOG(10) << "subset context from tag: " << start_tag + << "with new_indices size:" << new_indices.size() + << " index_seq size: " << sizeof...(Is) + << ",start tag: " << start_tag; + ((std::get(prev_).SubSetWithIndices(new_indices[Is])), ...); + } + + void set_sub_task_start_tag(int sub_task_start_tag) { + if (sub_task_start_tag == sub_task_start_tag_) { + LOG(WARNING) << "in sub task already set to " << sub_task_start_tag_; + } else { + sub_task_start_tag_ = sub_task_start_tag; + } + } + + int get_sub_task_start_tag() const { return sub_task_start_tag_; } + + void merge_offset_with_back(std::vector& new_offset_array) { + merge_offset(offsets_arrays_.back(), new_offset_array); + } + + private: + template + typename std::enable_if<(Is <= prev_alias_num + base_tag)>::type + updateOffsetVec() { + static constexpr size_t act_Is = Is - base_tag; + VLOG(10) << "updateOffsetVec: tag: " << Is << ", act: " << act_Is; + + auto size = 0; + if constexpr (act_Is < prev_alias_num) { + size = std::get(prev_).Size(); + } else { + size = cur_.Size(); + } + VLOG(10) << "in updateOffsetVec: tag: " << act_Is << ",size: " << size; + auto& offset_vec = offsets_arrays_[act_Is - 1]; + offset_vec.clear(); + offset_vec.reserve(size + 1); + for (auto i = 0; i <= size; ++i) { + offset_vec.emplace_back(i); + } + updateOffsetVec(); + } + + template + typename std::enable_if<(Is > prev_alias_num + base_tag)>::type + updateOffsetVec() {} + + template + auto flat_prev_tuple_impl(std::vector& index_eles, + std::index_sequence) { + return std::make_tuple( + std::move(std::get(prev_).template Flat(index_eles))...); + } + + template + auto flat_prev_tuple(std::vector& index_eles) { + return flat_prev_tuple_impl(index_eles, + std::make_index_sequence()); + } + + void merge_offset(std::vector& old_offset_array, + std::vector& new_offset_array) { + VLOG(10) << "merging offset"; + CHECK(new_offset_array.size() == old_offset_array.back() + 1) + << "new size " << new_offset_array.size() << ", old back" + << old_offset_array.back(); + for (auto i = 0; i < old_offset_array.size(); ++i) { + old_offset_array[i] = new_offset_array[old_offset_array[i]]; + } + } + + /// @brief Align input offset with previous offset. Finally, it is aligned + /// with offset_array.back(). + /// @param offset + /// @param offset_array + /// @param from_ind count from zero, need to minus base_tag + template + std::vector align_offset( + NODE_T& new_node, std::vector&& offset, + std::vector>& offset_array, int from_ind) { + if (from_ind != -1) { + from_ind = from_ind - base_tag; + } + if (from_ind == -1 || from_ind == offset_array.size()) { + VLOG(10) << "No need to align with backend " << from_ind + << ", offsets size: " << offset_array.size(); + return std::move(offset); + } + // First got offset array which indicate the repeated times. + CHECK(from_ind <= offset_array.size()) + << "out of range: " << from_ind << ", " << offset_array.size(); + std::vector copied = offset_array[from_ind]; + VLOG(10) << "copied: " << gs::to_string(copied); + for (auto i = from_ind + 1; i < offset_array.size(); ++i) { + for (auto j = 0; j < copied.size(); ++j) { + copied[j] = offset_array[i][copied[j]]; + } + } + CHECK(copied.size() == offset.size()); + // indicate the value at ind repeat how many times due to our chain. + VLOG(10) << "repeat array is :" << gs::to_string(copied); + VLOG(10) << "current offset:" << gs::to_string(offset); + new_node.Repeat(offset, copied); + std::vector res_offset; + { + // apply repeat on offset array; + size_t cur = 0; + for (auto i = 0; i + 1 < offset.size(); ++i) { + if (copied[i] < copied[i + 1]) { + int gap = offset[i + 1] - offset[i]; + int times_to_copy = copied[i + 1] - copied[i]; + + for (auto j = 0; j < times_to_copy; ++j) { + res_offset.push_back(cur); + cur += gap; + } + } else { + // NO action if there is no child nodes for i. + // res_offset.push_back(cur); + } + } + res_offset.push_back(cur); + } + LOG(INFO) << "res_offset size: " << res_offset.size(); + VLOG(10) << "res offset: " << gs::to_string(res_offset); + return res_offset; + } + HEAD_T cur_; + std::tuple prev_; + std::vector> offsets_arrays_; + int sub_task_start_tag_; +}; + +// Specialization for Context with no alias +template +class Context { + public: + using head_t = HEAD_T; + using iterator = ContextIter>; + static constexpr size_t alias_num = cur_alias == -1 ? 0 : 1; + static constexpr size_t col_num = 1; + static constexpr int prev_alias_num = 0; + static constexpr int max_tag_id = cur_alias; + static constexpr int base_tag_id = base_tag; + using self_type_t = Context; + using prev_tuple_t = std::tuple; + + using index_ele_tuples_t = std::tuple; + + template + using nth_node_t = std::tuple_element_t>; + + Context(HEAD_T&& head, int sub_task_start_tag = INVALID_TAG) + : cur_(std::move(head)), sub_task_start_tag_(sub_task_start_tag) {} + + Context( + Context&& other) noexcept + : cur_(std::move(other.cur_)), + sub_task_start_tag_(other.sub_task_start_tag_) {} + + Context(const Context& other) + : cur_(other.cur_), sub_task_start_tag_(other.sub_task_start_tag_) {} + + ~Context() {} + + template + void SelectInPlace(EXPR& expr) { + // The result context can be defined by the selected indices of the head + // node. We can got the result context by applying selected indices. + // auto new_head_and_offset = select_node.Filter(std::move(expr), + // col_tuples); + std::vector select_indices; + offset_t cur_ind = 0; + for (auto iter : *this) { + auto ele_tuple = iter.GetAllElement(); + auto data_tuple = iter.GetAllData(); + if (expr(ele_tuple, data_tuple)) { + select_indices.emplace_back(cur_ind); + } + cur_ind += 1; + } + // The offset need to be changed. + // replace head in place + cur_.SubSetWithIndices(select_indices); + } + + size_t AliasNum() const { return alias_num; } + + HEAD_T& GetHead() { return cur_; } + + HEAD_T& GetMutableHead() { return cur_; } + + // This tag is the absolute tag, start from zero. + template + HEAD_T& GetNode() { + static_assert((Is - base_tag) == 0 || Is == -1); + return cur_; + } + + template + HEAD_T& GetMutableNode() { + static_assert((Is - base_tag) == 0 || Is == -1); + return cur_; + } + + auto CreateSetBuilder() const { + return std::make_tuple(cur_.CreateBuilder()); + } + + // Towards which tag we will align on. + + HEAD_T&& MoveHead() { return std::move(cur_); } + + iterator begin() const { return iterator(cur_.begin()); } + iterator end() const { return iterator(cur_.end()); } + + // Alias a Context with only head node. + template > + RES_T Alias() { + static_assert(alias == base_tag && cur_alias == -1); + return RES_T(std::move(cur_), sub_task_start_tag_); + } + + // must return 1,1,1,. + std::vector ObtainOffsetFromTag(int dst_tag) const { + CHECK(dst_tag == cur_alias); + auto size = cur_.Size(); + std::vector res; + res.reserve(size + 1); + for (auto i = 0; i <= size; ++i) { + res.push_back(i); + } + return res; + } + + std::vector ObtainOffsetFromSubTaskStart() const { + CHECK(sub_task_start_tag_ != INVALID_TAG); + std::vector res; + res.reserve(cur_.Size() + 1); + for (auto i = 0; i <= cur_.Size(); ++i) { + res.push_back(i); + } + return res; + } + + template ::type* = nullptr> + auto ApplyNode(NEW_HEAD_T&& new_head, std::vector&& offset) { + // Collection + auto new_set_and_offset = new_head.apply(offset); + CHECK(std::get<1>(new_set_and_offset).size() == cur_.Size() + 1); + return this->template AddNode( + std::move(std::get<0>(new_set_and_offset)), + std::move(std::get<1>(new_set_and_offset))); + } + + // For non-collection apply result, just append. + template < + AppendOpt append_opt, typename NEW_HEAD_T, + typename std::enable_if::type* = nullptr> + auto ApplyNode(NEW_HEAD_T&& new_head, std::vector&& offset) { + CHECK(offset.size() == cur_.Size() + 1); + return this->template AddNode(std::move(new_head), + std::move(offset)); + } + + // 0. Replace current HEAD to obtain a new Traversal, if i'm not aliased + template < + AppendOpt opt, typename NEW_HEAD_T, + typename RES_T = Context< + NEW_HEAD_T, ResultColId::res_alias, + base_tag, grape::EmptyType>, + typename std::enable_if<(cur_alias == -1), NEW_HEAD_T>::type* = nullptr> + RES_T AddNode(NEW_HEAD_T&& new_node, std::vector&& offset, + int alias_to_use = -1) { // offset vector and alias_to_use + // is dummy in this case + CHECK(alias_to_use == cur_alias || alias_to_use == -1); + // VLOG(10) << "[AddNode:] offset size " << offset.size(); + return RES_T(std::move(new_node), sub_task_start_tag_); + } + + // 1. Replace current Head with new node, if i'm aliased to 0. + template < + AppendOpt opt, typename NEW_HEAD_T, + typename RES_T = Context< + NEW_HEAD_T, ResultColId::res_alias, + base_tag, HEAD_T>, + typename std::enable_if<(cur_alias != -1), NEW_HEAD_T>::type* = nullptr> + RES_T AddNode(NEW_HEAD_T&& new_node, std::vector&& offset, + int alias_to_use = -1) { + CHECK(alias_to_use == cur_alias || alias_to_use == -1); + std::vector> offsets; + offsets.emplace_back(std::move(offset)); + return RES_T(std::move(new_node), std::make_tuple(std::move(cur_)), + std::move(offsets), sub_task_start_tag_); + } + + self_type_t ReplaceHead(HEAD_T&& new_head, std::vector&& offset) { + return self_type_t(std::move(new_head)); + } + + template > + RES_T Flat(std::vector&& index_eles) { + static_assert(std::tuple_size_v == 1); + return RES_T(std::move(cur_.template Flat<0>(index_eles)), + sub_task_start_tag_); + } + + // Created Empty grouped context. + // later we will insert into. + template + RES_T ToGrouped(AGG_T& agg) { + // create a empty keyed set. + return RES_T(std::move(cur_.CreatedKeyed(agg)), sub_task_start_tag_); + } + + template + typename std::enable_if<(Is == -1 || Is == 0)>::type UpdateChildNode( + std::vector&& offset) { + return; + } + + void set_sub_task_start_tag(int sub_task_start_tag) { + if (sub_task_start_tag == sub_task_start_tag_) { + LOG(WARNING) << "in sub task already set to " << sub_task_start_tag_; + } else { + sub_task_start_tag_ = sub_task_start_tag; + } + } + + int get_sub_task_start_tag() const { return sub_task_start_tag_; } + + // for + // template + void FilterWithOffsets(std::vector& offset, JoinKind join_kind) { + CHECK(join_kind == JoinKind::AntiJoin); + std::vector active_indices; + for (auto i = 0; i < offset.size() - 1; ++i) { + if (offset[i] < offset[i + 1]) { + active_indices.emplace_back(i); + } + } + VLOG(10) << "[Filter with offsets:], active indices: " + << gs::to_string(active_indices) + << " join kind: " << gs::to_string(join_kind); + std::vector res_offset = + cur_.FilterWithIndices(active_indices, join_kind); + } + + private: + HEAD_T cur_; + int sub_task_start_tag_; +}; +// deduped + +template ::type* = nullptr> +auto& Get(Context& ctx) { + return ctx.GetHead(); +} + +template ::type* = nullptr> +auto& Get(Context& ctx) { + return ctx.GetHead(); +} + +template ::type* = nullptr> +auto& Get(Context& ctx) { + return ctx.template GetNode(); +} + +template ::type* = nullptr> +auto&& Move(Context& ctx) { + return ctx.MoveHead(); +} +template ::type* = nullptr> +auto&& Move(Context& ctx) { + return ctx.MoveHead(); +} + +template +using DefaultContext = Context; +// For get inner nodes, not implemented yet. + +template ::type* = nullptr> +static auto make_context(std::tuple&& prev_sets, HEAD_T&& head, + std::vector>&& offsets) { + return Context( + std::move(head)); +} + +template 0)>::type* = nullptr> +static auto make_context(std::tuple&& prev_sets, HEAD_T&& head, + std::vector>&& offsets) { + return Context( + std::move(head), std::move(prev_sets), std::move(offsets)); +} + +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_CONTEXT_H_ diff --git a/flex/engines/hqps_db/core/null_record.h b/flex/engines/hqps_db/core/null_record.h new file mode 100644 index 000000000000..3a25e32f2e27 --- /dev/null +++ b/flex/engines/hqps_db/core/null_record.h @@ -0,0 +1,87 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_ENGINE_NULL_RECORD_H_ +#define ENGINES_HQPS_ENGINE_NULL_RECORD_H_ + +#include +#include +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" + +namespace gs { + +struct None {}; + +static constexpr const None NONE; + +template +struct NullRecordCreator { + static inline T GetNull() { + static T null_value = std::numeric_limits::max(); + return null_value; + } +}; + +template <> +struct NullRecordCreator { + static inline std::string_view GetNull() { return ""; } +}; + +template +struct NullRecordCreator> { + static inline std::tuple GetNull() { + return std::make_tuple(NullRecordCreator::GetNull()...); + } +}; + +template +static inline bool IsNull(const std::tuple& tuple) { + if constexpr (Ind + 1 < sizeof...(T)) { + return (std::get<0>(tuple) == + NullRecordCreator< + std::tuple_element_t<0, std::tuple>>::GetNull()) && + IsNull(tuple); + } else { + return std::get<0>(tuple) == + NullRecordCreator< + std::tuple_element_t<0, std::tuple>>::GetNull(); + } +} + +template +static inline bool IsNull(const std::vector& vec) { + if (vec.empty()) { + return true; + } + for (auto& v : vec) { + if (!IsNull(v)) { + return false; + } + } + return true; +} + +template +static inline bool IsNull(const T& opt) { + return opt == NullRecordCreator::GetNull(); +} + +// customized operator == +template +bool operator==(const T& lhs, const None& rhs) { + return IsNull(lhs); +} +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_NULL_RECORD_H_ \ No newline at end of file diff --git a/flex/engines/hqps_db/core/operator/edge_expand.h b/flex/engines/hqps_db/core/operator/edge_expand.h new file mode 100644 index 000000000000..25846a586b60 --- /dev/null +++ b/flex/engines/hqps_db/core/operator/edge_expand.h @@ -0,0 +1,1423 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifndef ENGINES_HQPS_ENGINE_OPERATOR_EDGE_EXPAND_H_ +#define ENGINES_HQPS_ENGINE_OPERATOR_EDGE_EXPAND_H_ + +#include +#include + +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" + +#include "flex/engines/hqps_db/structures/multi_edge_set/adj_edge_set.h" +#include "flex/engines/hqps_db/structures/multi_edge_set/flat_edge_set.h" +#include "flex/engines/hqps_db/structures/multi_edge_set/general_edge_set.h" +#include "flex/engines/hqps_db/structures/multi_edge_set/multi_label_dst_edge_set.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/keyed_row_vertex_set.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/two_label_vertex_set.h" +#include "grape/utils/bitset.h" + +namespace gs { + +// In expand to edges, we just need to keep the reference of vertex set. +template +struct EdgeExpandVState { + const GRAPH_INTERFACE& graph_; + const VERTEX_SET_T& cur_vertex_set_; + Direction direction_; + typename GRAPH_INTERFACE::label_id_t edge_label_, other_label_; + size_t limit_; + EDGE_FILTER_T edge_filter_; + + EdgeExpandVState(const GRAPH_INTERFACE& frag, const VERTEX_SET_T& v_set, + Direction direction, + typename GRAPH_INTERFACE::label_id_t edge_label, + typename GRAPH_INTERFACE::label_id_t other_label, + EDGE_FILTER_T&& edge_filter, size_t limit) + : graph_(frag), + cur_vertex_set_(v_set), + direction_(direction), + edge_label_(edge_label), + other_label_(other_label), + edge_filter_(std::move(edge_filter)), + limit_(limit) {} +}; + +// In expand to edges, we need to create a new copy of vertex set. +template +struct EdgeExpandEState { + const GRAPH_INTERFACE& graph_; + VERTEX_SET_T& cur_vertex_set_; + Direction direction_; + typename GRAPH_INTERFACE::label_id_t edge_label_, other_label_; + const PropNameArray& prop_names_; + const EDGE_FILTER_T& edge_filter_; + size_t limit_; + + EdgeExpandEState(const GRAPH_INTERFACE& frag, VERTEX_SET_T& v_set, + Direction direction, + typename GRAPH_INTERFACE::label_id_t edge_label, + typename GRAPH_INTERFACE::label_id_t other_label, + const PropNameArray& prop_names, + const EDGE_FILTER_T& edge_filter, size_t limit) + : graph_(frag), + cur_vertex_set_(v_set), + direction_(direction), + edge_label_(edge_label), + other_label_(other_label), + limit_(limit), + prop_names_(prop_names), + edge_filter_(edge_filter) {} +}; + +template +struct EdgeExpandEMutltiDstState { + const GRAPH_INTERFACE& graph_; + VERTEX_SET_T& cur_vertex_set_; + Direction direction_; + typename GRAPH_INTERFACE::label_id_t edge_label_; + std::array other_label_; + const PropNameArray& prop_names_; + const EDGE_FILTER_T& edge_filter_; + size_t limit_; + + EdgeExpandEMutltiDstState( + const GRAPH_INTERFACE& frag, VERTEX_SET_T& v_set, Direction direction, + typename GRAPH_INTERFACE::label_id_t edge_label, + std::array other_label, + const PropNameArray& prop_names, const EDGE_FILTER_T& edge_filter, + size_t limit) + : graph_(frag), + cur_vertex_set_(v_set), + direction_(direction), + edge_label_(edge_label), + other_label_(other_label), + limit_(limit), + prop_names_(prop_names), + edge_filter_(edge_filter) {} +}; + +template +class EdgeExpand { + using label_id_t = typename GRAPH_INTERFACE::label_id_t; + using vertex_id_t = typename GRAPH_INTERFACE::vertex_id_t; + using vertex_set_t = DefaultRowVertexSet; + + public: + /// @brief Directly obtain vertices from edge. + /// Activation: RowVertexSet, TruePredicate. + /// @tparam EDATA_T + /// @tparam VERTEX_SET_T + /// @param frag + /// @param v_sets + /// @param edge_expand_opt + /// @return + template >> + static RES_T EdgeExpandV( + const GRAPH_INTERFACE& graph, + const RowVertexSet& cur_vertex_set, + Direction direction, label_id_t edge_label, label_id_t other_label, + EDGE_FILTER_T&& edge_filter, size_t limit = INT_MAX) { + auto state = EdgeExpandVState(graph, cur_vertex_set, direction, edge_label, + other_label, std::move(edge_filter), limit); + return EdgeExpandVFromSingleLabel(state); + } + + /// @brief Directly obtain vertices from keyed row vertex set, via edge. + /// @tparam EDATA_T + /// @tparam VERTEX_SET_T + /// @param frag + /// @param v_sets + /// @param edge_expand_opt + /// @return + template >> + static RES_T EdgeExpandV( + const GRAPH_INTERFACE& graph, + const KeyedRowVertexSet& + cur_vertex_set, + Direction direction, label_id_t edge_label, label_id_t other_label, + EDGE_FILTER_T&& edge_filter, size_t limit = INT_MAX) { + auto state = EdgeExpandVState(graph, cur_vertex_set, direction, edge_label, + other_label, std::move(edge_filter), limit); + return EdgeExpandVFromSingleLabel(state); + } + + /// @brief Directly obtain vertices from edge, without property and apply from + /// multi label set, Activation: MultiLabelVertexSet, TruePredicate. + /// @tparam EDATA_T + /// @tparam VERTEX_SET_T + /// @param frag + /// @param v_sets + /// @param edge_expand_opt + /// @return + template < + typename VERTEX_SET_T, typename... SELECTOR, + typename RES_T = std::pair>, + typename std::enable_if::type* = nullptr> + static RES_T EdgeExpandV(const GRAPH_INTERFACE& graph, + const VERTEX_SET_T& cur_vertex_set, + Direction direction, label_id_t edge_label, + label_id_t other_label, + Filter&& edge_filter, + size_t limit = INT_MAX) { + auto state = EdgeExpandVState(graph, cur_vertex_set, direction, edge_label, + other_label, std::move(edge_filter), limit); + + std::vector vids; + std::vector offset; + static constexpr size_t num_src_labels = VERTEX_SET_T::num_labels; + using nbr_list_array_t = typename GRAPH_INTERFACE::nbr_list_array_t; + std::vector nbr_lists; + for (auto i = 0; i < num_src_labels; ++i) { + auto& cur_set = state.cur_vertex_set_.GetSet(i); + label_id_t src_label, dst_label; + std::tie(src_label, dst_label) = get_graph_label_pair( + direction, cur_set.GetLabel(), state.other_label_); + VLOG(10) << "[EdgeExpandVMultiSrcLabel: from label: " + << cur_set.GetLabel() << ", other label: " << state.other_label_ + << ",edge label: " << state.edge_label_ << "src: " << src_label + << ",dst: " << dst_label << ",dire: " << state.direction_; + auto nbr_list_array = state.graph_.GetOtherVertices( + src_label, dst_label, state.edge_label_, cur_set.GetVertices(), + gs::to_string(state.direction_), state.limit_); + nbr_lists.emplace_back(std::move(nbr_list_array)); + } + + offset.reserve(state.cur_vertex_set_.Size() + 1); + // first gather size. + offset.emplace_back(vids.size()); + for (auto iter : state.cur_vertex_set_) { + auto vid = iter.GetVertex(); + auto cur_set_ind = iter.GetCurInd(); + auto set_inner_ind = iter.GetCurSetInnerInd(); + CHECK(nbr_lists.size() > cur_set_ind); + CHECK(nbr_lists[cur_set_ind].size() > set_inner_ind); + auto& cur_array = nbr_lists[cur_set_ind]; + auto cur_nbr_list = cur_array.get(set_inner_ind); + // VLOG(10) << "vertex: " << vid << ", num nbrs: " << cur_nbr_list.size(); + + for (auto nbr : cur_nbr_list) { + // TODO: use edge_filter to filter. + vids.emplace_back(nbr.neighbor()); + } + offset.emplace_back(vids.size()); + } + VLOG(10) << "vids size: " << vids.size(); + VLOG(10) << "offset: " << gs::to_string(offset); + vertex_set_t result_set(std::move(vids), state.other_label_); + auto pair = std::make_pair(std::move(result_set), std::move(offset)); + return pair; + } + + /// @brief Directly obtain vertices from two label vertex set. + /// multi label set + /// Activation: From two label set, TruePredicate. + /// @tparam EDATA_T + /// @tparam VERTEX_SET_T + /// @param frag + /// @param v_sets + /// @param edge_expand_opt + /// @return + template < + typename VERTEX_SET_T, typename... SELECTOR, + typename RES_T = std::pair>, + typename std::enable_if::type* = nullptr> + static RES_T EdgeExpandV(const GRAPH_INTERFACE& graph, + const VERTEX_SET_T& cur_vertex_set, + Direction direction, label_id_t edge_label, + label_id_t other_label, + Filter&& edge_filter, + size_t limit = INT_MAX) { + VLOG(10) << "[EdgeExpandV] for two label vertex set size: " + << cur_vertex_set.Size(); + auto state = EdgeExpandVState(graph, cur_vertex_set, direction, edge_label, + other_label, std::move(edge_filter), limit); + + std::vector vids; + std::vector offset; + static constexpr size_t num_src_labels = VERTEX_SET_T::num_labels; + using nbr_list_t = typename GRAPH_INTERFACE::nbr_list_t; + using nbr_list_array_t = typename GRAPH_INTERFACE::nbr_list_array_t; + nbr_list_array_t nbr_list_array; + nbr_list_array.resize(state.cur_vertex_set_.Size()); + + for (auto i = 0; i < num_src_labels; ++i) { + std::vector cur_vids; + std::vector active_inds; + std::tie(cur_vids, active_inds) = state.cur_vertex_set_.GetVertices(i); + label_id_t cur_label = state.cur_vertex_set_.GetLabel(i); + label_id_t src_label, dst_label; + std::tie(src_label, dst_label) = + get_graph_label_pair(direction, cur_label, state.other_label_); + + VLOG(10) << "[EdgeExpandV-TwoLabelSet]: from label: " + << ",edge label: " << state.edge_label_ << "src: " << src_label + << ",dst: " << dst_label << ",dire: " << state.direction_; + auto tmp_nbr_list_array = state.graph_.GetOtherVertices( + src_label, dst_label, state.edge_label_, cur_vids, + gs::to_string(state.direction_), state.limit_); + // nbr_lists.emplace_back(std::move(nbr_list_array)); + + CHECK(tmp_nbr_list_array.size() == active_inds.size()); + for (auto i = 0; i < active_inds.size(); ++i) { + auto dst_ind = active_inds[i]; + CHECK(nbr_list_array.get(dst_ind).size() == 0); + nbr_list_array.get_vector(dst_ind).swap( + tmp_nbr_list_array.get_vector(i)); + } + } + CHECK(nbr_list_array.size() == state.cur_vertex_set_.Size()); + + offset.reserve(state.cur_vertex_set_.Size() + 1); + // first gather size. + offset.emplace_back(vids.size()); + for (auto i = 0; i < nbr_list_array.size(); ++i) { + for (auto nbr : nbr_list_array.get(i)) { + // TODO: use edge_filter to filter. + vids.emplace_back(nbr.neighbor()); + } + offset.emplace_back(vids.size()); + } + vertex_set_t result_set(std::move(vids), state.other_label_); + auto pair = std::make_pair(std::move(result_set), std::move(offset)); + return pair; + } + + /// @brief Directly obtain vertices from edge, without property and apply from + /// multi label set + /// Activation: From Generate vertex set, TruePredicate. + /// @tparam EDATA_T + /// @tparam VERTEX_SET_T + /// @param frag + /// @param v_sets + /// @param edge_expand_opt + /// @return + template < + typename VERTEX_SET_T, typename... SELECTOR, + typename RES_T = std::pair>, + typename std::enable_if::type* = nullptr> + static RES_T EdgeExpandV(const GRAPH_INTERFACE& graph, + const VERTEX_SET_T& cur_vertex_set, + Direction direction, label_id_t edge_label, + label_id_t other_label, + Filter&& edge_filter, + size_t limit = INT_MAX) { + VLOG(10) << "[EdgeExpandV] for general vertex set size: " + << cur_vertex_set.Size(); + auto state = EdgeExpandVState(graph, cur_vertex_set, direction, edge_label, + other_label, std::move(edge_filter), limit); + + std::vector vids; + std::vector offset; + static constexpr size_t num_src_labels = VERTEX_SET_T::num_labels; + using nbr_list_t = typename GRAPH_INTERFACE::nbr_list_t; + using nbr_list_array_t = typename GRAPH_INTERFACE::nbr_list_array_t; + nbr_list_array_t nbr_list_array; + nbr_list_array.resize(state.cur_vertex_set_.Size()); + + for (auto i = 0; i < num_src_labels; ++i) { + std::vector cur_vids; + std::vector active_inds; + std::tie(cur_vids, active_inds) = state.cur_vertex_set_.GetVertices(i); + label_id_t cur_label = state.cur_vertex_set_.GetLabel(i); + label_id_t src_label, dst_label; + std::tie(src_label, dst_label) = + get_graph_label_pair(direction, cur_label, state.other_label_); + + VLOG(10) << "[EdgeExpandV]: from label: " + << ",edge label: " << state.edge_label_ << "src: " << src_label + << ",dst: " << dst_label << ",dire: " << state.direction_; + auto tmp_nbr_list_array = state.graph_.GetOtherVertices( + src_label, dst_label, state.edge_label_, cur_vids, + gs::to_string(state.direction_), state.limit_); + // nbr_lists.emplace_back(std::move(nbr_list_array)); + + CHECK(tmp_nbr_list_array.size() == active_inds.size()); + for (auto i = 0; i < active_inds.size(); ++i) { + auto dst_ind = active_inds[i]; + CHECK(nbr_list_array.get(dst_ind).size() == 0); + nbr_list_array.get_vector(dst_ind).swap( + tmp_nbr_list_array.get_vector(i)); + } + } + CHECK(nbr_list_array.size() == state.cur_vertex_set_.Size()); + + offset.reserve(state.cur_vertex_set_.Size() + 1); + // first gather size. + offset.emplace_back(vids.size()); + for (auto i = 0; i < nbr_list_array.size(); ++i) { + for (auto nbr : nbr_list_array.get(i)) { + // TODO: use edge_filter to filter. + vids.emplace_back(nbr.neighbor()); + } + offset.emplace_back(vids.size()); + } + VLOG(10) << "vids size: " << vids.size(); + VLOG(10) << "offset: " << gs::to_string(offset); + vertex_set_t result_set(std::move(vids), state.other_label_); + auto pair = std::make_pair(std::move(result_set), std::move(offset)); + return pair; + } + + /// @brief Directly obtain vertices from edge. + /// @tparam EDATA_T + /// @tparam VERTEX_SET_T + /// @param frag + /// @param v_sets + /// @param edge_expand_opt + /// @return + template >, + typename std::enable_if< + !IsTruePredicate::value>::type* = nullptr> + static RES_T EdgeExpandV( + const GRAPH_INTERFACE& graph, + const RowVertexSet& cur_vertex_set, + Direction direction, label_id_t edge_label, label_id_t other_label, + Filter&& edge_filter, + size_t limit = INT_MAX) { + auto state = EdgeExpandVState(graph, cur_vertex_set, direction, edge_label, + other_label, std::move(edge_filter), limit); + label_id_t src_label, dst_label; + std::tie(src_label, dst_label) = get_graph_label_pair( + state.direction_, state.cur_vertex_set_.GetLabel(), state.other_label_); + + VLOG(10) << "edgeExpandV: from label: " << state.cur_vertex_set_.GetLabel() + << ", other label: " << state.other_label_ + << ",edge label: " << state.edge_label_ + << ",dire: " << state.direction_ << ", propert name: "; + auto selectors = state.edge_filter_.selectors_; + auto adj_list_array = + get_adj_list_array_with_filter(state, src_label, dst_label, selectors); + VLOG(10) << "got adj list array: " << adj_list_array.size(); + + std::vector vids; + std::vector offset; + offset.reserve(state.cur_vertex_set_.Size() + 1); + CHECK(adj_list_array.size() == state.cur_vertex_set_.Size()); + // first gather size. + offset.emplace_back(vids.size()); + auto cur_v_set_size = cur_vertex_set.Size(); + + // for (auto iter : state.cur_vertex_set_) { + for (auto i = 0; i < cur_v_set_size; ++i) { + auto adj_list = adj_list_array.get(i); + for (auto adj : adj_list) { + // if (edge_filter(adj.properties())) { + if (std::apply(edge_filter.expr_, adj.properties())) { + vids.emplace_back(adj.neighbor()); + } + } + offset.emplace_back(vids.size()); + } + VLOG(10) << "vids size: " << vids.size(); + // VLOG(10) << "offset: " << gs::to_string(offset); + vertex_set_t result_set(std::move(vids), state.other_label_); + auto pair = std::make_pair(std::move(result_set), std::move(offset)); + return pair; + } + + /// @brief Directly obtain multiple label vertices from edge. + /// @tparam EDATA_T + /// @tparam VERTEX_SET_T + /// @param frag + /// @param v_sets + /// @param edge_expand_opt + /// @return + template ::type* = nullptr, + typename RES_T = + std::pair, + std::vector>> + static RES_T EdgeExpandV(const GRAPH_INTERFACE& graph, + const VERTEX_SET_T& cur_vertex_set, + Direction direction, label_id_t edge_label, + std::array& other_labels, + std::array&& edge_filter, + std::index_sequence) { + auto tuple = std::make_tuple(EdgeExpandV( + graph, cur_vertex_set, direction, edge_label, + std::get(other_labels), std::move(std::get(edge_filter)))...); + + size_t offset_array_size = std::get<0>(tuple).second.size(); + // std::vector res_offset(offset_array_size, 0); + VLOG(10) << "prev set size: " << cur_vertex_set.Size() + << ", new offset size: " << offset_array_size; + CHECK(offset_array_size == cur_vertex_set.Size() + 1); + size_t prev_set_size = cur_vertex_set.Size(); + + auto set_offset_array = + get_set_from_pair_tuple(tuple, std::make_index_sequence()); + auto& offset_arrays = std::get<1>(set_offset_array); + auto& vertex_sets = std::get<0>(set_offset_array); + + std::vector res_vids; + std::array res_bitset; + std::vector res_offset; + + size_t total_size = 0; + for (auto i = 0; i < vertex_sets.size(); ++i) { + total_size += vertex_sets[i].Size(); + } + VLOG(10) << "total size: " << total_size; + res_vids.reserve(total_size); + res_offset.reserve(prev_set_size + 1); + for (auto i = 0; i < num_labels; ++i) { + res_bitset[i].init(total_size); + } + + size_t cur_ind = 0; + res_offset.emplace_back(0); + for (auto i = 0; i < prev_set_size; ++i) { + for (auto j = 0; j < num_labels; ++j) { + auto& vec = vertex_sets[j].GetVertices(); + auto start_off = offset_arrays[j][i]; + auto end_off = offset_arrays[j][i + 1]; + for (auto k = start_off; k < end_off; ++k) { + res_vids.emplace_back(vec[k]); + res_bitset[j].set_bit(cur_ind); + cur_ind += 1; + } + } + res_offset.emplace_back(cur_ind); + } + CHECK(cur_ind == total_size); + auto copied_labels(other_labels); + GeneralVertexSet res_set( + std::move(res_vids), std::move(copied_labels), std::move(res_bitset)); + + return std::make_pair(std::move(res_set), std::move(res_offset)); + } + + /// @brief Directly obtain multiple label vertices from edge. specialization + /// for two label. + /// @tparam EDATA_T + /// @tparam VERTEX_SET_T + /// @param frag + /// @param v_sets + /// @param edge_expand_opt + /// @return + template ::type* = nullptr, + typename RES_T = std::pair< + TwoLabelVertexSet, + std::vector>> + static RES_T EdgeExpandV( + const GRAPH_INTERFACE& graph, + const RowVertexSet& cur_vertex_set, + Direction direction, label_id_t edge_label, + std::array& other_labels, + std::array, num_labels>&& edge_filter, + std::index_sequence) { + label_id_t src_label, dst_label; + + std::tie(src_label, dst_label) = get_graph_label_pair( + direction, cur_vertex_set.GetLabel(), other_labels[0]); + LOG(INFO) << "EdgeExpandV: with two dst labels" + << gs::to_string(other_labels); + + auto vid_and_offset1 = graph.GetOtherVerticesV2( + src_label, dst_label, edge_label, cur_vertex_set.GetVertices(), + gs::to_string(direction), INT_MAX); + + if (direction == Direction::In) { + src_label = other_labels[1]; + } else { + dst_label = other_labels[1]; + } + + auto vid_and_offset2 = graph.GetOtherVerticesV2( + src_label, dst_label, edge_label, cur_vertex_set.GetVertices(), + gs::to_string(direction), INT_MAX); + + auto& vids1 = vid_and_offset1.first; + auto& off1 = vid_and_offset1.second; + auto& vids2 = vid_and_offset2.first; + auto& off2 = vid_and_offset2.second; + size_t prev_set_size = cur_vertex_set.Size(); + CHECK(off1.size() == prev_set_size + 1); + CHECK(off2.size() == prev_set_size + 1); + + std::vector res_vids; + grape::Bitset res_bitset; + + size_t total_size = vids1.size() + vids2.size(); + VLOG(10) << "total size: " << total_size; + res_vids.reserve(total_size); + res_bitset.init(total_size); + + size_t cur = 0; + for (auto i = 0; i < prev_set_size; ++i) { + auto start_off = off1[i]; + auto end_off = off1[i + 1]; + for (auto k = start_off; k < end_off; ++k) { + res_vids.emplace_back(vids1[k]); + res_bitset.set_bit(cur++); + } + start_off = off2[i]; + end_off = off2[i + 1]; + for (auto k = start_off; k < end_off; ++k) { + res_vids.emplace_back(vids2[k]); + cur++; + } + } + { + for (auto i = 0; i < off1.size(); ++i) { + off1[i] += off2[i]; + } + } + CHECK(cur == total_size); + auto copied_labels(other_labels); + TwoLabelVertexSet res_set( + std::move(res_vids), std::move(copied_labels), std::move(res_bitset)); + return std::make_pair(std::move(res_set), std::move(off1)); + } + + // Transform tuple to array. + template + static auto get_set_from_pair_tuple(std::tuple& tuple, + std::index_sequence) { + using set_and_vec_t = typename std::tuple_element_t<0, std::tuple>; + using set_t = typename set_and_vec_t::first_type; + using vec_t = typename set_and_vec_t::second_type; + auto set_array = std::array{ + std::move(std::get<0>(std::get(tuple)))...}; + auto offset_array = std::array{ + std::move(std::get<1>(std::get(tuple)))...}; + return std::make_pair(std::move(set_array), std::move(offset_array)); + } + + ///////////////////////////////////////////////////////////////////////////// + /////////////////////////// Edge Expand E /////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////// + + /// @brief Obtain edge set from source vertices. with rvalue vertex set. + /// @tparam EDATA_T + /// @tparam VERTEX_SET_T general vertex set + /// @param frag + /// @param v_sets + /// @param edge_expand_opt + /// @return + // + template < + typename... T, size_t num_labels, typename EDGE_FILTER_T, + typename std::enable_if::type* = nullptr, + typename RES_T = std::pair< + MulLabelSrcGrootEdgeSet, + std::vector>> + static RES_T EdgeExpandE( + const GRAPH_INTERFACE& graph, + GeneralVertexSet& cur_vertex_set, + Direction direction, label_id_t edge_label, label_id_t other_label, + EDGE_FILTER_T& edge_filter, PropNameArray& props, + size_t limit = INT_MAX) { + static_assert("not implemented"); + auto state = + EdgeExpandEState, + EDGE_FILTER_T>(graph, cur_vertex_set, direction, + edge_label, other_label, props, + edge_filter, limit); + + return EdgeExpandENoPropImpl(state); + } + + // for input vertex set with only one label. + template ::type* = nullptr, + typename RES_T = std::pair, + std::vector>> + static RES_T EdgeExpandE( + const GRAPH_INTERFACE& graph, + RowVertexSet& cur_vertex_set, + Direction direction, label_id_t edge_label, label_id_t other_label, + EDGE_FILTER_T& edge_filter, PropNameArray& props, + size_t limit = INT_MAX) { + auto state = + EdgeExpandEState, + EDGE_FILTER_T>(graph, cur_vertex_set, direction, + edge_label, other_label, props, + edge_filter, limit); + + return EdgeExpandENoPropImpl(state); + } + // EdgeExpandE when input vertex are single label. and get multiple props + template < + typename... T, typename... SET_T, typename EDGE_FILTER_T, + typename std::enable_if<(sizeof...(T) > 0)>::type* = nullptr, + typename RES_T = std::pair, + std::vector>> + static RES_T EdgeExpandE( + const GRAPH_INTERFACE& graph, + RowVertexSet& cur_vertex_set, + Direction direction, label_id_t edge_label, label_id_t other_label, + EDGE_FILTER_T& edge_filter, PropNameArray& props, + size_t limit = INT_MAX) { + auto state = + EdgeExpandEState, + EDGE_FILTER_T, T...>(graph, cur_vertex_set, direction, + edge_label, other_label, props, + edge_filter, limit); + return EdgeExpandESingleLabelSrcImpl(state); + } + // EdgeExpandE when input vertex are single label. and get multiple props + // Input set is keyedVertexSet. + template < + typename... T, typename... SET_T, typename EDGE_FILTER_T, + typename std::enable_if<(sizeof...(T) > 0)>::type* = nullptr, + typename RES_T = std::pair, + std::vector>> + static RES_T EdgeExpandE( + const GRAPH_INTERFACE& graph, + KeyedRowVertexSet& + cur_vertex_set, + Direction direction, label_id_t edge_label, label_id_t other_label, + EDGE_FILTER_T& edge_filter, PropNameArray& props, + size_t limit = INT_MAX) { + LOG(INFO) << "EdgeExpandE from keyed vertex set"; + auto state = EdgeExpandEState< + GRAPH_INTERFACE, + KeyedRowVertexSet, + EDGE_FILTER_T, T...>(graph, cur_vertex_set, direction, edge_label, + other_label, props, edge_filter, limit); + return EdgeExpandESingleLabelSrcImpl(state); + } + + // EdgeExpandE when input vertex are multi label. + template < + typename... T, typename VERTEX_SET_T, typename EDGE_FILTER_T, + typename std::enable_if<(sizeof...(T) > 0) && + VERTEX_SET_T::is_multi_label && + !VERTEX_SET_T::is_general_set>::type* = nullptr, + typename RES_T = std::pair< + MulLabelSrcGrootEdgeSet, + std::vector>> + static RES_T EdgeExpandE(const GRAPH_INTERFACE& graph, + VERTEX_SET_T& cur_vertex_set, Direction direction, + label_id_t edge_label, label_id_t other_label, + EDGE_FILTER_T& edge_filter, + PropNameArray& props, size_t limit = INT_MAX) { + auto state = + EdgeExpandEState( + graph, cur_vertex_set, direction, edge_label, other_label, props, + edge_filter, limit); + return EdgeExpandEMultiLabelSrcImpl(state); + } + + // EdgeExpandE when input vertex are general set. + template < + typename... T, size_t num_labels, typename EDGE_FILTER_T, + typename std::enable_if<(sizeof...(T) > 0)>::type* = nullptr, + typename RES_T = std::pair, + std::vector>> + static RES_T EdgeExpandE( + const GRAPH_INTERFACE& graph, + GeneralVertexSet& cur_vertex_set, + Direction direction, label_id_t edge_label, label_id_t other_label, + EDGE_FILTER_T& edge_filter, PropNameArray& props, + size_t limit = INT_MAX) { + auto state = + EdgeExpandEState, + EDGE_FILTER_T, T...>(graph, cur_vertex_set, direction, + edge_label, other_label, props, + edge_filter, limit); + return EdgeExpandEGeneralSetImpl(state); + } + + // EdgeExpand E for two-label vertex set + template + static auto EdgeExpandE( + const GRAPH_INTERFACE& graph, + TwoLabelVertexSet& cur_vertex_set, + Direction direction, label_id_t edge_label, label_id_t other_label, + EDGE_FILTER_T& edge_filter, PropNameArray& props, + size_t limit = INT_MAX) { + auto state = + EdgeExpandEState, + EDGE_FILTER_T, T...>(graph, cur_vertex_set, direction, + edge_label, other_label, props, + edge_filter, limit); + return EdgeExpandETwoLabelSetImpl(state); + } + + // EdgeExpand with single src vertex, one edge label, but multiple dst labels, + // no edge props + template < + typename... T, typename... SET_T, size_t num_labels, + typename EDGE_FILTER_T, + typename RES_T = std::pair< + MultiLabelDstEdgeSet, + std::vector>> + static RES_T EdgeExpandE( + const GRAPH_INTERFACE& graph, + RowVertexSet& cur_vertex_set, + Direction direction, label_id_t edge_label, + std::array other_label, + EDGE_FILTER_T& edge_filter, PropNameArray& props, + size_t limit = INT_MAX) { + auto state = EdgeExpandEMutltiDstState< + GRAPH_INTERFACE, RowVertexSet, + num_labels, EDGE_FILTER_T, T...>(graph, cur_vertex_set, direction, + edge_label, other_label, props, + edge_filter, limit); + return EdgeExpandESingleLabelSrcMutliDstImpl(state); + } + + // impl EdgeExpandESingleLabelSrcMutliDstImpl, no prop + template < + typename VERTEX_SET_T, size_t num_labels, typename EDGE_FILTER_T, + typename RES_T = std::pair< + MultiLabelDstEdgeSet, + std::vector>> + static RES_T EdgeExpandESingleLabelSrcMutliDstImpl( + EdgeExpandEMutltiDstState& state) { + auto& cur_set = state.cur_vertex_set_; + using adj_list_array_t = + typename GRAPH_INTERFACE::template adj_list_array_t<>; + std::array + res_adj_list_arrays; // one for each dst labels. + + for (auto i = 0; i < num_labels; ++i) { + label_id_t src_label, dst_label; + if (state.direction_ == Direction::In) { + src_label = state.other_label_[i]; + dst_label = cur_set.GetLabel(); + } else { + src_label = cur_set.GetLabel(); + dst_label = state.other_label_[i]; + } + LOG(INFO) << "Obtaining edges from " << gs::to_string(src_label) << " to " + << gs::to_string(dst_label) << " with edge label " + << gs::to_string(state.edge_label_); + auto tmp = state.graph_.template GetEdges<>( + src_label, dst_label, state.edge_label_, cur_set.GetVertices(), + gs::to_string(state.direction_), state.limit_, {}); + res_adj_list_arrays[i].swap(tmp); + VLOG(10) << "fetch " << res_adj_list_arrays[i].size() << "edges from " + << cur_set.GetVertices().size() << "vertices"; + } + + std::vector offset; + size_t prev_set_size = cur_set.Size(); + offset.reserve(prev_set_size + 1); + size_t size = 0; + offset.emplace_back(size); + // Construct offset from adj_list. + for (auto i = 0; i < prev_set_size; ++i) { + for (auto j = 0; j < num_labels; ++j) { + auto edges = res_adj_list_arrays[j].get(i); + size += edges.size(); // number of edges in this AdjList + } + offset.emplace_back(size); + } + VLOG(10) << "num edges: " << size; + VLOG(10) << "offset: array: " << gs::to_string(offset); + auto copied_vids(cur_set.GetVertices()); + + // construct a edge set which contains dst vertices of multiple labels. + MultiLabelDstEdgeSet + edge_set(std::move(copied_vids), std::move(res_adj_list_arrays), + state.edge_label_, cur_set.GetLabel(), state.other_label_, + state.direction_); + CHECK(offset.back() == edge_set.Size()) + << "offset: " << offset.back() << ", " << edge_set.Size(); + return std::make_pair(std::move(edge_set), std::move(offset)); + } + + private: + template + static auto EdgeExpandVFromSingleLabel( + EdgeExpandVState>& state) { + label_id_t src_label, dst_label; + std::tie(src_label, dst_label) = get_graph_label_pair( + state.direction_, state.cur_vertex_set_.GetLabel(), state.other_label_); + + VLOG(10) << "[EdgeExpandV]: from label: " + << std::to_string(state.cur_vertex_set_.GetLabel()) + << ", vertex num: " << std::to_string(state.cur_vertex_set_.Size()) + << ", other label: " << std::to_string(state.other_label_) + << ",edge label: " << std::to_string(state.edge_label_) + << "src: " << std::to_string(src_label) + << ",dst: " << std::to_string(dst_label) + << ",direction: " << state.direction_; + auto nbr_list_array = state.graph_.GetOtherVertices( + src_label, dst_label, state.edge_label_, + state.cur_vertex_set_.GetVertices(), gs::to_string(state.direction_), + state.limit_); + std::vector vids; + std::vector offset; + offset.reserve(state.cur_vertex_set_.Size() + 1); + CHECK(nbr_list_array.size() == state.cur_vertex_set_.Size()); + // first gather size. + offset.emplace_back(vids.size()); + for (auto i = 0; i < nbr_list_array.size(); ++i) { + auto nbr_list = nbr_list_array.get(i); + for (auto nbr : nbr_list) { + vids.emplace_back(nbr.neighbor()); + } + offset.emplace_back(vids.size()); + } + + vertex_set_t result_set(std::move(vids), state.other_label_); + auto pair = std::make_pair(std::move(result_set), std::move(offset)); + return pair; + } + // the input src is multilabel. + // construct a multi label vertex set whose's src are multi label, but dst are + // same label. + // required props >= 1 + template + static auto EdgeExpandEMultiLabelSrcImpl( + EdgeExpandEState& + state) { + auto prop_names = state.prop_names_; + VLOG(10) << "[EdgeExpandEMultiLabelSrcImpl]" << prop_names.size(); + static constexpr size_t num_labels = VERTEX_SET_T::num_labels; + auto& multi_label_set = state.cur_vertex_set_; + using adj_list_array_t = + typename GRAPH_INTERFACE::template adj_list_array_t; + std::array res_adj_list_arrays; + std::array, num_labels> vids_arrays; + std::array, num_labels> offset_arrays; + + for (auto i = 0; i < num_labels; ++i) { + auto& cur_set = multi_label_set.GetSet(i); + vids_arrays[i] = cur_set.GetVertices(); + offset_arrays[i] = multi_label_set.GetOffset(i); + VLOG(10) << "offset array for: " << i + << "is: " << gs::to_string(offset_arrays[i]); + + label_id_t src_label, dst_label; + if (state.direction_ == Direction::In) { + src_label = state.other_label_; + dst_label = cur_set.GetLabel(); + } else { + src_label = cur_set.GetLabel(); + dst_label = state.other_label_; + } + + auto tmp = state.graph_.template GetEdges( + src_label, dst_label, state.edge_label_, cur_set.GetVertices(), + gs::to_string(state.direction_), state.limit_, prop_names); + res_adj_list_arrays[i].swap(tmp); + VLOG(10) << "fetch " << res_adj_list_arrays[i].size() << "edges from " + << cur_set.GetVertices().size() << "vertices"; + } + + std::vector offset; + offset.reserve(multi_label_set.Size() + 1); + size_t size = 0; + offset.emplace_back(size); + // Construct offset from adj_list. + for (auto iter : multi_label_set) { + auto cur_set_ind = iter.GetCurInd(); + auto inner_ind = iter.GetCurSetInnerInd(); + auto edges = res_adj_list_arrays[cur_set_ind].get(inner_ind); + size += edges.size(); // number of edges in this AdjList + offset.emplace_back(size); + } + VLOG(10) << "num edges: " << size; + VLOG(10) << "offset: array: " << gs::to_string(offset); + auto copied_labels = multi_label_set.GetLabels(); + for (auto l : copied_labels) { + VLOG(10) << l; + } + MulLabelSrcGrootEdgeSet + edge_set(std::move(vids_arrays), std::move(offset_arrays), + std::move(res_adj_list_arrays), prop_names, state.edge_label_, + copied_labels, state.other_label_); + CHECK(offset.back() == edge_set.Size()) + << "offset: " << offset.back() << ", " << edge_set.Size(); + return std::make_pair(std::move(edge_set), std::move(offset)); + } + + // the input src is multilabel and is general set. + // construct a multi label vertex set whose's src are multi label, but dst are + // same label. + // required props >= 1 + template + static auto EdgeExpandEGeneralSetImpl( + EdgeExpandEState& + state) { + auto prop_names = state.prop_names_; + static constexpr size_t num_labels = VERTEX_SET_T::num_labels; + auto& general_set = state.cur_vertex_set_; + auto total_vertices_num = general_set.Size(); + VLOG(10) << "[EdgeExpandEGeneralSetImpl]" << prop_names.size() + << ", total vnum: " << total_vertices_num; + + using adj_list_t = typename GRAPH_INTERFACE::template adj_list_t; + std::vector res_adj_list_arrays(total_vertices_num); + // overall vid array. + std::vector vids_arrays(general_set.GetVertices()); + std::array, num_labels> offset_arrays; + + label_id_t src_label, dst_label; + if (state.direction_ == Direction::In) { + src_label = state.other_label_; + dst_label = general_set.GetLabel(); + } else { + src_label = general_set.GetLabel(); + dst_label = state.other_label_; + } + + auto direction_str = gs::to_string(state.direction_); + for (auto i = 0; i < num_labels; ++i) { + std::vector cur_vids; + std::vector cur_active_inds; + std::tie(cur_vids, cur_active_inds) = general_set.GetVertices(i); + auto tmp = state.graph_.template GetEdges( + src_label, dst_label, state.edge_label_, cur_vids, direction_str, + state.limit_, prop_names); + CHECK(tmp.size() == cur_active_inds.size()); + for (auto j = 0; j < cur_active_inds.size(); ++i) { + res_adj_list_arrays[cur_active_inds[j]] = tmp.get(j); + } + } + + std::vector offset; + offset.reserve(general_set.Size() + 1); + size_t size = 0; + offset.emplace_back(size); + // Construct offset from adj_list. + for (auto edges : res_adj_list_arrays) { + size += edges.size(); // number of edges in this AdjList + offset.emplace_back(size); + } + VLOG(10) << "num edges: " << size; + VLOG(10) << "offset: array: " << gs::to_string(offset); + auto copied_labels(general_set.GetLabels()); + auto copied_bitsets(general_set.GetBitsets()); + + GeneralEdgeSet + edge_set(std::move(vids_arrays), std::move(res_adj_list_arrays), + std::move(copied_bitsets), prop_names, state.edge_label_, + copied_labels, state.other_label_, state.direction_); + CHECK(offset.back() == edge_set.Size()) + << "offset: " << offset.back() << ", " << edge_set.Size(); + return std::make_pair(std::move(edge_set), std::move(offset)); + } + + // the input set is two label set, and the result set is one label set + template + static auto EdgeExpandETwoLabelSetImpl( + EdgeExpandEState& + state) { + auto prop_names = state.prop_names_; + static constexpr size_t num_labels = VERTEX_SET_T::num_labels; + auto& general_set = state.cur_vertex_set_; + auto total_vertices_num = general_set.Size(); + VLOG(10) << "[EdgeExpandETwoLabelSetImpl]" << prop_names.size() + << ", total vnum: " << total_vertices_num; + + using adj_list_t = typename GRAPH_INTERFACE::template adj_list_t; + using adj_list_array_t = + typename GRAPH_INTERFACE::template adj_list_array_t; + // std::vector> res_adj_list_arrays(total_vertices_num); + adj_list_array_t res_adj_list_arrays; + res_adj_list_arrays.resize(total_vertices_num); + // overall vid array. + std::vector vids_arrays(general_set.GetVertices()); + std::array, num_labels> offset_arrays; + + label_id_t src_label, dst_label; + + auto direction_str = gs::to_string(state.direction_); + for (auto i = 0; i < num_labels; ++i) { + if (state.direction_ == Direction::In) { + src_label = state.other_label_; + dst_label = general_set.GetLabel(i); + } else { + src_label = general_set.GetLabel(i); + dst_label = state.other_label_; + } + std::vector cur_vids; + std::vector cur_active_inds; + std::tie(cur_vids, cur_active_inds) = general_set.GetVertices(i); + auto tmp = state.graph_.template GetEdges( + src_label, dst_label, state.edge_label_, cur_vids, direction_str, + state.limit_, prop_names); + CHECK(tmp.size() == cur_active_inds.size()); + if constexpr (GRAPH_INTERFACE::is_grape) { + // for grape graph, we can use operator =, since all data is already in + // memory + for (auto j = 0; j < cur_active_inds.size(); ++j) { + // res_adj_list_arrays[cur_active_inds[j]] = tmp.get(j); + res_adj_list_arrays.set(cur_active_inds[j], tmp.get(j)); + } + } else { + for (auto j = 0; j < cur_active_inds.size(); ++j) { + res_adj_list_arrays.get_vector(cur_active_inds[j]) + .swap(tmp.get_vector(j)); + } + } + } + + std::vector offset; + offset.reserve(general_set.Size() + 1); + size_t size = 0; + offset.emplace_back(size); + // Construct offset from adj_list. + for (auto i = 0; i < res_adj_list_arrays.size(); ++i) { + auto edges = res_adj_list_arrays.get(i); + size += edges.size(); // number of edges in this AdjList + offset.emplace_back(size); + } + VLOG(10) << "num edges: " << size; + VLOG(10) << "offset: array: " << gs::to_string(offset); + auto copied_labels(general_set.GetLabels()); + auto& old_bitset = general_set.GetBitset(); + grape::Bitset new_bitset; + new_bitset.init(old_bitset.cardinality()); + for (auto i = 0; i < old_bitset.cardinality(); ++i) { + new_bitset.set_bit(i); + } + + GeneralEdgeSet + edge_set(std::move(vids_arrays), std::move(res_adj_list_arrays), + std::move(new_bitset), prop_names, state.edge_label_, + copied_labels, state.other_label_, state.direction_); + CHECK(offset.back() == edge_set.Size()) + << "offset: " << offset.back() << ", " << edge_set.Size(); + return std::make_pair(std::move(edge_set), std::move(offset)); + } + + // optimize for filter expr is true predicate + template + static auto EdgeExpandESingleLabelSrcImpl( + EdgeExpandEState& + state) { + auto prop_names = state.prop_names_; + auto& cur_set = state.cur_vertex_set_; + VLOG(10) << "[EdgeExpandESingleLabelSrcImpl]" << prop_names.size() + << ", set size: " << cur_set.Size(); + for (auto v : prop_names) { + VLOG(10) << "prop:" << v; + } + + label_id_t src_label, dst_label; + if (state.direction_ == Direction::In) { + src_label = state.other_label_; + dst_label = cur_set.GetLabel(); + } else { + src_label = cur_set.GetLabel(); + dst_label = state.other_label_; + } + + auto adj_list_array = state.graph_.template GetEdges( + src_label, dst_label, state.edge_label_, cur_set.GetVertices(), + gs::to_string(state.direction_), state.limit_, prop_names); + + std::vector offset; + offset.reserve(cur_set.Size() + 1); + size_t size = 0; + offset.emplace_back(size); + CHECK(cur_set.Size() == adj_list_array.size()); + std::vector>> + prop_tuples; + prop_tuples.reserve(cur_set.Size() + 1); + // Construct offset from adj_list. + auto cur_set_iter = cur_set.begin(); + auto end_iter = cur_set.end(); + for (auto i = 0; i < adj_list_array.size(); ++i) { + auto edges = adj_list_array.get(i); + CHECK(cur_set_iter != end_iter); + auto src = cur_set_iter.GetVertex(); + for (auto edge : edges) { + auto& props = edge.properties(); + // current hack impl for edge property + // TODO: better performance + if (run_expr_filter(state.edge_filter_.expr_, props)) { + prop_tuples.emplace_back( + std::make_tuple(src, edge.neighbor(), props)); + } + } + ++cur_set_iter; + offset.emplace_back(prop_tuples.size()); + } + VLOG(10) << "num edges: " << prop_tuples.size(); + // VLOG(10) << "offset: array: " << gs::to_string(offset); + // copy vids + auto copied_vids(cur_set.GetVertices()); + std::vector label_vec(prop_tuples.size(), cur_set.GetLabel()); + FlatEdgeSet edge_set( + std::move(prop_tuples), state.edge_label_, {cur_set.GetLabel()}, + state.other_label_, prop_names, std::move(label_vec), state.direction_); + + CHECK(offset.back() == edge_set.Size()) + << "offset: " << offset.back() << ", " << edge_set.Size(); + return std::make_pair(std::move(edge_set), std::move(offset)); + } + + template + static inline bool run_expr_filter(const FILTER_T& filter, + const std::tuple& props) { + return run_expr_filter(filter, props, + std::make_index_sequence()); + } + + template + static inline bool run_expr_filter(const FILTER_T& filter, + const std::tuple& props, + std::index_sequence) { + return filter(std::get(props)...); + } + + // EdgeExpandE for multilabel input vertex set. + template < + typename VERTEX_SET_T, typename EDGE_FILTER_T, + typename std::enable_if::type* = nullptr> + static auto EdgeExpandENoPropImpl( + EdgeExpandEState& state) { + // no prop. + auto prop_names = state.prop_names_; + VLOG(10) << "[EdgeExpandEMultiLabelSrcImpl]" << prop_names.size(); + static constexpr size_t num_labels = VERTEX_SET_T::num_labels; + using adj_list_array_t = + typename GRAPH_INTERFACE::template adj_list_array_t<>; + auto& multi_label_set = state.cur_vertex_set_; + + std::array res_adj_list_arrays; + std::array, num_labels> vids_arrays; + std::array, num_labels> offset_arrays; + + for (auto i = 0; i < num_labels; ++i) { + auto& cur_set = multi_label_set.GetSet(i); + vids_arrays[i] = cur_set.GetVertices(); + offset_arrays[i] = multi_label_set.GetOffset(i); + VLOG(10) << "offset array for: " << i + << "is: " << gs::to_string(offset_arrays[i]); + + label_id_t src_label, dst_label; + if (state.direction_ == Direction::In) { + src_label = state.other_label_; + dst_label = cur_set.GetLabel(); + } else { + src_label = cur_set.GetLabel(); + dst_label = state.other_label_; + } + + auto tmp = state.graph_.template GetEdges<>( + src_label, dst_label, state.edge_label_, cur_set.GetVertices(), + gs::to_string(state.direction_), state.limit_, prop_names); + res_adj_list_arrays[i].swap(tmp); + VLOG(10) << "fetch " << res_adj_list_arrays[i].size() << "edges from " + << cur_set.GetVertices().size() << "vertices"; + } + + std::vector offset; + offset.reserve(multi_label_set.Size() + 1); + size_t size = 0; + offset.emplace_back(size); + // Construct offset from adj_list. + for (auto iter : multi_label_set) { + auto cur_set_ind = iter.GetCurInd(); + auto inner_ind = iter.GetCurSetInnerInd(); + auto edges = res_adj_list_arrays[cur_set_ind].get(inner_ind); + size += edges.size(); // number of edges in this AdjList + offset.emplace_back(size); + } + VLOG(10) << "num edges: " << size; + VLOG(10) << "offset: array: " << gs::to_string(offset); + auto copied_labels = multi_label_set.GetLabels(); + for (auto l : copied_labels) { + VLOG(10) << l; + } + MulLabelSrcGrootEdgeSet + edge_set(std::move(vids_arrays), std::move(offset_arrays), + std::move(res_adj_list_arrays), state.edge_label_, + copied_labels, state.other_label_); + CHECK(offset.back() == edge_set.Size()) + << "offset: " << offset.back() << ", " << edge_set.Size(); + return std::make_pair(std::move(edge_set), std::move(offset)); + } + + // EdgeExpandE for general input vertex set. + template < + typename VERTEX_SET_T, typename EDGE_FILTER_T, + typename std::enable_if::type* = nullptr> + static auto EdgeExpandENoPropImplForGeneralSet( + EdgeExpandEState& state) { + // no prop. + auto prop_names = state.prop_names_; + VLOG(10) << "[EdgeExpandENoPropImpl] for general vertex set of prop size: " + << prop_names.size(); + static constexpr size_t num_labels = VERTEX_SET_T::num_labels; + using adj_list_array_t = + typename GRAPH_INTERFACE::template adj_list_array_t<>; + auto& multi_label_set = state.cur_vertex_set_; + + std::array res_adj_list_arrays; + std::array, num_labels> vids_arrays; + std::array, num_labels> offset_arrays; + + for (auto i = 0; i < num_labels; ++i) { + auto& cur_set = multi_label_set.GetSet(i); + vids_arrays[i] = cur_set.GetVertices(); + offset_arrays[i] = multi_label_set.GetOffset(i); + VLOG(10) << "offset array for: " << i + << "is: " << gs::to_string(offset_arrays[i]); + + label_id_t src_label, dst_label; + if (state.direction_ == Direction::In) { + src_label = state.other_label_; + dst_label = cur_set.GetLabel(); + } else { + src_label = cur_set.GetLabel(); + dst_label = state.other_label_; + } + + auto tmp = state.graph_.template GetEdges<>( + src_label, dst_label, state.edge_label_, cur_set.GetVertices(), + gs::to_string(state.direction_), state.limit_, prop_names); + res_adj_list_arrays[i].swap(tmp); + VLOG(10) << "fetch " << res_adj_list_arrays[i].size() << "edges from " + << cur_set.GetVertices().size() << "vertices"; + } + + std::vector offset; + offset.reserve(multi_label_set.Size() + 1); + size_t size = 0; + offset.emplace_back(size); + // Construct offset from adj_list. + for (auto iter : multi_label_set) { + auto cur_set_ind = iter.GetCurInd(); + auto inner_ind = iter.GetCurSetInnerInd(); + auto edges = res_adj_list_arrays[cur_set_ind].get(inner_ind); + size += edges.size(); // number of edges in this AdjList + offset.emplace_back(size); + } + VLOG(10) << "num edges: " << size; + VLOG(10) << "offset: array: " << gs::to_string(offset); + auto copied_labels = multi_label_set.GetLabels(); + for (auto l : copied_labels) { + VLOG(10) << l; + } + MulLabelSrcGrootEdgeSet + edge_set(std::move(vids_arrays), std::move(offset_arrays), + std::move(res_adj_list_arrays), state.edge_label_, + copied_labels, state.other_label_); + CHECK(offset.back() == edge_set.Size()) + << "offset: " << offset.back() << ", " << edge_set.Size(); + return std::make_pair(std::move(edge_set), std::move(offset)); + } + + // EdgeExpandE for single label input vertex set. + template + static auto EdgeExpandENoPropImpl( + EdgeExpandEState, + EDGE_FILTER_T>& state) { + // no prop. + auto prop_names = state.prop_names_; + label_id_t src_label, dst_label; + if (state.direction_ == Direction::In) { + src_label = state.other_label_; + dst_label = state.cur_vertex_set_.GetLabel(); + } else { + src_label = state.cur_vertex_set_.GetLabel(); + dst_label = state.other_label_; + } + LOG(INFO) << "[EdgeExpandENoPropImpl] for single label vertex set. " + << (int) src_label << " " << (int) dst_label; + auto adj_list_array = state.graph_.template GetEdges<>( + src_label, dst_label, state.edge_label_, + state.cur_vertex_set_.GetVertices(), gs::to_string(state.direction_), + state.limit_, prop_names); + LOG(INFO) << "after get edges"; + std::vector offset; + offset.reserve(state.cur_vertex_set_.Size() + 1); + size_t size = 0; + size_t adj_list_ind = 0; + offset.emplace_back(size); + // Construct offset from adj_list. + for (auto iter : state.cur_vertex_set_) { + auto edges = adj_list_array.get(adj_list_ind); + size += edges.size(); // number of edges in this AdjList + offset.emplace_back(size); + adj_list_ind++; + } + LOG(INFO) << "total size of edges: " << size; + auto copied_vids(state.cur_vertex_set_.GetVertices()); + auto edge_set = + AdjEdgeSet( + std::move(copied_vids), std::move(adj_list_array), + state.edge_label_, state.cur_vertex_set_.GetLabel(), + state.other_label_, state.direction_); + return std::make_pair(std::move(edge_set), std::move(offset)); + } + + // only support one property + template + static auto get_adj_list_array_with_filter( + EdgeExpandVState& state, + LabelT src_label, LabelT dst_label, + std::tuple>& selectors) { + return get_adj_list_array_with_filter(state, src_label, dst_label, + std::get<0>(selectors)); + } + + template + static auto get_adj_list_array_with_filter( + EdgeExpandVState& state, + LabelT src_label, LabelT dst_label, PropertySelector& selector) { + VLOG(10) << "before get edges" << gs::to_string(selector.prop_name_); + std::array prop_names = {selector.prop_name_}; + auto adj_list_array = state.graph_.template GetEdges( + src_label, dst_label, state.edge_label_, + state.cur_vertex_set_.GetVertices(), gs::to_string(state.direction_), + state.limit_, prop_names); + return adj_list_array; + } + + static std::tuple get_graph_label_pair( + Direction& direction, label_id_t query_src_label, + label_id_t query_dst_label) { + label_id_t src_label, dst_label; + if (direction == Direction::In) { + src_label = query_dst_label; + dst_label = query_src_label; + } else { + src_label = query_src_label; + dst_label = query_dst_label; + } + return std::tuple{src_label, dst_label}; + } +}; + +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_OPERATOR_EDGE_EXPAND_H_ diff --git a/flex/engines/hqps_db/core/operator/get_v.h b/flex/engines/hqps_db/core/operator/get_v.h new file mode 100644 index 000000000000..c56076f8b836 --- /dev/null +++ b/flex/engines/hqps_db/core/operator/get_v.h @@ -0,0 +1,298 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifndef ENGINES_HQPS_ENGINE_OPERATOR_GET_V_H_ +#define ENGINES_HQPS_ENGINE_OPERATOR_GET_V_H_ + +#include +#include + +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/multi_label_vertex_set.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/two_label_vertex_set.h" + +#include "flex/storages/rt_mutable_graph/types.h" +#include "grape/utils/bitset.h" + +namespace gs { + +template +class GetVertex { + public: + using label_id_t = typename GRAPH_INTERFACE::label_id_t; + using vertex_id_t = typename GRAPH_INTERFACE::vertex_id_t; + using default_vertex_set_t = DefaultRowVertexSet; + + template + using vertex_set_t = RowVertexSet; + + template ::type* = nullptr, + typename RES_T = std::pair>> + static RES_T GetNoPropV( + const GRAPH_INTERFACE& graph, const SET_T& set, + GetVOpt>& get_v_opt) { + // VLOG(10) << "[Get no PropertyV from vertex set]" << set.Size(); + return GetNoPropVSetFromVertexSet(graph, set, get_v_opt); + } + + // get no propv from common edge set. + template < + typename SET_T, typename LabelT, size_t num_labels, typename EXPRESSION, + typename std::enable_if<(SET_T::is_edge_set && + !SET_T::is_multi_dst_label)>::type* = nullptr, + typename RES_T = std::pair>> + static RES_T GetNoPropVFromEdgeSet( + const GRAPH_INTERFACE& graph, const SET_T& set, + GetVOpt&& get_v_opt) { + VLOG(10) << "[Get no PropertyV from edge set]" << set.Size(); + return GetNoPropVSetFromSingleDstEdgeSet(graph, set, + std::move(get_v_opt)); + } + + // get no propv from two label dst edge set + template ::type* = nullptr, + typename RES_T = std::pair< + TwoLabelVertexSet, + std::vector>> + static RES_T GetNoPropVFromEdgeSet( + const GRAPH_INTERFACE& graph, const SET_T& set, + GetVOpt&& get_v_opt) { + VLOG(10) << "[Get no PropertyV from mutlti dst edge set]" << set.Size(); + return GetNoPropVSetFromMutliDstEdgeSet(graph, set, + std::move(get_v_opt)); + } + + // Result is multilabelVertexset. + template 1)>::type* = nullptr, + typename RES_T = + std::pair, num_labels>, + std::vector>> + static RES_T GetPropertyV( + const GRAPH_INTERFACE& graph, const SET_T& set, + GetVOpt&& get_v_opt) { + VLOG(10) << "[Get PropertyV from vertex set]" << set.Size(); + return GetMultiPropertyVSetFromVertexSet(graph, set, + std::move(get_v_opt)); + } + + /// Get vertex with properties from two label vertex set. + template + static auto GetPropertyVFromTwoLabelSet( + const GRAPH_INTERFACE& graph, const SET_T& set, + const GetVOpt& get_v_opt) { + auto v_opt = get_v_opt.v_opt_; + CHECK(v_opt == VOpt::Itself) + << "Can only get v from vertex set with v_opt == vopt::Itself"; + auto v_labels = get_v_opt.v_labels_; + auto props = get_v_opt.props_; + auto expr = get_v_opt.expr_; + // first extract properties, and create new properties, + // We assume the expr.props <= props. + double t0 = -grape::GetCurrentTime(); + auto property_tuples = get_property_tuple_two_label(graph, set, props); + auto set_with_tuple = + set.WithData(std::move(property_tuples), std::move(props)); + t0 += grape::GetCurrentTime(); + LOG(INFO) << "Get property tuple for two label set of size: " << set.Size() + << " cost: " << t0; + double t1 = -grape::GetCurrentTime(); + auto res = set_with_tuple.project_vertices_internal(v_labels, expr); + t1 += grape::GetCurrentTime(); + LOG(INFO) << "Filter cost: " << t1; + return res; + //.project_vertices. + // create new vertices with indices and property vector. + } + + // specialization for two label vertex set, return two label vertex set + // with + // labels. + template < + typename SET_T, typename LabelT, typename... T, size_t num_labels, + typename EXPRESSION, + typename std::enable_if<(SET_T::is_vertex_set && + SET_T::is_two_label_set && + num_labels > 1)>::type* = nullptr, + typename RES_T = std::pair, + std::vector>> + static RES_T GetPropertyV( + const GRAPH_INTERFACE& graph, const SET_T& set, + GetVOpt&& get_v_opt) { + VLOG(10) << "[Get PropertyV from vertex set]" << set.Size(); + return GetPropertyVFromTwoLabelSet(graph, set, get_v_opt); + } + + /// Get vertex with properties from vertex set. + template + static RES_T GetMultiPropertyVSetFromVertexSet( + const GRAPH_INTERFACE& graph, const SET_T& set, + GetVOpt&& get_v_opt) { + static_assert(SET_T::is_multi_label); + auto v_opt = get_v_opt.v_opt_; + auto v_labels = get_v_opt.v_labels_; + auto props = get_v_opt.props_; + auto expr = get_v_opt.expr_; + + auto result_vertex_and_offset = do_project(graph, v_labels, expr, set); + /// Then combine columns. + // TODO: Shrink for vector-based columns. + // auto col_tuple = GetColTuples(graph result_vertex_and_offset.first, + // props); + + static constexpr size_t multi_set_size = SET_T::num_labels; + auto array = get_multi_label_set_properties( + graph, std::move(result_vertex_and_offset.first), props, + std::make_index_sequence()); + typename RES_T::first_type multi_v_set(std::move(std::get<0>(array)), + std::move(std::get<1>(array))); + return std::make_pair(std::move(multi_v_set), + std::move(result_vertex_and_offset.second)); + } + + template + static auto get_multi_label_set_properties(const GRAPH_INTERFACE& graph, + SET_T&& multi_set, + PropNameArray& props, + std::index_sequence) { + using res_set_t = vertex_set_t; + static constexpr size_t num_labels = SET_T::num_labels; + std::array>, num_labels> res_data_tuples; + for (auto i = 0; i < num_labels; ++i) { + auto& cur_set = multi_set.GetSet(i); + VLOG(10) << "set: " << i << ", size: " << cur_set.Size(); + res_data_tuples[i] = graph.template GetVertexPropsFromVid( + cur_set.GetLabel(), cur_set.GetVertices(), props); + } + VLOG(10) << "Finish get data tuples"; + auto set_array = std::array{make_row_vertex_set( + std::move(multi_set.template GetSet().MoveVertices()), + multi_set.template GetSet().GetLabel(), + std::move(res_data_tuples[Is]), props)...}; + auto offset_array = std::array, num_labels>{ + std::move(multi_set.template GetOffset())...}; + return std::make_pair(std::move(set_array), std::move(offset_array)); + } + + template + static RES_T GetNoPropVSetFromVertexSet( + const GRAPH_INTERFACE& graph, const SET_T& set, + GetVOpt>& get_v_opt) { + auto v_opt = get_v_opt.v_opt_; + auto filter = get_v_opt.filter_; + return do_project(graph, get_v_opt.v_labels_, filter, set); + } + + // get single label from single dst edge label. + template + static RES_T GetNoPropVSetFromSingleDstEdgeSet( + const GRAPH_INTERFACE& graph, const SET_T& set, + GetVOpt&& get_v_opt) { + auto v_opt = get_v_opt.v_opt_; + auto v_label = get_v_opt.v_labels_[0]; + auto expr = get_v_opt.filter_.expr_; + return set.GetVertices(get_v_opt.v_opt_, get_v_opt.v_labels_, expr); + } + + // get multiple label dst edge label. returns two label set. + template ::type* = nullptr> + static RES_T GetNoPropVSetFromMutliDstEdgeSet( + const GRAPH_INTERFACE& graph, + const MultiLabelDstEdgeSet& + set, + GetVOpt&& get_v_opt) { + auto v_opt = get_v_opt.v_opt_; + auto expr = get_v_opt.expr_; + return set.GetVertices(get_v_opt.v_opt_, get_v_opt.v_labels_, expr); + } + + private: + // User-defined expression + // for vertex set with multiple labels, i.e. two_label or general vertex set. + // do project. + template < + typename LabelT, size_t num_labels, typename EXPRESSION, + typename... SELECTOR, typename SET_T, + typename std::enable_if && + (SET_T::is_general_set || + SET_T::is_two_label_set)>::type* = nullptr> + static auto do_project(const GRAPH_INTERFACE& graph, + std::array& labels, + Filter& filter, + const SET_T& set) { + double t0 = -grape::GetCurrentTime(); + // array size : num_labels + auto property_getters_array = get_prop_getters_from_selectors( + graph, set.GetLabels(), filter.selectors_); + t0 += grape::GetCurrentTime(); + LOG(INFO) << "Get property tuple for general set of size: " << set.Size() + << " cost: " << t0; + return set.project_vertices(labels, filter.expr_, property_getters_array); + } + + // udf expression with single label. + template + static auto do_project( + const GRAPH_INTERFACE& graph, std::array& labels, + Filter& filter, + const RowVertexSet& set) { + // TODO: support for multiple selectors + auto property_getters_array = std::array{get_prop_getter_from_selectors( + graph, set.GetLabel(), filter.selectors_)}; + return set.project_vertices(labels, filter.expr_, property_getters_array); + } + + // true predicate and single label. + template + static auto do_project( + const GRAPH_INTERFACE& graph, std::array& labels, + Filter& filter, + const RowVertexSet& set) { + // since expression always returns true, we provide set with a + // always-return-true prop getter. + return set.project_vertices(labels); + } + + // True predicate and multi label + template ::type* = nullptr> + static auto do_project(const GRAPH_INTERFACE& graph, + std::array& labels, + Filter& filter, const SET_T& set) { + return set.project_vertices(labels); + } +}; +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_OPERATOR_GET_V_H_ diff --git a/flex/engines/hqps_db/core/operator/group_by.h b/flex/engines/hqps_db/core/operator/group_by.h new file mode 100644 index 000000000000..f80a95992b56 --- /dev/null +++ b/flex/engines/hqps_db/core/operator/group_by.h @@ -0,0 +1,585 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifndef ENGINES_HQPS_ENGINE_OPERATOR_GROUP_H_ +#define ENGINES_HQPS_ENGINE_OPERATOR_GROUP_H_ + +#include +#include +#include + +#include "flex/engines/hqps_db/core/context.h" +#include "flex/engines/hqps_db/core/params.h" +#include "flex/engines/hqps_db/core/utils/keyed.h" +#include "flex/engines/hqps_db/structures/collection.h" + +namespace gs { + +// For each aggreator, return the type of applying aggregate on the desired col. +// with possible aggregate func. + +template +struct CommonBuilderT; + +template +struct CommonBuilderT> { + using set_t = std::remove_const_t().template GetNode())>>; + using builder_t = typename set_t::builder_t; + using result_t = typename builder_t::result_t; +}; + +template +struct GroupKeyResT; + +template +struct GroupKeyResT> { + using set_t = std::remove_const_t().template GetNode())>>; + using result_t = typename KeyedT>::keyed_set_t; +}; + +template +struct GroupValueResT; + +template +struct GroupValueResTImpl; + +template +struct GroupValueResT, + std::integer_sequence>> { + using old_set_t = std::remove_const_t().template GetNode())>>; + using result_t = + typename GroupValueResTImpl>::result_t; +}; + +// specialization for count for single tag +// TODO: count for pairs. +template +struct GroupValueResTImpl>> { + using result_t = Collection; +}; + +template +struct GroupValueResTImpl>> { + using result_t = Collection; +}; + +template +struct GroupValueResTImpl, AggFunc::SUM, + std::tuple>> { + using result_t = Collection; +}; + +// specialization for to_set +// TODO: to set for pairs. +template +struct GroupValueResTImpl, AggFunc::TO_SET, + std::tuple>> { + using result_t = Collection>; +}; + +template +struct GroupValueResTImpl, + AggFunc::TO_SET, + std::tuple>> { + using result_t = Collection>; +}; + +// specialization for to_list +// TODO: to set for pairs. +template +struct GroupValueResTImpl, AggFunc::TO_LIST, + std::tuple>> { + using result_t = Collection>; +}; + +// get the vertex's certain properties as list +template +struct GroupValueResTImpl, + AggFunc::TO_LIST, + std::tuple>> { + // using old_set_t = std::remove_const_t().template GetNode())>>; + using result_t = Collection>; +}; + +// get min value +template +struct GroupValueResTImpl, AggFunc::MIN, + std::tuple>> { + using result_t = Collection; +}; + +// support get max of vertexset's id +template +struct GroupValueResTImpl, AggFunc::MAX, + std::tuple>> { + using result_t = Collection; +}; + +// support get first from vertexset +template +struct GroupValueResTImpl, AggFunc::FIRST, + std::tuple>> { + // the old_set_t is vertex_set or collection + using result_t = + typename AggFirst>::result_t; +}; + +// support get first from two label vertex set +template +struct GroupValueResTImpl, + AggFunc::FIRST, + std::tuple>> { + // the old_set_t is vertex_set or collection + using result_t = + typename AggFirst>::result_t; +}; + +// get first from collection +template +struct GroupValueResTImpl, AggFunc::FIRST, + std::tuple>> { + // the old_set_t is vertex_set or collection + using result_t = typename AggFirst>::result_t; +}; + +template +struct UnWrapTuple; + +template +struct UnWrapTuple> { + using context_t = Context; +}; + +template +struct Rearrange { + using head_t = + std::tuple_element_t>; + using prev_t = + typename first_n>::type; + using context_t = + typename UnWrapTuple::context_t; +}; + +// only two nodees +// template +// struct Rearrange { +// using context_t = Context; +// }; + +// only one nodes +template +struct Rearrange { + using context_t = Context; +}; + +template +struct GroupResT; + +// We will return a brand new context. + +// after groupby, we will get a brand new context, and the tag_ids will start +// from 0. +template +struct GroupResT, std::tuple> { + static constexpr int new_cur_alias = +sizeof...(AGG_T); + // result ctx type + using result_t = typename Rearrange< + new_cur_alias, 0, typename GroupKeyResT::result_t, + typename GroupValueResT::result_t...>::context_t; +}; + +// keyed by two sets +template +struct GroupResT, std::tuple> { + static constexpr int new_cur_alias = + sizeof...(GROUP_KEY) + sizeof...(AGG_T) - 1; + // result ctx type + using result_t = typename Rearrange< + new_cur_alias, 0, typename CommonBuilderT::result_t..., + typename GroupValueResT::result_t...>::context_t; +}; + +template +struct FoldResT; + +// We will return a brand new context. +template +struct FoldResT> { + // take the largest alias in current context as base_tag. + static constexpr int base_tag = CTX_T::max_tag_id + 1; + static constexpr int new_head_tag = base_tag + sizeof...(AGG_T) - 1; + + // result ctx type + using result_t = typename Rearrange< + new_head_tag, base_tag, + typename GroupValueResT::result_t...>::context_t; +}; + +template +class GroupByOp { + using label_id_t = typename GRAPH_INTERFACE::label_id_t; + using vertex_id_t = typename GRAPH_INTERFACE::vertex_id_t; + using vertex_set_t = DefaultRowVertexSet; + + public: + template , + std::tuple>::result_t> + static RES_T GroupByWithoutKeyImpl( + const GRAPH_INTERFACE& graph, + Context&& ctx, + std::tuple&& group_opt) { + VLOG(10) << "new result_t, base tag: " << RES_T::base_tag_id; + // Currently we only support to to_count; + using agg_tuple_t = std::tuple; + using CTX_T = Context; + static constexpr size_t agg_num = std::tuple_size_v; + static constexpr size_t grouped_value_num = std::tuple_size_v; + // the result context must be one-to-one mapping. + + if (ctx.get_sub_task_start_tag() == INVALID_TAG) { + LOG(FATAL) << "Not implemented now"; + } + + int start_tag = ctx.get_sub_task_start_tag(); + VLOG(10) << "start tag: " << start_tag; + auto& agg_tuple = group_opt; + + auto value_set_builder_tuple = create_keyed_value_set_builder_tuple( + graph, ctx.GetPrevCols(), ctx.GetHead(), agg_tuple, + std::make_index_sequence()); + VLOG(10) << "Create value set builders"; + + for (auto iter : ctx) { + auto ele_tuple = iter.GetAllIndexElement(); + auto data_tuple = iter.GetAllData(); + auto start_tag_ind = iter.GetTagOffset(start_tag); + // indicate at which index the start_tag element is in. + auto key = start_tag_ind; + + insert_to_value_set_builder(value_set_builder_tuple, ele_tuple, + data_tuple, start_tag_ind); + } + auto value_set_built = + build_value_set_tuple(std::move(value_set_builder_tuple), + std::make_index_sequence()); + return RES_T(std::move(std::get<0>(value_set_built)), + ctx.get_sub_task_start_tag()); + + // // create offset array with one-one mapping. + // if (grouped_value_num == 1) { + // } else { + // auto offset_vec = make_offset_vector( + // grouped_value_num - 1, std::get<0>(value_set_built).size() + 1); + // VLOG(10) << "after group by, the set size: " << keyed_set_built.Size(); + // VLOG(10) << "offset vec: " << offset_vec.size(); + // VLOG(10) << "," << offset_vec[0].size(); + + // RES_T res(std::move(std::get(value_set_built)), + // std::move(gs::tuple_slice<0, grouped_value_num - 1>( + // std::move(value_set_built))), + // std::move(offset_vec)); + // return res; + // } + } + + // group by only one key_alias + template , + std::tuple, std::tuple>::result_t> + static RES_T GroupByImpl( + const GRAPH_INTERFACE& graph, + Context&& ctx, + std::tuple&& group_keys, std::tuple&& agg_tuple) { + VLOG(10) << "new result_t, base tag: " << RES_T::base_tag_id; + // Currently we only support to to_count; + using agg_tuple_t = std::tuple; + using key_alias_t = typename GROUP_KEY::selector_t; + using CTX_T = Context; + static constexpr size_t grouped_value_num = std::tuple_size_v; + static constexpr int keyed_tag_id = GROUP_KEY::col_id; + // the result context must be one-to-one mapping. + + auto& old_key_set = gs::Get(ctx); + using old_key_set_t = typename std::remove_const_t< + std::remove_reference_t>; + using keyed_set_builder_t = + typename KeyedT::builder_t; + auto keyed_set_size = old_key_set.Size(); + + // create a keyed set from the old key set. + keyed_set_builder_t keyed_set_builder(old_key_set); + // VLOG(10) << "Create keyed set builder"; + auto value_set_builder_tuple = create_keyed_value_set_builder_tuple( + graph, ctx.GetPrevCols(), ctx.GetHead(), agg_tuple, + std::make_index_sequence()); + + // if group_key use property, we need property getter + // else we just insert into key_set + if constexpr (group_key_on_property::value) { + auto named_property = create_prop_desc_from_selector( + std::get<0>(group_keys).selector_); + auto prop_getter = + create_prop_getter_from_prop_desc(graph, ctx, named_property); + for (auto iter : ctx) { + auto ele_tuple = iter.GetAllIndexElement(); + auto data_tuple = iter.GetAllData(); + + auto key_ele = gs::get_from_tuple(ele_tuple); + auto data_ele = gs::get_from_tuple(data_tuple); + size_t ind = insert_to_keyed_set_with_prop_getter(keyed_set_builder, + prop_getter, key_ele); + + insert_to_value_set_builder(value_set_builder_tuple, ele_tuple, + data_tuple, ind); + } + } else { + for (auto iter : ctx) { + auto ele_tuple = iter.GetAllIndexElement(); + auto data_tuple = iter.GetAllData(); + + auto key_ele = gs::get_from_tuple(ele_tuple); + auto data_ele = gs::get_from_tuple(data_tuple); + size_t ind = insert_to_keyed_set(keyed_set_builder, key_ele, data_ele); + insert_to_value_set_builder(value_set_builder_tuple, ele_tuple, + data_tuple, ind); + } + } + + auto keyed_set_built = keyed_set_builder.Build(); + + auto value_set_built = + build_value_set_tuple(std::move(value_set_builder_tuple), + std::make_index_sequence()); + + // create offset array with one-one mapping. + auto offset_vec = + make_offset_vector(grouped_value_num, keyed_set_built.Size()); + + auto new_tuple = std::tuple_cat(std::move(std::make_tuple(keyed_set_built)), + std::move(value_set_built)); + + RES_T res( + std::move(std::get(new_tuple)), + std::move(gs::tuple_slice<0, grouped_value_num>(std::move(new_tuple))), + std::move(offset_vec)); + + return res; + } + + // group by two key_alias, + template < + typename CTX_HEAD_T, int cur_alias, int base_tag, typename... CTX_PREV, + typename KEY_ALIAS0, typename KEY_ALIAS1, typename... AGG, + typename RES_T = typename GroupResT< + Context, + std::tuple, std::tuple>::result_t> + static RES_T GroupByImpl( + const GRAPH_INTERFACE& graph, + Context&& ctx, + std::tuple group_keys, + std::tuple&& aggs) { + VLOG(10) << "new result_t, base tag: " << RES_T::base_tag_id; + // Currently we only support to to_count; + using agg_tuple_t = std::tuple; + using key_alias0_t = KEY_ALIAS0; + using key_alias1_t = KEY_ALIAS1; + // we assume key_alias's tag are sequential. + + using CTX_T = Context; + static constexpr size_t grouped_value_num = std::tuple_size_v; + static constexpr int keyed_tag_id0 = key_alias0_t::col_id; + static constexpr int keyed_tag_id1 = key_alias1_t::col_id; + + // the result context must be one-to-one mapping. + + auto& old_key_set0 = gs::Get(ctx); + auto& old_key_set1 = gs::Get(ctx); + using old_key_set_t0 = typename std::remove_const_t< + std::remove_reference_t>; + using old_key_set_t1 = typename std::remove_const_t< + std::remove_reference_t>; + using old_key_set_iter_t0 = typename old_key_set_t0::iterator; + using old_key_set_iter_t1 = typename old_key_set_t1::iterator; + using old_key_set_ele0_t = std::remove_reference_t().GetIndexElement())>; + using old_key_set_ele1_t = std::remove_reference_t().GetIndexElement())>; + + // when grouping key is two key_alias, we use just set builder, not keyed + // builder. + + auto& key_alias_opt0 = std::get<0>(group_keys); + auto& key_alias_opt1 = std::get<1>(group_keys); + // create a keyed set from the old key set. + // VLOG(10) << "Create keyed set builder"; + auto value_set_builder_tuple = create_keyed_value_set_builder_tuple( + graph, ctx.GetPrevCols(), ctx.GetHead(), aggs, + std::make_index_sequence()); + VLOG(10) << "Create value set builders"; + + if constexpr (!group_key_on_property::value && + !group_key_on_property::value) { + // NOTE: here when we create keyed set builder, we don't require it as a + // keyed builder,i.e. it doesn't need to deduplicate. + auto keyed_set_builder0 = old_key_set0.CreateBuilder(); + auto keyed_set_builder1 = old_key_set1.CreateBuilder(); + using con_key_ele_t = std::pair; + std::unordered_map> + key_tuple_set; + size_t cur_ind = 0; + for (auto iter : ctx) { + auto ele_tuple = iter.GetAllIndexElement(); + auto data_tuple = iter.GetAllData(); + + auto key_ele0 = gs::get_from_tuple(ele_tuple); + auto key_ele1 = gs::get_from_tuple(ele_tuple); + auto tmp_ele = std::make_pair(key_ele0, key_ele1); + + auto data_ele0 = gs::get_from_tuple(data_tuple); + auto data_ele1 = gs::get_from_tuple(data_tuple); + size_t ind = 0; + if (key_tuple_set.find(tmp_ele) != key_tuple_set.end()) { + // already exist + auto ind = key_tuple_set[tmp_ele]; + } else { + // not exist + ind = cur_ind++; + insert_into_builder_v2_impl(keyed_set_builder0, key_ele0, data_ele0); + insert_into_builder_v2_impl(keyed_set_builder1, key_ele1, data_ele1); + key_tuple_set[tmp_ele] = ind; + } + // CHECK insert key. + insert_to_value_set_builder(value_set_builder_tuple, ele_tuple, + data_tuple, ind); + } + + auto keyed_set_built0 = keyed_set_builder0.Build(); + auto keyed_set_built1 = keyed_set_builder1.Build(); + CHECK(keyed_set_built0.Size() == keyed_set_built1.Size()) + << "size ueq: " << keyed_set_built0.Size() << " " + << keyed_set_built1.Size(); + + auto value_set_built = + build_value_set_tuple(std::move(value_set_builder_tuple), + std::make_index_sequence()); + // create offset array with one-one mapping. + auto offset_vec = + make_offset_vector(grouped_value_num + 1, keyed_set_built0.Size()); + + auto new_tuple = + std::tuple_cat(std::move(std::make_tuple(keyed_set_built0)), + std::move(std::make_tuple(keyed_set_built1)), + std::move(value_set_built)); + RES_T res(std::move(std::get(new_tuple)), + std::move(gs::tuple_slice<0, grouped_value_num + 1>( + std::move(new_tuple))), + std::move(offset_vec)); + + return res; + } else { + static_assert("Not implemented"); + } + } + + // ind is the index of the key in the key set + template + static void insert_to_value_set_builder( + std::tuple& value_set_builder, const ele_tuple_t& ele_tuple, + const data_tuple_t& data_tuple, size_t ind) { + std::get(value_set_builder).insert(ind, ele_tuple, data_tuple); + if constexpr (Is + 1 < sizeof...(SET_T)) { + insert_to_value_set_builder(value_set_builder, ele_tuple, + data_tuple, ind); + } + } + + template + static auto build_value_set_tuple(std::tuple&& builder_tuple, + std::index_sequence) { + return std::make_tuple(std::get(builder_tuple).Build()...); + } + + template + static auto create_keyed_value_set_builder_tuple( + const GRAPH_INTERFACE& graph, const std::tuple& prev, + const HEAD_T& head, std::tuple& agg_tuple, + std::index_sequence) { + return std::make_tuple(create_keyed_value_set_builder( + graph, prev, head, std::get(agg_tuple))...); + } + + template + static auto create_keyed_value_set_builder( + const GRAPH_INTERFACE& graph, const std::tuple& tuple, + const HEAD_T& head, + AggregateProp<_agg_func, std::tuple>, + std::integer_sequence>& agg) { + if constexpr (tag_id < sizeof...(SET_T)) { + auto old_set = gs::get_from_tuple(tuple); + using old_set_t = typename std::remove_const_t< + std::remove_reference_t>; + + return KeyedAggT, + std::integer_sequence>:: + create_agg_builder(old_set, graph, agg.selectors_); + } else { + return KeyedAggT, + std::integer_sequence>:: + create_agg_builder(head, graph, agg.selectors_); + } + } + + // insert_to_key_set with respect to property type + template + static inline auto insert_to_keyed_set_with_prop_getter( + BuilderT& builder, const PROP_GETTER& prop_getter, const ELE& ele) { + return builder.insert(prop_getter.get_view(ele)); + } + + // insert_into_bulder_v2_impl + template >>::type* = nullptr> + static inline auto insert_to_keyed_set(BuilderT& builder, const ELE& ele, + const DATA& data) { + return builder.insert(ele); + } + + // insert_into_bulder_v2_impl + template >>::type* = nullptr> + static inline auto insert_to_keyed_set(BuilderT& builder, const ELE& ele, + const DATA& data) { + return builder.insert(ele, data); + } +}; +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_OPERATOR_GROUP_H_ diff --git a/flex/engines/hqps_db/core/operator/path_expand.h b/flex/engines/hqps_db/core/operator/path_expand.h new file mode 100644 index 000000000000..43cf2e2f01cf --- /dev/null +++ b/flex/engines/hqps_db/core/operator/path_expand.h @@ -0,0 +1,378 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifndef ENGINES_HQPS_ENGINE_OPERATOR_PATH_EXPAND_H_ +#define ENGINES_HQPS_ENGINE_OPERATOR_PATH_EXPAND_H_ + +#include + +#include "flex/engines/hqps_db/core/params.h" +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h" + +#include "flex/storages/rt_mutable_graph/types.h" +#include "flex/utils/property/column.h" +#include "grape/utils/bitset.h" + +namespace gs { + +/** + * Path Expand expand from vertices to vertices via path. + * Can result to two different kind of input. + * - DefaultVertexSet.(EndV) + * - Path Object.(AllV) + * + * Currently we only support path expand with only one edge label and only one + *dst label. + * The input vertex set must be of one labe. + **/ + +template +class PathExpand { + public: + using label_id_t = typename GRAPH_INTERFACE::label_id_t; + using vertex_id_t = typename GRAPH_INTERFACE::vertex_id_t; + + template + using vertex_set_t = RowVertexSet; + // Path expand to vertices with columns. + + // PathExpand to vertices with vertex properties also retreived + template 0)>::type* = nullptr, + typename RES_SET_T = vertex_set_t, // int32_t is the + // length. + typename RES_T = std::pair>> + static RES_T PathExpandV( + const GRAPH_INTERFACE& graph, + const RowVertexSet& vertex_set, + PathExpandOpt&& path_expand_opt) { + // + auto cur_label = vertex_set.GetLabel(); + auto& range = path_expand_opt.range_; + auto& edge_expand_opt = path_expand_opt.edge_expand_opt_; + auto& get_v_opt = path_expand_opt.get_v_opt_; + auto tuple = PathExpandRawVMultiV( + graph, cur_label, vertex_set.GetVertices(), range, edge_expand_opt); + + auto& vids_vec = std::get<0>(tuple); + auto tuple_vec = graph.template GetVertexPropsFromVid( + cur_label, vids_vec, get_v_opt.props_); + CHECK(tuple_vec.size() == vids_vec.size()); + // prepend dist info. + auto new_tuple_vec = + prepend_tuple(std::move(std::get<1>(tuple)), std::move(tuple_vec)); + auto row_vertex_set = make_row_vertex_set( + std::move(std::get<0>(tuple)), edge_expand_opt.other_label_, + std::move(new_tuple_vec), {"dist"}); + return std::make_pair(std::move(row_vertex_set), + std::move(std::get<2>(tuple))); + } + + // PathExpandV for row vertex set as input. + template , + typename RES_T = std::pair>> + static RES_T PathExpandV( + const GRAPH_INTERFACE& graph, + const TwoLabelVertexSet& vertex_set, + PathExpandOpt&& path_expand_opt) { + // + auto& range = path_expand_opt.range_; + auto& edge_expand_opt = path_expand_opt.edge_expand_opt_; + auto& get_v_opt = path_expand_opt.get_v_opt_; + + std::vector input_v_0, input_v_1; + std::vector active_ind0, active_ind1; + std::tie(input_v_0, active_ind0) = vertex_set.GetVertices(0); + std::tie(input_v_1, active_ind1) = vertex_set.GetVertices(1); + + std::vector vids_vec0, vids_vec1; + std::vector dist_vec0, dist_vec1; + std::vector offsets0, offsets1; + std::tie(vids_vec0, dist_vec0, offsets0) = PathExpandRawVMultiV( + graph, vertex_set.GetLabel(0), input_v_0, range, edge_expand_opt); + std::tie(vids_vec1, dist_vec1, offsets1) = PathExpandRawVMultiV( + graph, vertex_set.GetLabel(1), input_v_1, range, edge_expand_opt); + // merge to label output together. + + // Default vertex set to vertex set. + std::vector res_vids; + std::vector res_dist; + std::vector res_offsets; + res_vids.reserve(vids_vec0.size() + vids_vec1.size()); + res_dist.reserve(dist_vec0.size() + dist_vec1.size()); + res_offsets.reserve(offsets0.size() + offsets1.size()); + res_offsets.emplace_back(0); + auto& bitset = vertex_set.GetBitset(); + auto input_size = vertex_set.GetVertices().size(); + + size_t cur_0_cnt = 0, cur_1_cnt = 0; + CHECK(offsets0.size() + offsets1.size() == input_size + 2); + for (auto i = 0; i < input_size; ++i) { + if (bitset.get_bit(i)) { + CHECK(cur_0_cnt < offsets0.size() - 1); + auto start = offsets0[cur_0_cnt]; + auto end = offsets0[cur_0_cnt + 1]; + for (auto j = start; j < end; ++j) { + res_vids.emplace_back(vids_vec0[j]); + res_dist.emplace_back(dist_vec0[j]); + } + cur_0_cnt += 1; + } else { + CHECK(cur_1_cnt < offsets1.size() - 1); + auto start = offsets1[cur_1_cnt]; + auto end = offsets1[cur_1_cnt + 1]; + for (auto j = start; j < end; ++j) { + res_vids.emplace_back(vids_vec1[j]); + res_dist.emplace_back(dist_vec1[j]); + } + cur_1_cnt += 1; + } + res_offsets.emplace_back(res_vids.size()); + } + + auto tuple_vec = single_col_vec_to_tuple_vec(std::move(res_dist)); + auto row_vertex_set = + make_row_vertex_set(std::move(res_vids), edge_expand_opt.other_label_, + std::move(tuple_vec), {"dist"}); + return std::make_pair(std::move(row_vertex_set), std::move(res_offsets)); + } + + // PathExpandV for two_label_vertex set as input. + template , + typename RES_T = std::pair>> + static RES_T PathExpandV( + const GRAPH_INTERFACE& graph, + const RowVertexSet& vertex_set, + PathExpandOpt&& path_expand_opt) { + // + auto cur_label = vertex_set.GetLabel(); + auto& range = path_expand_opt.range_; + auto& edge_expand_opt = path_expand_opt.edge_expand_opt_; + auto& get_v_opt = path_expand_opt.get_v_opt_; + auto tuple = PathExpandRawVMultiV( + graph, cur_label, vertex_set.GetVertices(), range, edge_expand_opt); + + // Default vertex set to vertex set. + auto& vids_vec = std::get<0>(tuple); + auto tuple_vec = single_col_vec_to_tuple_vec(std::move(std::get<1>(tuple))); + auto row_vertex_set = make_row_vertex_set(std::move(std::get<0>(tuple)), + edge_expand_opt.other_label_, + std::move(tuple_vec), {"dist"}); + return std::make_pair(std::move(row_vertex_set), + std::move(std::get<2>(tuple))); + } + + template + static std::tuple, std::vector, + std::vector> + PathExpandRawV2ForSingleV( + const GRAPH_INTERFACE& graph, LabelT src_label, + const std::vector& src_vertices_vec, Range& range, + EdgeExpandOpt& edge_expand_opt) { + // auto src_label = vertex_set.GetLabel(); + // auto src_vertices_vec = vertex_set.GetVertices(); + auto src_vertices_size = src_vertices_vec.size(); + vertex_id_t src_id = src_vertices_vec[0]; + + std::vector gids; + std::vector tmp_vec; + std::vector offsets; + // std::vector> gids; + // std::vector> offsets; + std::unordered_set visited_vertices; + std::vector dists; + + // init for index 0 + tmp_vec.emplace_back(src_id); + visited_vertices.insert(src_id); + if (range.start_ == 0) { + gids.emplace_back(src_id); + dists.emplace_back(0); + } + + double visit_array_time = 0.0; + for (auto cur_hop = 1; cur_hop < range.limit_; ++cur_hop) { + double t0 = -grape::GetCurrentTime(); + std::vector unused; + std::tie(tmp_vec, unused) = graph.GetOtherVerticesV2( + src_label, edge_expand_opt.other_label_, edge_expand_opt.edge_label_, + tmp_vec, gs::to_string(edge_expand_opt.dir_), INT_MAX); + // remove duplicate + size_t limit = 0; + for (auto i = 0; i < tmp_vec.size(); ++i) { + if (visited_vertices.find(tmp_vec[i]) == visited_vertices.end()) { + tmp_vec[limit++] = tmp_vec[i]; + } + } + tmp_vec.resize(limit); + if (cur_hop >= range.start_) { + // emplace tmp_vec to gids; + for (auto i = 0; i < tmp_vec.size(); ++i) { + auto nbr_gid = tmp_vec[i]; + auto insert_res = visited_vertices.insert(nbr_gid); + if (insert_res.second) { + gids.emplace_back(nbr_gid); + dists.emplace_back(cur_hop); + } + } + } else { + // when cur_hop is not included, we also need to insert vertices into + // set, to avoid duplicated. + for (auto i = 0; i < tmp_vec.size(); ++i) { + auto nbr_gid = tmp_vec[i]; + visited_vertices.insert(nbr_gid); + } + } + } + LOG(INFO) << "visit array time: " << visit_array_time + << ", gid size: " << gids.size(); + // select vetices that are in range. + offsets.emplace_back(0); + offsets.emplace_back(gids.size()); + + return std::make_tuple(std::move(gids), std::move(dists), + std::move(offsets)); + } + + // TODO: dedup can be used to speed up the query when the input vertices + // size if 1. + // const VERTEX_SET_T& vertex_set, + template + static std::tuple, std::vector, + std::vector> + PathExpandRawVMultiV(const GRAPH_INTERFACE& graph, LabelT src_label, + const std::vector& src_vertices_vec, + Range& range, + EdgeExpandOpt& edge_expand_opt) { + // auto src_label = vertex_set.GetLabel(); + // auto src_vertices_vec = vertex_set.GetVertices(); + auto src_vertices_size = src_vertices_vec.size(); + if (src_vertices_size == 1) { + LOG(INFO) + << "[NOTE:] PathExpandRawVMultiV is used for single vertex expand, " + "dedup is enabled."; + return PathExpandRawV2ForSingleV(graph, src_label, src_vertices_vec, + range, edge_expand_opt); + } + std::vector> gids; + std::vector> offsets; + std::unordered_set visited_vertices; + + gids.resize(range.limit_); + offsets.resize(range.limit_); + for (auto i = 0; i < range.limit_; ++i) { + offsets.reserve(src_vertices_size + 1); + } + + // init for index 0 + gids[0].insert(gids[0].begin(), src_vertices_vec.begin(), + src_vertices_vec.end()); + // offsets[0] set with all 1s + for (auto i = 0; i < src_vertices_size; ++i) { + offsets[0].emplace_back(i); + } + offsets[0].emplace_back(src_vertices_size); + visited_vertices.insert(src_vertices_vec.begin(), src_vertices_vec.end()); + + double visit_array_time = 0.0; + for (auto cur_hop = 1; cur_hop < range.limit_; ++cur_hop) { + double t0 = -grape::GetCurrentTime(); + auto pair = graph.GetOtherVerticesV2( + src_label, edge_expand_opt.other_label_, edge_expand_opt.edge_label_, + gids[cur_hop - 1], gs::to_string(edge_expand_opt.dir_), INT_MAX); + + gids[cur_hop].swap(pair.first); + CHECK(gids[cur_hop - 1].size() + 1 == pair.second.size()); + // offsets[cur_hop].swap(pair.second); + for (auto j = 0; j < offsets[cur_hop - 1].size(); ++j) { + auto& new_off_vec = pair.second; + offsets[cur_hop].emplace_back(new_off_vec[offsets[cur_hop - 1][j]]); + } + t0 += grape::GetCurrentTime(); + visit_array_time += t0; + } + LOG(INFO) << "visit array time: " << visit_array_time; + // select vetices that are in range. + std::vector flat_gids; + std::vector flat_offsets; + std::vector dists; + + { + size_t flat_size = 0; + for (auto i = range.start_; i < range.limit_; ++i) { + flat_size += gids[i].size(); + } + VLOG(10) << "flat size: " << flat_size; + flat_gids.reserve(flat_size); + dists.reserve(flat_size); + flat_offsets.reserve(src_vertices_size + 1); + + flat_offsets.emplace_back(0); + // for vertices already appears in [0, range.start_) + // we add vertices to vertex set, but we don't add them to flat_gids + // and dists. + + for (auto i = 0; i < src_vertices_size; ++i) { + // size_t prev_size = flat_gids.size(); + for (auto j = range.start_; j < range.limit_; ++j) { + auto start = offsets[j][i]; + auto end = offsets[j][i + 1]; + for (auto k = start; k < end; ++k) { + auto gid = gids[j][k]; + flat_gids.emplace_back(gids[j][k]); + dists.emplace_back(j); + // } + } + } + flat_offsets.emplace_back(flat_gids.size()); + } + } + + return std::make_tuple(std::move(flat_gids), std::move(dists), + std::move(flat_offsets)); + } + + private: + template + static auto prepend_tuple(std::vector&& first_col, + std::vector>&& old_cols) { + CHECK(first_col.size() == old_cols.size()); + std::vector> res_vec; + res_vec.reserve(old_cols.size()); + for (auto i = 0; i < old_cols.size(); ++i) { + res_vec.emplace_back(std::tuple_cat(std::make_tuple(first_col[i]), + std::move(old_cols[i]))); + } + return res_vec; + } + + template + static auto single_col_vec_to_tuple_vec(std::vector&& vec) { + std::vector> res_vec; + res_vec.reserve(vec.size()); + for (auto i = 0; i < vec.size(); ++i) { + res_vec.emplace_back(std::make_tuple(vec[i])); + } + return res_vec; + } +}; + +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_OPERATOR_PATH_EXPAND_H_ diff --git a/flex/engines/hqps_db/core/operator/project.h b/flex/engines/hqps_db/core/operator/project.h new file mode 100644 index 000000000000..56393d13d891 --- /dev/null +++ b/flex/engines/hqps_db/core/operator/project.h @@ -0,0 +1,468 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifndef ENGINES_HQPS_ENGINE_OPERATOR_PROJECT_H_ +#define ENGINES_HQPS_ENGINE_OPERATOR_PROJECT_H_ + +#include +#include +#include + +#include "flex/engines/hqps_db/core/context.h" +#include "flex/engines/hqps_db/core/params.h" +#include "flex/engines/hqps_db/core/utils/keyed.h" + +#include "flex/engines/hqps_db/structures/collection.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/general_vertex_set.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/two_label_vertex_set.h" + +namespace gs { + +template +struct ResultOfContextKeyAlias; + +// project one single property +template +struct ResultOfContextKeyAlias< + Context, + IdentityMapper>> { + using context_t = Context; + using ctx_node_t = std::remove_reference_t().template GetNode())>; + using result_t = Collection; +}; + +// project the tag itself. +template +struct ResultOfContextKeyAlias< + Context, + IdentityMapper>> { + using context_t = Context; + using ctx_node_t = std::remove_reference_t().template GetNode())>; + using result_t = ctx_node_t; +}; + +template +struct ResultContextTWithPrevTuple; + +template +struct ResultContextTWithPrevTuple> { + // FIXME: use correct append_opt + using result_t = + typename ResultContextT::result_t; +}; + +template +struct ProjectResT; + +template +struct ProjectResT, + ProjectOpt> { + using old_ctx_t = Context; + using project_opt_t = ProjectOpt; + static constexpr size_t num_key_alias = sizeof...(KEY_ALIAS_T); + using last_key_alias_t = + std::tuple_element_t>; + + using new_head_t = + typename ResultOfContextKeyAlias::result_t; + static constexpr int new_head_alias = last_key_alias_t::res_alias; + using proj_res_tuple_t = std::tuple< + typename ResultOfContextKeyAlias::result_t...>; + using first_n_of_key_alias_tuple = + typename first_n::type; + using result_t = typename ResultContextTWithPrevTuple< + new_head_alias, new_head_t, cur_alias, CTX_HEAD_T, base_tag, + typename TupleCatT, + first_n_of_key_alias_tuple>::tuple_cat_t>::result_t; +}; + +template +struct ProjectResT, + ProjectOpt> { + using old_ctx_t = Context; + using project_opt_t = ProjectOpt; + static constexpr size_t num_key_alias = sizeof...(KEY_ALIAS_T); + using last_key_alias_t = + std::tuple_element_t>; + + using new_head_t = + typename ResultOfContextKeyAlias::result_t; + static constexpr int new_head_alias = last_key_alias_t::res_alias; + using proj_res_tuple_t = std::tuple< + typename ResultOfContextKeyAlias::result_t...>; + using first_n_of_key_alias_tuple = + typename first_n::type; + using result_t = typename ResultContextTWithPrevTuple< + new_head_alias, new_head_t, cur_alias, CTX_HEAD_T, base_tag, + typename TupleCatT, + first_n_of_key_alias_tuple>::tuple_cat_t>::result_t; +}; + +template +class ProjectOp { + public: + // specialized to append + // Project a previous tag and append to traversal. + template ::type* = nullptr> + static auto ProjectImpl( + const GRAPH_INTERFACE& graph, + Context&& ctx, + std::tuple&& mappers) { + auto node_size = gs::Get<-1>(ctx).Size(); + VLOG(10) << "Current head size: " << node_size; + + std::vector offsets(node_size + 1, 0); + for (auto i = 1; i <= node_size; ++i) { + offsets[i] = i; + } + + return apply_projects_append<0>(graph, std::move(ctx), mappers, offsets); + } + + // implementation for project is false, only proj one column + template ::type* = nullptr> + static auto ProjectImpl( + const GRAPH_INTERFACE& graph, + Context&& ctx, + std::tuple&& mappers) { + auto node_size = gs::Get<-1>(ctx).Size(); + VLOG(10) << "Current head size: " << node_size; + + auto head = apply_single_project(graph, ctx, std::get<0>(mappers)); + using new_head_t = + std::remove_const_t>; + return Context(std::move(head)); + } + + // implementation for project is false. project multiple columns + template + 1)>::type* = nullptr> + static auto ProjectImpl( + const GRAPH_INTERFACE& graph, + Context&& ctx, + std::tuple&& mappers) { + static constexpr size_t proj_num = sizeof...(ProjectMapper); + auto node_size = gs::Get<-1>(ctx).Size(); + // VLOG(10) << "Current head size: " << node_size; + std::vector offset(node_size + 1, 0); + for (auto i = 1; i <= node_size; ++i) { + offset[i] = i; + } + std::vector> offsets; + offsets.reserve(proj_num - 1); + for (auto i = 0; i < proj_num - 1; ++i) { + offsets.push_back(offset); + } + + // LOG(INFO) << "Projecting columns: " << proj_num; + auto head = + apply_single_project(graph, ctx, std::get(mappers)); + auto prev_tuple = apply_single_project_until( + graph, ctx, mappers, std::make_index_sequence{}); + using new_head_t = + std::remove_const_t>; + using first_alias_t = std::tuple_element_t<0, std::tuple>; + using last_alias_t = + std::tuple_element_t>; + return make_context<0, proj_num - 1>(std::move(prev_tuple), std::move(head), + std::move(offsets)); + } + + template ::type* = nullptr> + static auto apply_projects_append(const GRAPH_INTERFACE& graph, CTX_T&& ctx, + std::tuple& key_alias, + std::vector& offsets) { + auto new_node = apply_single_project(graph, ctx, std::get(key_alias)); + std::vector res_offsets(offsets); + auto res = ctx.template AddNode(std::move(new_node), + std::move(res_offsets)); + return apply_projects_append(graph, std::move(res), key_alias, + offsets); + } + + // For the last element, return the result + template ::type* = nullptr> + static auto apply_projects_append(const GRAPH_INTERFACE& graph, CTX_T&& ctx, + std::tuple& key_alias, + std::vector& offsets) { + auto new_node = apply_single_project(graph, ctx, std::get(key_alias)); + std::vector res_offsets(offsets); + return ctx.template AddNode(std::move(new_node), + std::move(res_offsets)); + } + + // Apply single project on old context's node until the indicated index of + // project opts + template + static auto apply_single_project_until( + const GRAPH_INTERFACE& graph, CTX_T& ctx, + std::tuple& proj_prop_tuple, std::index_sequence) { + static_assert(limit < sizeof...(PROJ_PROP)); + return std::make_tuple( + apply_single_project(graph, ctx, std::get(proj_prop_tuple))...); + } + + // Apply single project with IdentityMapper. + template + static auto apply_single_project( + const GRAPH_INTERFACE& graph, CTX_T& ctx, + IdentityMapper>& mapper) { + auto& node = ctx.template GetNode(); + // Create a empty copy. + auto offset_array = ctx.ObtainOffsetFromTag(in_col_id); + auto repeat_array = offset_array_to_repeat_array(std::move(offset_array)); + // A col describe what content is used to project + return apply_single_project_impl( + graph, node, mapper.selector_.prop_name_, repeat_array); + } + + // Project self. + template + static auto apply_single_project( + const GRAPH_INTERFACE& graph, CTX_T& ctx, + IdentityMapper& mapper) { + auto& node = ctx.template GetNode(); + // Create a empty copy. + auto offset_array = ctx.ObtainOffsetFromTag(in_col_id); + auto repeat_array = offset_array_to_repeat_array(std::move(offset_array)); + KeyAlias key_alias; + return node.ProjectWithRepeatArray(repeat_array, key_alias); + } + + // Project with single mapper + template + static auto apply_single_project( + const GRAPH_INTERFACE& graph, CTX_T& ctx, + MultiMapper, in_col_id...>& mapper) { + // using expr_trait = gs::function_traits())>; + + // using expr_result_t = typename expr_trait::result_type; + using expr_result_t = typename EXPR::result_t; + std::vector res_vec; + res_vec.reserve(ctx.GetHead().Size()); + auto expr = mapper.expr_; + auto prop_desc = + create_prop_descs_from_selectors(mapper.selectors_); + auto prop_getters = + create_prop_getters_from_prop_desc(graph, ctx, prop_desc); + LOG(INFO) << "In project with expression, successfully got prop getters"; + for (auto iter : ctx) { + auto ele_tuple = iter.GetAllElement(); + res_vec.emplace_back(evaluate_proj_expr(expr, ele_tuple, prop_getters)); + } + return Collection(std::move(res_vec)); + } + + ///////////////////Project implementation for all data structures. + + // single label vertex set. + template + static auto apply_single_project_impl( + const GRAPH_INTERFACE& graph, + RowVertexSetImpl& node, + const std::string& prop_name, const std::vector& repeat_array) { + // Get property from storage. + auto prop_tuple_vec = graph.template GetVertexPropsFromVid( + node.GetLabel(), node.GetVertices(), {prop_name}); + // VLOG(10) << "Finish fetching properties"; + node.fillBuiltinProps(prop_tuple_vec, {prop_name}, repeat_array); + std::vector res_prop_vec; + for (auto i = 0; i < repeat_array.size(); ++i) { + for (auto j = 0; j < repeat_array[i]; ++j) { + res_prop_vec.push_back(std::get<0>(prop_tuple_vec[i])); + } + } + // check builtin properties. + // Found if there is any builtin properties need. + + return Collection(std::move(res_prop_vec)); + } + + // single keyed label vertex set. + template + static auto apply_single_project_impl( + const GRAPH_INTERFACE& graph, + KeyedRowVertexSetImpl& node, + const std::string& prop_name, const std::vector& repeat_array) { + LOG(INFO) << "[Single project on KeyedRowVertexSet:]" << node.GetLabel(); + // Get property from storage. + auto prop_tuple_vec = graph.template GetVertexPropsFromVid( + node.GetLabel(), node.GetVertices(), {prop_name}); + // VLOG(10) << "Finish fetching properties"; + node.fillBuiltinProps(prop_tuple_vec, {prop_name}, repeat_array); + std::vector res_prop_vec; + for (auto i = 0; i < repeat_array.size(); ++i) { + for (auto j = 0; j < repeat_array[i]; ++j) { + res_prop_vec.push_back(std::get<0>(prop_tuple_vec[i])); + } + } + // check builtin properties. + // Found if there is any builtin properties need. + + return Collection(std::move(res_prop_vec)); + } + + // project for two label vertex set. + template + static auto apply_single_project_impl( + const GRAPH_INTERFACE& graph, + TwoLabelVertexSetImpl& node, + const std::string& prop_name, const std::vector& repeat_array) { + auto tmp_prop_vec = + get_property_tuple_two_label(graph, node, {prop_name}); + + // make_repeat; + size_t sum = 0; + bool flag = true; + for (auto i = 0; i < repeat_array.size(); ++i) { + if (repeat_array[i] != 1) { + flag = false; + } + sum += repeat_array[i]; + } + std::vector res_prop_vec; + if (flag) { + { + // convert tuple to vector. + res_prop_vec.reserve(tmp_prop_vec.size()); + for (auto& ele : tmp_prop_vec) { + res_prop_vec.emplace_back(std::get<0>(ele)); + } + } + return Collection(std::move(res_prop_vec)); + } else { + res_prop_vec.reserve(sum); + for (auto i = 0; i < repeat_array.size(); ++i) { + for (auto j = 0; j < repeat_array[i]; ++j) { + res_prop_vec.emplace_back(std::get<0>(tmp_prop_vec[i])); + } + } + return Collection(std::move(res_prop_vec)); + } + } + + // general vertex set. + template + static auto apply_single_project_impl( + const GRAPH_INTERFACE& graph, GeneralVertexSet& node, + const std::string& prop_name_, const std::vector& repeat_array) { + VLOG(10) << "start fetching properties"; + auto tmp_prop_vec = + get_property_tuple_general(graph, node, {prop_name_}); + std::vector res_prop_vec; + // make_repeat; + size_t sum = 0; + bool flag = true; + for (auto i = 0; i < repeat_array.size(); ++i) { + if (repeat_array[i] != 1) { + flag = false; + } + sum += repeat_array[i]; + } + if (flag) { + { + // convert tmp_prop_vec to vector. + res_prop_vec.reserve(tmp_prop_vec.size()); + for (auto& ele : tmp_prop_vec) { + res_prop_vec.push_back(ele); + } + } + return Collection(std::move(tmp_prop_vec)); + } else { + res_prop_vec.reserve(sum); + for (auto i = 0; i < repeat_array.size(); ++i) { + for (auto j = 0; j < repeat_array[i]; ++j) { + res_prop_vec.push_back(std::get<0>(tmp_prop_vec[i])); + } + } + return Collection(std::move(res_prop_vec)); + } + } + + // single label edge set + template ::type* = nullptr> + static auto apply_single_project_impl( + const GRAPH_INTERFACE& graph, NODE_T& node, const std::string& prop_name, + const std::vector& repeat_array) { + VLOG(10) << "Finish fetching properties"; + std::vector> tmp_prop_vec; + { + size_t sum = 0; + for (auto v : repeat_array) { + sum += v; + } + tmp_prop_vec.resize(sum); + } + // We assume edge properties are already got in getEdges. + node.fillBuiltinProps(tmp_prop_vec, {prop_name}, repeat_array); + + std::vector res_prop_vec; + { + // convert tmp_prop_vec to vector. + res_prop_vec.reserve(tmp_prop_vec.size()); + for (auto& ele : tmp_prop_vec) { + res_prop_vec.push_back(std::get<0>(ele)); + } + } + + return Collection(std::move(res_prop_vec)); + } + + // evaluate expression in project op + template + static inline auto evaluate_proj_expr( + const EXPR& expr, std::tuple& eles, + std::tuple& prop_getter_tuple) { + return evaluate_proj_expr_impl( + expr, eles, prop_getter_tuple, + std::make_index_sequence()); + } + + template + static inline auto evaluate_proj_expr_impl( + const EXPR& expr, std::tuple& eles, + std::tuple& prop_getter_tuple, + std::index_sequence) { + return expr(std::get(prop_getter_tuple).get_from_all_element(eles)...); + } +}; +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_OPERATOR_PROJECT_H_ diff --git a/flex/engines/hqps_db/core/operator/scan.h b/flex/engines/hqps_db/core/operator/scan.h new file mode 100644 index 000000000000..340bd58ebfb6 --- /dev/null +++ b/flex/engines/hqps_db/core/operator/scan.h @@ -0,0 +1,131 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifndef ENGINES_HQPS_ENGINE_OPERATOR_SCAN_H_ +#define ENGINES_HQPS_ENGINE_OPERATOR_SCAN_H_ + +#include +#include + +#include "flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/two_label_vertex_set.h" + +namespace gs { + +// scan for a single vertex +template +class Scan { + public: + using label_id_t = typename GRAPH_INTERFACE::label_id_t; + using vertex_id_t = typename GRAPH_INTERFACE::vertex_id_t; + using vertex_set_t = DefaultRowVertexSet; + using two_label_set_t = + TwoLabelVertexSet; + + // scan vertex with expression, support label_key in expression, + template + static vertex_set_t ScanVertex(const GRAPH_INTERFACE& graph, + const label_id_t& v_label_id, + Filter&& filter) { + auto expr = filter.expr_; + auto selectors = filter.selectors_; + auto gids = scan_vertex_with_selector(graph, v_label_id, expr, selectors); + return MakeDefaultRowVertexSet(std::move(gids), + v_label_id); + } + + /// @brief Scan Vertex from two labels. + /// @tparam FUNC + /// @param graph + /// @param v_label_id + /// @param e_label_id + /// @param func + /// @return + template + static two_label_set_t ScanVertex(const GRAPH_INTERFACE& graph, + std::array&& labels, + Filter&& filter) { + static_assert(N == 2, "ScanVertex only support two labels"); + auto expr = filter.expr_; + auto selectors = filter.selectors_; + auto gids0 = scan_vertex_with_selector(graph, labels[0], expr, selectors); + auto gids1 = scan_vertex_with_selector(graph, labels[1], expr, selectors); + + // merge gids0 and gids1 + std::vector gids; + gids.reserve(gids0.size() + gids1.size()); + gids.insert(gids.end(), gids0.begin(), gids0.end()); + gids.insert(gids.end(), gids1.begin(), gids1.end()); + + grape::Bitset bitset; + bitset.init(gids.size()); + for (auto i = 0; i < gids0.size(); ++i) { + bitset.set_bit(i); + } + return make_two_label_set(std::move(gids), std::move(labels), + std::move(bitset)); + } + + /// @brief Scan vertex with oid + /// @param graph + /// @param v_label_id + /// @param oid + /// @return + static vertex_set_t ScanVertexWithOid(const GRAPH_INTERFACE& graph, + const label_id_t& v_label_id, + int64_t oid) { + std::vector gids; + gids.emplace_back(graph.ScanVerticesWithOid(v_label_id, oid)); + return MakeDefaultRowVertexSet(std::move(gids), v_label_id); + } + + private: + template + static std::vector scan_vertex1_impl( + const GRAPH_INTERFACE& graph, const label_id_t& v_label_id, + const FUNC& func, const std::tuple& props) { + std::vector gids; + auto filter = [&](vertex_id_t v, + const std::tuple& real_props) { + if (apply_on_tuple(func, real_props)) { + gids.push_back(v); + } + }; + + graph.template ScanVertices(v_label_id, props, filter); + return gids; + } + + template + static std::vector scan_vertex_with_selector( + const GRAPH_INTERFACE& graph, const label_id_t& v_label_id, + const FUNC& func, const std::tuple& selectors) { + std::vector gids; + auto filter = + [&](vertex_id_t v, + const std::tuple& real_props) { + if (apply_on_tuple(func, real_props)) { + gids.push_back(v); + } + }; + + graph.template ScanVertices(v_label_id, selectors, filter); + return gids; + } +}; + +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_OPERATOR_SCAN_H_ diff --git a/flex/engines/hqps_db/core/operator/shorest_path.h b/flex/engines/hqps_db/core/operator/shorest_path.h new file mode 100644 index 000000000000..01148b0c705e --- /dev/null +++ b/flex/engines/hqps_db/core/operator/shorest_path.h @@ -0,0 +1,284 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifndef ENGINES_HQPS_ENGINE_OPERATOR_SHORTEST_PATH_H_ +#define ENGINES_HQPS_ENGINE_OPERATOR_SHORTEST_PATH_H_ + +#include +#include + +#include "flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h" +#include "flex/engines/hqps_db/structures/path.h" + +namespace gs { + +// scan for a single vertex +template +class ShortestPathOp { + public: + using label_id_t = typename GRAPH_INTERFACE::label_id_t; + using vertex_id_t = typename GRAPH_INTERFACE::vertex_id_t; + using vertex_set_t = DefaultRowVertexSet; + + // Specialize for only one label. + template ::value)>::type* = nullptr, + typename path_set_t = PathSet> + static std::pair> ShortestPath( + const GRAPH_INTERFACE& graph, const SET_T& set, + const ShortestPathOpt& + opt) { + CHECK(set.Size() == 1); + auto src_label = set.GetLabel(); + // find the vertices that satisfy the condition. + auto dst_vertices = find_vertices_satisfy_condition( + graph, opt.until_condition_.expr_, set.GetLabel(), + opt.until_condition_.selectors_); + CHECK(dst_vertices.size() == 1); + CHECK(opt.edge_expand_opt_.other_label_ == src_label); + CHECK(opt.get_v_opt_.v_labels_[0] == src_label); + vertex_id_t src_vid = set.GetVertices()[0]; + vertex_id_t dst_vid = dst_vertices[0]; + VLOG(10) << "[ShortestPath]: src: " << src_vid << ", dst:" << dst_vid; + // only support one-to-one shortest path. + + auto path_set = + shortest_path_impl(graph, src_vid, dst_vid, opt.edge_expand_opt_.dir_, + opt.edge_expand_opt_.edge_label_, src_label); + + std::vector offsets{0, path_set.Size()}; + return std::make_pair(std::move(path_set), std::move(offsets)); + } + + private: + template + static PathSet shortest_path_impl( + const GRAPH_INTERFACE& graph, vertex_id_t src_vid, vertex_id_t dst_vid, + Direction direction, LabelT edge_label, LabelT vertex_label) { + std::unordered_map src_vid_dist; + std::unordered_map dst_vid_dist; + std::string direction_str = gs::to_string(direction); + int8_t src_dep = 0, dst_dep = 0; + std::queue src_q, dst_q; + std::queue tmp_q; + std::vector meet_vertices; // store the vertices meet. + src_vid_dist[src_vid] = 0; + dst_vid_dist[dst_vid] = 0; + src_q.push(src_vid); + dst_q.push(dst_vid); + while (true) { + if (!src_q.empty() && (src_q.size() <= dst_q.size())) { + // expand from src. + ++src_dep; + VLOG(10) << "Expand From src, current depth: " << src_dep + << " queue size: " << src_q.size(); + + expand_from_queue(graph, vertex_label, edge_label, direction_str, + src_dep, src_q, tmp_q, src_vid_dist, dst_vid_dist, + meet_vertices); + if (!meet_vertices.empty()) { + break; + } + std::swap(src_q, tmp_q); + } else { + // expand from dst. + ++dst_dep; + expand_from_queue(graph, vertex_label, edge_label, direction_str, + dst_dep, dst_q, tmp_q, dst_vid_dist, src_vid_dist, + meet_vertices); + if (!meet_vertices.empty()) { + break; + } + std::swap(dst_q, tmp_q); + } + if (src_q.empty() || dst_q.empty()) { + break; + } + } + + if (meet_vertices.empty()) { + VLOG(10) << "no meet vertices found"; + return make_empty_path_set({vertex_label}); + } + + // to find the path. + return find_paths(graph, vertex_label, edge_label, direction_str, + meet_vertices, src_vid, dst_vid, src_vid_dist, + dst_vid_dist); + } + + template + static void expand_from_queue( + const GRAPH_INTERFACE& graph, LabelT v_label, LabelT edge_label, + const std::string& direction, int8_t depth, + std::queue& src_q, std::queue& tmp_q, + std::unordered_map& cur_vid_dist, + std::unordered_map& other_vid_dist, + std::vector& meeted_vertices) { + std::vector ids_to_query; + ids_to_query.reserve(src_q.size()); + while (!src_q.empty()) { + auto src_v = src_q.front(); + src_q.pop(); + ids_to_query.emplace_back(src_v); + } + auto nbr_list_array = graph.GetOtherVertices( + v_label, v_label, edge_label, ids_to_query, direction, INT_MAX); + for (auto i = 0; i < nbr_list_array.size(); ++i) { + for (auto nbr : nbr_list_array.get(i)) { + auto v = nbr.neighbor(); + if (cur_vid_dist.find(v) == cur_vid_dist.end()) { + cur_vid_dist[v] = depth; + tmp_q.push(v); + if (other_vid_dist.find(v) != other_vid_dist.end()) { + meeted_vertices.push_back(v); + } + } + } + } + VLOG(10) << "push " << tmp_q.size() << " ele to new queue" + << ", meeted vertices: " << meeted_vertices.size(); + } + + static void dfs( + vertex_id_t src_vid, vertex_id_t dst_vid, + std::vector& cur_path, + std::unordered_map dist_from_src, + std::unordered_set valid_vertex_set, + std::vector>& paths, + std::unordered_map>& + vid_to_nbr_list) { + VLOG(10) << "cur: " << src_vid << ", cur_path: " << gs::to_string(cur_path); + cur_path.push_back(src_vid); + + if (src_vid == dst_vid) { + VLOG(10) << "Reach dst : " << gs::to_string(cur_path); + paths.push_back(cur_path); + cur_path.pop_back(); + return; + } + CHECK(vid_to_nbr_list.find(src_vid) != vid_to_nbr_list.end()); + for (auto nbr : vid_to_nbr_list[src_vid]) { + CHECK(dist_from_src.count(src_vid) > 0); + auto v = nbr.neighbor(); + if (valid_vertex_set.find(v) != valid_vertex_set.end()) { + CHECK(dist_from_src.count(v) > 0) << "check failed for : " << v; + if (dist_from_src[src_vid] + 1 == dist_from_src[v]) { + dfs(v, dst_vid, cur_path, dist_from_src, valid_vertex_set, paths, + vid_to_nbr_list); + } + } + } + cur_path.pop_back(); + } + + template + static PathSet find_paths( + const GRAPH_INTERFACE& graph, LabelT v_label, LabelT edge_label, + const std::string& direction, std::vector& meet_vertices, + vertex_id_t src_vid, vertex_id_t dst_vid, + std::unordered_map& src_vid_dist, + std::unordered_map& dst_vid_dist) { + std::unordered_set vertex_set; + std::unordered_map> + vid_to_nbr_list; + std::queue q; + for (auto v : meet_vertices) { + vertex_set.insert(v); + q.push(v); + } + + std::vector tmp_vec; + while (!q.empty()) { + tmp_vec.clear(); + for (auto i = 0; i < q.size(); ++i) { + auto v = q.front(); + q.pop(); + tmp_vec.emplace_back(v); + } + + auto nbr_list_array = graph.GetOtherVertices(v_label, v_label, edge_label, + tmp_vec, direction, INT_MAX); + for (auto i = 0; i < nbr_list_array.size(); ++i) { + auto cur_v = tmp_vec[i]; + for (auto nbr : nbr_list_array.get(i)) { + auto v = nbr.neighbor(); + if (vertex_set.find(v) != vertex_set.end()) { + continue; + } + if (src_vid_dist.find(v) != src_vid_dist.end() && + src_vid_dist[v] + 1 == src_vid_dist[cur_v]) { + q.push(v); + vertex_set.insert(v); + } + if (dst_vid_dist.find(v) != dst_vid_dist.end() && + dst_vid_dist[v] + 1 == dst_vid_dist[cur_v]) { + q.push(v); + vertex_set.insert(v); + src_vid_dist[v] = src_vid_dist[cur_v] + 1; + } + } + if (vid_to_nbr_list.find(cur_v) == vid_to_nbr_list.end()) { + vid_to_nbr_list.insert({cur_v, nbr_list_array.get_vector(i)}); + VLOG(10) << "cache nbr list for v: " << cur_v; + } + } + } + + // dfs to find path. + std::vector> paths; + std::vector cur_path; + dfs(src_vid, dst_vid, cur_path, src_vid_dist, vertex_set, paths, + vid_to_nbr_list); + VLOG(10) << "Got path size: " << paths.size(); + for (auto path : paths) { + VLOG(10) << "path: " << gs::to_string(path); + } + auto path_set = PathSet({v_label}); + for (auto path : paths) { + size_t s = path.size(); + std::vector offset(s, 0); + Path new_path(std::move(path), std::move(offset)); + path_set.EmplacePath(std::move(new_path)); + } + return path_set; + } + + template + static std::vector find_vertices_satisfy_condition( + const GRAPH_INTERFACE& graph, UNTIL_CONDITION& condition, LabelT v_label, + const std::tuple>& selectors) { + std::vector gids; + auto filter = [&](vertex_id_t v, const std::tuple& props) { + if (condition(std::get<0>(props))) { + gids.push_back(v); + } + }; + // TODO: make label param? + // auto names = std::array{std::get<0>(selectors).prop_name_}; + graph.template ScanVertices(v_label, selectors, filter); + return gids; + } +}; + +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_OPERATOR_SHORTEST_PATH_H_ diff --git a/flex/engines/hqps_db/core/operator/sink.h b/flex/engines/hqps_db/core/operator/sink.h new file mode 100644 index 000000000000..8a0cb66aca48 --- /dev/null +++ b/flex/engines/hqps_db/core/operator/sink.h @@ -0,0 +1,463 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_ENGINE_OPERATOR_SINK_H_ +#define ENGINES_HQPS_ENGINE_OPERATOR_SINK_H_ + +#include +#include +#include + +#include "flex/engines/hqps_db/core/context.h" +#include "flex/engines/hqps_db/core/params.h" +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" +#include "flex/engines/hqps_db/core/utils/props.h" +#include "flex/engines/hqps_db/structures/multi_edge_set/flat_edge_set.h" +#include "flex/engines/hqps_db/structures/multi_edge_set/general_edge_set.h" + +#include "proto_generated_gie/results.pb.h" + +namespace gs { + +template +size_t SizeOf(const T& t) { + return sizeof(T); +} + +template +size_t SizeOf(const std::string& t) { + return t.size(); +} + +template +size_t SizeOf(const std::string_view& t) { + return t.size(); +} + +template +size_t SizeOf(const std::vector& t) { + return t.size() * sizeof(T); +} + +template +size_t SizeOfImpl(const std::tuple& t) { + if constexpr (Id < sizeof...(T)) { + return SizeOf(std::get(t)) + SizeOfImpl(t); + } else { + return 0; + } +} + +template +size_t SizeOf(const std::tuple& t) { + return SizeOfImpl<0>(t); +} + +template +void SumEleSize(std::tuple& t, size_t& size) { + if constexpr (Is < sizeof...(T)) { + size += SizeOf(std::get(t)); + if constexpr (Is + 1 < sizeof...(T)) { + SumEleSize(t, size); + } + } +} + +template )>::type* = nullptr> +void template_set_value(common::Value* value, T v) { + value->set_i32(v); +} +template )>::type* = nullptr> +void template_set_value(common::Value* value, T v) { + value->set_i32(v); +} + +template )>::type* = nullptr> +void template_set_value(common::Value* value, T v) { + value->set_boolean(v); +} + +template )>::type* = nullptr> +void template_set_value(common::Value* value, T v) { + if constexpr (sizeof(T) == 8) { + value->set_i64(v); + } else { + value->set_i32(v); + } +} + +template ) &&( + !std::is_same_v)>::type* = nullptr> +void template_set_value(common::Value* value, T v) { + value->set_i64(v); +} + +template )>::type* = nullptr> +void template_set_value(common::Value* value, T v) { + value->set_i64(v); +} + +template )>::type* = nullptr> +void template_set_value(common::Value* value, T v) { + value->set_str(v.data(), v.size()); +} + +template )>::type* = nullptr> +void template_set_value(common::Value* value, T v) { + value->set_f64(v); +} + +template +void template_set_tuple_value_impl(results::Collection* collection, + const std::tuple& t) { + if constexpr (Is < sizeof...(T)) { + auto cur_ele = collection->add_collection()->mutable_object(); + template_set_value(cur_ele, std::get(t)); + if constexpr (Is + 1 < sizeof...(T)) { + template_set_tuple_value_impl(collection, t); + } + } +} + +template +void template_set_tuple_value(results::Collection* collection, + const std::tuple& t) { + template_set_tuple_value_impl(collection, t); +} + +template +void template_set_tuple_value(results::Collection* collection, + const std::vector& t) { + for (auto i = 0; i < t.size(); ++i) { + auto cur_ele = collection->add_collection()->mutable_object(); + template_set_value(cur_ele, t[i]); + } +} + +class SinkOp { + public: + // sink current context to results_pb defined in results.proto + // return results::CollectiveResults + template + static results::CollectiveResults Sink( + Context& ctx, + std::array::col_num> + tag_ids) { + using CTX_T = Context; + + // prepare enough record rows. + auto size = ctx.GetHead().Size(); + // std::vector results_vec(size); + results::CollectiveResults results_vec; + for (auto i = 0; i < size; ++i) { + results_vec.add_results(); + } + LOG(INFO) << "reserve " << size << " records"; + sink_column<0>(results_vec, ctx, tag_ids); + sink_head(results_vec, ctx, tag_ids[tag_ids.size() - 1]); + return results_vec; + } + + template < + size_t I, typename CTX_T, + typename std::enable_if<(I >= CTX_T::prev_alias_num)>::type* = nullptr> + static void sink_column(results::CollectiveResults& record, CTX_T& ctx, + const std::array& tag_ids) { + LOG(INFO) << "no prev columns to sink"; + } + + template < + size_t I, typename CTX_T, + typename std::enable_if<(I < CTX_T::prev_alias_num)>::type* = nullptr> + static void sink_column(results::CollectiveResults& record, CTX_T& ctx, + const std::array& tag_ids) { + if constexpr (I < CTX_T::prev_alias_num) { + LOG(INFO) << "Projecting col: " << I; + static constexpr size_t act_tag_id = CTX_T::base_tag_id + I; + auto offset_array = ctx.ObtainOffsetFromTag(act_tag_id); + auto repeat_array = offset_array_to_repeat_array(std::move(offset_array)); + sink_col_impl(record, ctx.template GetNode(), + repeat_array, tag_ids[I]); + } + if constexpr (I + 1 < CTX_T::prev_alias_num) { + sink_column(record, ctx, tag_ids); + } + } + + template + static void sink_head(results::CollectiveResults& record, CTX_T& ctx, + int32_t tag_id) { + auto& head = ctx.GetHead(); + sink_col_impl(record, head, {}, + tag_id); + } + + template + static void sink_col_impl( + results::CollectiveResults& results_vec, + const RowVertexSetImpl& vertex_set, + const std::vector& repeat_offsets, int32_t tag_id) { + auto label = vertex_set.GetLabel(); + auto& vids = vertex_set.GetVertices(); + return sink_col_impl_for_vertex_set( + label, vids, results_vec, repeat_offsets, tag_id); + } + + template + static void sink_col_impl( + results::CollectiveResults& results_vec, + const KeyedRowVertexSetImpl& + vertex_set, + const std::vector& repeat_offsets, int32_t tag_id) { + auto label = vertex_set.GetLabel(); + auto& vids = vertex_set.GetVertices(); + return sink_col_impl_for_vertex_set( + label, vids, results_vec, repeat_offsets, tag_id); + } + // sink row vertex set, if offsets is empty, we sink all vertices + // if offsets is set, we use offset to repeat + template + static void sink_col_impl_for_vertex_set( + LabelT label, const std::vector& vids, + results::CollectiveResults& results_vec, + const std::vector& repeat_offsets, int32_t tag_id) { + if (repeat_offsets.empty()) { + for (auto i = 0; i < vids.size(); ++i) { + auto row = results_vec.mutable_results(i); + CHECK(row->record().columns_size() == Ind); + auto record = row->mutable_record(); + auto new_col = record->add_columns(); + new_col->mutable_name_or_id()->set_id(tag_id); + new_col->mutable_entry()->mutable_element()->mutable_vertex()->set_id( + vids[i]); + new_col->mutable_entry() + ->mutable_element() + ->mutable_vertex() + ->mutable_label() + ->set_id(label); + } + } else { + CHECK(repeat_offsets.size() == vids.size()); + { + int32_t num_rows = 0; + for (auto i : repeat_offsets) { + num_rows += i; + } + CHECK(num_rows == results_vec.results_size()); + } + size_t cur_ind = 0; + for (auto i = 0; i < vids.size(); ++i) { + for (auto j = 0; j < repeat_offsets[i]; ++j) { + auto row = results_vec.mutable_results(cur_ind++); + auto record = row->mutable_record(); + auto new_col = record->add_columns(); + new_col->mutable_name_or_id()->set_id(tag_id); + new_col->mutable_entry()->mutable_element()->mutable_vertex()->set_id( + vids[i]); + new_col->mutable_entry() + ->mutable_element() + ->mutable_vertex() + ->mutable_label() + ->set_id(label); + } + } + } + } + + // sink collection of pod + template ::value) && + (!gs::is_tuple::value)>::type* = nullptr> + static void sink_col_impl(results::CollectiveResults& results_vec, + const Collection& collection, + const std::vector& repeat_offsets, + int32_t tag_id) { + if (repeat_offsets.empty()) { + CHECK(collection.Size() == results_vec.results_size()) + << "size neq " << collection.Size() << " " + << results_vec.results_size(); + for (auto i = 0; i < collection.Size(); ++i) { + auto row = results_vec.mutable_results(i); + CHECK(row->record().columns_size() == Ind); + auto record = row->mutable_record(); + auto new_col = record->add_columns(); + new_col->mutable_name_or_id()->set_id(tag_id); + auto common_value_ptr = + new_col->mutable_entry()->mutable_element()->mutable_object(); + template_set_value(common_value_ptr, collection.Get(i)); + } + } else { + CHECK(repeat_offsets.size() == collection.Size()); + size_t cur_ind = 0; + for (auto i = 0; i < collection.Size(); ++i) { + for (auto j = 0; j < repeat_offsets[i]; ++j) { + auto row = results_vec.mutable_results(cur_ind++); + auto record = row->mutable_record(); + auto new_col = record->add_columns(); + new_col->mutable_name_or_id()->set_id(tag_id); + auto common_value_ptr = + new_col->mutable_entry()->mutable_element()->mutable_object(); + template_set_value(common_value_ptr, collection.Get(i)); + } + } + } + } + + // sinke for tuple with one element + template ::value) && (gs::is_tuple::value) && + (gs::tuple_size::value == 1)>::type* = nullptr> + static void sink_col_impl(results::CollectiveResults& results_vec, + const Collection& collection, + const std::vector& repeat_offsets, + int32_t tag_id) { + if (repeat_offsets.empty()) { + CHECK(collection.Size() == results_vec.results_size()) + << "size neq " << collection.Size() << " " + << results_vec.results_size(); + for (auto i = 0; i < collection.Size(); ++i) { + auto row = results_vec.mutable_results(i); + CHECK(row->record().columns_size() == Ind) + << "record column size: " << row->record().columns_size() + << ", ind: " << Ind; + auto record = row->mutable_record(); + auto new_col = record->add_columns(); + new_col->mutable_name_or_id()->set_id(tag_id); + auto mutable_ele = + new_col->mutable_entry()->mutable_element()->mutable_object(); + template_set_value>( + mutable_ele, std::get<0>(collection.Get(i))); + } + } else { + CHECK(repeat_offsets.size() == collection.Size()); + size_t cur_ind = 0; + for (auto i = 0; i < collection.Size(); ++i) { + for (auto j = 0; j < repeat_offsets[i]; ++j) { + // auto& row = results_vec[cur_ind++]; + auto row = results_vec.mutable_results(i); + auto record = row->mutable_record(); + auto new_col = record->add_columns(); + new_col->mutable_name_or_id()->set_id(tag_id); + auto mutable_ele = + new_col->mutable_entry()->mutable_element()->mutable_object(); + template_set_value>( + mutable_ele, std::get<0>(collection.Get(i))); + } + } + } + } + + // sink for tuple, with more than one element + template ::value) && (gs::is_tuple::value) && + ((gs::tuple_size::value) > 1)>::type* = nullptr> + static void sink_col_impl(results::CollectiveResults& results_vec, + const Collection& collection, + const std::vector& repeat_offsets, + int32_t tag_id) { + if (repeat_offsets.empty()) { + CHECK(collection.Size() == results_vec.results_size()) + << "size neq " << collection.Size() << " " + << results_vec.results_size(); + for (auto i = 0; i < collection.Size(); ++i) { + auto row = results_vec.mutable_results(i); + CHECK(row->record().columns_size() == Ind) + << "record column size: " << row->record().columns_size() + << ", ind: " << Ind; + auto record = row->mutable_record(); + auto new_col = record->add_columns(); + new_col->mutable_name_or_id()->set_id(tag_id); + auto mutable_collection = + new_col->mutable_entry()->mutable_collection(); + template_set_tuple_value(mutable_collection, collection.Get(i)); + } + } else { + CHECK(repeat_offsets.size() == collection.Size()); + size_t cur_ind = 0; + for (auto i = 0; i < collection.Size(); ++i) { + for (auto j = 0; j < repeat_offsets[i]; ++j) { + // auto& row = results_vec[cur_ind++]; + auto row = results_vec.mutable_results(i); + auto record = row->mutable_record(); + auto new_col = record->add_columns(); + new_col->mutable_name_or_id()->set_id(tag_id); + auto mutable_collection = + new_col->mutable_entry()->mutable_collection(); + template_set_tuple_value(mutable_collection, collection.Get(i)); + } + } + } + } + + // sink for collection of vector. + template ::value)>::type* = nullptr> + static void sink_col_impl(results::CollectiveResults& results_vec, + const Collection& collection, + const std::vector& repeat_offsets, + int32_t tag_id) { + if (repeat_offsets.empty()) { + CHECK(collection.Size() == results_vec.results_size()) + << "size neq " << collection.Size() << " " + << results_vec.results_size(); + for (auto i = 0; i < collection.Size(); ++i) { + // auto& row = results_vec[i]; + auto row = results_vec.mutable_results(i); + CHECK(row->record().columns_size() == Ind) + << "record column size: " << row->record().columns_size() + << ", ind: " << Ind; + auto record = row->mutable_record(); + auto new_col = record->add_columns(); + new_col->mutable_name_or_id()->set_id(tag_id); + auto mutable_collection = + new_col->mutable_entry()->mutable_collection(); + template_set_tuple_value(mutable_collection, collection.Get(i)); + } + } else { + CHECK(repeat_offsets.size() == collection.Size()); + size_t cur_ind = 0; + for (auto i = 0; i < collection.Size(); ++i) { + for (auto j = 0; j < repeat_offsets[i]; ++j) { + // auto& row = results_vec[cur_ind++]; + auto row = results_vec.mutable_results(i); + auto record = row->mutable_record(); + auto new_col = record->add_columns(); + new_col->mutable_name_or_id()->set_id(tag_id); + auto mutable_collection = + new_col->mutable_entry()->mutable_collection(); + template_set_tuple_value(mutable_collection, collection.Get(i)); + } + } + } + } +}; +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_OPERATOR_SINK_H_ diff --git a/flex/engines/hqps_db/core/operator/sort.h b/flex/engines/hqps_db/core/operator/sort.h new file mode 100644 index 000000000000..7cabdf94c4b9 --- /dev/null +++ b/flex/engines/hqps_db/core/operator/sort.h @@ -0,0 +1,431 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifndef ENGINES_HQPS_ENGINE_OPERATOR_SORT_H_ +#define ENGINES_HQPS_ENGINE_OPERATOR_SORT_H_ + +#include +#include + +#include "flex/engines/hqps_db/core/context.h" + +#include "flex/engines/hqps_db/core/params.h" +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" +#include "flex/engines/hqps_db/structures/multi_edge_set/flat_edge_set.h" +#include "flex/engines/hqps_db/structures/multi_edge_set/general_edge_set.h" + +#include "flex/engines/hqps_db/core/utils/props.h" + +namespace gs { + +template +struct TupleComparator { + static constexpr size_t num_pairs = sizeof...(ORDER_PAIRS); + std::tuple order_pairs_; + + TupleComparator(std::tuple& order_pairs) + : order_pairs_(order_pairs) {} + + template + inline bool operator()(const TUPLE_T& left, const TUPLE_T& right) const { + return compare_impl<0>(left, right); + } + + template < + size_t Is, typename TUPLE_T, + typename std::enable_if< + (std::tuple_element_t>::sort_order == + SortOrder::ASC)>::type* = nullptr> + inline bool compare_impl(const TUPLE_T& left, const TUPLE_T& right) const { + auto lv = std::get(left); + auto rv = std::get(right); + if (lv < rv) { + return true; + } + if (lv > rv) { + return false; + } + if constexpr (Is + 1 < num_pairs) { + return compare_impl(left, right); + } else { + return true; + } + } + + template < + size_t Is, typename TUPLE_T, + typename std::enable_if< + (std::tuple_element_t>::sort_order == + SortOrder::DESC)>::type* = nullptr> + inline bool compare_impl(const TUPLE_T& left, const TUPLE_T& right) const { + auto lv = std::get(left); + auto rv = std::get(right); + if (lv > rv) { + return true; + } + if (lv < rv) { + return false; + } + if constexpr (Is + 1 < num_pairs) { + return compare_impl(left, right); + } else { + return true; + } + } +}; // namespace gs + +template +struct GeneralComparator { + static constexpr size_t num_pairs = sizeof...(ORDER_PAIRS); + static constexpr auto num_pair_ind_seq = + std::make_index_sequence(); + const std::tuple& order_pairs_; + GeneralComparator(const std::tuple& order_pairs) + : order_pairs_(order_pairs) {} + + template + inline bool operator()(const std::tuple& ele_tuple, + const std::tuple& top_tuple, + const std::tuple& getter) const { + return compare_impl<0>(ele_tuple, top_tuple, getter); + } + + template + inline auto get_sort_tuple(const std::tuple& ele_tuple, + const std::tuple& getter, + size_t cnt) const { + return get_sort_tuple(ele_tuple, getter, cnt, num_pair_ind_seq); + } + + template + inline auto get_sort_tuple(const std::tuple& ele_tuple, + const std::tuple& getter, size_t cnt, + std::index_sequence) const { + return std::tuple{get_sort_tuple(ele_tuple, getter)..., cnt}; + } + + template + inline auto get_sort_tuple(const std::tuple& ele_tuple, + const std::tuple& getter) const { + static constexpr int tag_id = + std::tuple_element_t>::tag_id; + if constexpr (tag_id == -1) { + static constexpr int new_tag_id = sizeof...(IND_ELE) - 1; + return std::get(getter).get_view(std::get(ele_tuple)); + } else { + static constexpr int new_tag_id = tag_id - base_tag; + return std::get(getter).get_view(std::get(ele_tuple)); + } + } + + template < + size_t Is, typename... IND_ELE, typename... T, typename... GETTER, + typename std::enable_if< + (std::tuple_element_t>::sort_order == + SortOrder::ASC)>::type* = nullptr> + inline bool compare_impl(const std::tuple& ele_tuple, + const std::tuple& top_tuple, + const std::tuple& getters) const { + auto& getter = std::get(getters); + static constexpr int tag_id = + std::tuple_element_t>::tag_id; + if constexpr (tag_id == -1) { + static constexpr int new_tag_id = sizeof...(IND_ELE) - 1; + auto data = getter.get_view(gs::get_from_tuple(ele_tuple)); + if (data < std::get(top_tuple)) { + return true; + } + if (data > std::get(top_tuple)) { + return false; + } + if constexpr (Is + 1 < num_pairs) { + return compare_impl(ele_tuple, top_tuple, getters); + } else { + return true; + } + } else { + static constexpr int new_tag_id = tag_id - base_tag; + auto data = getter.get_view(gs::get_from_tuple(ele_tuple)); + if (data < std::get(top_tuple)) { + return true; + } + if (data > std::get(top_tuple)) { + return false; + } + if constexpr (Is + 1 < num_pairs) { + return compare_impl(ele_tuple, top_tuple, getters); + } else { + return true; + } + } + } + + template < + size_t Is, typename... IND_ELE, typename... T, typename... GETTER, + typename std::enable_if< + (std::tuple_element_t>::sort_order == + SortOrder::DESC)>::type* = nullptr> + inline bool compare_impl(const std::tuple& ele_tuple, + const std::tuple& top_tuple, + const std::tuple& getters) const { + auto& getter = std::get(getters); + static constexpr int tag_id = + std::tuple_element_t>::tag_id; + if constexpr (tag_id == -1) { + static constexpr int new_tag_id = sizeof...(IND_ELE) - 1; + auto data = getter.get_view(gs::get_from_tuple(ele_tuple)); + if (data > std::get(top_tuple)) { + return true; + } + if (data < std::get(top_tuple)) { + return false; + } + if constexpr (Is + 1 < num_pairs) { + return compare_impl(ele_tuple, top_tuple, getters); + } else { + return true; + } + } else { + static constexpr int new_tag_id = tag_id - base_tag; + auto data = getter.get_view(gs::get_from_tuple(ele_tuple)); + if (data > std::get(top_tuple)) { + return true; + } + if (data < std::get(top_tuple)) { + return false; + } + if constexpr (Is + 1 < num_pairs) { + return compare_impl(ele_tuple, top_tuple, getters); + } else { + return true; + } + } + } +}; + +template +struct ResultTOfContextOrderPair; + +// Result of the data type after apply order pair +template +struct ResultTOfContextOrderPair< + Context, ORDER_PAIR> { + static constexpr int tag_id = ORDER_PAIR::tag_id; + static constexpr size_t col_id = ORDER_PAIR::col_id; + using context_t = Context; + using context_iter_t = typename context_t::iterator; + // using ctx_node_t = + // std::remove_reference_t().template + // GetNode())>; + using data_tuple_t = decltype(std::declval().GetAllData()); + + using tag_data_tuple_t = + typename gs::tuple_element::type; + using result_t = typename gs::tuple_element::type; +}; + +template +class SortOp { + public: + template ::index_ele_tuples_t> + static auto SortTopK( + const GRAPH_INTERFACE& graph, + Context&& ctx, + std::tuple&& tuples, size_t limit) { + VLOG(10) << "[SortTopK]: limit: " << limit + << ", input size: " << ctx.GetHead().Size(); + std::apply( + [](auto&... args) { + ((LOG(INFO) << "SortTopK: " << args.name << " "), ...); + }, + tuples); + + // Generate tuples from ctx, with required key. + + using ctx_t = Context; + using sort_tuple_t = std::tuple; + + TupleComparator tuple_sorter(tuples); + std::priority_queue, + TupleComparator> + pq(tuple_sorter); + sort_tuple_t empty_tuple; + sort_tuple_t& top_tuple = empty_tuple; + + size_t cnt = 0; + auto sort_prop_getter_tuple = create_prop_getter_tuple( + tuples, ctx, graph, std::make_index_sequence()); + LOG(INFO) << "Finish create prop getter tuple."; + GeneralComparator comparator(tuples); + + double t0 = -grape::GetCurrentTime(); + for (auto iter : ctx) { + auto cur_tuple = iter.GetAllIndexElement(); + if (pq.size() < limit) { + pq.emplace( + comparator.get_sort_tuple(cur_tuple, sort_prop_getter_tuple, cnt)); + top_tuple = pq.top(); + } else if (pq.size() == limit) { + // update prop getter with index_ele. + if (comparator(cur_tuple, top_tuple, sort_prop_getter_tuple)) { + pq.pop(); + pq.emplace(comparator.get_sort_tuple(cur_tuple, + sort_prop_getter_tuple, cnt)); + top_tuple = pq.top(); + } + } + cnt += 1; + } + + t0 += grape::GetCurrentTime(); + LOG(INFO) << " sort tuple cost: " << t0; + // pop out all ele in priority_queue + double t1 = -grape::GetCurrentTime(); + std::vector> inds; + inds.reserve(pq.size()); + cnt = 0; + while (!pq.empty()) { + inds.emplace_back(std::make_pair(cnt, gs::get_from_tuple<-1>(pq.top()))); + pq.pop(); + cnt += 1; + } + sort(inds.begin(), inds.end(), + [](const auto& a, const auto& b) { return a.second < b.second; }); + std::vector index_eles; + index_eles.resize(inds.size()); + auto iter2 = ctx.begin(); + cnt = 0; + size_t inds_ind = 0; + while (inds_ind < inds.size()) { + auto pair = inds[inds_ind]; + while (cnt < pair.second) { + ++iter2; + ++cnt; + } + index_eles[pair.first] = (iter2.GetAllIndexElement()); + inds_ind += 1; + } + std::reverse(index_eles.begin(), index_eles.end()); + t1 += grape::GetCurrentTime(); + VLOG(10) << "Finish extract top k result, sort tuple time: " << t0 + << ", prepare index ele: " << t1 + << ", result num: " << index_eles.size(); + + return ctx.Flat(std::move(index_eles)); + } + + template + static inline void update_prop_getter(std::tuple& getters, + std::tuple& ind_eles) { + std::get(getters).set_ind_ele(ind_eles); + if constexpr (Is + 1 < sizeof...(GETTER)) { + update_prop_getter(getters, ind_eles); + } + } + + template + static auto create_prop_getter_tuple( + const std::tuple& pairs, + Context& ctx, + const GRAPH_INTERFACE& graph, std::index_sequence) { + return std::make_tuple(create_prop_getter_impl_for_order_pair( + std::get(pairs), ctx, graph)...); + } + + template + static auto create_prop_getter_impl_for_order_pair( + const ORDER_PAIR& ordering_pair, + Context& ctx, + const GRAPH_INTERFACE& graph) { + static constexpr int tag_id = ORDER_PAIR::tag_id; + auto& set = ctx.template GetNode(); + return create_prop_getter_impl( + set, graph, ordering_pair.name); + } + + // Get property getter for row vertex set, with ordinary properties. + template + static auto create_prop_getter_impl_for_order_pair( + const ORDER_PAIR& ordering_pair, + const RowVertexSet& set, + const GRAPH_INTERFACE& graph) { + return create_prop_getter_impl( + set, graph, ordering_pair.name); + } + + // return a pair of prop_getter, each for one label. + template + static auto create_prop_getter_impl_for_order_pair( + const ORDER_PAIR& ordering_pair, + const TwoLabelVertexSet& set, + const GRAPH_INTERFACE& graph) { + return create_prop_getter_impl( + set, graph, ordering_pair.name); + } + + template + static auto create_prop_getter_impl_for_order_pair( + const ORDER_PAIR& ordering_pair, + const KeyedRowVertexSetImpl& set, + const GRAPH_INTERFACE& graph) { + return create_prop_getter_impl( + set, graph, ordering_pair.name); + } + + template + static auto create_prop_getter_impl_for_order_pair( + const ORDER_PAIR& ordering_pair, + const FlatEdgeSet& set, + const GRAPH_INTERFACE& graph) { + return FlatEdgeSetPropGetter< + ORDER_PAIR::tag_id, typename FlatEdgeSet:: + index_ele_tuple_t>(); + } + + template + static auto create_prop_getter_impl_for_order_pair( + const ORDER_PAIR& ordering_pair, + const GeneralEdgeSet& set, + const GRAPH_INTERFACE& graph) { + return GeneralEdgeSetPropGetter< + ORDER_PAIR::tag_id, + typename GeneralEdgeSet::index_ele_tuple_t>(); + } + + template + static auto create_prop_getter_impl_for_order_pair( + const ORDER_PAIR& ordering_pair, const Collection& set, + const GRAPH_INTERFACE& graph) { + CHECK(ordering_pair.name == "None" || ordering_pair.name == "none"); + return CollectionPropGetter(); + } +}; +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_OPERATOR_SORT_H_ diff --git a/flex/engines/hqps_db/core/params.h b/flex/engines/hqps_db/core/params.h new file mode 100644 index 000000000000..0aac0e4580ea --- /dev/null +++ b/flex/engines/hqps_db/core/params.h @@ -0,0 +1,767 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_ENGINE_PARAMS_H_ +#define ENGINES_HQPS_ENGINE_PARAMS_H_ + +#include +#include +#include +// #include "grape/grape.h" +#include "flex/engines/hqps_db/core/utils/hqps_type.h" +#include "flex/storages/rt_mutable_graph/types.h" + +#include "grape/types.h" +namespace gs { + +// define a header to make input_col_id easy to understand +#define INPUT_COL_ID(x) (x) +#define LAST_COL -1 + +#define PROJ_TO_NEW false +#define PROJ_TO_APPEND true + +// Indicator flag for appending the result column to Context. +enum class AppendOpt { + Persist = 0, // persistently store the result column in Context. + Temp = 1, // temporally store the result column, will be replaced by the next + // column. + Replace = 2, // replace the last column in Context. +}; + +template +struct ResultColId { + static constexpr int32_t res_alias = + opt == AppendOpt::Temp + ? -1 + : (opt == AppendOpt::Replace + ? old_alias + : (old_alias == -1 ? (int32_t) sizeof...(PREV_COL) + : old_alias + 1)); +}; + +template +struct ResultColId { + static constexpr int32_t res_alias = + opt == AppendOpt::Temp + ? -1 + : (opt == AppendOpt::Replace ? old_alias + : (old_alias == -1 ? 0 : old_alias + 1)); +}; + +template +struct PropertySelector { + using prop_t = T; + std::string prop_name_; + PropertySelector(std::string prop_name) : prop_name_(std::move(prop_name)) {} + PropertySelector() = default; +}; + +using InternalIdSelector = PropertySelector; + +// @brief Mapping a vertex/edge to new data with expr& selector. +// @tparam EXPR +// @tparam ...SELECTOR +template +struct MultiMapper { + EXPR expr_; + SELECTOR_TUPLE selectors_; + MultiMapper(EXPR&& expr, SELECTOR_TUPLE&& selectors) + : expr_(std::move(expr)), selectors_(std::move(selectors)) {} +}; + +// Mapping the data selected by selector identically. +template +struct IdentityMapper { + SELECTOR selector_; + IdentityMapper(SELECTOR&& selector) : selector_(std::move(selector)) {} + IdentityMapper() = default; +}; + +template +struct Filter { + using expr_t = EXPR; + + EXPR expr_; + std::tuple selectors_; + Filter() = default; + Filter(EXPR&& expr, std::tuple&& selectors) + : expr_(std::move(expr)), selectors_(std::move(selectors)) {} +}; + +template +auto make_mapper_with_expr(EXPR&& expr, SELECTOR&&... selector) { + return MultiMapper, in_col_id...>( + std::move(expr), std::make_tuple(selector...)); +} + +template +auto make_mapper_with_variable(SELECTOR&& selector) { + return IdentityMapper(std::move(selector)); +} + +// makeFilter +template +auto make_filter(EXPR&& expr, SELECTOR&&... selectors) { + return Filter(std::move(expr), + std::make_tuple(std::move(selectors)...)); +} + +///////////////////////////////for group by//////////////////////////// +template +struct GroupKey { + using selector_t = PropertySelector; + static constexpr int col_id = _col_id; + PropertySelector selector_; + GroupKey(PropertySelector&& selector) : selector_(std::move(selector)) {} + GroupKey() = default; +}; + +enum class AggFunc { + SUM = 0, + MIN = 1, + MAX = 2, + COUNT = 3, + COUNT_DISTINCT = 4, + TO_LIST = 5, + TO_SET = 6, + AVG = 7, + FIRST = 8, +}; + +// Get the return type of this aggregation. +template +struct AggFuncReturnValue { + // default return t; + using return_t = T; +}; + +template +struct AggFuncReturnValue { + using return_t = size_t; +}; + +template +struct AggFuncReturnValue { + using return_t = size_t; +}; + +// for grouping values, for which key, and to which alias, applying which +// agg_func. +// col_ind: the index of property which we will use. + +template +struct AggregateProp; + +template +struct AggregateProp<_agg_func, std::tuple...>, + std::integer_sequence> { + static_assert(sizeof...(Is) == sizeof...(T)); + static constexpr AggFunc agg_func = _agg_func; + static constexpr size_t num_vars = sizeof...(T); + std::tuple...> selectors_; + + AggregateProp(std::tuple...>&& selectors) + : selectors_(std::move(selectors)) {} +}; + +template +auto make_aggregate_prop(std::tuple...>&& selectors, + std::integer_sequence) { + return AggregateProp<_agg_func, std::tuple...>, + std::integer_sequence>( + std::move(selectors)); +} + +template +using PropNameArray = + std::array>>; +template +struct NamedProperty { + using prop_t = T; + static constexpr int tag_id = _tag_id; + // using data_tuple_t = std::tuple; + + // PropNameArray names; + std::string name; + NamedProperty() = default; + NamedProperty(std::string&& n) : name(std::move(n)){}; + NamedProperty(const std::string& n) : name(n){}; +}; + +template +struct InnerIdProperty { + static constexpr int tag_id = _tag_id; + InnerIdProperty() = default; +}; + +template +struct LabelKeyProperty { + using prop_t = label_id_t; + static constexpr int tag_id = _tag_id; + std::string name; + LabelKeyProperty(std::string&& n) : name(std::move(n)){}; +}; + +template +struct is_label_key_prop : std::false_type {}; + +template +struct is_label_key_prop> : std::true_type {}; + +// static constexpr size_t dist_col = 0; + +using groot_prop_label_t = std::string; + +struct Range { + Range() : start_(0), limit_(INT_MAX) {} + Range(size_t s, size_t l) : start_(s), limit_(l) {} + size_t start_; + size_t limit_; +}; +// Sort +enum SortOrder { + Shuffle = 0, // random order + ASC = 1, // increasing order. + DESC = 2, // descending order +}; + +//@.name +//@a.name +template +struct OrderingPropPair { + using prop_t = T; + static constexpr int tag_id = tag; + // static constexpr size_t col_id = col; + static constexpr SortOrder sort_order = sort_order_; + std::string name; + OrderingPropPair(std::string n) : name(n) {} +}; + +// The query pay load for ordering. +template +struct SortOrderOpt { + std::tuple ordering_pairs_; + Range range_; + // SORT_FUNC sort_func_; + // sort_func_(std::move(sort_func)), + + SortOrderOpt(Range&& range, ORDER_PAIR&&... tuple) + : range_(std::move(range)), ordering_pairs_{tuple...} {} +}; + +template +auto make_sort_opt(Range&& range, ORDER_PAIR&&... pairs) { + return SortOrderOpt(std::move(range), + std::forward(pairs)...); +} + +enum JoinKind { + Semi = 0, + InnerJoin = 1, + AntiJoin = 2, + LeftOuterJoin = 3, +}; + +enum Direction { Out = 0, In = 1, Both = 2 }; +enum VOpt { + Start = 0, // The start vertex of current expanded edge. + End = 1, // the ending vertex of this expanding. + Other = 2, // the other vertices. + Both_V = 3, // both side + Itself = 4, // Get vertex from vertex set +}; + +enum PathOpt { + Arbitrary = 0, // can be duplicated path + Simple = 1, // a single path which contains no duplicated value. +}; + +enum ResultOpt { + EndV = 0, // Get the end vertex of path. i.e. [3],[4] + AllV = 1, // Get all the vertex on path. i.e. [1,2,3],[1,2,4] +}; + +struct TruePredicate { + template + bool operator()(T& t) const { + return true; + } +}; + +struct TrueFilter { + TruePredicate expr_; +}; + +template +struct IsTruePredicate : std::false_type {}; + +template <> +struct IsTruePredicate : std::true_type {}; + +struct FalsePredicate { + template + bool operator()(T& t) { + return false; + } +}; + +////////////////////////EdgeExpand Params////////////////////// +// EdgeExpandMsg +// can use for both edgeExpandE and edgeExpandV +template +struct EdgeExpandOpt { + EdgeExpandOpt(Direction dir, LabelT edge_label, LabelT other_label, + Filter&& edge_filter) + : dir_(dir), + edge_label_(edge_label), + other_label_(std::move(other_label)), + edge_filter_(std::move(edge_filter)) {} + + Direction dir_; + LabelT edge_label_; + LabelT other_label_; // There might be multiple dst labels. + Filter edge_filter_; +}; + +template +struct EdgeExpandEOpt; + +template +struct EdgeExpandEMultiLabelOpt; + +template +struct EdgeExpandEOpt, T...> { + EdgeExpandEOpt(PropNameArray&& prop_names, Direction dir, + LabelT edge_label, LabelT other_label, + Filter&& edge_filter) + : prop_names_(std::move(prop_names)), + dir_(dir), + edge_label_(edge_label), + other_label_(std::move(other_label)), + edge_filter_(std::move(edge_filter)) {} + + EdgeExpandEOpt(Direction dir, LabelT edge_label, LabelT other_label, + EDGE_FILTER_FUNC&& edge_filter) + : dir_(dir), + edge_label_(edge_label), + other_label_(std::move(other_label)), + edge_filter_(std::move(edge_filter)) {} + + PropNameArray prop_names_; + Direction dir_; + LabelT edge_label_; + LabelT other_label_; // There might be multiple dst labels. + Filter edge_filter_; +}; + +template +struct EdgeExpandEMultiLabelOpt, T...> { + EdgeExpandEMultiLabelOpt(PropNameArray&& prop_names, Direction dir, + LabelT edge_label, + std::array other_label, + Filter&& edge_filter) + : prop_names_(std::move(prop_names)), + dir_(dir), + edge_label_(edge_label), + other_label_(std::move(other_label)), + edge_filter_(std::move(edge_filter)) {} + + EdgeExpandEMultiLabelOpt(Direction dir, LabelT edge_label, + std::array other_label, + Filter&& edge_filter) + : dir_(dir), + edge_label_(edge_label), + other_label_(std::move(other_label)), + edge_filter_(std::move(edge_filter)) {} + + PropNameArray prop_names_; + Direction dir_; + LabelT edge_label_; + std::array other_label_; + EDGE_FILTER_FUNC edge_filter_; +}; + +template +auto make_edge_expande_opt(PropNameArray&& prop_names, Direction dir, + LabelT edge_label, LabelT other_label, + Filter&& func) { + return EdgeExpandEOpt, + T...>(std::move(prop_names), dir, edge_label, + other_label, std::move(func)); +} + +template +auto make_edge_expande_opt(PropNameArray&& prop_names, Direction dir, + LabelT edge_label, LabelT other_label) { + return EdgeExpandEOpt, T...>( + std::move(prop_names), dir, edge_label, other_label, + Filter()); +} + +template +auto make_edge_expande_opt(Direction dir, LabelT edge_label, + LabelT other_label) { + return EdgeExpandEOpt>( + dir, edge_label, other_label, Filter()); +} + +template +auto make_edge_expande_opt(Direction dir, LabelT edge_label, + std::array other_labels) { + return EdgeExpandEMultiLabelOpt>( + dir, edge_label, other_labels, Filter()); +} + +// For edge expand with multiple labels. +template +struct EdgeExpandOptMultiLabel { + EdgeExpandOptMultiLabel( + Direction dir, LabelT edge_label, + std::array&& other_label, + std::array&& edge_filter) + : direction_(dir), + edge_label_(edge_label), + edge_filter_(std::move(edge_filter)), + other_labels_(std::move(other_label)) {} + + Direction direction_; + LabelT edge_label_; + // edge filter func can be apply to every label vertcies + std::array edge_filter_; + std::array + other_labels_; // There might be multiple dst labels. +}; + +template +auto make_edge_expandv_opt(Direction dir, LabelT edge_label, + std::array&& other_labels) { + return EdgeExpandOptMultiLabel( + dir, edge_label, std::move(other_labels), + std::array, num_labels>()); +} + +template +auto make_edge_expandv_opt(Direction dir, LabelT edge_label, + std::array&& other_labels, + std::array&& func) { + return EdgeExpandOptMultiLabel(dir, edge_label, std::move(other_labels), + std::move(func)); +} + +template +inline auto make_edge_expandv_opt(Direction dir, LabelT edge_label, + LabelT other_label) { + return EdgeExpandOpt(dir, edge_label, other_label, Filter()); +} + +template +auto make_edge_expandv_opt(Direction dir, LabelT edge_label, LabelT other_label, + Filter&& func) { + return EdgeExpandOpt(dir, edge_label, other_label, std::move(func)); +} + +template +struct GetVOpt; + +template +struct GetVOpt, T...> { + VOpt v_opt_; + // label of vertices we need. + std::array v_labels_; + // columns of vertices we need to fetch. + Filter filter_; + std::tuple...> props_; + + GetVOpt(VOpt v_opt, std::array&& v_labels, + std::tuple...>&& props, + Filter&& filter) + : v_opt_(v_opt), + v_labels_(std::move(v_labels)), + props_(std::move(props)), + filter_(std::move(filter)) {} + + GetVOpt(VOpt v_opt, std::array&& v_labels, + std::tuple...>&& props) + : v_opt_(v_opt), + v_labels_(std::move(v_labels)), + props_(std::move(props)) {} + + GetVOpt(VOpt v_opt, std::array&& v_labels, + Filter&& filter) + : v_opt_(v_opt), + v_labels_(std::move(v_labels)), + filter_(std::move(filter)) {} + + // Only with v_labels. + GetVOpt(VOpt v_opt, std::array&& v_labels) + : v_opt_(v_opt), v_labels_(std::move(v_labels)) {} + // it is ok that other members will be initiate to default value. +}; + +template +using SimpleGetVOpt = GetVOpt; + +// make get_v opt with labels and props and expr(filters) +template +auto make_getv_opt(VOpt v_opt, std::array&& v_labels, + std::tuple...>&& props, + Filter&& filter) { + return GetVOpt, T...>( + v_opt, std::move(v_labels), std::move(props), std::move(filter)); +} + +template +auto make_getv_opt(VOpt v_opt, std::array&& v_labels, + Filter&& filter) { + return GetVOpt>( + v_opt, std::move(v_labels), std::move(filter)); +} + +// make get_v opt with labels and props. +template +auto make_getv_opt(VOpt v_opt, std::array&& v_labels, + PropNameArray&& props) { + return GetVOpt, T...>( + v_opt, std::move(v_labels), std::move(props)); +} + +// make get_v opt with labels. +// template +// auto make_getv_opt(VOpt v_opt, std::array&& v_labels) { +// return GetVOpt(v_opt, std::move(v_labels)); +// } + +// inline auto make_getv_opt(VOpt v_opt, std::string v_label) { +// return SimpleGetVOpt( +// v_opt, std::array{v_label}); +// } + +// Path expand with only one dst label. +// Path expand with until condition. +template +struct PathExpandOptImpl { + PathExpandOptImpl(EdgeExpandOpt&& edge_expand_opt, + SimpleGetVOpt&& get_v_opt, + Range&& range, UNTIL_CONDITION&& until_condition, + PathOpt path_opt = PathOpt::Arbitrary, + ResultOpt result_opt = ResultOpt::EndV) + : edge_expand_opt_(std::move(edge_expand_opt)), + get_v_opt_(std::move(get_v_opt)), + range_(std::move(range)), + until_condition_(std::move(until_condition)), + path_opt_(path_opt), + result_opt_(result_opt) {} + + EdgeExpandOpt edge_expand_opt_; + SimpleGetVOpt get_v_opt_; + Range range_; // Range for result vertices, default is [0,INT_MAX) + UNTIL_CONDITION until_condition_; + PathOpt path_opt_; // Single path or not. + ResultOpt result_opt_; // Get all vertices on Path or only ending vertices. +}; + +template +using PathExpandOpt = + PathExpandOptImpl, T...>; + +// opt used for simple path opt. +template +using ShortestPathOpt = + PathExpandOptImpl; + +template +auto make_path_expand_opt( + EdgeExpandOpt&& edge_expand_opt, + SimpleGetVOpt&& get_v_opt, Range&& range, + PathOpt path_opt = PathOpt::Arbitrary, + ResultOpt result_opt = ResultOpt::EndV) { + return PathExpandOpt( + std::move(edge_expand_opt), std::move(get_v_opt), std::move(range), + Filter(), path_opt, result_opt); +} + +template +auto make_shortest_path_opt( + EdgeExpandOpt&& edge_expand_opt, + SimpleGetVOpt&& get_v_opt, Range&& range, + Filter&& until_condition, + PathOpt path_opt = PathOpt::Arbitrary, + ResultOpt result_opt = ResultOpt::EndV) { + return ShortestPathOpt, T...>( + std::move(edge_expand_opt), std::move(get_v_opt), std::move(range), + std::move(until_condition), path_opt, result_opt); +} + +// Just filter with v_labels. +template +auto make_getv_opt(VOpt v_opt, std::array&& v_labels) { + return GetVOpt>( + v_opt, std::move(v_labels)); +} + +///////////////////////Group prams//////////////////////////// + +template +struct TagProp { + static constexpr int tag_id = _tag_id; + PropNameArray prop_names_; + using prop_tuple_t = std::tuple; + + TagProp(PropNameArray&& prop_names) + : prop_names_(std::move(prop_names)) {} +}; + +// tagPropWithAlias +template +struct AliasTagProp { + static constexpr int tag_id = _tag_id; + static constexpr int res_alias = _res_alias; + // the property name for projection. + TagProp<_tag_id, T...> tag_prop_; + // PropNameArray prop_names_; + AliasTagProp(PropNameArray&& prop_names) + : tag_prop_{std::move(prop_names)} {} +}; + +// Alias the property of multiple tags' multiple propty. + +// For the grouping key, use which property, and alias to what. +template +struct KeyAlias { + static constexpr int tag_id = _tag_id; +}; + +template +struct ProjectSelf { + static constexpr int tag_id = _tag_id; + static constexpr int res_alias = _res_alias; +}; + +// evalutate expression on previous context. +template +struct ProjectExpr { + static constexpr int res_alias = _res_alias; + EXPR expr_; + ProjectExpr(EXPR&& expr) : expr_(std::move(expr)) {} +}; + +template +auto make_project_expr(EXPR&& expr) { + // get the return type of expr() + // using RES_T = typename EXPR::result_t; + return ProjectExpr<_res_alias, RES_T, EXPR>(std::move(expr)); +} + +template +auto make_key_alias_prop(PropNameArray&& names) { + return AliasTagProp<_tag_id, _res_alias, T...>(std::move(names)); +} + +template +struct FoldOpt { + using agg_tuple_t = std::tuple<_AGGREGATE...>; + static constexpr size_t num_agg = sizeof...(_AGGREGATE); + agg_tuple_t aggregate_; + FoldOpt(agg_tuple_t&& aggregate) : aggregate_(std::move(aggregate)) {} + + FoldOpt(_AGGREGATE&&... aggregate) + : aggregate_(std::forward<_AGGREGATE>(aggregate)...) {} +}; + +template +auto make_fold_opt(_AGG_T&&... aggs) { + return FoldOpt(std::forward<_AGG_T>(aggs)...); +} + +// The res_alias of project opt's should be gte 0. +// As we append them on by one after each projection. +template +struct ProjectOpt { + std::tuple key_alias_tuple_; + static constexpr size_t num_proj_cols = sizeof...(KEY_ALIAS_PROP); + ProjectOpt(KEY_ALIAS_PROP&&... key_aliases) + : key_alias_tuple_(std::forward(key_aliases)...) {} +}; + +template +auto make_project_opt(KEY_ALIAS_PROP&&... key_alias) { + return ProjectOpt(std::forward(key_alias)...); +} + +// convert tag_alias_prop to named_property +// only support one type +template +NamedProperty alias_tag_prop_to_named_property( + const AliasTagProp<_tag_id, _res_alias, T>& alias_tag_prop) { + return NamedProperty(alias_tag_prop.tag_prop_.prop_names_[0]); +} + +// ShortestPath +/* +message ShortestPathExpand { + message WeightCal { + enum Aggregate { + SUM = 0; + MAX = 1; + MIN = 2; + AVG = 3; + MUL = 4; + } + // This optional expression defines how to calculate the weight on each +edge. In the expression, + // one can directly write start, end to indicate the start/edge vertices of +the edge. + // e.g. the expression: "start.value + end.value * weight" defines that the +weight of each edge + // is calculated by multiplying the edge vertex's value with the edge's +weight and then summing + // it with the start vertex's value. + common.Expression weight_each = 1; + // Define how to aggregate the calculated weight of each edge as the path +weight Aggregate aggregate = 2; + } + // A shortest path expansion has a base of path expansion + PathExpand path_expand = 1; + // An optional weight calculation function for shortest path. If not +specified, the weight is + // by default the length of the path. + WeightCal weight_cal = 2; +} +*/ + +} // namespace gs + +namespace std { + +inline ostream& operator<<(ostream& os, const gs::Dist& g) { + os << g.dist; + return os; +} +} // namespace std + +#endif // ENGINES_HQPS_ENGINE_PARAMS_H_ diff --git a/flex/engines/hqps_db/core/sync_engine.h b/flex/engines/hqps_db/core/sync_engine.h new file mode 100644 index 000000000000..b9cd6e18eaf2 --- /dev/null +++ b/flex/engines/hqps_db/core/sync_engine.h @@ -0,0 +1,725 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_ENGINE_SYNC_ENGINE_H_ +#define ENGINES_HQPS_ENGINE_SYNC_ENGINE_H_ + +#include + +#include "flex/engines/hqps_db/core/context.h" +#include "flex/engines/hqps_db/core/params.h" +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/general_vertex_set.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/multi_label_vertex_set.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h" +#include "flex/engines/hqps_db/structures/path.h" +#include "flex/storages/rt_mutable_graph/types.h" +#include "flex/utils/property/column.h" +#include "grape/utils/bitset.h" + +#include "flex/engines/hqps_db/core/base_engine.h" +#include "flex/engines/hqps_db/core/operator/edge_expand.h" +#include "flex/engines/hqps_db/core/operator/get_v.h" +#include "flex/engines/hqps_db/core/operator/group_by.h" +#include "flex/engines/hqps_db/core/operator/path_expand.h" +#include "flex/engines/hqps_db/core/operator/scan.h" +#include "flex/engines/hqps_db/core/operator/shorest_path.h" +#include "flex/engines/hqps_db/core/operator/sink.h" +#include "flex/engines/hqps_db/core/utils/props.h" + +namespace gs { + +template +class SyncEngine : public BaseEngine { + using label_id_t = typename GRAPH_INTERFACE::label_id_t; + using vertex_id_t = typename GRAPH_INTERFACE::vertex_id_t; + using default_vertex_set_t = DefaultRowVertexSet; + using two_label_set_t = + TwoLabelVertexSet; + + template + using vertex_set_t = RowVertexSet; + + public: + ///////////////////////////// Scan Vertex///////////////////////////// + + /// @brief Scan for one label, with append_opt == AppendOpt::Persist + /// @tparam EXPR + /// @tparam COL_T + /// @tparam ...SELECTOR + /// @tparam append_opt + /// @param graph + /// @param v_label + /// @param filter + /// @return + template ::type* = + nullptr, + typename COL_T = default_vertex_set_t> + static Context ScanVertex( + const GRAPH_INTERFACE& graph, const label_id_t& v_label, + Filter&& filter) { + auto v_set_tuple = Scan::template ScanVertex( + graph, v_label, std::move(filter)); + + return Context(std::move(v_set_tuple)); + } + + // implement for append_opt == AppendOpt::temp + template < + AppendOpt append_opt, typename EXPR, typename... SELECTOR, + typename std::enable_if<(append_opt == AppendOpt::Temp)>::type* = nullptr, + typename COL_T = default_vertex_set_t> + static Context ScanVertex( + const GRAPH_INTERFACE& graph, const label_id_t& v_label, + Filter&& filter) { + auto v_set_tuple = Scan::template ScanVertex( + graph, v_label, std::move(filter)); + + return Context(std::move(v_set_tuple)); + } + + /// @brief Scan vertices with multiple labels + /// @tparam FUNC + /// @tparam COL_T + /// @tparam res_alias + /// @param time_stamp + /// @param graph + /// @param v_label + /// @param func + /// @return + template ::type* = + nullptr, + typename COL_T = two_label_set_t> + static Context ScanVertex( + const GRAPH_INTERFACE& graph, + std::array&& v_labels, + Filter&& filter) { + auto v_set_tuple = Scan::template ScanVertex( + graph, std::move(v_labels), std::move(filter)); + + return Context(std::move(v_set_tuple)); + } + + template < + AppendOpt append_opt, size_t num_labels, typename EXPR, + typename... SELECTOR, + typename std::enable_if<(append_opt == AppendOpt::Temp)>::type* = nullptr, + typename COL_T = two_label_set_t> + static Context ScanVertex( + const GRAPH_INTERFACE& graph, + std::array&& v_labels, + Filter&& filter) { + auto v_set_tuple = Scan::template ScanVertex( + graph, std::move(v_labels), std::move(filter)); + + return Context(std::move(v_set_tuple)); + } + + template ::type* = + nullptr, + typename COL_T = default_vertex_set_t> + static Context ScanVertexWithOid( + const GRAPH_INTERFACE& graph, LabelT v_label, int64_t oid) { + auto v_set_tuple = + Scan::ScanVertexWithOid(graph, v_label, oid); + + return Context(std::move(v_set_tuple)); + } + + template < + AppendOpt append_opt, typename LabelT, + typename std::enable_if<(append_opt == AppendOpt::Temp)>::type* = nullptr, + typename COL_T = default_vertex_set_t> + static Context ScanVertexWithOid( + const GRAPH_INTERFACE& graph, LabelT v_label, int64_t oid) { + auto v_set_tuple = + Scan::ScanVertexWithOid(graph, v_label, oid); + + return Context(std::move(v_set_tuple)); + } + + //////////////////////////EdgeExpand//////////////////////////// + + /// @brief //////// Edge Expand to vertex, the output is vertices with out any + /// property! + /// According to whether the alias_to_use is the head node, we shall have two + /// kind + /// of implementation + /// 0. If start from tag, all good. + /// 1. If from a previous alias, a additional repeat array should be provided, + /// to make sure the output vertices is aligned with the current head alias. + /// @tparam EDATA_T + /// @tparam VERTEX_SET_T + /// @tparam + /// @tparam N + /// @param frag + /// @param v_sets + /// @param edge_expand_opt + /// @return + template ::result_t> + static RES_T EdgeExpandV( + const GRAPH_INTERFACE& graph, + Context&& ctx, + EdgeExpandOpt&& edge_expand_opt, + size_t limit = INT_MAX) { + auto& select_node = gs::Get(ctx); + + auto pair = EdgeExpand::template EdgeExpandV( + graph, select_node, edge_expand_opt.dir_, edge_expand_opt.edge_label_, + edge_expand_opt.other_label_, std::move(edge_expand_opt.edge_filter_), + limit); + return ctx.template AddNode( + std::move(pair.first), std::move(pair.second), input_col_id); + } + + /// @brief //////// Edge Expand to vertex, the output is vertices with out any + /// property! + /// @tparam EDATA_T + /// @tparam VERTEX_SET_T + /// @tparam + /// @tparam N + /// @param frag + /// @param v_sets + /// @param edge_expand_opt + /// @return + template + static auto EdgeExpandE( + const GRAPH_INTERFACE& graph, + Context&& ctx, + EdgeExpandEOpt, T...>&& + edge_expand_opt, + size_t limit = INT_MAX) { + // Unwrap params here. + auto& select_node = gs::Get(ctx); + // Modifiy offsets. + // pass select node by reference. + auto pair = EdgeExpand::template EdgeExpandE( + graph, select_node, edge_expand_opt.dir_, edge_expand_opt.edge_label_, + edge_expand_opt.other_label_, edge_expand_opt.edge_filter_, + edge_expand_opt.prop_names_, limit); + // create new context node, update offsets. + return ctx.template AddNode( + std::move(pair.first), std::move(pair.second), alias_to_use); + // old context will be abondon here. + } + + /// @brief //////// Edge Expand to Edge, with multiple dst vertex labels. + /// @tparam ...T + /// @tparam CTX_HEAD_T + /// @tparam ...CTX_PREV + /// @tparam LabelT + /// @tparam EDGE_FILTER_T + /// @tparam res_alias + /// @tparam alias_to_use + /// @tparam cur_alias + /// @tparam base_tag + /// @param time_stamp + /// @param graph + /// @param ctx + /// @param edge_expand_opt + /// @param limit + /// @return + template + static auto EdgeExpandE( + const GRAPH_INTERFACE& graph, + Context&& ctx, + EdgeExpandEMultiLabelOpt&& + edge_expand_opt, + size_t limit = INT_MAX) { + // Unwrap params here. + auto& select_node = gs::Get(ctx); + // Modifiy offsets. + // pass select node by reference. + auto pair = EdgeExpand::template EdgeExpandE( + graph, select_node, edge_expand_opt.dir_, edge_expand_opt.edge_label_, + edge_expand_opt.other_label_, edge_expand_opt.edge_filter_, + edge_expand_opt.prop_names_, limit); + // create new context node, update offsets. + return ctx.template AddNode( + std::move(pair.first), std::move(pair.second), alias_to_use); + // old context will be abondon here. + } + + template + static auto EdgeExpandV( + const GRAPH_INTERFACE& graph, + Context&& ctx, + EdgeExpandOptMultiLabel&& + edge_expand_opt) { + // Unwrap params here. + auto& select_node = gs::Get(ctx); + // Modifiy offsets. + // pass select node by reference. + auto pair = EdgeExpand::EdgeExpandV( + graph, select_node, edge_expand_opt.direction_, + edge_expand_opt.edge_label_, edge_expand_opt.other_labels_, + std::move(edge_expand_opt.edge_filter_), + std::make_index_sequence()); + // create new context node, update offsets. + return ctx.template AddNode(std::move(pair.first), + std::move(pair.second), alias_to_use); + // old context will be abondon here. + } + + //////////////////////////////////////Path Expand///////////////////////// + // Path Expand to vertices with columns + template , + typename RES_T = + typename ResultContextT::result_t> + static RES_T PathExpandV( + const GRAPH_INTERFACE& graph, + Context&& ctx, + PathExpandOpt&& path_expand_opt) { + if (path_expand_opt.path_opt_ != PathOpt::Arbitrary) { + LOG(FATAL) << "Only support Arbitrary path now"; + } + if (path_expand_opt.result_opt_ != ResultOpt::EndV) { + LOG(FATAL) << "Only support EndV now"; + } + auto& select_node = gs::Get(ctx); + auto pair = PathExpand::PathExpandV( + graph, select_node, std::move(path_expand_opt)); + + // create new context node, update offsets. + return ctx.template AddNode(std::move(pair.first), + std::move(pair.second), alias_to_use); + // old context will be abondon here. + } + + /////////////////////GetV, output vertices with columns ////////////////////// + // res_alias: the alias of output + // alias_to_use: the alias of col of current ctx we use as input. + // cur_alias: the alias of current head node. + // num_properties: the properties num to get from vertex. should eq + // sizeof...(COL_T) + template < + AppendOpt opt, int alias_to_use, typename CTX_HEAD_T, int cur_alias, + int base_tag, typename... CTX_PREV, typename LabelT, size_t num_labels, + typename EXPRESSION, typename... SELECTOR, typename... T, + typename std::enable_if<(num_labels > 1 && sizeof...(T) >= 1)>::type* = + nullptr> + static auto GetV(const GRAPH_INTERFACE& frag, + Context&& ctx, + GetVOpt, + T...>&& get_v_opt) { + auto& select = gs::Get(ctx); + auto pair = GetVertex::GetPropertyV(frag, select, + std::move(get_v_opt)); + return ctx.template AddNode(std::move(pair.first), + std::move(pair.second), alias_to_use); + } + + template < + AppendOpt opt, int alias_to_use, typename CTX_HEAD_T, int cur_alias, + int base_tag, typename... CTX_PREV, typename LabelT, size_t num_labels, + typename EXPRESSION, typename... SELECTOR, typename... T, + typename std::enable_if<(num_labels == 1 && sizeof...(T) >= 1)>::type* = + nullptr> + static auto GetV(const GRAPH_INTERFACE& frag, + Context&& ctx, + GetVOpt, + T...>&& get_v_opt) { + auto& select = gs::Get(ctx); + auto pair = GetVertex::GetPropertyV(frag, select, + std::move(get_v_opt)); + return ctx.template AddNode(std::move(pair.first), + std::move(pair.second), alias_to_use); + } + + // get no props, just filter + template < + AppendOpt opt, int alias_to_use, typename CTX_HEAD_T, int cur_alias, + int base_tag, typename... CTX_PREV, typename LabelT, size_t num_labels, + typename EXPRESSION, typename... SELECTOR, typename... T, + typename ctx_t = Context, + typename old_node_t = std::remove_reference_t< + decltype(std::declval().template GetNode())>, + typename std::enable_if<(old_node_t::is_vertex_set && + sizeof...(T) == 0)>::type* = nullptr, + typename NEW_HEAD_T = old_node_t, + typename RES_T = + typename ResultContextT::result_t> + static RES_T GetV(const GRAPH_INTERFACE& frag, + Context&& ctx, + GetVOpt, + T...>&& get_v_opt) { + auto& select = gs::Get(ctx); + auto pair = GetVertex::GetNoPropV(frag, select, get_v_opt); + return ctx.template AddNode(std::move(pair.first), + std::move(pair.second), alias_to_use); + } + + // get vertex from edge set + template < + AppendOpt opt, int alias_to_use, typename CTX_HEAD_T, int cur_alias, + int base_tag, typename... CTX_PREV, typename LabelT, size_t num_labels, + typename EXPRESSION, typename... SELECTOR, typename... T, + typename ctx_t = Context, + typename old_node_t = std::remove_reference_t< + decltype(std::declval().template GetNode())>, + typename std::enable_if<(old_node_t::is_edge_set && + sizeof...(T) == 0)>::type* = nullptr> + static auto GetV(const GRAPH_INTERFACE& graph, + Context&& ctx, + GetVOpt, + T...>&& get_v_opt) { + auto& select = gs::Get(ctx); + auto pair = GetVertex::GetNoPropVFromEdgeSet( + graph, select, std::move(get_v_opt)); + VLOG(10) << "new node's size: " << pair.first.Size(); + // << ", offset: " << gs::to_string(pair.second); + return ctx.template AddNode(std::move(pair.first), + std::move(pair.second), alias_to_use); + } + + //////////////////////////////////////Project///////////////////////// + // Project current relations to new columns, append or not. + // TODO: add type infere back: + // typename RES_T = typename ProjectResT< + // is_append, Context, + // PROJECT_OPT>::result_t + template + static auto Project( + const GRAPH_INTERFACE& graph, + Context&& ctx, + std::tuple&& proj_mappers) { + VLOG(10) << "[Project] with project opt size: " << sizeof...(ProjMapper); + return ProjectOp::template ProjectImpl( + graph, std::move(ctx), std::move(proj_mappers)); + } + + //////////////////////////////////////Sort/Order///////////////////////// + // From current context, do the sort. + // After sort, the corresponding order maintained by csr offsets will be + // cleaned. We need to flat current context to new context. Each node will + // be replaced by the flat one. The alignment between nodes will be 1-1. + template + static auto Sort(const GRAPH_INTERFACE& graph, + Context&& ctx, + Range&& limit_range, + std::tuple&& ordering_pairs) { + if (limit_range.start_ != 0) { + LOG(FATAL) << "Current only support topk"; + } + if (limit_range.limit_ == 0) { + LOG(FATAL) << "Current only support empty range"; + } + + VLOG(10) << "[Sort: ] Sort with " << sizeof...(ORDER_PAIRS) << " keys"; + return SortOp::SortTopK( + graph, std::move(ctx), std::move(ordering_pairs), limit_range.limit_); + } + + //////////////////////////////////////Select/Filter///////////////////////// + // Select with head node. The type doesn't change + // select only head node. + template < + int in_col_id, typename CTX_HEAD_T, int cur_alias, int base_tag, + typename... CTX_PREV, typename EXPR, typename... Selector, + typename std::enable_if::type* = nullptr, + typename RES_T = Context> + static RES_T Select( + const GRAPH_INTERFACE& graph, + Context&& ctx, + Filter&& filter) { + VLOG(10) << "[Select]"; + auto expr = filter.expr_; + auto selectors = filter.selectors_; + + auto& head = ctx.GetMutableHead(); + auto labels = head.GetLabels(); + auto prop_getter_tuple = + get_prop_getters_from_selectors(graph, labels, selectors); + SelectTwoLabelSetImpl(ctx, head, prop_getter_tuple, expr); + + return std::move(ctx); + } + + template + static void SelectTwoLabelSetImpl( + CTX_T&& ctx, HEAD_T& head, + const std::array& prop_getter_tuple, const EXPR& expr) { + auto& bitset = head.GetMutableBitset(); + auto& vertices = head.GetMutableVertices(); + size_t cur = 0; + static_assert(HEAD_T::num_props == 0); + auto& last_offset = ctx.GetMutableOffset(-1); + double t0 = -grape::GetCurrentTime(); + grape::Bitset new_bitset; + new_bitset.init(vertices.size()); + size_t cur_begin = last_offset[0]; + for (auto i = 0; i < last_offset.size() - 1; ++i) { + auto limit = last_offset[i + 1]; + for (auto j = cur_begin; j < limit; ++j) { + auto vid = vertices[j]; + if (bitset.get_bit(j)) { + if (std::apply(expr, prop_getter_tuple[0].get_view(vid))) { + new_bitset.set_bit(cur); + if (cur < j) { + vertices[cur++] = vid; + } else { + cur++; + } + } + } else { + if (std::apply(expr, prop_getter_tuple[1].get_view(vid))) { + if (cur < j) { + vertices[cur++] = vid; + } else { + cur++; + } + } + } + } + cur_begin = last_offset[i + 1]; + last_offset[i + 1] = cur; + } + vertices.resize(cur); + bitset.swap(new_bitset); + t0 += grape::GetCurrentTime(); + VLOG(10) << "after filter: " << vertices.size() << ", time: " << t0; + } + + // Select from row vertex set. + // only the case of select head node is supported. + template < + int in_col_id, typename CTX_HEAD_T, int cur_alias, int base_tag, + typename... CTX_PREV, typename EXPR, typename... SELECTOR, + typename std::enable_if::type* = nullptr, + typename RES_T = Context> + static RES_T Select( + const GRAPH_INTERFACE& graph, + Context&& ctx, + Filter&& filter) { + VLOG(10) << "[Select]"; + using ctx_t = Context; + // Currently only support select with head node. + auto expr = filter.expr_; + auto selectors = filter.selectors_; + + auto& head = ctx.GetMutableHead(); + auto label = head.GetLabel(); + auto prop_getter_tuple = + std::array{get_prop_getter_from_selectors(graph, label, selectors)}; + // TODO: implement + SelectRowVertexSetImpl(ctx, head, prop_getter_tuple, expr, + std::make_index_sequence()); + + return std::move(ctx); + } + + template + static void SelectRowVertexSetImpl( + CTX_T& ctx, HEAD_T& head, + const std::array& prop_getters, const EXPR& expr, + std::index_sequence) { + double t0 = -grape::GetCurrentTime(); + size_t cur = 0; + auto& vertices = head.GetMutableVertices(); + auto& prop_getter = prop_getters[0]; + if constexpr (CTX_T::prev_alias_num == 0) { + for (auto i = 0; i < vertices.size(); ++i) { + auto vid = vertices[i]; + if (std::apply(expr, prop_getter.get_view(vid))) { + if (cur < i) { + vertices[cur++] = vid; + } else { + cur++; + } + } + } + } else { + auto& last_offset = ctx.GetMutableOffset(-1); + + size_t cur_begin = last_offset[0]; + for (auto i = 0; i < last_offset.size() - 1; ++i) { + auto limit = last_offset[i + 1]; + for (auto j = cur_begin; j < limit; ++j) { + auto vid = vertices[j]; + if (std::apply(expr, prop_getter.get_view(vid))) { + if (cur < j) { + vertices[cur++] = vid; + } else { + cur++; + } + } + } + cur_begin = last_offset[i + 1]; + last_offset[i + 1] = cur; + } + } + vertices.resize(cur); + t0 += grape::GetCurrentTime(); + VLOG(10) << "after filter: " << vertices.size() << ", time: " << t0; + } + + //////////////////////////////////////Select/Filter///////////////////////// + // Select with head node. The type doesn't change + // select can possiblely applied on multiple tags + // (!CTX_HEAD_T::is_row_vertex_set) && (!CTX_HEAD_T::is_two_label_set) && + template < + int... in_col_id, typename CTX_HEAD_T, int cur_alias, int base_tag, + typename... CTX_PREV, typename EXPR, typename... SELECTOR, + typename std::enable_if<((sizeof...(in_col_id) > 1) && + (sizeof...(in_col_id) == + sizeof...(SELECTOR)))>::type* = nullptr, + typename RES_T = Context> + static RES_T Select( + const GRAPH_INTERFACE& graph, + Context&& ctx, + Filter&& filter) { + VLOG(10) << "[Context]: Select in place"; + auto expr = filter.expr_; + auto selectors = filter.selectors_; + + std::vector new_offsets; + std::vector select_indices; + new_offsets.emplace_back(0); + offset_t cur_offset = 0; + offset_t cur_ind = 0; + auto& cur_ = ctx.GetHead(); + select_indices.reserve(cur_.Size()); + // create prop_desc from in_col_id and selectors + auto prop_descs = create_prop_descs_from_selectors(selectors); + auto prop_getters_tuple = + create_prop_getters_from_prop_desc(graph, ctx, prop_descs); + for (auto iter : ctx) { + auto eles = iter.GetAllElement(); + // if (expr(eles)) { + // if (std::apply(expr, props)) { + if (run_expr_filter(expr, prop_getters_tuple, eles)) { + select_indices.emplace_back(cur_ind); + cur_offset += 1; + } + cur_ind += 1; + new_offsets.emplace_back(cur_offset); + } + VLOG(10) << "Select " << select_indices.size() << ", out of " << cur_ind + << " records" + << ", head size: " << cur_.Size(); + + cur_.SubSetWithIndices(select_indices); + ctx.merge_offset_with_back(new_offsets); + return std::move(ctx); + } + + template + static inline bool run_expr_filter( + const EXPR& expr, std::tuple& prop_getter_tuple, + std::tuple& eles) { + return run_expr_filter_impl( + expr, prop_getter_tuple, eles, + std::make_index_sequence()); + } + + template + static inline bool run_expr_filter_impl( + const EXPR& expr, std::tuple& prop_getter_tuple, + std::tuple& eles, std::index_sequence) { + return expr(std::get(prop_getter_tuple).get_from_all_element(eles)...); + } + + //////////////////////////////////////Group///////////////////////// + // We currently support group with one key, and possiblely multiple values. + // create a brand new context type. + // group count is included in this implementation. + template , + std::tuple, std::tuple>::result_t> + static RES_T GroupBy( + const GRAPH_INTERFACE& graph, + Context&& ctx, + std::tuple&& group_key, + std::tuple&& agg_func) { + VLOG(10) << "[Group] with with group opt"; + return GroupByOp::GroupByImpl( + graph, std::move(ctx), std::move(group_key), std::move(agg_func)); + } + + template + static auto GroupByWithoutKey( + const GRAPH_INTERFACE& graph, + Context&& ctx, + std::tuple&& fold_opt) { + VLOG(10) << "[Group] with fold opt"; + return GroupByOp::GroupByWithoutKeyImpl( + graph, std::move(ctx), std::move(fold_opt)); + } + + //////////////////////////////////////Shortest Path///////////////////////// + // Return the path. + template , + typename RES_T = + typename ResultContextT::result_t> + static RES_T ShortestPath( + const GRAPH_INTERFACE& graph, + Context&& ctx, + ShortestPathOpt&& + shortest_path_opt) { + static_assert(alias_to_use == -1 || alias_to_use == cur_alias); + if (shortest_path_opt.path_opt_ != PathOpt::Simple) { + LOG(FATAL) << "Only support Simple path now"; + } + if (shortest_path_opt.result_opt_ != ResultOpt::AllV) { + LOG(FATAL) << "Only support AllV now"; + } + + auto& set = ctx.template GetNode(); + auto path_set_and_offset = ShortestPathOp::ShortestPath( + graph, set, std::move(shortest_path_opt)); + return ctx.template AddNode(std::move(path_set_and_offset.first), + std::move(path_set_and_offset.second)); + } +}; +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_SYNC_ENGINE_H_ diff --git a/flex/engines/hqps_db/core/utils/hqps_type.h b/flex/engines/hqps_db/core/utils/hqps_type.h new file mode 100644 index 000000000000..36173129cf0c --- /dev/null +++ b/flex/engines/hqps_db/core/utils/hqps_type.h @@ -0,0 +1,35 @@ +#ifndef ENGINES_HQPS_ENGINE_UTILS_TYPE_UTILS_H_ +#define ENGINES_HQPS_ENGINE_UTILS_TYPE_UTILS_H_ + +#include "flex/storages/rt_mutable_graph/types.h" + +namespace gs { +struct Dist { + int32_t dist = 0; + Dist(int32_t d) : dist(d) {} + Dist() : dist(0) {} + inline Dist& operator=(int32_t d) { + dist = d; + return *this; + } + + void set(int32_t i) { dist = i; } +}; + +inline bool operator<(const Dist& a, const Dist& b) { return a.dist < b.dist; } +inline bool operator>(const Dist& a, const Dist& b) { return a.dist > b.dist; } + +inline bool operator==(const Dist& a, const Dist& b) { + return a.dist == b.dist; +} + +// distance in path. +using dist_t = Dist; +static constexpr label_t INVALID_LABEL_ID = std::numeric_limits::max(); +using offset_t = size_t; +using vertex_set_key_t = size_t; +static constexpr vid_t INVALID_VID = std::numeric_limits::max(); + +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_UTILS_TYPE_UTILS_H_ \ No newline at end of file diff --git a/flex/engines/hqps_db/core/utils/hqps_utils.h b/flex/engines/hqps_db/core/utils/hqps_utils.h new file mode 100644 index 000000000000..8c8f11565eda --- /dev/null +++ b/flex/engines/hqps_db/core/utils/hqps_utils.h @@ -0,0 +1,983 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_ENGINE_HQPS_UTILS_H_ +#define ENGINES_HQPS_ENGINE_HQPS_UTILS_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "flex/engines/hqps_db/core/params.h" + +#include "flex/storages/rt_mutable_graph/types.h" +#include "flex/utils/property/column.h" + +namespace gs { + +// demangle a c++ variable's class name + +template +std::string demangle(const T& t) { + int status; + char* demangled = abi::__cxa_demangle(typeid(T).name(), 0, 0, &status); + std::string ret(demangled); + free(demangled); + return ret; +} + +template +struct return_type; + +template +struct return_type { + using type = R; +}; + +template +struct return_type { + using type = R; +}; + +template +struct return_type { + using type = R; +}; + +template +struct return_type { + using type = R; +}; + +template +struct return_type { + using type = R; +}; + +template +struct return_type { + using type = R; +}; + +template +struct return_type { + using type = R; +}; + +template +struct return_type { + using type = R; +}; + +template +struct return_type { + using type = R; +}; + +template +struct return_type { + using type = R; +}; + +template +struct return_type { + using type = R; +}; + +template +struct return_type { + using type = R; +}; + +template +struct return_type { + using type = R; +}; + +template +struct return_type { + using type = R; +}; + +template +using return_type_t = typename return_type::type; + +template +struct is_tuple : std::false_type {}; + +template +struct is_tuple> : std::true_type {}; + +template +struct tuple_size {}; + +template +struct tuple_size::value>::type> { + static constexpr size_t value = std::tuple_size::value; +}; + +template +struct tuple_size::value>::type> { + static constexpr size_t value = 1; +}; + +// check whether the group key uses property +template +struct group_key_on_property : public std::true_type {}; + +template +struct group_key_on_property< + AliasTagProp> + : public std::false_type {}; + +template +struct group_key_on_property> : public std::true_type {}; + +template +struct group_key_on_property> + : public std::false_type {}; + +// check edge_dir and vopt consistency +inline bool check_edge_dir_consist_vopt(const Direction& dir, VOpt vopt) { + if (dir == Direction::Out) { + return vopt == VOpt::End || vopt == VOpt::Other; + } else if (dir == Direction::In) { + return vopt == VOpt::Start || vopt == VOpt::Other; + } else if (dir == Direction::Both) { + return vopt == VOpt::Other; + } + LOG(FATAL) << "Invalid direction: " << dir; + return false; +} + +// customized operator +// 0. WithIn +const struct WithIn_ { +} WithIn; + +template +struct WithProxy { + WithProxy(const T& t) : t_(t) {} + const T& t_; +}; + +template +WithProxy operator<(const T& lhs, const WithIn_& rhs) { + return WithProxy(lhs); +} + +template < + typename T, size_t N, + typename std::enable_if && (N == 1)>::type* = nullptr> +bool operator>(const WithProxy& lhs, const std::array& rhs) { + return lhs.t_ == rhs[0]; +} + +template && (N > 1)>::type* = nullptr> +bool operator>(const WithProxy& lhs, const std::array& rhs) { + return rhs.end() != std::find(rhs.begin(), rhs.end(), lhs.t_); +} + +template < + typename T, size_t N, + typename std::enable_if && (N == 0)>::type* = nullptr> +bool operator>(const WithProxy& lhs, const std::array& rhs) { + return false; +} + +template < + std::size_t nth, std::size_t... Head, std::size_t... Tail, + typename... Types, + typename std::enable_if<(nth + 1 != sizeof...(Types))>::type* = nullptr> +constexpr auto remove_nth_element_impl(std::index_sequence, + std::index_sequence, + const std::tuple& tup) { + return std::tuple{std::get(tup)..., + // We +1 to refer one element after the one removed + std::get(tup)...}; +} + +template < + std::size_t nth, std::size_t... Head, std::size_t... Tail, + typename... Types, + typename std::enable_if<(nth + 1 == sizeof...(Types))>::type* = nullptr> +constexpr auto remove_nth_element_impl(std::index_sequence, + std::index_sequence, + const std::tuple& tup) { + return std::tuple{std::get(tup)...}; +} + +template +constexpr auto remove_nth_element(const std::tuple& tup) { + static_assert(nth < sizeof...(Types)); + return remove_nth_element_impl( + std::make_index_sequence(), + std::make_index_sequence(), tup); +} + +template +constexpr auto remove_ith_jth_element(const std::tuple& tup) { + static_assert(ith < sizeof...(Types)); + static_assert(jth < sizeof...(Types)); + static_assert(ith != jth); + if constexpr (ith < jth) { + return remove_nth_element(remove_nth_element(tup)); + } else { + return remove_nth_element(remove_nth_element(tup)); + } +} + +template +struct remove_ith_type {}; + +template +struct remove_ith_type<0, std::tuple> { + typedef std::tuple type; +}; + +template +struct remove_ith_type> { + typedef decltype(std::tuple_cat( + std::declval>(), + std::declval>::type>())) + type; +}; + +// I != J +template +struct remove_ith_jth_type {}; + +template +struct remove_ith_jth_type, + typename std::enable_if<(I < J)>::type> { + using first_type = typename remove_ith_type>::type; + using type = typename remove_ith_type::type; +}; + +template +struct remove_ith_jth_type, + typename std::enable_if<(I > J)>::type> { + using type = typename remove_ith_jth_type>::type; +}; + +template +struct Edge; + +template < + size_t Is, typename... PROP_T, + typename std::enable_if::type* = nullptr> void + props_to_string_array( + std::tuple& props, + std::array>>& + res) { + res[Is] = std::get(props).property_name; + props_to_string_array(props, res); +} + +template ::type* = nullptr> +void props_to_string_array( + std::tuple& props, + std::array>>& res) { + res[Is] = std::get(props).property_name; +} +template +auto propsToStringArray(std::tuple& props) { + std::array res; + props_to_string_array<0>(props, res); + return res; +} + +template +struct tuple_element; + +// recursive case +template +struct tuple_element> + : gs::tuple_element> {}; + +// base case +template +struct tuple_element<0, std::tuple> { + using type = Head; +}; + +template +struct tuple_element<-1, std::tuple> + : gs::tuple_element> {}; +template +struct tuple_element<-1, std::tuple> { + using type = Head; +}; + +template +auto unwrap_future_tuple(std::tuple&& tuple) { + return unwrap_future_tuple(std::move(tuple), + std::make_index_sequence()); +} +template +auto unwrap_future_tuple(std::tuple&& tuple, std::index_sequence) { + return std::make_tuple(std::move(std::get(tuple).get0())...); +} + +inline std::vector merge_union_offset(std::vector& a, + std::vector& b) { + CHECK(a.size() == b.size() && a.size() > 0); + std::vector res; + res.reserve(a.size()); + res[0] = a[0] + b[0]; + for (auto i = 1; i < a.size(); ++i) { + res[i] = res[i - 1] + a[i] - a[i - 1] + b[i] - b[i - 1]; + } + return res; +} + +inline auto make_offset_vector(size_t m, size_t n) { + std::vector> offsets; + //[0,m) + for (auto i = 0; i < m; ++i) { + // [0, n] + std::vector cur(n + 1, 0); + for (auto j = 0; j <= n; ++j) { + cur[j] = j; + } + offsets.emplace_back(std::move(cur)); + } + return offsets; +} + +// first n ele in tuple type + +template +struct first_n_impl; + +template +struct first_n_impl, Out...> { + typedef + typename first_n_impl, Out..., First>::type + type; // move first input to output. +}; + +// need First, Other... here to resolve ambiguity on n = 0 +template +struct first_n_impl<0, std::tuple, Out...> { + typedef typename std::tuple type; // stop if no more elements needed +}; + +// explicit rule for empty tuple because of First, Other... in the previous +// rule. +// actually it's for n = size of tuple +template +struct first_n_impl<0, std::tuple<>, Out...> { + typedef typename std::tuple type; +}; + +// template +// using first_n = first_n_impl>; + +template +struct first_n; + +template +struct first_n> { + using type = typename first_n_impl>::type; +}; + +template +constexpr auto tuple_slice_impl(T&& t, std::index_sequence) { + return std::forward_as_tuple(std::get(std::forward(t))...); +} + +template +constexpr auto tuple_slice(T&& t) { + static_assert(r >= l, "invalid slice"); + static_assert(std::tuple_size>::value >= r, + "slice index out of bounds"); + return tuple_slice_impl(std::forward(t), + std::make_index_sequence{}); +} + +template = 0)>::type* = nullptr> +inline auto get_from_tuple(std::tuple& tuple) { + return std::get(tuple); +} + +template ::type* = nullptr> +inline auto get_from_tuple(std::tuple& tuple) { + static constexpr size_t num = sizeof...(T); + return std::get(tuple); +} +template = 0)>::type* = nullptr> +inline const auto& get_from_tuple(const std::tuple& tuple) { + return std::get(tuple); +} + +template ::type* = nullptr> +inline const auto& get_from_tuple(const std::tuple& tuple) { + static constexpr size_t num = sizeof...(T); + return std::get(tuple); +} + +// vertex/edge property associate with type +template ::type> +auto transform_array_impl(std::array&& array, FUNC_T&& func, + std::index_sequence) { + return std::array{std::move(func(std::move(array[Is])))...}; +} + +template +auto transform_array(std::array&& array, FUNC_T&& func) { + return transform_array_impl(std::move(array), std::move(func), + std::make_index_sequence()); +} + +template ::type> +auto transform_tuple_impl(const std::tuple&& tuple, FUNC_T&& func, + std::index_sequence) { + return std::make_tuple( + std::move(func(Is, std::move(std::get(tuple))))...); +} + +template +auto transform_tuple(const std::tuple&& tuple, FUNC_T&& func) { + static constexpr size_t N = sizeof...(T); + return transform_tuple_impl(std::move(tuple), std::move(func), + std::make_index_sequence()); +} + +template +bool apply_on_tuple(const FUNC& func, const std::tuple& tuple) { + return apply_on_tuple_impl(func, tuple, + std::make_index_sequence()); +} + +template +bool apply_on_tuple_impl(const FUNC& func, const std::tuple& tuple, + std::index_sequence) { + return func(std::get(tuple)...); +} + +template ::type> +auto apply_array_impl(const std::array& array, FUNC_T&& func, + std::index_sequence) { + return std::array{std::move(func(array[Is]))...}; +} + +template +auto apply_array(const std::array& array, FUNC_T&& func) { + return apply_array_impl(array, std::move(func), + std::make_index_sequence()); +} + +template +void apply_tuple_impl(const std::tuple& tuple, const FUNC_T& func, + std::index_sequence, OTHER_ARGS&... other_args) { + ((func(std::get(tuple), std::forward(other_args)...)), ...); +} + +template +auto apply_tuple(const std::tuple& tuple, const FUNC_T& func, + OTHER_ARGS&... other_args) { + static constexpr size_t N = sizeof...(T); + return apply_tuple_impl(tuple, func, std::make_index_sequence(), + std::forward(other_args)...); +} + +template +constexpr auto make_array(Args&&... args) { + if constexpr (std::is_same::value) { + return std::array...>, + sizeof...(Args)>{{std::forward(args)...}}; + } else { + return std::array{{std::forward(args)...}}; + } +} + +template +using DataTupleT = typename T::data_tuple_t; + +// T must be tuple +template +using tuple_cat_t = decltype(std::tuple_cat(std::declval()...)); + +template +struct is_shared_ptr : std::false_type {}; + +template +struct is_shared_ptr> : std::true_type {}; + +template +struct is_vector : std::false_type {}; + +template +struct is_vector> : std::true_type {}; + +template +struct is_pair : std::false_type {}; + +template +struct is_pair> : std::true_type {}; + +// template +// struct CanToString : std::false_type {}; + +// template +// struct CanToString> : std::true_type {}; + +template +constexpr std::index_sequence add(std::index_sequence) { + return {}; +} + +template +struct NumberLarger { + static const bool value = (M >= N); +}; + +template +using make_index_range = + decltype(add(std::make_index_sequence())); + +template +struct TupleCatT { + using tuple_cat_t = + decltype(std::tuple_cat(std::declval(), std::declval())); +}; + +template +struct TupleCatT, T2> { + using tuple_cat_t = decltype(std::tuple_cat(std::declval())); +}; + +template +struct TupleCatT> { + using tuple_cat_t = decltype(std::tuple_cat(std::declval())); +}; + +template +auto make_getter_tuple(label_t label, std::tuple&& tuple, + std::index_sequence) { + return std::make_tuple(std::get(tuple).CreateGetter(label)...); +} + +template +using ValueTypeOf = typename T::value_type; + +template +using SharedPtrTypeOf = std::shared_ptr>; + +template +using GetterTypeOf = typename T::GetterType; + +template +using ElementTypeOf = typename GETTER_T::element_type; + +template +using DataOfColumnPtr = typename T::element_type::value_type; + +template +using IterOf = typename T::iterator; + +template +using PQ_T = std::priority_queue, CMP>; + +template +struct SingleColumn { + COL col_; +}; + +// Definition +template +struct ColumnAccessorImpl; + +// Empty Accessor. +template +struct ColumnAccessorImpl {}; + +// Recurvise +template +struct ColumnAccessorImpl + : public SingleColumn, + public ColumnAccessorImpl {}; + +// multiple single columns. + +// Obtain a reference +template +FIRST& Get(ColumnAccessorImpl& tuple) { + // Fully qualified name for the member, to find the right one + // (they are all called `value`). + return tuple.SingleColumn::col_; +} + +template +using ColumnAccessor = ColumnAccessorImpl<0, COLS...>; + +// Make COlumnAccessor like make tuple + +enum class OperatorType { + kAuxilia = 0, + kEdgeExpand = 1, + kGetV = 2, + kProject = 3, + kSink = 4, +}; + +enum class Cmp { + kEQ = 0, + kLT = 1, + kGT = 2, + kLE = 3, + kGE = 4, + kINSIDE = 5, + kOUTSIDE = 6, + kWITHIN = 7, + kWITHOUT = 8, +}; + +enum class SourceType { kVertex = 0, kEdge = 1 }; +enum class EntryType { + kVertexEntry = 0, + kEdgeEntry = 1, + kObjectEntry = 2, + kPathEntry = 3, + kProjectedVertexEntry = 4, + kProjectedEdgeEntry = 5, +}; + +template +static typename PRIORITY_QUEUE_T::container_type priority_queue_to_vec( + PRIORITY_QUEUE_T& pq, bool reversed = false) { + auto pq_size = pq.size(); + typename PRIORITY_QUEUE_T::container_type res; + res.reserve(pq_size); + for (int i = 0; i < pq_size; ++i) { + res.emplace_back(pq.top()); + pq.pop(); + } + return res; +} + +template +struct to_string_impl { + static std::string to_string(const T& t) { return t.to_string(); } +}; + +template +struct to_string_impl> { + static inline std::string to_string(const std::vector& vec) { + std::ostringstream ss; + // ss << "Vec["; + if (vec.size() > 0) { + for (int i = 0; i < vec.size() - 1; ++i) { + ss << to_string_impl::to_string(vec[i]) << ","; + } + ss << to_string_impl::to_string(vec[vec.size() - 1]); + } + // ss << "]"; + return ss.str(); + } +}; + +template +struct to_string_impl> { + static inline std::string to_string(const std::array& empty) { + std::stringstream ss; + for (auto i : empty) { + ss << i << ","; + } + return ss.str(); + } +}; + +template <> +struct to_string_impl { + static inline std::string to_string(const AppendOpt& empty) { + if (empty == AppendOpt::Persist) { + return "Persist"; + } else if (empty == AppendOpt::Temp) { + return "Temp"; + } else { + throw std::runtime_error("Unknown AppendOpt"); + } + } +}; + +template <> +struct to_string_impl { + static inline std::string to_string(const Dist& empty) { + return std::to_string(empty.dist); + } +}; + +template <> +struct to_string_impl { + static inline std::string to_string(const Date& empty) { + return std::to_string(empty.milli_second); + } +}; + +template <> +struct to_string_impl { + static inline std::string to_string(const std::string_view& empty) { + return std::string(empty); + } +}; + +template <> +struct to_string_impl { + static inline std::string to_string(const grape::EmptyType& empty) { + return ""; + } +}; + +template <> +struct to_string_impl { + static inline std::string to_string(const uint8_t& empty) { + return std::to_string((int32_t) empty); + } +}; + +template <> +struct to_string_impl { + static inline std::string to_string(const int64_t& empty) { + return std::to_string(empty); + } +}; + +template <> +struct to_string_impl { + static inline std::string to_string(const bool& empty) { + return std::to_string(empty); + } +}; + +template <> +struct to_string_impl { + static inline std::string to_string(const unsigned long& empty) { + return std::to_string(empty); + } +}; + +template <> +struct to_string_impl { + static inline std::string to_string(const int32_t& empty) { + return std::to_string(empty); + } +}; + +template <> +struct to_string_impl { + static inline std::string to_string(const uint32_t& empty) { + return std::to_string(empty); + } +}; + +template <> +struct to_string_impl { + static inline std::string to_string(const double& empty) { + return std::to_string(empty); + } +}; + +template <> +struct to_string_impl { + static inline std::string to_string(const std::string& empty) { + return empty; + } +}; + +template <> +struct to_string_impl { + static inline std::string to_string(const Direction& opt) { + if (opt == Direction::In) { + return "In"; + } else if (opt == Direction::Out) { + return "Out"; + } else { + return "Both"; + } + } +}; + +template <> +struct to_string_impl { + static inline std::string to_string(const ResultOpt& result_opt) { + if (result_opt == ResultOpt::AllV) { + return "AllV"; + } else { + return "EndV"; + } + } +}; + +template <> +struct to_string_impl { + static inline std::string to_string(const PathOpt& result_opt) { + if (result_opt == PathOpt::Arbitrary) { + return "Arbitrary"; + } else { + return "Simple"; + } + } +}; + +template <> +struct to_string_impl { + static inline std::string to_string(const JoinKind& result_opt) { + if (result_opt == JoinKind::AntiJoin) { + return "AntiJoin"; + } else if (result_opt == JoinKind::Semi) { + return "Semi"; + } else { + return "InnerJoin"; + } + } +}; + +template <> +struct to_string_impl { + static inline std::string to_string(const gs::VOpt& opt) { + switch (opt) { + case gs::VOpt::Start: + return "Start"; + case gs::VOpt::End: + return "End"; + case gs::VOpt::Other: + return "Other"; + case gs::VOpt::Both_V: + return "Both"; + case gs::VOpt::Itself: + return "Itself"; + } + LOG(ERROR) << "Should not reach here"; + return ""; + } +}; + +template +struct to_string_impl> { + static inline std::string to_string(const std::tuple& t) { + std::string result; + result += "tuple<"; + std::apply( + [&result](const auto&... v) { + ((result += + (to_string_impl>>::to_string(v)) + + ","), + ...); + }, + t); + result += ">"; + return result; + } +}; + +template +struct to_string_impl> { + static inline std::string to_string(const std::pair& t) { + std::stringstream ss; + ss << "pair<" << to_string_impl::to_string(t.first) << "," + << to_string_impl::to_string(t.second) << ">"; + return ss.str(); + } +}; + +template +std::string to_string(const T& t) { + return to_string_impl::to_string(t); +} + +template +struct Edge { + VID_T src, dst; + const std::tuple& edata; + Edge(VID_T s, VID_T d, const std::tuple& data) + : src(s), dst(d), edata(data) {} + std::string to_string() const { + return std::to_string(src) + "->" + std::to_string(dst) + "(" + + gs::to_string(edata) + ")"; + } +}; + +template +struct Edge { + VID_T src, dst; + grape::EmptyType edata; + Edge(vid_t s, vid_t d) : src(s), dst(d) {} + std::string to_string() const { + return std::to_string(src) + "->" + std::to_string(dst) + "(" + ")"; + } +}; + +struct QPSError { + std::string message; + explicit QPSError(std::string msg) : message(std::move(msg)) {} + + std::string GetMessage() { return message; } +}; + +class QPSException : public std::exception { + public: + explicit QPSException(std::string&& error_msg) + : std::exception(), _err_msg(error_msg) {} + ~QPSException() override = default; + + const char* what() const noexcept override { return _err_msg.c_str(); } + + private: + std::string _err_msg; +}; + +template +struct function_traits : public function_traits {}; +// For generic types, directly use the result of the signature of its +// 'operator()' + +template +struct function_traits +// we specialize for pointers to member function +{ + enum { arity = sizeof...(Args) }; + // arity is the number of arguments. + + typedef ReturnType result_type; + + template + struct arg { + typedef typename std::tuple_element>::type type; + // the i-th argument is equivalent to the i-th tuple element of a tuple + // composed of those arguments. + }; +}; + +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_HQPS_UTILS_H_ diff --git a/flex/engines/hqps_db/core/utils/keyed.h b/flex/engines/hqps_db/core/utils/keyed.h new file mode 100644 index 000000000000..5e1a041d1cef --- /dev/null +++ b/flex/engines/hqps_db/core/utils/keyed.h @@ -0,0 +1,438 @@ + +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_ENGINE_KEYED_UTILS_H_ +#define ENGINES_HQPS_ENGINE_KEYED_UTILS_H_ + +#include "flex/engines/hqps_db/core/utils/props.h" +#include "flex/engines/hqps_db/structures/collection.h" +#include "flex/engines/hqps_db/structures/multi_edge_set/adj_edge_set.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/general_vertex_set.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/keyed_row_vertex_set.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/two_label_vertex_set.h" + +namespace gs { + +template +struct AggFirst; + +template +struct AggFirst> { + using result_t = Collection; +}; + +template +struct AggFirst> { + using result_t = RowVertexSetImpl; +}; + +template +struct AggFirst> { + using result_t = TwoLabelVertexSetImpl; +}; + +/// @brief Helper to get keyed set type +/// @tparam T +/// @tparam ValueT Keyed prop type +template +struct KeyedT; + +// group by the vertex set itself +template +struct KeyedT, + PropertySelector> { + using keyed_set_t = KeyedRowVertexSet; + // // The builder type. + using builder_t = KeyedRowVertexSetBuilder; +}; + +// group by the vertex set itself +template +struct KeyedT, + PropertySelector> { + using keyed_set_t = KeyedRowVertexSet; + // // The builder type. + using builder_t = + KeyedRowVertexSetBuilder; +}; + +// group by the vertex set' property +template +struct KeyedT, PropertySelector> { + using keyed_set_t = Collection; + // // The builder type. + using builder_t = KeyedCollectionBuilder; +}; + +// key on a keyed row vertex get us a unkeyed set. +template +struct KeyedT, + PropertySelector> { + using keyed_set_t = KeyedRowVertexSetImpl; + // // The builder type. + using builder_t = KeyedRowVertexSetBuilder; +}; + +// group by vertex set' id, for generate vertex set. +template +struct KeyedT, + PropertySelector> { + using keyed_set_t = KeyedRowVertexSet; + // // The builder type. + using builder_t = + KeyedRowVertexSetBuilder; +}; + +template +struct KeyedT, PropertySelector> { + using keyed_set_t = Collection; + // // The builder type. + using builder_t = KeyedCollectionBuilder; +}; + +// when keyed with aggregation function, (which we currently only support +// collection) + +/// @brief Helper to get keyed set type with aggregation fnc +/// @tparam T +/// @tparam ValueT Keyed prop type +template +struct KeyedAggT; + +template +struct KeyedAggT, AggFunc::COUNT, + std::tuple, std::integer_sequence> { + using agg_res_t = Collection; + // build a counter array. + using aggregate_res_builder_t = CountBuilder; + + static aggregate_res_builder_t create_agg_builder( + const RowVertexSet& set, const GI& graph, + std::tuple>& selector) { + return CountBuilder(); + } +}; + +// aggregate count_dist +template +struct KeyedAggT, AggFunc::COUNT_DISTINCT, + std::tuple, + std::integer_sequence> { + using agg_res_t = Collection; + // build a counter array. + using aggregate_res_builder_t = DistinctCountBuilder<1, tag_id, VID_T>; + + static aggregate_res_builder_t create_agg_builder( + const RowVertexSet& set, const GI& graph, + std::tuple>& selectors) { + return aggregate_res_builder_t(set.GetVertices()); + } +}; + +template +struct KeyedAggT, AggFunc::COUNT, + std::tuple, std::integer_sequence> { + using agg_res_t = Collection; + // build a counter array. + using aggregate_res_builder_t = CountBuilder; + + static aggregate_res_builder_t create_agg_builder( + const TwoLabelVertexSet& set, const GI& graph, + std::tuple>& selectors) { + return CountBuilder(); + } +}; + +// count distinct for two_label set. +template +struct KeyedAggT, + AggFunc::COUNT_DISTINCT, std::tuple, + std::integer_sequence> { + using agg_res_t = Collection; + // build a counter array. + using aggregate_res_builder_t = DistinctCountBuilder<2, tag_id, VID_T>; + + static aggregate_res_builder_t create_agg_builder( + const TwoLabelVertexSet& set, const GI& graph, + std::tuple>& selectors) { + return aggregate_res_builder_t(set.GetBitset(), set.GetVertices()); + } +}; + +// general vertex set to_count +template +struct KeyedAggT, AggFunc::COUNT, + std::tuple, std::integer_sequence> { + using agg_res_t = Collection; + // build a counter array. + using aggregate_res_builder_t = CountBuilder; + + static aggregate_res_builder_t create_agg_builder( + const GeneralVertexSet& set, const GI& graph, + std::tuple>& selectors) { + return CountBuilder(); + } +}; + +template +struct KeyedAggT, AggFunc::SUM, std::tuple, + std::integer_sequence> { + using agg_res_t = Collection; + // build a counter array. + using aggregate_res_builder_t = SumBuilder; + + static aggregate_res_builder_t create_agg_builder( + const Collection& set, const GI& graph, + std::tuple>& selectors) { + return aggregate_res_builder_t(); + } +}; + +template +struct KeyedAggT, AggFunc::TO_SET, + std::tuple, std::integer_sequence> { + using agg_res_t = CollectionOfVec; + using aggregate_res_builder_t = + CollectionOfSetBuilder, + tag_id>; + + static aggregate_res_builder_t create_agg_builder( + const RowVertexSet& set, const GI& graph, + std::tuple>& selectors) { + return CollectionOfSetBuilder, + tag_id>( + set, graph, std::array{std::get<0>(selectors).prop_name_}); + } +}; + +// to_vector +template +struct KeyedAggT, AggFunc::TO_LIST, std::tuple, + std::integer_sequence> { + using agg_res_t = CollectionOfVec; + using aggregate_res_builder_t = + CollectionOfVecBuilder, tag_id>; + + static aggregate_res_builder_t create_agg_builder( + const Collection& set, const GI& graph, + std::tuple>& selectors) { + return aggregate_res_builder_t( + graph, set, std::array{std::get<0>(selectors).prop_name_}); + } +}; + +template +struct KeyedAggT, AggFunc::TO_LIST, + std::tuple, std::integer_sequence> { + static_assert(!std::is_same_v, + "Aggregate to_list for vertex set it self is not allowed"); + using agg_res_t = CollectionOfVec; + using aggregate_res_builder_t = + CollectionOfVecBuilder, + tag_id>; + + static aggregate_res_builder_t create_agg_builder( + const RowVertexSet& set, const GI& graph, + std::tuple>& selectors) { + return aggregate_res_builder_t(set, graph, + {std::get<0>(selectors).prop_name_}); + } +}; + +// get min +template +struct KeyedAggT, AggFunc::MIN, std::tuple, + std::integer_sequence> { + using agg_res_t = Collection; + using aggregate_res_builder_t = MinBuilder; + + static aggregate_res_builder_t create_agg_builder( + const Collection& set, const GI& graph, + std::tuple>& selectors) { + return aggregate_res_builder_t(set, graph, + {std::get<0>(selectors).prop_name_}); + } +}; + +// get max +template +struct KeyedAggT, AggFunc::MAX, std::tuple, + std::integer_sequence> { + using agg_res_t = Collection; + using aggregate_res_builder_t = MaxBuilder; + + static aggregate_res_builder_t create_agg_builder( + const Collection& set, const GI& graph, + std::tuple>& selectors) { + return aggregate_res_builder_t( + set, graph, std::array{std::get<0>(selectors).prop_name_}); + } +}; + +template +struct KeyedAggT, AggFunc::FIRST, std::tuple, + std::integer_sequence> { + using agg_res_t = Collection; + using aggregate_res_builder_t = + FirstBuilder, PropT, tag_id>; + + static aggregate_res_builder_t create_agg_builder( + const Collection& set, const GI& graph, + std::tuple>& selectors) { + return aggregate_res_builder_t( + set, graph, std::array{std::get<0>(selectors).prop_name_}); + } +}; + +// Aggregate first for twolabel vertex set +template +struct KeyedAggT, AggFunc::FIRST, + std::tuple, + std::integer_sequence> { + using agg_res_t = TwoLabelVertexSetImpl; + using old_set_t = TwoLabelVertexSetImpl; + using aggregate_res_builder_t = + FirstBuilder, + grape::EmptyType, tag_id>; + + static aggregate_res_builder_t create_agg_builder( + const old_set_t& set, const GI& graph, + std::tuple>& selectors) { + auto labels = set.GetLabels(); + return aggregate_res_builder_t( + set, graph, std::array{std::get<0>(selectors).prop_name_}); + } +}; + +template +static inline auto insert_into_builder_v2_impl( + KeyedRowVertexSetBuilderImpl& builder, + const ELE& ele, const DATA& data) { + return builder.insert(ele, data); +} + +template +static inline auto insert_into_builder_v2_impl( + KeyedRowVertexSetBuilderImpl& + builder, + const ELE& ele, const DATA& data) { + return builder.Insert(ele); +} + +// insert_into_bulder_v2_impl +template < + typename BuilderT, typename ELE, typename DATA, + typename std::enable_if< + (BuilderT::is_row_vertex_set_builder && + std::is_same_v>)>::type* = nullptr> +static inline auto insert_into_builder_v2_impl(BuilderT& builder, + const ELE& ele, + const DATA& data) { + return builder.Insert(ele); +} + +// insert_into_bulder_v2_impl +template < + typename BuilderT, typename ELE, typename DATA, + typename std::enable_if< + (BuilderT::is_row_vertex_set_builder && + !std::is_same_v>)>::type* = nullptr> +static inline auto insert_into_builder_v2_impl(BuilderT& builder, + const ELE& ele, + const DATA& data) { + return builder.Insert(ele, data); +} + +template +static inline auto insert_into_builder_v2_impl( + FlatEdgeSetBuilder& builder, const ELE& ele, + const DATA& data) { + return builder.Insert(ele); +} + +template ::type* = nullptr> +static inline auto insert_into_builder_v2_impl(BuilderT& builder, + const ELE& ele, + const DATA& data) { + return builder.Insert(ele); +} + +template < + typename BuilderT, typename ELE, typename DATA, + typename std::enable_if< + (BuilderT::is_two_label_set_builder && + std::is_same_v>)>::type* = nullptr> +static inline auto insert_into_builder_v2_impl(BuilderT& builder, + const ELE& ele, + const DATA& data) { + return builder.Insert(ele); +} + +template < + typename BuilderT, typename ELE, typename DATA, + typename std::enable_if< + (BuilderT::is_two_label_set_builder && + !std::is_same_v>)>::type* = nullptr> +static inline auto insert_into_builder_v2_impl(BuilderT& builder, + const ELE& ele, + const DATA& data) { + return builder.Insert(ele, data); +} + +// insert for collectionBuilder +template < + typename BuilderT, typename ELE, typename DATA, + typename std::enable_if<(BuilderT::is_collection_builder)>::type* = nullptr> +static inline auto insert_into_builder_v2_impl(BuilderT& builder, + const ELE& ele, + const DATA& data) { + return builder.Insert(ele); +} + +// insert for adjEdgeSetBuilder +template +static inline auto insert_into_builder_v2_impl( + AdjEdgeSetBuilder& builder, const ELE& ele, + const DATA& data) { + return builder.Insert(ele); +} + +template +static inline auto insert_into_builder_v2_impl( + AdjEdgeSetBuilder& builder, + const ELE& ele, const DATA& data) { + return builder.Insert(ele); +} + +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_KEYED_UTILS_H_ diff --git a/flex/engines/hqps_db/core/utils/props.h b/flex/engines/hqps_db/core/utils/props.h new file mode 100644 index 000000000000..0154d9f3a600 --- /dev/null +++ b/flex/engines/hqps_db/core/utils/props.h @@ -0,0 +1,738 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_ENGINE_OPERATOR_PROP_UTILS_H_ +#define ENGINES_HQPS_ENGINE_OPERATOR_PROP_UTILS_H_ + +#include +#include +#include +#include + +namespace gs { + +// forward declare context +template +class Context; + +// forward declare flat_edge_set +template +class FlatEdgeSet; + +// forward declare general_edge_set +template +class GeneralEdgeSet; + +// forward declare keyed_row_vertex_set +template +class KeyedRowVertexSetImpl; + +// forward declare row_vertex_set +template +class RowVertexSetImpl; + +// forward declare two_label_vertex_set +template +class TwoLabelVertexSetImpl; + +template +struct MultiPropGetterT; + +template +struct MultiPropGetterT> {}; + +template +class Collection; + +template +static auto get_prop_getter_from_named_property( + const GRAPH_INTERFACE& graph, const LabelT& label, + const std::tuple...>& named_property) { + std::array prop_names; + int i = 0; + std::apply( + [&prop_names, &i](auto&... named_prop) { + ((prop_names[i++] = named_prop.name), ...); + }, + named_property); + return graph.template GetMultiPropGetter(label, prop_names); +} + +//// Get one property getter for one label +template +static auto get_prop_getter_from_selectors( + const GRAPH_INTERFACE& graph, const LabelT& label, + const std::tuple...>& selectors) { + std::array prop_names; + int i = 0; + std::apply( + [&prop_names, &i](auto&... named_prop) { + ((prop_names[i++] = named_prop.prop_name_), ...); + }, + selectors); + return graph.template GetMultiPropGetter(label, prop_names); +} +/// get single property getter for one label +template +static auto get_single_prop_getter_from_selector( + const GRAPH_INTERFACE& graph, const LabelT& label, + const PropertySelector& selector) { + auto prop_name = selector.prop_name_; + return graph.template GetSinglePropGetter(label, prop_name); +} + +// get prop getter from multiplet named property +template +static auto get_prop_getters_from_named_property( + const GRAPH_INTERFACE& graph, const LabelT& label, + std::tuple named_property) { + std::array labels = {label}; + return get_prop_getters_from_named_property(graph, labels, named_property); +} + +template +static auto get_prop_getters_from_named_property( + const GRAPH_INTERFACE& graph, const std::array& labels, + std::tuple named_property, std::index_sequence) { + using prop_getter_t = typename GRAPH_INTERFACE::template multi_prop_getter_t< + typename NamedPropT::prop_t...>; + std::array prop_getter_array{ + get_prop_getter_from_named_property(graph, labels[Is], + named_property)...}; + return prop_getter_array; +} + +// Get prop getters from Selector. +template +static auto get_prop_getters_from_selectors_impl( + const GRAPH_INTERFACE& graph, const std::array& labels, + std::tuple selectors, std::index_sequence) { + using prop_getter_t = typename GRAPH_INTERFACE::template multi_prop_getter_t< + typename SELECTOR::prop_t...>; + std::array prop_getter_array{ + get_prop_getter_from_selectors(graph, labels[Is], selectors)...}; + return prop_getter_array; +} + +template +static auto get_prop_getters_from_selectors( + const GRAPH_INTERFACE& graph, const std::array& labels, + std::tuple named_property) { + return get_prop_getters_from_selectors_impl( + graph, labels, named_property, std::make_index_sequence{}); +} + +///////////////////////// prop getter for vertex set +////////////////////////////// + +template +class InnerIdGetter { + public: + InnerIdGetter(const std::vector& vids) : vids_(vids) {} + + VID_T get_view(const std::tuple& ele) const { + return std::get<1>(ele); + } + + template + inline auto get_from_all_element(const ALL_ELE_T& all_ele) const { + return gs::get_from_tuple(all_ele); + } + + private: + const std::vector& vids_; +}; + +template +class InnerIdDataGetter { + public: + InnerIdDataGetter(const std::vector& vids, + const std::vector>& data) + : vids_(vids), data_(data) {} + + std::tuple> get_view( + const std::tuple& ele) const { + auto vid = std::get<1>(ele); + auto idx = std::get<0>(ele); + CHECK(vid == vids_[idx]); + return std::make_tuple(vid, std::get<0>(data_[idx])); + } + + template + VID_T get_from_all_element(const TUPLE_T& tuple) const { + auto ind_ele = gs::get_from_tuple(tuple); + return ind_ele; + } + + private: + const std::vector& vids_; + const std::vector>& data_; +}; + +template +class EdgeSetInnerIdGetter { + public: + EdgeSetInnerIdGetter() {} + + template + inline auto get_from_all_element(const ALL_ELE_T& all_ele) const { + return gs::get_from_tuple(all_ele); + } +}; + +template +class CollectionPropGetter { + public: + CollectionPropGetter() {} + + inline auto get_view(const std::tuple& ele) const { + return std::get<1>(ele); + } + + inline auto get_view() const { return std::get<1>(ind_ele_); } + + template + inline auto get_from_all_element(const ALL_ELE_T& all_ele) const { + return gs::get_from_tuple(all_ele); + } + + template + inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) { + ind_ele_ = ind_ele; + } + + private: + std::tuple ind_ele_; +}; + +// specialize for collection with only one column +template +class CollectionPropGetter> { + public: + CollectionPropGetter() {} + + inline T get_view(const std::tuple& ele) const { + return std::get<1>(ele); + } + + inline T get_view() const { return std::get<1>(ind_ele_); } + + template + inline auto get_from_all_element(const ALL_ELE_T& all_ele) const { + return gs::get_from_tuple(all_ele); + } + + template + inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) { + ind_ele_ = ind_ele; + } + + private: + std::tuple ind_ele_; +}; + +template +class FlatEdgeSetPropGetter { + public: + FlatEdgeSetPropGetter() {} + + inline auto get_view(const index_ele_tuple_t& ind_ele) const { + return std::get<0>(std::get<2>(std::get<1>(ind_ele))); + } + + inline auto get_view() + const { // const std::tuple& ind_ele + return std::get<0>(std::get<2>(std::get<1>(ind_ele_))); + } + + template + inline auto get_from_all_element(const ALL_ELE_T& all_ele) const { + auto& my_ele = gs::get_from_tuple(all_ele); + return std::get<0>(std::get<2>(my_ele)); + } + + template + inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) { + ind_ele_ = gs::get_from_tuple(ind_ele); + } + + private: + index_ele_tuple_t ind_ele_; +}; + +template +class GeneralEdgeSetPropGetter { + public: + GeneralEdgeSetPropGetter() {} + + inline auto get_view(const index_ele_tuple_t& ind_ele) const { + return std::get<0>(std::get<2>(ind_ele).properties()); + } + + inline auto get_view() + const { // const std::tuple& ind_ele + return std::get<0>(std::get<2>(ind_ele_).properties()); + } + + template + inline auto get_from_all_element(const ALL_ELE_T& all_ele) const { + auto& my_ele = gs::get_from_tuple(all_ele); + return std::get<0>(std::get<1>(my_ele).properties()); + } + + template + inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) { + ind_ele_ = gs::get_from_tuple(ind_ele); + } + + private: + index_ele_tuple_t ind_ele_; +}; + +template +class TwoLabelVertexSetImplPropGetter { + public: + TwoLabelVertexSetImplPropGetter(std::array&& getters) + : getters_(std::move(getters)) {} + + inline auto get_view(const IND_ELE_T& ind_ele) const { + return getters_[std::get<1>(ind_ele)].get_view(std::get<2>(ind_ele)); + } + + inline auto get_view() + const { // const std::tuple& ind_ele + return getters_[std::get<1>(ind_ele_)].get_view(std::get<2>(ind_ele_)); + } + + template + inline auto get_from_all_element(const ALL_ELE_T& all_ele) const { + auto& my_ele = gs::get_from_tuple(all_ele); + auto& getter = getters_[std::get<0>(my_ele)]; + return getter.get_view(std::get<1>(my_ele)); + } + + template + inline auto get_from_element(const ELE_T& ele) const { + return getters_[std::get<0>(ele)].get_view(std::get<1>(ele)); + } + + template + inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) { + ind_ele_ = gs::get_from_tuple(ind_ele); + } + + private: + IND_ELE_T ind_ele_; + std::array getters_; +}; + +template +class RowVertexSetPropGetter { + public: + RowVertexSetPropGetter(PROP_GETTER_T&& getter) : getter_(std::move(getter)) {} + + template + inline auto get_view(const std::tuple& ind_ele) const { + return getter_.get_view(std::get<1>(ind_ele)); + } + + inline auto get_view() const { + return getter_.get_view(std::get<1>(ind_ele_)); + } + + template + inline auto get_from_all_element(const ALL_ELE_T& all_ele) const { + auto& my_ele = gs::get_from_tuple(all_ele); + return getter_.get_view(my_ele); + } + + // get from ele + template + inline auto get_from_element(const ELE_T& ele) const { + return getter_.get_view(ele); + } + + template + inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) { + ind_ele_ = gs::get_from_tuple(ind_ele); + } + + private: + IND_ELE_T ind_ele_; + PROP_GETTER_T getter_; +}; + +template +class KeyedRowVertexSetPropGetter { + public: + KeyedRowVertexSetPropGetter(PROP_GETTER_T&& getter) + : getter_(std::move(getter)) {} + + template + inline auto get_view(const std::tuple& ind_ele) const { + return getter_.get_view(std::get<1>(ind_ele)); + } + + inline auto get_view() const { + return getter_.get_view(std::get<1>(ind_ele_)); + } + + template + inline auto get_from_all_element(const ALL_ELE_T& all_ele) const { + auto& my_ele = gs::get_from_tuple(all_ele); + return getter_.get_view(my_ele); + } + + template + inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) { + ind_ele_ = gs::get_from_tuple(ind_ele); + } + + private: + IND_ELE_T ind_ele_; + PROP_GETTER_T getter_; +}; + +template +class DistGetter { + public: + DistGetter(std::vector&& dist) : dist_(std::move(dist)) {} + + template + inline auto get_view(const std::tuple& ind_ele) const { + return dist_[std::get<0>(ind_ele)]; + } + + inline auto get_view() const { return dist_[std::get<0>(ind_ele_)]; } + + template + inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) { + ind_ele_ = gs::get_from_tuple(ind_ele); + } + + private: + std::vector dist_; + IND_ELE_T ind_ele_; +}; + +///////////////////////Creating property getter///////////////////////// + +template +static auto get_dist_prop_getter( + const RowVertexSetImpl& set, + const std::array& prop_names) { + if (prop_names[Is] == "dist" || prop_names[Is] == "Dist") { + std::vector dists; + auto& data_vec = set.GetDataVec(); + dists.reserve(set.Size()); + for (auto i = 0; i < data_vec.size(); ++i) { + dists.emplace_back(Dist(std::get(data_vec[i]))); + } + return DistGetter::index_ele_tuple_t>( + std::move(dists)); + } + if constexpr (Is + 1 >= sizeof...(T)) { + LOG(WARNING) << "Property dist not found, using default 0"; + std::vector dists; + auto set_size = set.Size(); + dists.reserve(set_size); + for (auto i = 0; i < set_size; ++i) { + dists.emplace_back(0); + } + return DistGetter::index_ele_tuple_t>( + std::move(dists)); + } else { + return get_dist_prop_getter(set, prop_names); + } +} + +// getting dist prop for keyed row vertex set +template +static auto get_dist_prop_getter( + const KeyedRowVertexSetImpl& set, + const std::array& prop_names) { + if (prop_names[Is] == "dist" || prop_names[Is] == "Dist") { + std::vector dists; + auto& data_vec = set.GetDataVec(); + dists.reserve(set.Size()); + for (auto i = 0; i < data_vec.size(); ++i) { + dists.emplace_back(Dist(std::get(data_vec[i]))); + } + return DistGetter::index_ele_tuple_t>( + std::move(dists)); + } + if constexpr (Is + 1 >= sizeof...(T)) { + LOG(WARNING) << "Property dist not found, using default 0"; + std::vector dists; + auto set_size = set.Size(); + dists.reserve(set_size); + for (auto i = 0; i < set_size; ++i) { + dists.emplace_back(0); + } + return DistGetter::index_ele_tuple_t>( + std::move(dists)); + } else { + return get_dist_prop_getter(set, prop_names); + } +} + +// get for common properties for rwo_vertex_set +template < + int tag_id, typename prop_t, typename GRAPH_INTERFACE, typename LabelT, + typename VID_T, typename... T, + typename std::enable_if>::type* = nullptr> +static auto create_prop_getter_impl( + const RowVertexSetImpl& set, + const GRAPH_INTERFACE& graph, const std::string& prop_name) { + using prop_getter_t = + typename GRAPH_INTERFACE::template single_prop_getter_t; + // const std::array& labels = set.GetLabels(); + + auto label = set.GetLabel(); + VLOG(10) << "getting getter for " << prop_name << " for label " + << gs::to_string(label); + auto getter = graph.template GetSinglePropGetter(label, prop_name); + return RowVertexSetPropGetter< + tag_id, prop_getter_t, + typename RowVertexSetImpl::index_ele_tuple_t>( + std::move(getter)); +} + +// get for dist property for row_vertex_set +template < + int tag_id, typename prop_t, typename GRAPH_INTERFACE, typename LabelT, + typename VID_T, typename... T, + typename std::enable_if>::type* = nullptr> +static auto create_prop_getter_impl( + const RowVertexSetImpl& set, + const GRAPH_INTERFACE& graph, const std::string& prop_name) { + VLOG(10) << "Getting dist prop getter"; + CHECK(prop_name == "dist" || prop_name == "Dist"); + return get_dist_prop_getter(set, set.GetPropNames()); +} + +// get dist property for keyed vertex set +template < + int tag_id, typename prop_t, typename GRAPH_INTERFACE, typename LabelT, + typename KEY_T, typename VID_T, typename... T, + typename std::enable_if>::type* = nullptr> +static auto create_prop_getter_impl( + const KeyedRowVertexSetImpl& set, + const GRAPH_INTERFACE& graph, const std::string& prop_name) { + VLOG(10) << "Getting dist prop getter"; + CHECK(prop_name == "dist" || prop_name == "Dist"); + return get_dist_prop_getter(set, set.GetPropNames()); +} + +// get for common properties for two_label_vertex_set +template +static auto create_prop_getter_impl( + const TwoLabelVertexSetImpl& set, + const GRAPH_INTERFACE& graph, const std::string& prop_name) { + using prop_getter_t = + typename GRAPH_INTERFACE::template single_prop_getter_t; + auto& labels = set.GetLabels(); + std::array names{prop_name}; + VLOG(10) << "Getting prop labels for " << prop_name << " for labels " + << std::to_string(labels[0]) << ", " << std::to_string(labels[1]); + std::array prop_getter{ + graph.template GetSinglePropGetter(labels[0], prop_name), + graph.template GetSinglePropGetter(labels[1], prop_name)}; + + return TwoLabelVertexSetImplPropGetter< + tag_id, prop_getter_t, + typename TwoLabelVertexSetImpl::index_ele_tuple_t>( + std::move(prop_getter)); +} + +// get for common properties for keyed_row_vertex_set +template < + int tag_id, typename prop_t, typename GRAPH_INTERFACE, typename LabelT, + typename KEY_T, typename VID_T, typename... T, + typename std::enable_if>::type* = nullptr> +static auto create_prop_getter_impl( + const KeyedRowVertexSetImpl& set, + const GRAPH_INTERFACE& graph, const std::string& prop_name) { + using prop_getter_t = + typename GRAPH_INTERFACE::template single_prop_getter_t; + // const std::array& labels = set.GetLabels(); + auto label = set.GetLabel(); + + auto getter = graph.template GetSinglePropGetter(label, prop_name); + return KeyedRowVertexSetPropGetter< + tag_id, prop_getter_t, + typename KeyedRowVertexSetImpl::index_ele_tuple_t>( + std::move(getter)); +} + +// get for common properties for FlatEdgeSet +template +static auto create_prop_getter_impl( + const FlatEdgeSet& set, + const GRAPH_INTERFACE& graph, const std::string& prop_name) { + return FlatEdgeSetPropGetter< + tag_id, + typename FlatEdgeSet::index_ele_tuple_t>(); +} + +// get for common properties for GeneralEdgeSet +template +static auto create_prop_getter_impl( + const GeneralEdgeSet& set, + const GI& graph, const std::string& prop_name) { + return GeneralEdgeSetPropGetter< + tag_id, typename GeneralEdgeSet::index_ele_tuple_t>(); +} + +// get for common properties for collection +template +static auto create_prop_getter_impl(const Collection& set, const GI& graph, + const std::string& prop_name) { + CHECK(prop_name == "None" || prop_name == "none" || prop_name == ""); + return CollectionPropGetter(); +} + +// create inner id getter for row vertex set with props +template +static auto create_prop_getter_from_prop_desc( + const GRAPH_INTERFACE& graph, + const RowVertexSetImpl& set, + const InnerIdProperty& inner_id_prop) { + return InnerIdDataGetter(set.GetVertices(), set.GetDataVec()); +} +// create inner id getter for keyed row vertex set without props +template +static auto create_prop_getter_from_prop_desc( + const GRAPH_INTERFACE& graph, + const RowVertexSetImpl& set, + const InnerIdProperty& inner_id_prop) { + return InnerIdGetter( + set.GetVertices()); +} +// create inner_id getter for two label vertex set +template +static auto create_prop_getter_from_prop_desc( + const GRAPH_INTERFACE& graph, + const TwoLabelVertexSetImpl& set, + const InnerIdProperty& inner_id_prop) { + return InnerIdGetter( + set.GetVertices()); +} + +// create inner id getter for collection. +template +static auto create_prop_getter_from_prop_desc( + const GRAPH_INTERFACE& graph, const Collection& set, + const InnerIdProperty& inner_id_prop) { + return InnerIdGetter(set.GetVector()); +} + +// create innerId getter for flat edge set. +template +static auto create_prop_getter_from_prop_desc( + const GRAPH_INTERFACE& graph, + const FlatEdgeSet& set, + const InnerIdProperty& inner_id_prop) { + return EdgeSetInnerIdGetter(); +} + +// get prop for inner id for idKey +template +static auto create_prop_getter_from_prop_desc( + const GRAPH_INTERFACE& graph, + Context& ctx, + const InnerIdProperty& inner_id_prop) { + auto& set = ctx.template GetNode(); + return create_prop_getter_from_prop_desc(graph, set, inner_id_prop); +} + +// get prop for common property. +// return a single prop getter +template +static auto create_prop_getter_from_prop_desc( + const GRAPH_INTERFACE& graph, CTX_T& ctx, + const NamedProperty& named_property) { + auto& set = ctx.template GetNode(); + return create_prop_getter_impl(set, graph, named_property.name); +} + +template +static auto create_prop_getters_from_prop_desc( + const GI& graph, CTX_T& ctx, const std::tuple& prop_desc, + std::index_sequence) { + return std::make_tuple(create_prop_getter_from_prop_desc( + graph, ctx, std::get(prop_desc))...); +} + +template +static auto create_prop_getters_from_prop_desc( + const GI& graph, CTX_T& ctx, const std::tuple& prop_desc) { + return create_prop_getters_from_prop_desc( + graph, ctx, prop_desc, std::make_index_sequence()); +} + +template +auto create_prop_desc_from_selector( + const PropertySelector& selector) { + return InnerIdProperty(); +} + +template +auto create_prop_desc_from_selector(const PropertySelector& selector) { + return NamedProperty(selector.prop_name_); +} + +template +auto create_prop_descs_from_selectors(std::integer_sequence, + const std::tuple& selectors, + std::index_sequence) { + return std::make_tuple( + create_prop_desc_from_selector(std::get(selectors))...); +} + +template +auto create_prop_descs_from_selectors( + const std::tuple& selectors) { + return create_prop_descs_from_selectors( + std::integer_sequence(), selectors, + std::make_index_sequence()); +} + +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_OPERATOR_PROP_UTILS_H_ \ No newline at end of file diff --git a/flex/engines/hqps_db/database/adj_list.h b/flex/engines/hqps_db/database/adj_list.h new file mode 100644 index 000000000000..19fe7ccbdc4c --- /dev/null +++ b/flex/engines/hqps_db/database/adj_list.h @@ -0,0 +1,560 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_DATABASE_ADJ_LIST_H_ +#define ENGINES_HQPS_DATABASE_ADJ_LIST_H_ + +#include +#include +#include +#include + +#include "flex/engines/hqps_db/core/null_record.h" + +namespace gs { + +namespace mutable_csr_graph_impl { + +template +class SinglePropGetter { + public: + using value_type = T; + SinglePropGetter() {} + SinglePropGetter(std::shared_ptr> c) : column(c) { + CHECK(column.get() != nullptr); + } + + inline value_type get_view(vid_t vid) const { + if (vid == NONE) { + return NullRecordCreator::GetNull(); + } + return column->get_view(vid); + } + + inline SinglePropGetter& operator=(const SinglePropGetter& d) { + column = d.column; + return *this; + } + + private: + std::shared_ptr> column; +}; + +// Property Getter hold the handle of the property column. + +template +class MultiPropGetter { + public: + using column_tuple_t = std::tuple>...>; + using result_tuple_t = std::tuple; + MultiPropGetter() {} + MultiPropGetter(column_tuple_t c) : column(c) {} + + inline result_tuple_t get_view(vid_t vid) const { + if (vid == NONE) { + return NullRecordCreator::GetNull(); + } + return get_view(vid, std::make_index_sequence()); + } + + template + inline result_tuple_t get_view(vid_t vid, std::index_sequence) const { + if (vid == NONE) { + return NullRecordCreator::GetNull(); + } + return std::make_tuple(std::get(column)->get_view(vid)...); + } + + inline MultiPropGetter& operator=(const MultiPropGetter& d) { + column = d.column; + return *this; + } + + private: + column_tuple_t column; +}; + +template +class Adj {}; + +template +class Adj { + public: + Adj() = default; + ~Adj() = default; + + Adj(const Adj& other) : neighbor_(other.neighbor_), prop_(other.prop_) {} + + Adj(Adj&& other) + : neighbor_(other.neighbor_), prop_(std::move(other.prop_)) {} + + inline Adj& operator=(const Adj& from) { + this->neighbor_ = from.neighbor_; + this->prop_ = from.prop_; + return *this; + } + + vid_t neighbor() const { return neighbor_; } + const std::tuple& properties() const { return prop_; } + + vid_t neighbor_; + std::tuple prop_; +}; + +template <> +class Adj<> { + public: + Adj() = default; + ~Adj() = default; + + Adj(const Adj<>& other) : neighbor_(other.neighbor_), prop_(other.prop_) {} + Adj(Adj<>&& other) + : neighbor_(other.neighbor_), prop_(std::move(other.prop_)) {} + + inline Adj<>& operator=(const Adj<>& from) { + this->neighbor_ = from.neighbor_; + this->prop_ = from.prop_; + return *this; + } + + vid_t neighbor() const { return neighbor_; } + const std::tuple<>& properties() const { return prop_; } + + vid_t neighbor_; + std::tuple<> prop_; +}; + +template +class AdjList {}; + +template +class AdjList { + using nbr_t = MutableNbr; + class Iterator { + public: + Iterator() + : cur_(), + begin0_(nullptr), + end0_(nullptr), + begin1_(nullptr), + end1_(nullptr) {} + Iterator(const nbr_t* begin0, const nbr_t* end0, const nbr_t* begin1, + const nbr_t* end1) + : cur_(), begin0_(begin0), end0_(end0), begin1_(begin1), end1_(end1) { + // probe for next; + probe_for_next(); + } + + void probe_for_next() { + if (begin0_ != end0_ && begin0_ != NULL) { + cur_.neighbor_ = begin0_->neighbor; + std::get<0>(cur_.prop_) = begin0_->data; + return; + } + // ptr= null is ok, since fast fail on neq + + if (begin1_ != end1_ && begin1_ != NULL) { + cur_.neighbor_ = begin1_->neighbor; + std::get<0>(cur_.prop_) = begin1_->data; + return; + } + } + + bool valid() const { return begin0_ != end0_ || begin1_ != end1_; } + const Adj& operator*() const { return cur_; } + const Adj* operator->() const { return &cur_; } + + vid_t neighbor() const { return cur_.neighbor(); } + const std::tuple& properties() const { return cur_.properties(); } + + std::string to_string() const { + std::stringstream ss; + ss << "(neighbor: " << cur_.neighbor_ + << ", prop: " << std::get<0>(cur_.prop_) << ")"; + return ss.str(); + } + + Iterator& operator++() { + if (begin0_ < end0_) { + ++begin0_; + } else if (begin1_ < end1_) { + ++begin1_; + } else { + return *this; + } + probe_for_next(); + return *this; + } + Iterator operator++(int) { + Iterator tmp = *this; + ++(*this); + return tmp; + } + + bool operator==(const Iterator& rhs) const { + return begin0_ == rhs.begin0_ && begin1_ == rhs.begin1_; + } + + bool operator!=(const Iterator& rhs) const { + return begin1_ != rhs.begin1_ || begin0_ != rhs.begin0_; + } + inline Iterator& operator=(const Iterator& from) { + this->cur_ = from.cur_; + this->begin0_ = from.begin0_; + this->end0_ = from.end0_; + this->begin1_ = from.begin1_; + this->end1_ = from.end1_; + return *this; + } + + private: + Adj cur_; + const nbr_t *begin0_, *begin1_; + const nbr_t *end0_, *end1_; + }; + + public: + using slice_t = MutableNbrSlice; + using iterator = Iterator; + AdjList() = default; + // copy constructor + AdjList(const AdjList& adj_list) + : slice0_(adj_list.slice0_), slice1_(adj_list.slice1_) {} + // with sinle slice provided. + AdjList(const slice_t& slice0) : slice0_(slice0), slice1_() {} + AdjList(const slice_t& slice0, const slice_t& slice1) + : slice0_(slice0), slice1_(slice1) {} + + AdjList(AdjList&& adj_list) + : slice0_(std::move(adj_list.slice0_)), + slice1_(std::move(adj_list.slice1_)) {} + + AdjList(AdjList& adj_list) + : slice0_(adj_list.slice0_), slice1_(adj_list.slice1_) {} + + Iterator begin() const { + return Iterator(slice0_.begin(), slice0_.end(), slice1_.begin(), + slice1_.end()); + } + Iterator end() const { + return Iterator(slice0_.end(), slice0_.end(), slice1_.end(), slice1_.end()); + } + size_t size() const { return slice0_.size() + slice1_.size(); } + + AdjList& operator=(const AdjList& other) { + slice0_ = other.slice0_; + slice1_ = other.slice1_; + return *this; + } + + const slice_t& slice0() const { return slice0_; } + const slice_t& slice1() const { return slice1_; } + + private: + slice_t slice0_, slice1_; +}; + +template <> +class AdjList<> { + using nbr_t = MutableNbr; + class Iterator { + public: + Iterator() + : cur_(), + begin0_(nullptr), + end0_(nullptr), + begin1_(nullptr), + end1_(nullptr) {} + Iterator(const nbr_t* begin0, const nbr_t* end0, const nbr_t* begin1, + const nbr_t* end1) + : cur_(), begin0_(begin0), end0_(end0), begin1_(begin1), end1_(end1) { + probe_for_next(); + } + + void probe_for_next() { + if (begin0_ != end0_ && begin0_ != NULL) { + cur_.neighbor_ = begin0_->neighbor; + return; + } + // ptr= null is ok, since fast fail on neq + + if (begin1_ != end1_ && begin1_ != NULL) { + cur_.neighbor_ = begin1_->neighbor; + return; + } + } + + vid_t neighbor() const { return cur_.neighbor(); } + + const Adj<>& operator*() const { return cur_; } + const Adj<>* operator->() const { return &cur_; } + + Iterator& operator++() { + if (begin0_ < end0_) { + ++begin0_; + } else if (begin1_ < end1_) { + ++begin1_; + } else { + return *this; + } + probe_for_next(); + return *this; + } + Iterator operator++(int) { + Iterator tmp = *this; + ++(*this); + return tmp; + } + inline Iterator& operator=(const Iterator& from) { + this->cur_ = from.cur_; + this->begin0_ = from.begin0_; + this->end0_ = from.end0_; + this->begin1_ = from.begin1_; + this->end1_ = from.end1_; + return *this; + } + bool operator==(const Iterator& rhs) const { + return begin0_ == rhs.begin0_ && begin1_ == rhs.begin1_; + } + bool operator!=(const Iterator& rhs) const { + return begin1_ != rhs.begin1_ || begin0_ != rhs.begin0_; + } + + private: + Adj<> cur_; + const nbr_t *begin0_, *begin1_; + const nbr_t *end0_, *end1_; + }; + + public: + using iterator = Iterator; + using slice_t = MutableNbrSlice; + AdjList() = default; + AdjList(const slice_t& slice) : slice0_(slice), slice1_() {} + + AdjList(const slice_t& slice0, const slice_t& slice1) + : slice0_(slice0), slice1_(slice1) {} + + AdjList(AdjList<>&& adj_list) + : slice0_(std::move(adj_list.slice0_)), + slice1_(std::move(adj_list.slice1_)) {} + + AdjList(const AdjList<>& adj_list) + : slice0_(adj_list.slice0_), slice1_(adj_list.slice1_) {} + + Iterator begin() const { + return Iterator(slice0_.begin(), slice0_.end(), slice1_.begin(), + slice1_.end()); + } + Iterator end() const { + return Iterator(slice0_.end(), slice0_.end(), slice1_.end(), slice1_.end()); + } + size_t size() const { return slice0_.size() + slice1_.size(); } + + AdjList<>& operator=(const AdjList<>& other) { + slice0_ = other.slice0_; + slice1_ = other.slice1_; + return *this; + } + + // slice0_ getter + const slice_t& slice0() const { return slice0_; } + // slice1_ getter + const slice_t& slice1() const { return slice1_; } + + private: + slice_t slice0_, slice1_; +}; + +template +class AdjListArray {}; + +template +class AdjListArray { + public: + using csr_base_t = MutableCsrBase; + using typed_csr_base_t = MutableCsr; + using slice_t = MutableNbrSlice; + AdjListArray() = default; + AdjListArray(const csr_base_t* csr, const std::vector& vids) + : flag_(false) { + slices_.reserve(vids.size()); + const typed_csr_base_t* casted_csr = + dynamic_cast(csr); + for (auto v : vids) { + slices_.emplace_back(std::make_pair(casted_csr->get_edges(v), slice_t())); + } + } + AdjListArray(const csr_base_t* csr0, const csr_base_t* csr1, + const std::vector& vids) + : flag_(true) { + slices_.reserve(vids.size()); + + const typed_csr_base_t* casted_csr0 = + dynamic_cast(csr0); + const typed_csr_base_t* casted_csr1 = + dynamic_cast(csr1); + for (auto v : vids) { + slices_.emplace_back( + std::make_pair(casted_csr0->get_edges(v), casted_csr1->get_edges(v))); + } + } + + void resize(size_t new_size) { slices_.resize(new_size); } + + void set(size_t i, const AdjList& slice) { + slices_[i] = std::make_pair(slice.slice0(), slice.slice1()); + } + + AdjListArray(AdjListArray&& adj_list) + : slices_(std::move(adj_list.slices_)), flag_(adj_list.flag_) {} + + size_t size() const { return slices_.size(); } + + AdjList get(size_t i) const { + if (flag_) { + return AdjList(slices_[i].first, slices_[i].second); + } else { + return AdjList(slices_[i].first); + } + } + + void swap(AdjListArray& adj_list) { + this->slices_.swap(adj_list.slices_); + bool tmp_flag = flag_; + flag_ = adj_list.flag_; + adj_list.flag_ = tmp_flag; + } + + private: + std::vector> slices_; + bool flag_; +}; + +template <> +class AdjListArray<> { + public: + using csr_base_t = MutableCsrBase; + using typed_csr_base_t = MutableCsr; + using slice_t = MutableNbrSlice; + AdjListArray() = default; + AdjListArray(const csr_base_t* csr, const std::vector& vids) + : flag_(false) { + slices_.reserve(vids.size()); + const typed_csr_base_t* casted_csr = + dynamic_cast(csr); + for (auto v : vids) { + auto edges = casted_csr->get_edges(v); + slices_.emplace_back(std::make_pair(casted_csr->get_edges(v), slice_t())); + } + } + + AdjListArray(const csr_base_t* csr0, const csr_base_t* csr1, + const std::vector& vids) + : flag_(true) { + slices_.reserve(vids.size()); + const typed_csr_base_t* casted_csr0 = + dynamic_cast(csr0); + const typed_csr_base_t* casted_csr1 = + dynamic_cast(csr1); + + for (auto v : vids) { + slices_.emplace_back( + std::make_pair(casted_csr0->get_edges(v), casted_csr1->get_edges(v))); + } + } + // move constructor + AdjListArray(AdjListArray<>&& adj_list) + : slices_(std::move(adj_list.slices_)), flag_(adj_list.flag_) {} + + size_t size() const { return slices_.size(); } + + void resize(size_t new_size) { slices_.resize(new_size); } + + void set(size_t i, const AdjList<>& slice) { + slices_[i] = std::make_pair(slice.slice0(), slice.slice1()); + } + + AdjList<> get(size_t i) const { + if (flag_) { + return AdjList<>(slices_[i].first, slices_[i].second); + } else { + return AdjList<>(slices_[i].first); + } + } + + void swap(AdjListArray<>& adj_list) { + this->slices_.swap(adj_list.slices_); + bool tmp_flag = flag_; + flag_ = adj_list.flag_; + adj_list.flag_ = tmp_flag; + } + + private: + std::vector> slices_; + bool flag_; +}; + +class Nbr { + public: + Nbr() = default; + explicit Nbr(vid_t neighbor) : neighbor_(neighbor) {} + ~Nbr() = default; + + inline vid_t neighbor() const { return neighbor_; } + + private: + vid_t neighbor_; +}; + +class NbrList { + public: + NbrList(const Nbr* b, const Nbr* e) : begin_(b), end_(e) {} + ~NbrList() = default; + + const Nbr* begin() const { return begin_; } + const Nbr* end() const { return end_; } + inline size_t size() const { return end_ - begin_; } + + private: + const Nbr* begin_; + const Nbr* end_; +}; + +class NbrListArray { + public: + NbrListArray() {} + ~NbrListArray() = default; + + NbrList get(size_t index) const { + auto& list = nbr_lists_[index]; + return NbrList(list.data(), list.data() + list.size()); + } + + void put(std::vector&& list) { nbr_lists_.push_back(std::move(list)); } + + size_t size() const { return nbr_lists_.size(); } + + void resize(size_t size) { nbr_lists_.resize(size); } + + std::vector& get_vector(size_t index) { return nbr_lists_[index]; } + + private: + std::vector> nbr_lists_; +}; + +} // namespace mutable_csr_graph_impl +} // namespace gs + +#endif // ENGINES_HQPS_DATABASE_ADJ_LIST_H_ \ No newline at end of file diff --git a/flex/engines/hqps_db/database/mutable_csr_interface.h b/flex/engines/hqps_db/database/mutable_csr_interface.h new file mode 100644 index 000000000000..a236f938b1a4 --- /dev/null +++ b/flex/engines/hqps_db/database/mutable_csr_interface.h @@ -0,0 +1,862 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_DATABASE_MUTABLE_CSR_INTERFACE_H_ +#define ENGINES_HQPS_DATABASE_MUTABLE_CSR_INTERFACE_H_ + +#include + +#include "flex/engines/graph_db/database/graph_db.h" +#include "flex/engines/graph_db/database/graph_db_session.h" +#include "flex/engines/hqps_db/core/null_record.h" +#include "flex/engines/hqps_db/core/params.h" + +#include "flex/engines/hqps_db/database/adj_list.h" +#include "grape/utils/bitset.h" + +#include "grape/util.h" + +namespace gs { + +template +void get_tuple_from_column_tuple( + size_t index, std::tuple& t, + const std::tuple>...>& columns) { + auto ptr = std::get(columns); + if (ptr) { + std::get(t) = ptr->get_view(index); + } + + if constexpr (I + 1 < sizeof...(T)) { + get_tuple_from_column_tuple(index, t, columns); + } +} + +template +void get_tuple_from_column_tuple(size_t index, std::tuple& t, + const std::tuple& columns) { + auto ptr = std::get(columns); + if (ptr) { + std::get(t) = ptr->get_view(index); + } + + if constexpr (I + 1 < sizeof...(T)) { + get_tuple_from_column_tuple(index, t, columns); + } +} + +/** + * @brief MutableCSRInterface is the interface for the mutable CSR graph + * implementation. + * + */ +class MutableCSRInterface { + public: + const GraphDBSession& GetDBSession() { return db_session_; } + + using vertex_id_t = vid_t; + using outer_vertex_id_t = oid_t; + using label_id_t = uint8_t; + + using nbr_list_array_t = mutable_csr_graph_impl::NbrListArray; + + template + using adj_list_array_t = mutable_csr_graph_impl::AdjListArray; + + template + using adj_list_t = mutable_csr_graph_impl::AdjList; + + template + using adj_t = mutable_csr_graph_impl::Adj; + + using nbr_t = mutable_csr_graph_impl::Nbr; + + using nbr_list_t = mutable_csr_graph_impl::NbrList; + + template + using single_prop_getter_t = mutable_csr_graph_impl::SinglePropGetter; + + template + using multi_prop_getter_t = mutable_csr_graph_impl::MultiPropGetter; + + static constexpr bool is_grape = true; + + static MutableCSRInterface& get(); + + MutableCSRInterface(const GraphDBSession& session) : db_session_(session) {} + + /** + * @brief Get the Vertex Label id + * + * @param label + * @return label_id_t + */ + label_id_t GetVertexLabelId(const std::string& label) const { + return db_session_.schema().get_vertex_label_id(label); + } + + /** + * @brief Get the Edge Label id + * + * @param label + * @return label_id_t + */ + label_id_t GetEdgeLabelId(const std::string& label) const { + return db_session_.schema().get_edge_label_id(label); + } + + /** + * @brief ScanVertices scans all vertices with the given label and calls the + * given function on each vertex for filtering. + * @tparam FUNC_T + * @tparam SELECTOR + * @param label + * @param props + * @param func + */ + template + void ScanVertices(const std::string& label, + const std::tuple& props, + const FUNC_T& func) const { + auto label_id = db_session_.schema().get_vertex_label_id(label); + return ScanVertices(label_id, props, func); + } + + /** + * @brief ScanVertices scans all vertices with the given label and calls the + * given function on each vertex for filtering. + * @tparam FUNC_T + * @tparam SELECTOR + * @param label_id + * @param props + * @param func + */ + template + void ScanVertices(const label_id_t& label_id, + const std::tuple& selectors, + const FUNC_T& func) const { + auto columns = + get_tuple_column_from_graph_with_property(label_id, selectors); + auto vnum = db_session_.graph().vertex_num(label_id); + std::tuple t; + for (auto v = 0; v != vnum; ++v) { + get_tuple_from_column_tuple(v, t, columns); + func(v, t); + } + } + + /** + * @brief ScanVertices scans all vertices with the given label with give + * original id. + * @param label + * @param oid + */ + vertex_id_t ScanVerticesWithOid(const std::string& label, + outer_vertex_id_t oid) const { + auto label_id = db_session_.schema().get_vertex_label_id(label); + vertex_id_t vid; + CHECK(db_session_.graph().get_lid(label_id, oid, vid)); + return vid; + } + + /** + * @brief ScanVertices scans all vertices with the given label with give + * original id. + * @param label_id + * @param oid + */ + vertex_id_t ScanVerticesWithOid(const label_id_t& label_id, + outer_vertex_id_t oid) const { + vertex_id_t vid; + CHECK(db_session_.graph().get_lid(label_id, oid, vid)); + return vid; + } + + /** + * @brief ScanVerticesWithoutProperty scans all vertices with the given label + * and calls the given function on each vertex for filtering. With no + * property. + * @tparam FUNC_T + * @param label + * @param func + */ + template + void ScanVerticesWithoutProperty(const std::string& label, + const FUNC_T& func) const { + auto label_id = db_session_.schema().get_vertex_label_id(label); + auto vnum = db_session_.graph().vertex_num(label_id); + for (auto v = 0; v != vnum; ++v) { + func(v); + } + } + + /** + * @brief GetVertexProps gets the properties of the given vertex. + * @tparam T + * @param label + * @param vid + * @param prop_names + */ + template + std::pair, std::vector>> + GetVertexPropsFromOid( + const std::string& label, const std::vector oids, + const std::array>>& + prop_names) const { + auto label_id = db_session_.schema().get_vertex_label_id(label); + std::tuple*...> columns; + get_tuple_column_from_graph(label_id, prop_names, columns); + std::vector vids(oids.size()); + std::vector> props(oids.size()); + + for (size_t i = 0; i < oids.size(); ++i) { + db_session_.graph().get_lid(label_id, oids[i], vids[i]); + get_tuple_from_column_tuple(vids[i], props[i], columns); + } + + return std::make_pair(std::move(vids), std::move(props)); + } + + /** + * @brief GetVertexProps gets the properties of the given vertices. + * @tparam T + * @param label + * @param vids + * @param prop_names + */ + template + std::vector> GetVertexPropsFromVid( + const std::string& label, const std::vector& vids, + const std::array>>& + prop_names) const { + auto label_id = db_session_.schema().get_vertex_label_id(label); + std::tuple>...> columns; + get_tuple_column_from_graph(label_id, prop_names, columns); + std::vector> props(vids.size()); + fetch_properties_in_column(vids, props, columns); + return std::move(props); + } + + /** + * @brief GetVertexPropsFromVid gets the properties of the given vertices. + * @tparam T + * @param label_id + * @param vids + * @param prop_names + */ + template + std::vector> GetVertexPropsFromVid( + const label_id_t& label_id, const std::vector& vids, + const std::array>>& + prop_names) const { + // auto label_id = db_session_.schema().get_vertex_label_id(label); + CHECK(label_id < db_session_.schema().vertex_label_num()); + std::tuple>...> columns; + get_tuple_column_from_graph(label_id, prop_names, columns); + std::vector> props(vids.size()); + fetch_properties_in_column(vids, props, columns); + return std::move(props); + } + + /** + * @brief GetVertexPropsFromVid gets the properties of the given vertices. + * Works for multiple labels. + * @tparam T + * @param vids + * @param label_ids + * @param vid_inds + * @param prop_names + */ + template + std::vector> GetVertexPropsFromVid( + const std::vector& vids, + const std::array& labels, + const std::array, num_labels>& vid_inds, + const std::array>>& + prop_names) const { + std::vector> props(vids.size()); + std::vector label_ids; + for (auto label : labels) { + label_ids.emplace_back(db_session_.schema().get_vertex_label_id(label)); + } + using column_tuple_t = std::tuple>...>; + std::vector columns; + columns.resize(label_ids.size()); + for (auto i = 0; i < label_ids.size(); ++i) { + get_tuple_column_from_graph(label_ids[i], prop_names, columns[i]); + } + + VLOG(10) << "start getting vertices's property"; + double t0 = -grape::GetCurrentTime(); + fetch_properties<0>(props, columns, vids, vid_inds); + t0 += grape::GetCurrentTime(); + VLOG(10) << "Finish getting vertices's property, cost: " << t0; + + return std::move(props); + } + + /** + * @brief GetVertexPropsFromVidV2 gets the properties of the given vertices. + * Works for 2 labels. + * @tparam T + * @param vids + * @param labels + * @param bitset + * @param prop_names + */ + template ::type* = nullptr> + std::vector> GetVertexPropsFromVidV2( + const std::vector& vids, + const std::array& labels, + const grape::Bitset& bitset, + const std::array>>& + prop_names) const { + size_t total_size = vids.size(); + std::vector> props(total_size); + std::vector label_ids; + for (auto label : labels) { + label_ids.emplace_back(db_session_.schema().get_vertex_label_id(label)); + } + using column_tuple_t = std::tuple>...>; + std::vector columns; + columns.resize(label_ids.size()); + for (auto i = 0; i < label_ids.size(); ++i) { + get_tuple_column_from_graph(label_ids[i], prop_names, columns[i]); + } + + fetch_propertiesV2<0>(props, columns, vids, bitset); + + return std::move(props); + } + + /** + * @brief GetVertexPropsFromVidV2 gets the properties of the given vertices. + * Works for 2 labels. + * @tparam T + * @param vids + * @param labels + * @param bitset + * @param prop_names + */ + template ::type* = nullptr> + std::vector> GetVertexPropsFromVidV2( + const std::vector& vids, + const std::array& labels, + const grape::Bitset& bitset, + const std::array>>& + prop_names) const { + size_t total_size = vids.size(); + std::vector> props(total_size); + std::vector label_ids; + for (auto label : labels) { + CHECK(label < db_session_.schema().vertex_label_num()); + label_ids.emplace_back(label); + // label_ids.emplace_back(db_session_.schema().get_vertex_label_id(label)); + } + using column_tuple_t = std::tuple>...>; + std::vector columns; + columns.resize(label_ids.size()); + for (auto i = 0; i < label_ids.size(); ++i) { + get_tuple_column_from_graph(label_ids[i], prop_names, columns[i]); + } + + fetch_propertiesV2<0>(props, columns, vids, bitset); + + return std::move(props); + } + + template ::type* = nullptr> + void fetch_propertiesV2(std::vector>& props, + std::vector& columns, + const std::vector& vids, + const grape::Bitset& bitset) const { + // auto index_seq = std::make_index_sequence{}; + + { + auto& column_tuple0 = columns[0]; + auto& column_tuple1 = columns[1]; + auto ptr0 = std::get(column_tuple0); + auto ptr1 = std::get(column_tuple1); + if (ptr0 && ptr1) { + for (auto i = 0; i < vids.size(); ++i) { + if (bitset.get_bit(i)) { + std::get(props[i]) = ptr0->get_view(vids[i]); + } else { + std::get(props[i]) = ptr1->get_view(vids[i]); + } + } + } else if (ptr0) { + for (auto i = 0; i < vids.size(); ++i) { + if (bitset.get_bit(i)) { + std::get(props[i]) = ptr0->get_view(vids[i]); + } + } + } else if (ptr1) { + for (auto i = 0; i < vids.size(); ++i) { + if (!bitset.get_bit(i)) { + std::get(props[i]) = ptr1->get_view(vids[i]); + } + } + } else { + VLOG(10) << "skip for column " << Is; + } + } + fetch_propertiesV2(props, columns, vids, bitset); + } + + template + void fetch_properties_in_column(const std::vector& vids, + std::vector>& props, + column_tuple_t& column) const { + // auto index_seq = std::make_index_sequence{}; + + auto& cur_column = std::get(column); + if (cur_column) { + for (auto i = 0; i < vids.size(); ++i) { + std::get(props[i]) = cur_column->get_view(vids[i]); + } + } + + if constexpr (Is + 1 < sizeof...(T)) { + fetch_properties_in_column(vids, props, column); + } + } + + template = sizeof...(T))>::type* = nullptr> + void fetch_propertiesV2(std::vector>& props, + std::vector& columns, + const std::vector& vids, + const grape::Bitset& bitset) const {} + + template ::type* = nullptr> + void fetch_properties( + std::vector>& props, + std::vector& columns, + const std::vector& vids, + const std::array, num_labels>& vid_inds) const { + // auto index_seq = std::make_index_sequence{}; + + for (size_t i = 0; i < num_labels; ++i) { + auto column_tuple = columns[i]; + auto ptr = std::get(column_tuple); + if (ptr) { + for (auto j = 0; j < vid_inds[i].size(); ++j) { + auto vid_ind = vid_inds[i][j]; + auto vid = vids[vid_ind]; + std::get(props[vid_ind]) = ptr->get_view(vid); + } + } else { + VLOG(10) << "skip for column " << Is; + } + } + + fetch_properties(props, columns, vids, vid_inds); + } + + template = sizeof...(T))>::type* = nullptr> + void fetch_properties( + std::vector>& props, + std::vector& columns, + const std::vector& vids, + const std::array, num_labels>& vid_inds) const {} + + template ::type* = nullptr> + void visit_properties( + std::vector>& props, + std::vector& columns, + const std::vector& vids, + const std::array, num_labels>& vid_inds) const { + // auto index_seq = std::make_index_sequence{}; + + for (size_t i = 0; i < num_labels; ++i) { + auto column_tuple = columns[i]; + auto ptr = std::get(column_tuple); + if (ptr) { + std::tuple_element_t> tmp; + for (auto j = 0; j < vid_inds[i].size(); ++j) { + auto vid_ind = vid_inds[i][j]; + auto vid = vids[vid_ind]; + tmp = ptr->get_view(vid); + } + VLOG(10) << tmp; + } else { + VLOG(10) << "skip for column " << Is; + } + } + + visit_properties(props, columns, vids, vid_inds); + } + + template = sizeof...(T))>::type* = nullptr> + void visit_properties( + std::vector>& props, + std::vector& columns, + const std::vector& vids, + const std::array, num_labels>& vid_inds) const {} + + template + mutable_csr_graph_impl::AdjListArray GetEdges( + const label_id_t& src_label_id, const label_id_t& dst_label_id, + const label_id_t& edge_label_id, const std::vector& vids, + const std::string& direction_str, size_t limit, + const std::array>>& + prop_names) const { + if (direction_str == "out" || direction_str == "Out" || + direction_str == "OUT") { + auto csr = db_session_.graph().get_oe_csr(src_label_id, dst_label_id, + edge_label_id); + return mutable_csr_graph_impl::AdjListArray(csr, vids); + } else if (direction_str == "in" || direction_str == "In" || + direction_str == "IN") { + auto csr = db_session_.graph().get_ie_csr(dst_label_id, src_label_id, + edge_label_id); + return mutable_csr_graph_impl::AdjListArray(csr, vids); + } else if (direction_str == "both" || direction_str == "Both" || + direction_str == "BOTH") { + auto csr0 = db_session_.graph().get_oe_csr(src_label_id, dst_label_id, + edge_label_id); + auto csr1 = db_session_.graph().get_ie_csr(dst_label_id, src_label_id, + edge_label_id); + CHECK(csr0); + CHECK(csr1); + return mutable_csr_graph_impl::AdjListArray(csr0, csr1, vids); + } else { + // LOG(FATAL) << "Not implemented - " << direction_str; + throw std::runtime_error("Not implemented - " + direction_str); + } + } + + template + mutable_csr_graph_impl::AdjListArray GetEdges( + const std::string& src_label, const std::string& dst_label, + const std::string& edge_label, const std::vector& vids, + const std::string& direction_str, size_t limit, + const std::array>>& + prop_names) const { + auto src_label_id = db_session_.schema().get_vertex_label_id(src_label); + auto dst_label_id = db_session_.schema().get_vertex_label_id(dst_label); + auto edge_label_id = db_session_.schema().get_edge_label_id(edge_label); + + return GetEdges(src_label_id, dst_label_id, edge_label_id, vids, + direction_str, limit, prop_names); + } + + std::pair, std::vector> GetOtherVerticesV2( + const std::string& src_label, const std::string& dst_label, + const std::string& edge_label, const std::vector& vids, + const std::string& direction_str, size_t limit) const { + auto src_label_id = db_session_.schema().get_vertex_label_id(src_label); + auto dst_label_id = db_session_.schema().get_vertex_label_id(dst_label); + auto edge_label_id = db_session_.schema().get_edge_label_id(edge_label); + + return GetOtherVerticesV2(src_label_id, dst_label_id, edge_label_id, vids, + direction_str, limit); + } + + // return the vids, and offset array. + std::pair, std::vector> GetOtherVerticesV2( + const label_id_t& src_label_id, const label_id_t& dst_label_id, + const label_id_t& edge_label_id, const std::vector& vids, + const std::string& direction_str, size_t limit) const { + std::vector ret_v; + std::vector ret_offset; + + if (direction_str == "out" || direction_str == "Out" || + direction_str == "OUT") { + auto csr = db_session_.graph().get_oe_csr(src_label_id, dst_label_id, + edge_label_id); + auto size = 0; + for (auto i = 0; i < vids.size(); ++i) { + auto v = vids[i]; + size += csr->edge_iter(v)->size(); + } + ret_v.reserve(size); + ret_offset.reserve(vids.size()); + ret_offset.emplace_back(0); + + for (auto i = 0; i < vids.size(); ++i) { + auto v = vids[i]; + auto iter = csr->edge_iter(v); + while (iter->is_valid()) { + ret_v.emplace_back(iter->get_neighbor()); + iter->next(); + } + ret_offset.emplace_back(ret_v.size()); + } + } else if (direction_str == "in" || direction_str == "In" || + direction_str == "IN") { + auto csr = db_session_.graph().get_ie_csr(dst_label_id, src_label_id, + edge_label_id); + auto size = 0; + for (auto i = 0; i < vids.size(); ++i) { + auto v = vids[i]; + size += csr->edge_iter(v)->size(); + } + ret_v.reserve(size); + ret_offset.reserve(vids.size()); + ret_offset.emplace_back(0); + + for (auto i = 0; i < vids.size(); ++i) { + auto v = vids[i]; + auto iter = csr->edge_iter(v); + while (iter->is_valid()) { + ret_v.emplace_back(iter->get_neighbor()); + iter->next(); + } + ret_offset.emplace_back(ret_v.size()); + } + } else if (direction_str == "both" || direction_str == "Both" || + direction_str == "BOTH") { + auto ie_csr = db_session_.graph().get_ie_csr(dst_label_id, src_label_id, + edge_label_id); + auto oe_csr = db_session_.graph().get_oe_csr(src_label_id, dst_label_id, + edge_label_id); + auto size = 0; + for (auto i = 0; i < vids.size(); ++i) { + auto v = vids[i]; + size += ie_csr->edge_iter(v)->size(); + size += oe_csr->edge_iter(v)->size(); + } + ret_v.reserve(size); + ret_offset.reserve(vids.size() + 1); + ret_offset.emplace_back(0); + for (auto i = 0; i < vids.size(); ++i) { + auto v = vids[i]; + { + auto iter = ie_csr->edge_iter(v); + while (iter->is_valid()) { + ret_v.emplace_back(iter->get_neighbor()); + iter->next(); + } + } + { + auto iter = oe_csr->edge_iter(v); + while (iter->is_valid()) { + ret_v.emplace_back(iter->get_neighbor()); + iter->next(); + } + } + ret_offset.emplace_back(ret_v.size()); + } + } else { + LOG(FATAL) << "Not implemented - " << direction_str; + } + return std::make_pair(std::move(ret_v), std::move(ret_offset)); + } + + mutable_csr_graph_impl::NbrListArray GetOtherVertices( + const std::string& src_label, const std::string& dst_label, + const std::string& edge_label, const std::vector& vids, + const std::string& direction_str, size_t limit) const { + auto src_label_id = db_session_.schema().get_vertex_label_id(src_label); + auto dst_label_id = db_session_.schema().get_vertex_label_id(dst_label); + auto edge_label_id = db_session_.schema().get_edge_label_id(edge_label); + return GetOtherVertices(src_label_id, dst_label_id, edge_label_id, vids, + direction_str, limit); + } + + mutable_csr_graph_impl::NbrListArray GetOtherVertices( + const label_id_t& src_label_id, const label_id_t& dst_label_id, + const label_id_t& edge_label_id, const std::vector& vids, + const std::string& direction_str, size_t limit) const { + mutable_csr_graph_impl::NbrListArray ret; + + if (direction_str == "out" || direction_str == "Out" || + direction_str == "OUT") { + auto csr = db_session_.graph().get_oe_csr(src_label_id, dst_label_id, + edge_label_id); + ret.resize(vids.size()); + for (size_t i = 0; i < vids.size(); ++i) { + auto v = vids[i]; + auto iter = csr->edge_iter(v); + auto& vec = ret.get_vector(i); + while (iter->is_valid()) { + vec.push_back(mutable_csr_graph_impl::Nbr(iter->get_neighbor())); + iter->next(); + } + } + } else if (direction_str == "in" || direction_str == "In" || + direction_str == "IN") { + auto csr = db_session_.graph().get_ie_csr(dst_label_id, src_label_id, + edge_label_id); + ret.resize(vids.size()); + for (size_t i = 0; i < vids.size(); ++i) { + auto v = vids[i]; + auto iter = csr->edge_iter(v); + auto& vec = ret.get_vector(i); + while (iter->is_valid()) { + vec.push_back(mutable_csr_graph_impl::Nbr(iter->get_neighbor())); + iter->next(); + } + } + } else if (direction_str == "both" || direction_str == "Both" || + direction_str == "BOTH") { + ret.resize(vids.size()); + auto ocsr = db_session_.graph().get_oe_csr(src_label_id, dst_label_id, + edge_label_id); + auto icsr = db_session_.graph().get_ie_csr(dst_label_id, src_label_id, + edge_label_id); + for (size_t i = 0; i < vids.size(); ++i) { + auto v = vids[i]; + auto& vec = ret.get_vector(i); + auto iter = ocsr->edge_iter(v); + while (iter->is_valid()) { + vec.push_back(mutable_csr_graph_impl::Nbr(iter->get_neighbor())); + iter->next(); + } + iter = icsr->edge_iter(v); + while (iter->is_valid()) { + vec.push_back(mutable_csr_graph_impl::Nbr(iter->get_neighbor())); + iter->next(); + } + } + } else { + LOG(FATAL) << "Not implemented - " << direction_str; + } + return ret; + } + + template + mutable_csr_graph_impl::MultiPropGetter GetMultiPropGetter( + const std::string& label, + const std::array& prop_names) const { + auto label_id = db_session_.schema().get_vertex_label_id(label); + return GetMultiPropGetter(label_id, prop_names); + } + + template + mutable_csr_graph_impl::MultiPropGetter GetMultiPropGetter( + const label_id_t& label_id, + const std::array& prop_names) const { + static constexpr auto ind_seq = std::make_index_sequence(); + using column_tuple_t = std::tuple>...>; + column_tuple_t columns; + get_tuple_column_from_graph(label_id, prop_names, columns); + return mutable_csr_graph_impl::MultiPropGetter(columns); + } + + template + mutable_csr_graph_impl::SinglePropGetter GetSinglePropGetter( + const std::string& label, const std::string& prop_name) const { + auto label_id = db_session_.schema().get_vertex_label_id(label); + return GetSinglePropGetter(label_id, prop_name); + } + + template + mutable_csr_graph_impl::SinglePropGetter GetSinglePropGetter( + const label_id_t& label_id, const std::string& prop_name) const { + using column_t = std::shared_ptr>; + column_t column = GetTypedRefColumn(label_id, prop_name); + return mutable_csr_graph_impl::SinglePropGetter(std::move(column)); + } + + // get the vertex property + template + std::shared_ptr> GetTypedRefColumn( + const label_t& label_id, const std::string& prop_name) const { + using column_t = std::shared_ptr>; + column_t column; + if (prop_name == "id" || prop_name == "ID" || prop_name == "Id") { + column = std::dynamic_pointer_cast>( + db_session_.get_vertex_id_column(label_id)); + } else { + auto ptr = db_session_.get_vertex_property_column(label_id, prop_name); + if (ptr) { + column = std::dynamic_pointer_cast>( + create_ref_column(ptr)); + } else { + return nullptr; + } + } + return column; + } + + private: + std::shared_ptr create_ref_column( + std::shared_ptr column) const { + auto type = column->type(); + if (type == PropertyType::kInt32) { + return std::make_shared>( + *std::dynamic_pointer_cast>(column)); + } else if (type == PropertyType::kInt64) { + return std::make_shared>( + *std::dynamic_pointer_cast>(column)); + } else if (type == PropertyType::kDate) { + return std::make_shared>( + *std::dynamic_pointer_cast>(column)); + } else if (type == PropertyType::kString) { + return std::make_shared>( + *std::dynamic_pointer_cast>(column)); + } else { + LOG(FATAL) << "unexpected type to create column, " + << static_cast(type); + return nullptr; + } + } + + template + auto get_single_column_from_graph_with_property( + label_t label, const PropertySelector& selector) const { + return GetTypedRefColumn(label, selector.prop_name_); + } + + template + auto get_tuple_column_from_graph_with_property_impl( + label_t label, const std::tuple& selectors, + std::index_sequence) const { + return std::make_tuple(get_single_column_from_graph_with_property( + label, std::get(selectors))...); + } + + template + inline auto get_tuple_column_from_graph_with_property( + label_t label, const std::tuple& selectors) const { + return get_tuple_column_from_graph_with_property_impl( + label, selectors, std::make_index_sequence()); + } + + template + void get_tuple_column_from_graph( + label_t label, + const std::array>>& + prop_names, + std::tuple>...>& columns) const { + // TODO: support label_property + using PT = std::tuple_element_t>; + std::get(columns) = std::dynamic_pointer_cast>( + GetTypedRefColumn(label, prop_names[I])); + if constexpr (I + 1 < sizeof...(T)) { + get_tuple_column_from_graph(label, prop_names, columns); + } + } + + const GraphDBSession& db_session_; + bool initialized_ = false; +}; + +} // namespace gs + +#endif // ENGINES_HQPS_DATABASE_MUTABLE_CSR_INTERFACE_H_ diff --git a/flex/engines/hqps_db/structures/README.md b/flex/engines/hqps_db/structures/README.md new file mode 100644 index 000000000000..78b6f2184ac5 --- /dev/null +++ b/flex/engines/hqps_db/structures/README.md @@ -0,0 +1,2 @@ +## Data Structure +This directory contains implementation of graph data structures needed by hqps_db. \ No newline at end of file diff --git a/flex/engines/hqps_db/structures/collection.h b/flex/engines/hqps_db/structures/collection.h new file mode 100644 index 000000000000..f0e3f3dfe71b --- /dev/null +++ b/flex/engines/hqps_db/structures/collection.h @@ -0,0 +1,908 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_DS_COLLECTION_H_ +#define ENGINES_HQPS_DS_COLLECTION_H_ + +#include +#include + +#include "flex/engines/hqps_db/core/null_record.h" +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" +#include "flex/engines/hqps_db/core/utils/props.h" +#include "flex/storages/rt_mutable_graph/types.h" +#include "grape/utils/bitset.h" + +#include +#include "grape/util.h" + +namespace gs { + +class EmptyCol { + public: + using value_type = grape::EmptyType; +}; + +// After operator like group, we need to extract the property or the count to +// separate column. +// We use collection to implemention this abstraction. +// Currently we may not use it like vertex_set/edge_set, i.e., no dedup, no +// flat, not subset on collection. + +template +class RowVertexSetImpl; + +template +class RowVertexSetImplBuilder; + +template +class TwoLabelVertexSetImpl; + +template +class TwoLabelVertexSetImplBuilder; + +template +class Collection; + +template +class CollectionBuilder { + public: + static constexpr bool is_row_vertex_set_builder = false; + static constexpr bool is_flat_edge_set_builder = false; + static constexpr bool is_general_edge_set_builder = false; + static constexpr bool is_two_label_set_builder = false; + static constexpr bool is_collection_builder = true; + using result_t = Collection; + + CollectionBuilder() {} + + // insert tuple at index ind. + void Insert(T&& t) { vec_.emplace_back(std::move(t)); } + + void Insert(const T& t) { vec_.push_back(t); } + + // insert index ele tuple + void Insert(const std::tuple& t) { + vec_.emplace_back(std::get<1>(t)); + } + + Collection Build() { + // VLOG(10) << "Finish building counter" << gs::to_string(vec_); + return Collection(std::move(vec_)); + } + + private: + std::vector vec_; +}; + +// Building for collection which appears as the key in group by +template +class KeyedCollectionBuilder { + public: + static constexpr bool is_row_vertex_set_builder = false; + static constexpr bool is_flat_edge_set_builder = false; + static constexpr bool is_general_edge_set_builder = false; + static constexpr bool is_two_label_set_builder = false; + static constexpr bool is_collection_builder = true; + using result_t = Collection; + KeyedCollectionBuilder() {} + + KeyedCollectionBuilder(const Collection& old) { vec_.reserve(old.Size()); } + + template + KeyedCollectionBuilder( + const RowVertexSetImpl& row_vertex_set) { + vec_.reserve(row_vertex_set.Size()); + } + + // insert returning a unique index for the inserted element + size_t insert(const T& t) { + if (map_.find(t) == map_.end()) { + map_[t] = vec_.size(); + vec_.push_back(t); + return vec_.size() - 1; + } else { + return map_[t]; + } + } + + size_t insert(T&& t) { + if (map_.find(t) == map_.end()) { + map_[t] = vec_.size(); + vec_.emplace_back(std::move(t)); + return vec_.size() - 1; + } else { + return map_[t]; + } + } + + size_t Insert(const std::tuple& t) { + return insert(std::get<1>(t)); + } + + Collection Build() { + // VLOG(10) << "Finish building counter" << gs::to_string(vec_); + return Collection(std::move(vec_)); + } + + private: + std::unordered_map> map_; + std::vector vec_; +}; + +template +class CollectionIter { + public: + using data_tuple_t = std::tuple; + using inner_iter_t = typename std::vector::const_iterator; + using self_type_t = CollectionIter; + using index_ele_tuple_t = std::tuple; + CollectionIter(const std::vector& vec, size_t ind) + : vec_(vec), ind_(ind) {} + + T GetElement() const { return vec_[ind_]; } + + index_ele_tuple_t GetIndexElement() const { + return std::make_tuple(ind_, vec_[ind_]); + } + + T GetData() const { return vec_[ind_]; } + + inline CollectionIter& operator++() { + ++ind_; + return *this; + }; + inline bool operator==(const self_type_t& rhs) const { + return ind_ == rhs.ind_; + } + inline bool operator!=(const self_type_t& rhs) const { + return ind_ != rhs.ind_; + } + inline bool operator<(const self_type_t& rhs) const { + return ind_ < rhs.ind_; + } + inline const self_type_t& operator*() const { return *this; } + inline const self_type_t* operator->() const { return this; } + + private: + size_t ind_; + const std::vector& vec_; +}; + +// specialization for T is tuple, and only contains one element. +template +class CollectionIter> { + public: + using ele_t = std::tuple; + using data_tuple_t = std::tuple; + using inner_iter_t = typename std::vector::const_iterator; + using self_type_t = CollectionIter; + using index_ele_tuple_t = std::tuple; + CollectionIter(const std::vector& vec, size_t ind) + : vec_(vec), ind_(ind) {} + + T GetElement() const { return std::get<0>(vec_[ind_]); } + + index_ele_tuple_t GetIndexElement() const { + return std::make_tuple(ind_, std::get<0>(vec_[ind_])); + } + + T GetData() const { return std::get<0>(vec_[ind_]); } + + inline CollectionIter>& operator++() { + ++ind_; + return *this; + }; + inline bool operator==(const self_type_t& rhs) const { + return ind_ == rhs.ind_; + } + inline bool operator!=(const self_type_t& rhs) const { + return ind_ != rhs.ind_; + } + inline bool operator<(const self_type_t& rhs) const { + return ind_ < rhs.ind_; + } + inline const self_type_t& operator*() const { return *this; } + inline const self_type_t* operator->() const { return this; } + + private: + size_t ind_; + const std::vector& vec_; +}; + +template +class Collection { + public: + using value_type = T; + using iterator = CollectionIter; + using data_tuple_t = typename iterator::data_tuple_t; + using index_ele_tuple_t = typename iterator::index_ele_tuple_t; + using flat_t = Collection; + using self_type_t = Collection; + using EntityValueType = T; + + using builder_t = CollectionBuilder; + + static constexpr bool is_collection = true; + static constexpr bool is_keyed = false; + static constexpr bool is_multi_label = false; + static constexpr bool is_vertex_set = false; + static constexpr bool is_edge_set = false; + static constexpr bool is_two_label_set = false; + static constexpr bool is_general_set = false; + static constexpr bool is_row_vertex_set = false; + + Collection() {} + Collection(size_t cap) { vec_.reserve(cap); } + Collection(std::vector&& vec) : vec_(std::move(vec)) {} + Collection(Collection&& other) : vec_(std::move(other.vec_)) {} + Collection(const Collection& other) : vec_(other.vec_) {} + + ~Collection() {} + size_t Size() const { return vec_.size(); } + + builder_t CreateBuilder() const { return builder_t(); } + + // Append empty entries to make length == the give args + void MakeUpTo(size_t dstLen) { + if (dstLen <= vec_.size()) { + return; + } + vec_.resize(dstLen); + } + + // For the input offset array, add default value for null entry. + std::pair, std::vector> apply( + const std::vector& offset) { + size_t new_size = offset.size() - 1; + + VLOG(10) << "Extend " << vec_.size() << " to size: " << new_size; + + std::vector new_vec; + std::vector new_offset; + new_offset.reserve(new_size + 1); + new_vec.reserve(new_size); + new_offset.emplace_back(0); + for (auto i = 0; i < new_size; ++i) { + if (offset[i] >= offset[i + 1]) { + new_vec.emplace_back(T()); + } else { + for (auto j = offset[i]; j < offset[i + 1]; ++j) { + new_vec.push_back(vec_[j]); + } + } + new_offset.emplace_back(new_vec.size()); + } + Collection new_set(std::move(new_vec)); + return std::make_pair(std::move(new_set), std::move(new_offset)); + } + + void Repeat(std::vector& cur_offset, + std::vector& repeat_vec) { + std::vector res; + res.reserve(repeat_vec.back()); + CHECK(repeat_vec.size() == cur_offset.size()) + << "repeat vec:" << gs::to_string(repeat_vec) + << ", cur offset: " << gs::to_string(cur_offset); + for (auto i = 0; i + 1 < cur_offset.size(); ++i) { + auto times_to_repeat = repeat_vec[i + 1] - repeat_vec[i]; + for (auto j = 0; j < times_to_repeat; ++j) { + for (auto k = cur_offset[i]; k < cur_offset[i + 1]; ++k) { + res.push_back(vec_[k]); + } + } + } + VLOG(10) << "new vids: " << gs::to_string(res); + vec_.swap(res); + } + + template + auto Flat(std::vector>& index_ele_tuples) { + std::vector res_vids; + res_vids.reserve(index_ele_tuples.size()); + for (auto ele : index_ele_tuples) { + auto& cur = std::get(ele); + //(ind, vid) + auto& ind = std::get<0>(cur); + CHECK(ind < vec_.size()); + res_vids.emplace_back(vec_[ind]); + } + return self_type_t(std::move(res_vids)); + } + + // project my self. + template ::type* = nullptr> + self_type_t ProjectWithRepeatArray(const std::vector& repeat_array, + KeyAlias& key_alias) const { + std::vector res; + for (auto i = 0; i < repeat_array.size(); ++i) { + for (auto j = 0; j < repeat_array[i]; ++j) { + // VLOG(10) << "Project: " << vids_[i]; + res.push_back(vec_[i]); + } + } + return self_type_t(std::move(res)); + } + + void SubSetWithIndices(std::vector& indices) { + std::vector res; + res.reserve(indices.size()); + for (auto ind : indices) { + res.emplace_back(vec_[ind]); + } + vec_.swap(res); + } + + template + void fillBuiltinProps(std::vector>& tuples, + PropNameArray& prop_names) { + LOG(WARNING) << " Not implemented"; + } + + iterator begin() const { return iterator(vec_, 0); } + + iterator end() const { return iterator(vec_, vec_.size()); } + + T Get(size_t ind) const { return vec_[ind]; } + + const std::vector GetVector() const { return vec_; } + + private: + std::vector vec_; +}; + +template +using CollectionOfVec = Collection>; + +// the tag is used when receiving tuple, and apply aggregate function on tuple. +template +class CountBuilder { + public: + CountBuilder() {} + + // insert tuple at index ind. + // if the ele_value equal to invalid_value, then do not insert. + template + void insert(size_t ind, const ELE_TUPLE& tuple, const DATA_TUPLE& data) { + // just count times. + while (vec_.size() <= ind) { + vec_.emplace_back(0); + } + using cur_ele_tuple = typename gs::tuple_element::type; + auto& cur_ele = gs::get_from_tuple(tuple); + // currenly we support vertex ele tupe and edge tuple. + if constexpr (std::tuple_size::value == 2) { + auto& ele = std::get<1>(cur_ele); + using vid_t = typename std::tuple_element<1, cur_ele_tuple>::type; + if (ele != NullRecordCreator::GetNull()) { + ++vec_[ind]; + } else { + VLOG(10) << "ele is null"; + } + } else { + VLOG(10) << "inc:" << ind << ", " << gs::to_string(tuple); + ++vec_[ind]; + } + } + + Collection Build() { + // VLOG(10) << "Finish building counter" << gs::to_string(vec_); + return Collection(std::move(vec_)); + } + + private: + std::vector vec_; +}; + +template +class DistinctCountBuilder; + +// count the distinct number of recieved elements. +template +class DistinctCountBuilder<1, tag_id, T> { + public: + DistinctCountBuilder(const std::vector& vertices) { + // find out the range of vertices inside vector, and use a bitset to count + for (auto v : vertices) { + min_v = std::min(min_v, v); + max_v = std::max(max_v, v); + } + range_size = max_v - min_v + 1; + } + + template + void insert(size_t ind, const ELE_TUPLE_T& tuple, const DATA_TUPLE& data) { + auto& cur_ind_ele = gs::get_from_tuple(tuple); + static_assert( + std::is_same_v< + std::tuple_element_t<1, std::remove_const_t>>, + T>, + "Type not match"); + while (vec_.size() <= ind) { + vec_.emplace_back(grape::Bitset(range_size)); + } + auto& cur_bitset = vec_[ind]; + auto cur_v = std::get<1>(cur_ind_ele); + cur_bitset.set_bit(cur_v - min_v); + // VLOG(10) << "tag id: " << tag_id << "insert at ind: " << ind + // << ",value : " << cur_v << ", res: " << cur_bitset.count(); + } + + Collection Build() { + std::vector res; + res.reserve(vec_.size()); + for (auto& bitset : vec_) { + res.emplace_back(bitset.count()); + } + return Collection(std::move(res)); + } + + private: + std::vector vec_; + T min_v, max_v, range_size; +}; + +// specialization for DistinctCountBuilder for num_labels=2 +template +class DistinctCountBuilder<2, tag_id, T> { + public: + DistinctCountBuilder(const grape::Bitset& bitset, + const std::vector& vids) { + // find out the range of vertices inside vector, and use a bitset to count + for (auto i = 0; i < vids.size(); ++i) { + auto v = vids[i]; + if (bitset.get_bit(i)) { + min_v[0] = std::min(min_v[0], v); + max_v[0] = std::max(max_v[0], v); + } else { + min_v[1] = std::min(min_v[1], v); + max_v[1] = std::max(max_v[1], v); + } + } + range_size[0] = max_v[0] - min_v[0] + 1; + range_size[1] = max_v[1] - min_v[0] + 1; + VLOG(10) << "Min: " << min_v[0] << ", range size: " << range_size[0]; + VLOG(10) << "Min: " << min_v[1] << ", range size: " << range_size[1]; + } + + template + void insert(size_t ind, const ELE_TUPLE_T& tuple, const DATA_TUPLE& data) { + auto& cur_ind_ele = gs::get_from_tuple(tuple); + static_assert( + std::is_same_v< + std::tuple_element_t<2, std::remove_const_t>>, + T>, + "Type not match"); + auto label_ind = std::get<1>(cur_ind_ele); + while (vec_[label_ind].size() <= ind) { + vec_[label_ind].emplace_back(grape::Bitset(range_size[label_ind])); + } + + auto& cur_bitset = vec_[label_ind][ind]; + auto cur_v = std::get<2>(cur_ind_ele); + cur_bitset.set_bit(cur_v - min_v[label_ind]); + VLOG(10) << "tag id: " << tag_id << "insert at ind: " << ind + << ",value : " << cur_v << ", res: " << cur_bitset.count(); + } + + Collection Build() { + std::vector res; + auto max_ind = std::max(vec_[0].size(), vec_[1].size()); + res.resize(max_ind, 0); + for (auto label_ind = 0; label_ind < 2; ++label_ind) { + for (auto i = 0; i < vec_[label_ind].size(); ++i) { + res[i] += vec_[label_ind][i].count(); + } + } + return Collection(std::move(res)); + } + + private: + std::array, 2> vec_; + std::array min_v, max_v, range_size; +}; + +template +class SumBuilder { + public: + SumBuilder() {} + SumBuilder(size_t cap) { vec_.resize(cap, (T) 0); } + + // insert tuple at index ind. + template + void insert(size_t ind, const IND_ELE_TUPLE& tuple, const DATA_TUPLE& data) { + const auto& cur_ind_ele = gs::get_from_tuple(tuple); + // just count times. + while (vec_.size() <= ind) { + vec_.emplace_back((T) 0); + } + vec_[ind] += std::get<1>(cur_ind_ele); + } + + Collection Build() { + // VLOG(10) << "Finish building counter" << gs::to_string(vec_); + return Collection(std::move(vec_)); + } + + private: + std::vector vec_; +}; + +template +class MinBuilder { + public: + MinBuilder(const Collection& set, const GI& graph, + PropNameArray prop_names) { + vec_.resize(set.Size(), std::numeric_limits::max()); + } + MinBuilder() {} + MinBuilder(size_t cap) { vec_.resize(cap, (T) 0); } + + // insert tuple at index ind. + template + void insert(size_t ind, const IND_ELE_TUPLE& tuple, const DATA_TUPLE& data) { + const auto& cur_ind_ele = gs::get_from_tuple(tuple); + // just count times. + while (vec_.size() <= ind) { + vec_.emplace_back(std::numeric_limits::max()); + } + vec_[ind] = std::min(vec_[ind], std::get<1>(cur_ind_ele)); + } + + Collection Build() { + // VLOG(10) << "Finish building counter" << gs::to_string(vec_); + return Collection(std::move(vec_)); + } + + private: + std::vector vec_; +}; + +template +class MaxBuilder { + public: + MaxBuilder(const Collection& set, const GI& graph, + PropNameArray prop_names) { + vec_.resize(set.Size(), std::numeric_limits::min()); + } + MaxBuilder() {} + MaxBuilder(size_t cap) { vec_.resize(cap, (T) 0); } + + // insert tuple at index ind. + template + void insert(size_t ind, const IND_ELE_TUPLE& tuple, + const DATA_TUPLE_T& data) { + const auto& cur_ind_ele = gs::get_from_tuple(tuple); + // just count times. + while (vec_.size() <= ind) { + vec_.emplace_back(std::numeric_limits::max()); + } + vec_[ind] = std::max(vec_[ind], std::get<1>(cur_ind_ele)); + } + + Collection Build() { + // VLOG(10) << "Finish building counter" << gs::to_string(vec_); + return Collection(std::move(vec_)); + } + + private: + std::vector vec_; +}; + +template +class FirstBuilder; + +// FirstBuilder +template +class FirstBuilder, grape::EmptyType, tag_id> { + public: + FirstBuilder(const Collection& set, const GI& graph, + PropNameArray prop_names) { + CHECK(prop_names.size() == 1); + CHECK(prop_names[0] == "none" || prop_names[0] == "None" || + prop_names[0] == ""); + } + + template + void insert(size_t ind, const IND_ELE_T& tuple, + const DATA_ELE_T& data_tuple) { + if (ind < vec_.size()) { + return; + } else if (ind == vec_.size()) { + vec_.emplace_back(std::get<1>(gs::get_from_tuple(tuple))); + } else { + LOG(FATAL) << "Can not insert with ind: " << ind + << ", which cur size is : " << vec_.size(); + } + } + + Collection Build() { return Collection(std::move(vec_)); } + + private: + std::vector vec_; +}; + +// firstBuilder for vertex set, with data tuple +template +class FirstBuilder, + grape::EmptyType, tag_id> { + public: + using set_t = RowVertexSetImpl; + using builder_t = RowVertexSetImplBuilder; + FirstBuilder(const set_t& set, const GI& graph, + PropNameArray prop_names) + : builder_(set.GetLabel(), set.GetPropNames()) {} + + template + void insert(size_t ind, const IND_ELE_T& tuple, + const DATA_TUPLE_T& data_tuple) { + if (ind < builder_.Size()) { + return; + } else if (ind == builder_.Size()) { + builder_.Insert(tuple, data_tuple); + } else { + LOG(FATAL) << "Can not insert with ind: " << ind + << ", which cur size is : " << builder_.size(); + } + } + + set_t Build() { return builder_.Build(); } + + private: + builder_t builder_; +}; + +template +class FirstBuilder, + grape::EmptyType, tag_id> { + public: + using set_t = RowVertexSetImpl; + using builder_t = RowVertexSetImplBuilder; + FirstBuilder(const set_t& set, const GI& graph, + PropNameArray prop_names) + : builder_(set.GetLabel(), set.GetPropNames()) {} + + template + void insert(size_t ind, const IND_ELE_T& tuple, + const DATA_TUPLE_T& data_tuple) { + if (ind < builder_.Size()) { + return; + } else if (ind == builder_.Size()) { + builder_.Insert(tuple); + } else { + LOG(FATAL) << "Can not insert with ind: " << ind + << ", which cur size is : " << builder_.Size(); + } + } + + set_t Build() { return builder_.Build(); } + + private: + builder_t builder_; +}; + +// first builder for two label set +template +class FirstBuilder, + grape::EmptyType, tag_id> { + public: + using set_t = TwoLabelVertexSetImpl; + using builder_t = + TwoLabelVertexSetImplBuilder; + FirstBuilder(const set_t& set, const GI& graph, + PropNameArray prop_names) + : builder_(set.Size(), set.GetLabels()) {} + + template + void insert(size_t ind, const IND_ELE_T& tuple, + const DATA_TUPLE_T& data_tuple) { + if (ind < builder_.Size()) { + return; + } else if (ind == builder_.Size()) { + builder_.Insert(gs::get_from_tuple(tuple)); + } else { + LOG(FATAL) << "Can not insert with ind: " << ind + << ", which cur size is : " << builder_.Size(); + } + } + + set_t Build() { return builder_.Build(); } + + private: + builder_t builder_; +}; + +template +class FirstBuilder, + grape::EmptyType, tag_id> { + public: + using set_t = TwoLabelVertexSetImpl; + using builder_t = TwoLabelVertexSetImplBuilder; + FirstBuilder(const set_t& set, const GI& graph, + PropNameArray prop_names) + // we should use a size which indicate the context size + : builder_(set.Size(), set.GetLabels()) {} + + template + void insert(size_t ind, const IND_ELE_T& tuple, + const DATA_TUPLE_T& data_tuple) { + if (ind < builder_.Size()) { + return; + } else if (ind == builder_.Size()) { + builder_.Insert(gs::get_from_tuple(tuple), + gs::get_from_tuple(data_tuple)); + } else { + LOG(FATAL) << "Can not insert with ind: " << ind + << ", which cur size is : " << builder_.Size(); + } + } + + set_t Build() { return builder_.Build(); } + + private: + builder_t builder_; +}; + +template +class CollectionOfSetBuilder; + +template +class CollectionOfSetBuilder< + T, GRAPH_INTERFACE, RowVertexSetImpl, tag_id> { + public: + using set_t = RowVertexSetImpl; + using graph_prop_getter_t = + typename GRAPH_INTERFACE::template single_prop_getter_t; + using PROP_GETTER_T = + RowVertexSetPropGetter; + CollectionOfSetBuilder(const RowVertexSetImpl& set, + const GRAPH_INTERFACE& graph, + PropNameArray prop_names) + : prop_getter_( + create_prop_getter_impl(set, graph, prop_names[0])) {} + + // insert tuple at index ind. + template + void insert(size_t ind, IND_TUPLE& tuple) { + while (vec_.size() <= ind) { + vec_.emplace_back(std::vector()); + } + auto cur = gs::get_from_tuple(tuple); + using ele_t = typename set_t::index_ele_tuple_t; + if (NullRecordCreator::GetNull() == cur) { + return; + } + + vec_[ind].emplace_back(prop_getter_.get_view(cur)); + } + + template + void insert(size_t ind, const IND_TUPLE& tuple, const DATA_TUPLE& data) { + while (vec_.size() <= ind) { + vec_.emplace_back(std::vector()); + } + auto cur = gs::get_from_tuple(tuple); + using ele_t = typename set_t::index_ele_tuple_t; + if (NullRecordCreator::GetNull() == cur) { + return; + } + + vec_[ind].emplace_back(prop_getter_.get_view(cur)); + } + + CollectionOfVec Build() { + // Make it unique. + for (auto i = 0; i < vec_.size(); ++i) { + sort(vec_[i].begin(), vec_[i].end()); + vec_[i].erase(unique(vec_[i].begin(), vec_[i].end()), vec_[i].end()); + } + // VLOG(10) << "Finish building counter" << gs::to_string(vec_); + return CollectionOfVec(std::move(vec_)); + } + + private: + std::vector> vec_; + PROP_GETTER_T prop_getter_; +}; + +// To vector +template +class CollectionOfVecBuilder; + +template +class CollectionOfVecBuilder, tag_id> { + public: + CollectionOfVecBuilder(const GI& graph, const Collection& set, + PropNameArray prop_names) {} + + // insert tuple at index ind. + template + void insert(size_t ind, IND_TUPLE& tuple) { + auto cur = std::get<1>(gs::get_from_tuple(tuple)); + // just count times. + while (vec_.size() <= ind) { + vec_.emplace_back(std::vector()); + } + using input_ele_t = typename std::remove_reference::type; + if (NullRecordCreator::GetNull() == cur) { + return; + } + // emplace the element to vector + vec_[ind].emplace_back(cur); + } + + template + void insert(size_t ind, const IND_TUPLE& tuple, const DATA_TUPLE& data) { + return insert(ind, tuple); + } + + CollectionOfVec Build() { return CollectionOfVec(std::move(vec_)); } + + private: + std::vector> vec_; +}; + +// organizing one property of vertex set to vector. +template +class CollectionOfVecBuilder, + tag_id> { + public: + using set_t = RowVertexSetImpl; + using graph_prop_getter_t = + typename GRAPH_INTERFACE::template single_prop_getter_t; + using PROP_GETTER_T = + RowVertexSetPropGetter; + CollectionOfVecBuilder(const set_t& set, const GRAPH_INTERFACE& graph, + PropNameArray& prop_names) + : prop_getter_(create_prop_getter_impl(set, graph, + prop_names[0])) {} + + // insert tuple at index ind. + template + void insert(size_t ind, IND_TUPLE& tuple) { + auto cur = gs::get_from_tuple(tuple); + // just count times. + while (vec_.size() <= ind) { + vec_.emplace_back(std::vector()); + } + using input_ele_t = typename std::remove_reference::type; + if (NullRecordCreator::GetNull() == cur) { + return; + } + // emplace the element to vector + vec_[ind].emplace_back(prop_getter_.get_view(cur)); + } + + CollectionOfVec Build() { + return CollectionOfVec(std::move(vec_)); + } + + private: + std::vector> vec_; + PROP_GETTER_T prop_getter_; +}; + +} // namespace gs + +#endif // ENGINES_HQPS_DS_COLLECTION_H_ diff --git a/flex/engines/hqps_db/structures/multi_edge_set/README.md b/flex/engines/hqps_db/structures/multi_edge_set/README.md new file mode 100644 index 000000000000..52d495eb62c3 --- /dev/null +++ b/flex/engines/hqps_db/structures/multi_edge_set/README.md @@ -0,0 +1,6 @@ +# Edge MultiSet + +- SingleSrcAdjEdgeSet: Stores edges based on adj_list(fetched via storage interface) references. Src vertices and dst vertices are both from one label. +- MultiSrcAdEdgeSet: Stores edges based on adj_list(fetched via storage interface) references. Src vertices are from multiple labels, and dst vertices are both from one label. +- FlatEdgeSet: Stores the Edges in triplet. +- MultiLabelDstEdgeSet: Stores Edges with multiple destination labels, stored in a separate manner. \ No newline at end of file diff --git a/flex/engines/hqps_db/structures/multi_edge_set/adj_edge_set.h b/flex/engines/hqps_db/structures/multi_edge_set/adj_edge_set.h new file mode 100644 index 000000000000..798a91d82bca --- /dev/null +++ b/flex/engines/hqps_db/structures/multi_edge_set/adj_edge_set.h @@ -0,0 +1,1019 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_ENGINE_DS_EDGE_MULTI_EDGE_SET_ADJ_EDGE_SET_H_ +#define ENGINES_HQPS_ENGINE_DS_EDGE_MULTI_EDGE_SET_ADJ_EDGE_SET_H_ + +#include +#include +#include +#include + +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" +#include "flex/engines/hqps_db/structures/multi_edge_set/flat_edge_set.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h" +#include "grape/types.h" + +namespace gs { + +/** + * @brief AdjEdgeSetBuilder Works for AdjEdgeSet, + * + * @tparam VID_T + * @tparam LabelT + * @tparam EDATA_T + */ +template +class AdjEdgeSetBuilder { + using index_ele_tuple_t = + std::tuple>; + using res_ele_tuple_t = std::tuple>; + using res_t = FlatEdgeSet; + + public: + static constexpr bool is_adj_edge_set_builder = true; + AdjEdgeSetBuilder(LabelT src_label, LabelT dst_label, LabelT edge_label, + std::array prop_names, + Direction direc) + : src_label_(src_label), + dst_label_(dst_label), + edge_label_(edge_label), + prop_names_(prop_names), + direction_(direc) {} + + void Insert(const index_ele_tuple_t& tuple) { + vec_.emplace_back(gs::remove_nth_element<0>(tuple)); + } + + res_t Build() { + std::vector label_vec(vec_.size()); + std::fill(label_vec.begin(), label_vec.end(), src_label_); + return res_t(std::move(vec_), edge_label_, {src_label_}, dst_label_, + prop_names_, std::move(label_vec), direction_); + } + + private: + std::vector vec_; + LabelT src_label_, dst_label_, edge_label_; + std::array prop_names_; + Direction direction_; +}; + +template +class AdjEdgeSetBuilder { + using index_ele_tuple_t = std::tuple; + using res_ele_tuple_t = std::tuple; + using res_t = FlatEdgeSet; + + public: + static constexpr bool is_adj_edge_set_builder = true; + AdjEdgeSetBuilder(LabelT src_label, LabelT dst_label, LabelT edge_label, + Direction direc) + : src_label_(src_label), + dst_label_(dst_label), + edge_label_(edge_label), + direction_(direc) {} + + void Insert(const index_ele_tuple_t& tuple) { + vec_.emplace_back(gs::remove_nth_element<0>(tuple)); + } + + res_t Build() { + std::vector label_vec(vec_.size()); + std::fill(label_vec.begin(), label_vec.end(), src_label_); + return res_t(std::move(vec_), edge_label_, {src_label_}, dst_label_, + std::move(label_vec), direction_); + } + + private: + std::vector vec_; + LabelT src_label_, dst_label_, edge_label_; + Direction direction_; +}; + +template +class AdjEdgeSetIter { + public: + using self_type_t = AdjEdgeSetIter; + using adj_list_t = typename GI::template adj_list_t; + using adj_list_iter_t = typename adj_list_t::iterator; + using adj_list_array_t = typename GI::template adj_list_array_t; + using ele_tuple_t = std::tuple>; + using index_ele_tuple_t = + std::tuple>; + AdjEdgeSetIter(const std::vector& vids, + const adj_list_array_t& adj_lists, size_t ind) + : vids_(vids), adj_lists_(adj_lists), ind_(ind) { + if (ind_ == vids_.size()) { + begin_ = adj_list_iter_t(); + end_ = adj_list_iter_t(); + } else { + while (ind_ < vids_.size()) { + auto cur_adj_list = adj_lists_.get(ind_); + begin_ = cur_adj_list.begin(); + end_ = cur_adj_list.end(); + if (begin_ != end_) { + break; + } + ind_ += 1; + } + if (ind_ < vids_.size()) { + VLOG(10) << "Found first valid edge at: " << ind_; + } else { + begin_ = adj_list_iter_t(); + end_ = adj_list_iter_t(); + } + } + } + + inline VID_T GetSrc() const { return vids_[ind_]; } + inline VID_T GetDst() const { return begin_.neighbor(); } + inline const std::tuple GetData() const { + return begin_.properties(); + } + + inline ele_tuple_t GetElement() const { + return std::tuple{GetSrc(), GetDst(), GetData()}; + } + + inline index_ele_tuple_t GetIndexElement() const { + return std::make_tuple(ind_, GetSrc(), GetDst(), GetData()); + } + + inline const self_type_t& operator++() { + ++begin_; + if (begin_ == end_) { + ind_ += 1; + while (ind_ < vids_.size()) { + auto cur_adj_list = adj_lists_.get(ind_); + begin_ = cur_adj_list.begin(); + end_ = cur_adj_list.end(); + if (begin_ != end_) { + break; + } + ind_ += 1; + } + if (ind_ >= vids_.size()) { + begin_ = adj_list_iter_t(); + } + } + return *this; + } + + inline bool operator==(const self_type_t& rhs) const { + return ind_ == rhs.ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + return ind_ != rhs.ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return ind_ < rhs.ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + private: + const std::vector& vids_; + const adj_list_array_t& adj_lists_; + size_t ind_; + adj_list_iter_t begin_, end_; +}; + +template +class AdjEdgeSetIter { + public: + using self_type_t = AdjEdgeSetIter; + using adj_t = typename GI::template adj_t<>; + using adj_list_array_t = typename GI::template adj_list_array_t<>; + using adj_list_t = typename GI::template adj_list_t<>; + using adj_list_iter_t = typename adj_list_t::iterator; + using ele_tuple_t = std::tuple; + using index_ele_tuple_t = std::tuple; + AdjEdgeSetIter(const std::vector& vids, + const adj_list_array_t& adj_lists, size_t ind) + : vids_(vids), adj_lists_(adj_lists), ind_(ind) { + if (ind_ == vids_.size()) { + // begin_ = end_ = nullptr; + begin_ = adj_list_iter_t(); + end_ = adj_list_iter_t(); + } else { + while (ind_ < vids_.size()) { + auto cur_adj_list = adj_lists_.get(ind_); + begin_ = cur_adj_list.begin(); + end_ = cur_adj_list.end(); + if (begin_ != end_) { + break; + } + ind_ += 1; + } + if (ind_ < vids_.size()) { + VLOG(10) << "Found first valid edge at: " << ind_; + } else { + begin_ = adj_list_iter_t(); + end_ = adj_list_iter_t(); + } + } + } + + inline const self_type_t& operator++() { + ++begin_; + if (begin_ == end_) { + ind_ += 1; + while (ind_ < vids_.size()) { + auto cur_adj_list = adj_lists_.get(ind_); + begin_ = cur_adj_list.begin(); + end_ = cur_adj_list.end(); + if (begin_ != end_) { + break; + } + ind_ += 1; + } + if (ind_ >= vids_.size()) { + begin_ = adj_list_iter_t(); + } + } + return *this; + } + + inline VID_T GetSrc() const { return vids_[ind_]; } + inline VID_T GetDst() const { return begin_.neighbor(); } + inline grape::EmptyType GetData() const { return grape::EmptyType(); } + + inline ele_tuple_t GetElement() const { + return std::tuple{GetSrc(), GetDst(), GetData()}; + } + + inline index_ele_tuple_t GetIndexElement() const { + // TODO: consider direction. + return std::make_tuple(ind_, GetSrc(), GetDst(), GetData()); + } + + inline bool operator==(const self_type_t& rhs) const { + return ind_ == rhs.ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + return ind_ != rhs.ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return ind_ < rhs.ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + private: + const std::vector& vids_; + const adj_list_array_t& adj_lists_; + size_t ind_; + adj_list_iter_t begin_, end_; +}; + +/// @brief Multi label edge set iter. +/// @tparam VID_T +/// @tparam ...T +template +class MulLabelSrcGrootEdgeSetIter { + public: + using self_type_t = MulLabelSrcGrootEdgeSetIter; + // using adj_t = typename GI::template adj_t; + using adj_list_array_t = typename GI::template adj_list_array_t; + using adj_list_t = typename GI::template adj_list_t; + using adj_list_iter_t = typename adj_list_t::iterator; + using ele_tuple_t = std::tuple; + // set_ind, inner_ind, + using index_ele_tuple_t = std::tuple; + MulLabelSrcGrootEdgeSetIter(const std::array, N>& vids, + const std::array, N>& offsets, + const std::array& adj_lists, + size_t ind) + : vids_(vids), + offsets_(offsets), + adj_lists_(adj_lists), + cur_ind_(ind), + set_ind_(0) { + if (cur_ind_ == offsets[0].size() - 1) { + VLOG(10) << "end iter"; + // begin_ =end_ = nullptr; + // while(adj_lists_[cur_ind_].get) + } else { + for (auto i = 0; i < N; ++i) { + local_ind_[i] = 0; + } + // begin_ =end_ = nullptr; + VLOG(10) << "begin iter"; + probe_for_next(); + } + } + + inline const self_type_t& operator++() { + ++begin_; + // ++local_ind_[set_ind_]; + probe_for_next(); + return *this; + } + + void probe_for_next() { + bool flag = false; + if (begin_.valid() && end_.valid()) { + if (begin_ != end_) { + return; + } else if (begin_ == end_) { + ++local_ind_[set_ind_]; + } + } + + while (cur_ind_ < offsets_[0].size() - 1) { + while (set_ind_ < N) { + // VLOG(10) << "probe for next: " << cur_ind_ << ", set_ind : " << + // set_ind_ + // << "local ind: " << local_ind_[set_ind_] + // << ", range: " << offsets_[set_ind_][cur_ind_] << "," + // << offsets_[set_ind_][cur_ind_ + 1]; + if (local_ind_[set_ind_] >= offsets_[set_ind_][cur_ind_] && + local_ind_[set_ind_] < offsets_[set_ind_][cur_ind_ + 1]) { + begin_ = adj_lists_[set_ind_].get(local_ind_[set_ind_]).begin(); + end_ = adj_lists_[set_ind_].get(local_ind_[set_ind_]).end(); + if (begin_ != end_) { + flag = true; + break; + } else { + ++local_ind_[set_ind_]; + } + // set_ind_ += 1; + } else { + set_ind_ += 1; + } + } + if (flag) { + break; + } + set_ind_ = 0; + cur_ind_ += 1; + } + if (cur_ind_ < offsets_[0].size() - 1) { + // VLOG(10) << "found next: " << cur_ind_ << ", set_ind : " << set_ind_ + // << "local ind: " << local_ind_[set_ind_] << ", " + // << begin_->neighbor() << ", " + // << vids_[set_ind_][local_ind_[set_ind_]]; + } else { + VLOG(10) << "reach end" << cur_ind_; + // begin_ = end_ = nullptr; + } + // int t; + // std::cin >> t; + } + + inline VID_T GetSrc() const { return vids_[set_ind_][local_ind_[set_ind_]]; } + inline VID_T GetDst() const { return begin_.neighbor(); } + inline const std::tuple& GetData() const { return begin_.properties(); } + + inline Edge GetElement() const { + return Edge(GetSrc(), GetDst(), GetData()); + } + + inline bool operator==(const self_type_t& rhs) const { + return cur_ind_ == rhs.cur_ind_; + } + + index_ele_tuple_t GetIndexElement() const { + return std::make_tuple(set_ind_, local_ind_[set_ind_], + std::make_tuple(GetSrc(), begin_)); + } + + inline bool operator!=(const self_type_t& rhs) const { + return cur_ind_ != rhs.cur_ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return cur_ind_ < rhs.cur_ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + private: + const std::array, N>& vids_; + const std::array, N>& offsets_; + const std::array& adj_lists_; + size_t set_ind_; + size_t cur_ind_; + std::array local_ind_; + adj_list_iter_t begin_, end_; +}; + +template +class MulLabelSrcGrootEdgeSetIter { + public: + using self_type_t = + MulLabelSrcGrootEdgeSetIter; + // using adj_t = typename GI::template adj_t<>; + using adj_list_array_t = typename GI::template adj_list_array_t<>; + using adj_list_t = typename GI::template adj_list_t<>; + using adj_list_iter_t = typename adj_list_t::iterator; + using ele_tuple_t = std::tuple; + // set_ind, inner_ind, + using index_ele_tuple_t = std::tuple; + MulLabelSrcGrootEdgeSetIter(const std::array, N>& vids, + const std::array, N>& offsets, + const std::array& adj_lists, + size_t ind) + : vids_(vids), + offsets_(offsets), + adj_lists_(adj_lists), + cur_ind_(ind), + set_ind_(0), + data_(std::make_tuple(grape::EmptyType())) { + if (cur_ind_ == offsets[0].size() - 1) { + VLOG(10) << "end iter"; + // while(adj_lists_[cur_ind_].get) + } else { + for (auto i = 0; i < N; ++i) { + local_ind_[i] = 0; + } + VLOG(10) << "begin iter"; + probe_for_next(); + } + } + + inline const self_type_t& operator++() { + CHECK(begin_); + ++begin_; + // ++local_ind_[set_ind_]; + probe_for_next(); + return *this; + } + + void probe_for_next() { + bool flag = false; + if (begin_.valid() && end_.valid()) { + if (begin_ != end_) { + return; + } else if (begin_ == end_) { + ++local_ind_[set_ind_]; + } + } + + while (cur_ind_ < offsets_[0].size() - 1) { + while (set_ind_ < N) { + if (local_ind_[set_ind_] >= offsets_[set_ind_][cur_ind_] && + local_ind_[set_ind_] < offsets_[set_ind_][cur_ind_ + 1]) { + begin_ = adj_lists_[set_ind_].get(local_ind_[set_ind_]).begin(); + end_ = adj_lists_[set_ind_].get(local_ind_[set_ind_]).end(); + if (begin_ != end_) { + flag = true; + break; + } else { + ++local_ind_[set_ind_]; + } + } else { + set_ind_ += 1; + } + } + if (flag) { + break; + } + set_ind_ = 0; + cur_ind_ += 1; + } + } + + inline VID_T GetSrc() const { return vids_[set_ind_][local_ind_[set_ind_]]; } + inline VID_T GetDst() const { return begin_.neighbor(); } + inline const std::tuple& GetData() const { return data_; } + + inline Edge GetElement() const { + return Edge(GetSrc(), GetDst(), GetData()); + } + + index_ele_tuple_t GetIndexElement() const { + return std::make_tuple(set_ind_, local_ind_[set_ind_], + std::make_tuple(GetSrc(), begin_)); + } + + inline bool operator==(const self_type_t& rhs) const { + return cur_ind_ == rhs.cur_ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + // VLOG(10) << "judge: " << cur_ind_ << " vs: " << rhs.cur_ind_; + return cur_ind_ != rhs.cur_ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return cur_ind_ < rhs.cur_ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + private: + const std::array, N>& vids_; + const std::array, N>& offsets_; + const std::array& adj_lists_; + size_t set_ind_; + size_t cur_ind_; + std::array local_ind_; + adj_list_iter_t begin_, end_; + std::tuple data_; +}; + +template +class AdjEdgeSet { + public: + using iterator = AdjEdgeSetIter; + using self_type_t = AdjEdgeSet; + using flat_t = FlatEdgeSet; + using data_tuple_t = std::tuple; + using adj_t = typename GI::template adj_t; + using adj_list_array_t = typename GI::template adj_list_array_t; + using adj_list_t = typename GI::template adj_list_t; + using adj_list_iter_t = typename adj_list_t::iterator; + using ele_tuple_t = std::tuple; + using index_ele_tuple_t = std::tuple; + using flat_ele_t = index_ele_tuple_t; + // a builder which can receive AdjEdgeSet's elements and build a flatEdgeSet. + using builder_t = AdjEdgeSetBuilder; + + static constexpr bool is_edge_set = true; + static constexpr bool is_multi_label = false; + static constexpr bool is_collection = false; + static constexpr bool is_multi_dst_label = false; + + AdjEdgeSet(std::vector&& vids, adj_list_array_t&& adj_lists, + LabelT edge_label, LabelT src_label, LabelT dst_label, + std::array prop_names, Direction dir) + : vids_(std::move(vids)), + adj_lists_(std::move(adj_lists)), + edge_label_(edge_label), + src_label_(src_label), + dst_label_(dst_label), + prop_names_(prop_names), + dir_(dir) { + size_ = 0; + for (auto i = 0; i < adj_lists_.size(); ++i) { + size_ += adj_lists_.get(i).size(); + } + } + + builder_t CreateBuilder() const { + return builder_t(src_label_, dst_label_, edge_label_, prop_names_, dir_); + } + + iterator begin() const { return iterator(vids_, adj_lists_, 0); } + + iterator end() const { return iterator(vids_, adj_lists_, vids_.size()); } + + template + std::pair, std::vector> + GetVertices(VOpt v_opt, std::array& labels, + EXPR& expr) const { + if (dir_ == Direction::In) { + CHECK(v_opt == VOpt::Start || v_opt == VOpt::Other); + } else if (dir_ == Direction::Out) { + CHECK(v_opt == VOpt::End || v_opt == VOpt::Other); + } + std::vector offsets; + std::vector vids; + offsets.reserve(Size()); + vids.reserve(Size()); + offsets.emplace_back(0); + for (auto iter : *this) { + vids.emplace_back(iter.GetDst()); + offsets.emplace_back(vids.size()); + } + auto set = MakeDefaultRowVertexSet(std::move(vids), dst_label_); + return std::make_pair(std::move(set), std::move(offsets)); + } + + template + flat_t Flat( + std::vector>& index_ele_tuple) const { + std::vector>> res; + res.reserve(index_ele_tuple.size()); + for (auto i = 0; i < index_ele_tuple.size(); ++i) { + auto cur_ind_ele = std::get(index_ele_tuple[i]); + auto nbr = std::get<2>(cur_ind_ele); + // auto ele = std::get<2>(cur_ind_ele); + // auto nbr = std::get<1>(ele); + res.emplace_back(std::make_tuple(std::get<1>(cur_ind_ele), + nbr->neighbor(), nbr->properties())); + } + // TODO :better label vec + std::vector label_vec(res.size(), src_label_); + return flat_t(std::move(res), edge_label_, {src_label_}, dst_label_, + prop_names_, std::move(label_vec)); + } + + template + void fillBuiltinProps(std::vector>& tuples, + PropNameArray& prop_names, + std::vector& repeat_array) { + LOG(WARNING) << "No implemented"; + } + + // fill builtin props withour repeat array. + template + void fillBuiltinProps(std::vector>& tuples, + PropNameArray& prop_names) { + LOG(WARNING) << "No implemented"; + } + + void Repeat(std::vector& cur_offset, + std::vector& repeat_vec) { + LOG(FATAL) << "not implemented"; + } + + size_t Size() const { return size_; } + + private: + size_t size_; + std::vector vids_; + LabelT edge_label_, src_label_, dst_label_; + adj_list_array_t adj_lists_; + std::array prop_names_; + Direction dir_; +}; + +template +class AdjEdgeSet { + public: + using iterator = AdjEdgeSetIter; + using self_type_t = AdjEdgeSet; + using data_tuple_t = std::tuple; + using flat_t = FlatEdgeSet; + using adj_list_array_t = typename GI::template adj_list_array_t<>; + using adj_list_t = typename GI::template adj_list_t<>; + using adj_list_iter_t = typename adj_list_t::iterator; + using ele_tuple_t = std::tuple; + using index_ele_tuple_t = std::tuple; + using flat_ele_t = index_ele_tuple_t; + using builder_t = AdjEdgeSetBuilder; + static constexpr bool is_multi_label = false; + static constexpr bool is_collection = false; + static constexpr bool is_edge_set = true; + static constexpr bool is_multi_dst_label = false; + AdjEdgeSet(std::vector&& vids, adj_list_array_t&& adj_lists, + LabelT edge_label, LabelT src_label, LabelT dst_label, + Direction dir) + : vids_(std::move(vids)), + adj_lists_(std::move(adj_lists)), + edge_label_(edge_label), + src_label_(src_label), + dst_label_(dst_label), + dir_(dir) { + size_ = 0; + for (auto i = 0; i < adj_lists_.size(); ++i) { + size_ += adj_lists_.get(i).size(); + } + } + + builder_t CreateBuilder() const { + return builder_t(src_label_, dst_label_, edge_label_, dir_); + } + + iterator begin() const { return iterator(vids_, adj_lists_, 0); } + + iterator end() const { return iterator(vids_, adj_lists_, vids_.size()); } + + size_t Size() const { return size_; } + + template + std::pair, std::vector> + GetVertices(VOpt v_opt, std::array& labels, + EXPR& expr) const { + if (dir_ == Direction::In) { + CHECK(v_opt == VOpt::Start || v_opt == VOpt::Other); + } else if (dir_ == Direction::Out) { + CHECK(v_opt == VOpt::End || v_opt == VOpt::Other); + } + std::vector offsets; + std::vector vids; + offsets.reserve(Size()); + offsets.emplace_back(0); + for (auto iter : *this) { + vids.emplace_back(iter.GetDst()); + offsets.emplace_back(vids.size()); + } + auto set = MakeDefaultRowVertexSet(std::move(vids), dst_label_); + return std::make_pair(std::move(set), std::move(offsets)); + } + + template + flat_t Flat( + std::vector>& index_ele_tuple) const { + std::vector> res; + res.reserve(index_ele_tuple.size()); + for (auto i = 0; i < index_ele_tuple.size(); ++i) { + auto cur_ind_ele = std::get(index_ele_tuple[i]); + auto iter = std::get<2>(cur_ind_ele); + auto src = std::get<1>(cur_ind_ele); + res.emplace_back( + std::make_tuple(src, iter->neighbor(), grape::EmptyType())); + } + return flat_t(std::move(res)); + } + + template + void fillBuiltinProps(std::vector>& tuples, + PropNameArray& prop_names, + std::vector& repeat_array) { + LOG(WARNING) << "No implemented"; + } + + // fill builtin props withour repeat array. + template + void fillBuiltinProps(std::vector>& tuples, + PropNameArray& prop_names) { + LOG(WARNING) << "No implemented"; + } + + void Repeat(std::vector& cur_offset, + std::vector& repeat_vec) { + LOG(FATAL) << "not implemented"; + } + + private: + size_t size_; + std::vector vids_; + LabelT edge_label_, src_label_, dst_label_; + adj_list_array_t adj_lists_; + Direction dir_; +}; + +template +class MulLabelSrcGrootEdgeSet { + public: + static constexpr size_t num_src_labels = N; + static constexpr bool is_edge_set = true; + static constexpr bool is_multi_label = false; + static constexpr bool is_collection = false; + static constexpr bool is_multi_src = true; + using iterator = MulLabelSrcGrootEdgeSetIter; + using self_type_t = MulLabelSrcGrootEdgeSet; + using data_tuple_t = std::tuple; + using adj_t = typename GI::template adj_t; + using adj_list_t = typename GI::template adj_list_t; + using adj_list_iter_t = typename adj_list_t::iterator; + using adj_list_array_t = typename GI::template adj_list_array_t; + // src, nbr. + using ele_tuple_t = std::tuple; + // set_ind, inner_ind, + using index_ele_tuple_t = std::tuple; + using flat_t = FlatEdgeSet; + MulLabelSrcGrootEdgeSet( + std::array, N>&& vids, + std::array, N>&& offsets, + std::array&& adj_lists, + std::array prop_names, LabelT edge_label, + std::array src_labels, LabelT dst_label) + : vids_(std::move(vids)), + offsets_(std::move(offsets)), + adj_lists_(std::move(adj_lists)), + prop_names_(prop_names), + edge_label_(edge_label), + src_labels_(src_labels), + dst_label_(dst_label) { + VLOG(10) << "Finish construction"; + } + + iterator begin() const { + VLOG(10) << "create begin iter: 0 : " << offsets_[0].size() - 1; + return iterator(vids_, offsets_, adj_lists_, 0); + } + + iterator end() const { + // VLOG(10) << gs::to_string(offsets_[0]); + VLOG(10) << "create end iter: 0 : " << offsets_[0].size() - 1; + return iterator(vids_, offsets_, adj_lists_, offsets_[0].size() - 1); + } + + size_t Size() const { + size_t size = 0; + for (auto i = 0; i < N; ++i) { + for (auto j = 0; j < adj_lists_[i].size(); ++j) { + size += adj_lists_[i].get(j).size(); + } + } + return size; + } + + size_t NumEdgesFromSrc(size_t i) const { + CHECK(i < num_src_labels); + size_t size = 0; + for (auto j = 0; j < adj_lists_[i].size(); ++j) { + size += adj_lists_[i].get(j).size(); + } + return size; + } + + template + std::pair, std::vector> + GetVertices(VOpt v_opt, std::array& labels, + EXPR& expr) const { + CHECK(v_opt == VOpt::End); + std::vector offsets; + std::vector vids; + vids.reserve(Size()); + offsets.reserve(Size()); + offsets.emplace_back(0); + for (auto iter : *this) { + vids.emplace_back(iter.GetDst()); + offsets.emplace_back(vids.size()); + } + auto set = MakeDefaultRowVertexSet(std::move(vids), dst_label_); + return std::make_pair(std::move(set), std::move(offsets)); + } + + template < + size_t InnerIs, size_t Is, typename... PropT, + typename std::enable_if<(InnerIs == sizeof...(EDATA_T))>::type* = nullptr> + void fillBuiltinPropsImpl(std::vector>& tuples, + std::string& prop_name, + std::vector& repeat_array) {} + + // TODO: make use of repeat array. + template < + size_t InnerIs, size_t Is, typename... PropT, + typename std::enable_if<(InnerIs < sizeof...(EDATA_T))>::type* = nullptr> + void fillBuiltinPropsImpl(std::vector>& tuples, + std::string& prop_name, + std::vector& repeat_array) { + using inner_col_type = + typename std::tuple_element_t>; + if constexpr (std::is_same_v>, + inner_col_type>) { + if (prop_name == prop_names_[InnerIs]) { + VLOG(10) << "Found builin property" << prop_names_[InnerIs]; + CHECK(repeat_array.size() == Size()); + size_t ind = 0; + size_t prop_ind = 0; + for (auto iter : *this) { + auto repeat_times = repeat_array[ind]; + for (auto j = 0; j < repeat_times; ++j) { + CHECK(prop_ind < tuples.size()); + std::get(tuples[prop_ind]) = std::get(iter.GetData()); + prop_ind += 1; + } + ind += 1; + } + } + } else { + fillBuiltinPropsImpl(tuples, prop_name, + repeat_array); + } + } + + template + flat_t Flat( + std::vector>& index_ele_tuple) const { + std::vector>> res; + std::vector label_vec; + res.reserve(index_ele_tuple.size()); + label_vec.reserve(index_ele_tuple.size()); + + for (auto i = 0; i < index_ele_tuple.size(); ++i) { + auto cur_ind_ele = std::get(index_ele_tuple[i]); + auto ele = std::get<2>(cur_ind_ele); + auto nbr = std::get<1>(ele); + res.emplace_back(std::make_tuple(std::get<0>(ele), nbr->neighbor(), + nbr->properties())); + label_vec.emplace_back(src_labels_[std::get<0>(cur_ind_ele)]); + } + return FlatEdgeSet(std::move(res), edge_label_, src_labels_, dst_label_, + prop_names_, std::move(label_vec)); + } + + template + void fillBuiltinPropsImpl(std::vector>& tuples, + std::string& prop_name, + std::vector& repeat_array) { + fillBuiltinPropsImpl<0, Is, PropT...>(tuples, prop_name, repeat_array); + } + + template + void fillBuiltinPropsImpl(std::vector>& tuples, + PropNameArray& prop_names, + std::vector& repeat_array, + std::index_sequence) { + (fillBuiltinPropsImpl(tuples, std::get(prop_names), + repeat_array), + ...); + } + + template + void fillBuiltinProps(std::vector>& tuples, + PropNameArray& prop_names, + std::vector& repeat_array) { + fillBuiltinPropsImpl(tuples, prop_names, repeat_array, + std::make_index_sequence()); + } + + // fill builtin props withour repeat array. + template + void fillBuiltinProps(std::vector>& tuples, + PropNameArray& prop_names) { + std::vector repeat_array(vids_.size(), 1); + fillBuiltinPropsImpl(tuples, prop_names, repeat_array, + std::make_index_sequence()); + } + + private: + std::array, N> vids_; + std::array, N> offsets_; + LabelT edge_label_, dst_label_; + std::array src_labels_; + std::array adj_lists_; + std::array prop_names_; +}; + +template +class MulLabelSrcGrootEdgeSet { + public: + static constexpr size_t num_src_labels = N; + static constexpr bool is_edge_set = true; + static constexpr bool is_multi_label = false; + static constexpr bool is_collection = false; + static constexpr bool is_multi_dst_label = false; + using iterator = MulLabelSrcGrootEdgeSetIter; + using self_type_t = + MulLabelSrcGrootEdgeSet; + using flat_t = self_type_t; + using data_tuple_t = std::tuple; + using adj_t = typename GI::template adj_t<>; + using adj_list_array_t = typename GI::template adj_list_array_t<>; + + MulLabelSrcGrootEdgeSet(std::array, N>&& vids, + std::array, N>&& offsets, + adj_list_array_t&& adj_lists, LabelT edge_label, + std::array src_labels, LabelT dst_label) + : vids_(std::move(vids)), + offsets_(std::move(offsets)), + adj_lists_(std::move(adj_lists)), + edge_label_(edge_label), + src_labels_(src_labels), + dst_label_(dst_label) {} + + iterator begin() const { return iterator(vids_, offsets_, adj_lists_, 0); } + + iterator end() const { + return iterator(vids_, offsets_, adj_lists_, offsets_[0].size() - 1); + } + + size_t Size() const { + size_t size = 0; + for (auto i = 0; i < N; ++i) { + for (auto j = 0; j < adj_lists_[i].size(); ++j) { + size += adj_lists_[i].get(j).size(); + } + } + return size; + } + + template + std::pair, std::vector> + GetVertices(VOpt v_opt, std::array& labels, + EXPR& expr) const { + CHECK(v_opt == VOpt::End); + std::vector offsets; + std::vector vids; + offsets.reserve(Size()); + offsets.emplace_back(0); + for (auto iter : *this) { + vids.emplace_back(iter.GetDst()); + offsets.emplace_back(vids.size()); + } + auto set = MakeDefaultRowVertexSet(std::move(vids), dst_label_); + return std::make_pair(std::move(set), std::move(offsets)); + } + + private: + std::array, N> vids_; + std::array, N> offsets_; + LabelT edge_label_, dst_label_; + std::array src_labels_; + std::array adj_lists_; +}; +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_DS_EDGE_MULTI_EDGE_SET_ADJ_EDGE_SET_H_ diff --git a/flex/engines/hqps_db/structures/multi_edge_set/flat_edge_set.h b/flex/engines/hqps_db/structures/multi_edge_set/flat_edge_set.h new file mode 100644 index 000000000000..fda447bde1cd --- /dev/null +++ b/flex/engines/hqps_db/structures/multi_edge_set/flat_edge_set.h @@ -0,0 +1,511 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_ENGINE_DS_MULTI_EDGE_SET_FLAT_EDGE_SET_H_ +#define ENGINES_HQPS_ENGINE_DS_MULTI_EDGE_SET_FLAT_EDGE_SET_H_ + +#include +#include + +#include "flex/engines/hqps_db/core/params.h" +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h" + +#include "glog/logging.h" + +namespace gs { + +template +class FlatEdgeSet; + +template +class FlatEdgeSetBuilder { + public: + using ele_tuple_t = std::tuple>; + using index_ele_tuple_t = std::tuple; + using result_t = FlatEdgeSet; + + static constexpr bool is_flat_edge_set_builder = true; + static constexpr bool is_row_vertex_set_builder = false; + static constexpr bool is_general_edge_set_builder = false; + static constexpr bool is_two_label_set_builder = false; + + FlatEdgeSetBuilder( + std::array src_labels, LabelT dst_label, LabelT edge_label, + std::array prop_names, + const std::vector& label_vec, // label_vec is needed to create + // new label_vec with index_ele + Direction direction) + : src_labels_(src_labels), + dst_label_(dst_label), + edge_label_(edge_label), + prop_names_(prop_names), + label_vec_(label_vec), + direction_(direction) {} + + // There could be null record. + void Insert(const index_ele_tuple_t& tuple) { + vec_.push_back(std::get<1>(tuple)); + if (!IsNull(std::get<1>(tuple))) { + label_vec_new_.push_back(label_vec_[std::get<0>(tuple)]); + } else { + label_vec_new_.push_back(NullRecordCreator::GetNull()); + } + } + + result_t Build() { + return result_t(std::move(vec_), edge_label_, src_labels_, dst_label_, + prop_names_, std::move(label_vec_new_), direction_); + } + + private: + std::vector vec_; + std::array src_labels_; + LabelT dst_label_; + LabelT edge_label_; + std::array prop_names_; + std::vector label_vec_; + std::vector label_vec_new_; + Direction direction_; +}; + +template +class FlatEdgeSetIter { + public: + using ele_tuple_t = std::tuple>; + using self_type_t = FlatEdgeSetIter; + using index_ele_tuple_t = std::tuple; + using data_tuple_t = ele_tuple_t; + FlatEdgeSetIter(const std::vector& vec, size_t ind) + : vec_(vec), ind_(ind) {} + + ele_tuple_t GetElement() const { return vec_[ind_]; } + + index_ele_tuple_t GetIndexElement() const { + return std::make_tuple(ind_, GetElement()); + } + + VID_T GetSrc() const { return std::get<0>(vec_[ind_]); } + + VID_T GetDst() const { return std::get<1>(vec_[ind_]); } + + const std::tuple& GetData() const { + return std::get<2>(vec_[ind_]); + } + + size_t GetIndex() const { return ind_; } + + inline const self_type_t& operator++() { + ++ind_; + return *this; + } + + inline bool operator==(const self_type_t& rhs) const { + return ind_ == rhs.ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + return ind_ != rhs.ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return ind_ < rhs.ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + private: + const std::vector& vec_; + size_t ind_; +}; +template +class FlatEdgeSetIter { + public: + using ele_tuple_t = std::tuple; + using self_type_t = FlatEdgeSetIter; + using index_ele_tuple_t = std::tuple; + using data_tuple_t = ele_tuple_t; + FlatEdgeSetIter(const std::vector& vec, size_t ind) + : vec_(vec), ind_(ind) {} + + ele_tuple_t GetElement() const { return vec_[ind_]; } + + index_ele_tuple_t GetIndexElement() const { + return std::make_tuple(ind_, GetElement()); + } + + VID_T GetSrc() const { return std::get<0>(vec_[ind_]); } + + VID_T GetDst() const { return std::get<1>(vec_[ind_]); } + + const std::tuple& GetData() const { + return std::get<2>(vec_[ind_]); + } + + size_t GetIndex() const { return ind_; } + + inline const self_type_t& operator++() { + ++ind_; + return *this; + } + + inline bool operator==(const self_type_t& rhs) const { + return ind_ == rhs.ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + return ind_ != rhs.ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return ind_ < rhs.ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + private: + const std::vector& vec_; + size_t ind_; +}; + +template +class FlatEdgeSet { + public: + using ele_tuple_t = std::tuple>; + using index_ele_tuple_t = std::tuple; + using iterator = FlatEdgeSetIter; + using self_type_t = FlatEdgeSet; + using flat_t = self_type_t; + using data_tuple_t = ele_tuple_t; + using builder_t = FlatEdgeSetBuilder; + + static constexpr bool is_multi_label = false; + static constexpr bool is_collection = false; + static constexpr bool is_edge_set = true; + static constexpr bool is_multi_src = false; + static constexpr bool is_multi_dst_label = false; + + FlatEdgeSet(std::vector&& vec, LabelT edge_label, + std::array src_labels, LabelT dst_label, + std::array prop_names, + std::vector&& label_vec, Direction direction) + : vec_(std::move(vec)), + edge_label_(edge_label), + src_labels_(src_labels), + dst_label_(dst_label), + prop_names_(prop_names), + label_vec_(std::move(label_vec)), + direction_(direction) { + CHECK(label_vec_.size() == vec_.size()); + } + + iterator begin() const { return iterator(vec_, 0); } + + iterator end() const { return iterator(vec_, vec_.size()); } + + template + flat_t Flat( + std::vector>& index_ele_tuple) const { + std::vector>> res; + std::vector label_vec; + res.reserve(index_ele_tuple.size()); + label_vec.reserve(index_ele_tuple.size()); + for (auto i = 0; i < index_ele_tuple.size(); ++i) { + auto cur_ind_ele = std::get(index_ele_tuple[i]); + res.emplace_back(std::get<1>(cur_ind_ele)); + label_vec.emplace_back(label_vec_[std::get<0>(cur_ind_ele)]); + } + return FlatEdgeSet(std::move(res), edge_label_, src_labels_, dst_label_, + prop_names_, std::move(label_vec), direction_); + } + + template < + size_t InnerIs, size_t Is, typename... PropT, + typename std::enable_if<(InnerIs == sizeof...(EDATA_T))>::type* = nullptr> + void fillBuiltinPropsImpl(std::vector>& tuples, + const std::string& prop_name, + const std::vector& repeat_array) {} + + template < + size_t InnerIs, size_t Is, typename... PropT, + typename std::enable_if<(InnerIs < sizeof...(EDATA_T))>::type* = nullptr> + void fillBuiltinPropsImpl(std::vector>& tuples, + const std::string& prop_name, + const std::vector& repeat_array) { + using inner_col_type = + typename std::tuple_element_t>; + if constexpr (std::is_same_v>, + inner_col_type>) { + if (prop_name == prop_names_[InnerIs]) { + VLOG(10) << "Found builin property" << prop_names_[InnerIs]; + CHECK(repeat_array.size() == Size()); + size_t prop_ind = 0; + for (auto i = 0; i < vec_.size(); ++i) { + auto repeat_times = repeat_array[i]; + for (auto j = 0; j < repeat_times; ++j) { + CHECK(prop_ind < tuples.size()); + std::get(tuples[prop_ind]) = + std::get(std::get<2>(vec_[i])); + prop_ind += 1; + } + } + } + } else { + fillBuiltinPropsImpl(tuples, prop_name, + repeat_array); + } + } + + template + void fillBuiltinPropsImpl(std::vector>& tuples, + const std::string& prop_name, + const std::vector& repeat_array) { + fillBuiltinPropsImpl<0, Is, PropT...>(tuples, prop_name, repeat_array); + } + + template + void fillBuiltinPropsImpl(std::vector>& tuples, + const PropNameArray& prop_names, + const std::vector& repeat_array, + std::index_sequence) { + (fillBuiltinPropsImpl(tuples, std::get(prop_names), + repeat_array), + ...); + } + + template + void fillBuiltinProps(std::vector>& tuples, + const PropNameArray& prop_names, + const std::vector& repeat_array) { + fillBuiltinPropsImpl(tuples, prop_names, repeat_array, + std::make_index_sequence()); + } + + // fill builtin props without repeat array. + template + void fillBuiltinProps(std::vector>& tuples, + const PropNameArray& prop_names) { + std::vector repeat_array(vec_.size(), 1); + fillBuiltinPropsImpl(tuples, prop_names, repeat_array, + std::make_index_sequence()); + } + + size_t Size() const { return vec_.size(); } + + template + std::pair, std::vector> + GetVertices(VOpt v_opt, std::array& labels, + EXPR& expr) const { + CHECK(check_edge_dir_consist_vopt(direction_, v_opt)); + std::vector offsets; + std::vector vids; + offsets.reserve(Size()); + offsets.emplace_back(0); + // TODO: check labels. + bool flag = false; + for (auto l : labels) { + if (l == dst_label_) { + flag = true; + } + } + if (flag) { + for (auto iter : *this) { + vids.emplace_back(iter.GetDst()); + offsets.emplace_back(vids.size()); + } + } else { + size_t size = Size(); + for (auto i = 0; i < size; ++i) { + offsets.emplace_back(0); + } + } + auto set = MakeDefaultRowVertexSet(std::move(vids), dst_label_); + return std::make_pair(std::move(set), std::move(offsets)); + } + + // implement ProjectWithRepeatArray + template ::type* = nullptr> + self_type_t ProjectWithRepeatArray(const std::vector& repeat_array, + KeyAlias& key_alias) const { + std::vector new_vec; + std::vector new_label_vec; + size_t next_size = 0; + for (auto i = 0; i < repeat_array.size(); ++i) { + next_size += repeat_array[i]; + } + VLOG(10) << "[FlatEdgeSet] size: " << Size() + << " Project self, next size: " << next_size; + + new_vec.reserve(next_size); + new_label_vec.reserve(next_size); + + for (auto i = 0; i < repeat_array.size(); ++i) { + for (auto j = 0; j < repeat_array[i]; ++j) { + new_vec.emplace_back(vec_[i]); + new_label_vec.emplace_back(label_vec_[i]); + } + } + + return self_type_t(std::move(new_vec), edge_label_, src_labels_, dst_label_, + prop_names_, std::move(new_label_vec), direction_); + } + + void Repeat(std::vector& cur_offset, + std::vector& repeat_vec) { + CHECK(cur_offset.size() == repeat_vec.size()); + std::vector res_vec; + std::vector res_label_vec; + res_vec.reserve(repeat_vec.back()); + res_label_vec.reserve(repeat_vec.back()); + for (auto i = 0; i + 1 < cur_offset.size(); ++i) { + auto times_to_repeat = repeat_vec[i + 1] - repeat_vec[i]; + for (auto j = 0; j < times_to_repeat; ++j) { + for (auto k = cur_offset[i]; k < cur_offset[i + 1]; ++k) { + res_vec.emplace_back(vec_[k]); + res_label_vec.emplace_back(label_vec_[k]); + } + } + } + vec_.swap(res_vec); + label_vec_.swap(res_label_vec); + } + + builder_t CreateBuilder() const { + return builder_t(src_labels_, dst_label_, edge_label_, prop_names_, + label_vec_, direction_); + } + + private: + std::vector vec_; + std::array src_labels_; + LabelT dst_label_, edge_label_; + std::array prop_names_; + std::vector label_vec_; + Direction direction_; +}; + +template +class FlatEdgeSet { + public: + // TODO: use std::tuple is enough + using ele_tuple_t = std::tuple; + using index_ele_tuple_t = std::tuple; + using iterator = FlatEdgeSetIter; + using self_type_t = FlatEdgeSet; + using flat_t = self_type_t; + using data_tuple_t = ele_tuple_t; + + static constexpr bool is_multi_label = false; + static constexpr bool is_collection = false; + static constexpr bool is_edge_set = true; + static constexpr bool is_multi_src = false; + static constexpr bool is_multi_dst_label = false; + + FlatEdgeSet(std::vector&& vec, LabelT edge_label, + std::array src_labels, LabelT dst_label, + std::vector&& label_vec, Direction& dire) + : vec_(std::move(vec)), + edge_label_(edge_label), + src_labels_(src_labels), + dst_label_(dst_label), + label_vec_(std::move(label_vec)), + direction_(dire) { + CHECK(label_vec_.size() == vec_.size()); + } + + iterator begin() const { return iterator(vec_, 0); } + + iterator end() const { return iterator(vec_, vec_.size()); } + + template + flat_t Flat( + std::vector>& index_ele_tuple) const { + std::vector> res; + std::vector label_vec; + res.reserve(index_ele_tuple.size()); + label_vec.reserve(index_ele_tuple.size()); + for (auto i = 0; i < index_ele_tuple.size(); ++i) { + auto cur_ind_ele = std::get(index_ele_tuple[i]); + res.push_back(std::get<1>(cur_ind_ele)); + label_vec.push_back(label_vec_[std::get<0>(cur_ind_ele)]); + } + return FlatEdgeSet(std::move(res), edge_label_, src_labels_, dst_label_, + std::move(label_vec)); + } + template + void fillBuiltinProps(std::vector>& tuples, + PropNameArray& prop_names, + std::vector& repeat_array) { + fillBuiltinPropsImpl(tuples, prop_names, repeat_array, + std::make_index_sequence()); + } + + // fill builtin props withour repeat array. + template + void fillBuiltinProps(std::vector>& tuples, + PropNameArray& prop_names) { + std::vector repeat_array(vec_.size(), 1); + fillBuiltinPropsImpl(tuples, prop_names, repeat_array, + std::make_index_sequence()); + } + + template + std::pair, std::vector> + GetVertices(VOpt v_opt, std::array& labels, + EXPR& expr) const { + // We only contains one label for dst vertices. + CHECK(v_opt == VOpt::End); + std::vector offsets; + std::vector vids; + offsets.reserve(Size()); + offsets.emplace_back(0); + // TODO: check labels. + bool flag = false; + for (auto l : labels) { + if (l == dst_label_) { + flag = true; + } + } + if (flag) { + for (auto iter : *this) { + vids.emplace_back(iter.GetDst()); + offsets.emplace_back(vids.size()); + } + } else { + size_t size = Size(); + for (auto i = 0; i < size; ++i) { + offsets.emplace_back(0); + } + } + auto set = MakeDefaultRowVertexSet(std::move(vids), dst_label_); + return std::make_pair(std::move(set), std::move(offsets)); + } + + size_t Size() const { return vec_.size(); } + + private: + std::vector vec_; + std::array src_labels_; + LabelT dst_label_, edge_label_; + std::vector label_vec_; + Direction direction_; +}; +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_DS_MULTI_EDGE_SET_FLAT_EDGE_SET_H_ \ No newline at end of file diff --git a/flex/engines/hqps_db/structures/multi_edge_set/general_edge_set.h b/flex/engines/hqps_db/structures/multi_edge_set/general_edge_set.h new file mode 100644 index 000000000000..6b7206e1fe6a --- /dev/null +++ b/flex/engines/hqps_db/structures/multi_edge_set/general_edge_set.h @@ -0,0 +1,638 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_ENGINE_DS_EDGE_MULTISET_GENERAL_EDGE_SET_H_ +#define ENGINES_HQPS_ENGINE_DS_EDGE_MULTISET_GENERAL_EDGE_SET_H_ + +#include +#include +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" +#include "flex/engines/hqps_db/structures/multi_edge_set/flat_edge_set.h" +namespace gs { + +template +class GeneralEdgeSet; + +template +class GeneralEdgeSetBuilder {}; + +template +class GeneralEdgeSetBuilder<2, GI, VID_T, LabelT, T...> { + public: + using adj_list_array_t = typename GI::template adj_list_array_t; + using adj_list_t = typename GI::template adj_list_t; + using adj_list_iterator = typename adj_list_t::iterator; + using ele_tuple_t = std::tuple>; + using index_ele_tuple_t = std::tuple; + using res_t = FlatEdgeSet; + + static constexpr bool is_row_vertex_set_builder = false; + static constexpr bool is_flat_edge_set_builder = false; + static constexpr bool is_general_edge_set_builder = true; + static constexpr bool is_two_label_set_builder = false; + + static constexpr size_t num_props = sizeof...(T); + GeneralEdgeSetBuilder(size_t edge_size, const grape::Bitset& bitset, + std::array prop_names, + LabelT edge_label, std::array src_labels, + LabelT dst_label, Direction dir) + : bitset_(bitset), + prop_names_(prop_names), + edge_label_(edge_label), + src_labels_(src_labels), + dst_label_(dst_label), + direction_(dir) { + vec_.reserve(edge_size); + } + + void Insert(const index_ele_tuple_t& tuple) { + // TODO: support inserting null record. + auto ind = std::get<0>(tuple); + auto src = std::get<1>(tuple); + auto adj_iter = std::get<2>(tuple); + auto dst = adj_iter.neighbor(); + auto props = adj_iter.properties(); + vec_.emplace_back(src, dst, props); + if (bitset_.get_bit(ind)) { + label_vec_.emplace_back(src_labels_[0]); + } else { + label_vec_.emplace_back(src_labels_[1]); + } + } + + res_t Build() { + return res_t(std::move(vec_), edge_label_, src_labels_, dst_label_, + prop_names_, std::move(label_vec_), direction_); + } + + private: + std::vector vec_; + std::vector label_vec_; + std::array prop_names_; + LabelT edge_label_; + std::array src_labels_; + LabelT dst_label_; + const grape::Bitset& bitset_; + Direction direction_; +}; + +template +class GeneralEdgeSetBuilder<2, GI, VID_T, LabelT, grape::EmptyType> { + public: + using adj_list_array_t = typename GI::template adj_list_array_t<>; + using adj_list_t = typename GI::template adj_list_t<>; + using adj_list_iterator = typename adj_list_t::iterator; + using ele_tuple_t = std::tuple; + using index_ele_tuple_t = std::tuple; + using res_t = FlatEdgeSet; + + static constexpr bool is_row_vertex_set_builder = false; + static constexpr bool is_flat_edge_set_builder = false; + static constexpr bool is_general_edge_set_builder = true; + static constexpr bool is_two_label_set_builder = false; + + GeneralEdgeSetBuilder(size_t edge_size, const grape::Bitset& bitset, + LabelT edge_label, std::array src_labels, + LabelT dst_label, Direction dir) + : bitset_(bitset), + edge_label_(edge_label), + src_labels_(src_labels), + dst_label_(dst_label), + direction_(dir) { + vec_.reserve(edge_size); + } + + void Insert(const index_ele_tuple_t& tuple) { + // TODO: support inserting null record. + auto ind = std::get<0>(tuple); + auto src = std::get<1>(tuple); + auto adj_iter = std::get<2>(tuple); + auto dst = adj_iter.neighbor(); + auto props = adj_iter.properties(); + vec_.emplace_back(src, dst, props); + if (bitset_.get_bit(ind)) { + label_vec_.emplace_back(src_labels_[0]); + } else { + label_vec_.emplace_back(src_labels_[1]); + } + } + + res_t Build() { + return res_t(std::move(vec_), edge_label_, src_labels_, dst_label_, + std::move(label_vec_), direction_); + } + + private: + std::vector vec_; + std::vector label_vec_; + LabelT edge_label_; + std::array src_labels_; + LabelT dst_label_; + const grape::Bitset& bitset_; + Direction direction_; +}; +template +class GeneralEdgeSetIter { + public: + using adj_list_array_t = typename GI::template adj_list_array_t; + using adj_list_t = typename GI::template adj_list_t; + using adj_list_iterator = typename adj_list_t::iterator; + using ele_tuple_t = std::tuple; + using data_tuple_t = ele_tuple_t; + using index_ele_tuple_t = std::tuple; + using self_type_t = GeneralEdgeSetIter; + + GeneralEdgeSetIter(const std::vector& vids, + const adj_list_array_t& adj_lists, size_t ind) + : vids_(vids), adj_lists_(adj_lists), ind_(ind) { + if (ind_ == 0) { + probe_next_valid_adj(); + } + } + + // copy constructor + GeneralEdgeSetIter(const self_type_t& other) + : vids_(other.vids_), + adj_lists_(other.adj_lists_), + ind_(other.ind_), + cur_adj_list_(other.cur_adj_list_), + begin_(other.begin_), + end_(other.end_) {} + + inline VID_T GetSrc() const { return vids_[ind_]; } + inline VID_T GetDst() const { return begin_.neighbor(); } + inline const std::tuple& GetData() const { return begin_.properties(); } + + ele_tuple_t GetElement() const { return ele_tuple_t(GetSrc(), begin_); } + + index_ele_tuple_t GetIndexElement() const { + return std::make_tuple(ind_, GetSrc(), begin_); + } + + VID_T GetVertex() const { return vids_[ind_]; } + + inline const self_type_t& operator++() { + if (ind_ < vids_.size()) { + ++begin_; + if (begin_ == end_) { + ++ind_; + probe_next_valid_adj(); + } + } + return *this; + } + + void probe_next_valid_adj() { + while (ind_ < vids_.size()) { + cur_adj_list_ = adj_lists_.get(ind_); + begin_ = cur_adj_list_.begin(); + end_ = cur_adj_list_.end(); + if (begin_ != end_) { + break; + } + ind_ += 1; + } + } + + // We may never compare to other kind of iterators + inline bool operator==(const self_type_t& rhs) const { + return ind_ == rhs.ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + return ind_ != rhs.ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return ind_ < rhs.ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + private: + const std::vector& vids_; + const adj_list_array_t& adj_lists_; + adj_list_t cur_adj_list_; + adj_list_iterator begin_, end_; + size_t ind_; +}; + +template +class GeneralEdgeSetIter { + public: + using adj_list_array_t = typename GI::template adj_list_array_t<>; + using adj_list_t = typename GI::template adj_list_t<>; + using adj_list_iterator = typename adj_list_t::iterator; + using ele_tuple_t = std::tuple; + using data_tuple_t = ele_tuple_t; + using index_ele_tuple_t = std::tuple; + using self_type_t = GeneralEdgeSetIter; + + GeneralEdgeSetIter(const std::vector& vids, + const adj_list_array_t& adj_lists, size_t ind) + : vids_(vids), adj_lists_(adj_lists), ind_(ind) { + if (ind_ == 0) { + probe_next_valid_adj(); + } + } + + // copy constructor + GeneralEdgeSetIter(const self_type_t& other) + : vids_(other.vids_), + adj_lists_(other.adj_lists_), + ind_(other.ind_), + cur_adj_list_(other.cur_adj_list_), + begin_(other.begin_), + end_(other.end_) {} + + inline VID_T GetSrc() const { return vids_[ind_]; } + inline VID_T GetDst() const { return begin_.neighbor(); } + inline const std::tuple GetData() const { + return std::make_tuple(grape::EmptyType()); + } + + ele_tuple_t GetElement() const { return ele_tuple_t(GetSrc(), begin_); } + + index_ele_tuple_t GetIndexElement() const { + return std::make_tuple(ind_, GetSrc(), begin_); + } + + VID_T GetVertex() const { return vids_[ind_]; } + + inline const self_type_t& operator++() { + if (ind_ < vids_.size()) { + ++begin_; + if (begin_ == end_) { + ++ind_; + probe_next_valid_adj(); + } + } + return *this; + } + + void probe_next_valid_adj() { + while (ind_ < vids_.size()) { + cur_adj_list_ = adj_lists_.get(ind_); + begin_ = cur_adj_list_.begin(); + end_ = cur_adj_list_.end(); + if (begin_ != end_) { + break; + } + ind_ += 1; + } + } + + // We may never compare to other kind of iterators + inline bool operator==(const self_type_t& rhs) const { + return ind_ == rhs.ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + return ind_ != rhs.ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return ind_ < rhs.ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + private: + const std::vector& vids_; + const adj_list_array_t& adj_lists_; + adj_list_t cur_adj_list_; + adj_list_iterator begin_, end_; + size_t ind_; +}; + +template +class GeneralEdgeSet { + public: + static constexpr size_t num_src_labels = N; + static constexpr size_t num_props = sizeof...(T); + static constexpr bool is_edge_set = true; + static constexpr bool is_multi_dst_label = false; + using lid_t = VID_T; + + using adj_list_array_t = typename GI::template adj_list_array_t; + + using iterator = GeneralEdgeSetIter; + GeneralEdgeSet(std::vector&& vids, adj_list_array_t&& adj_lists, + std::array&& bitsets, + const std::array& prop_names, + LabelT edge_label, + std::array&& src_labels, + LabelT dst_label) + : vids_(std::move(vids)), + adj_lists_(std::move(adj_lists)), + prop_names_(prop_names), + edge_label_(edge_label), + src_labels_(std::move(src_labels)), + dst_label_(dst_label) { + bitsets_.swap(bitsets); + } + + GeneralEdgeSet(GeneralEdgeSet&& other) + : vids_(std::move(other.vids_)), + adj_lists_(std::move(other.adj_lists_)), + bitsets_(std::move(other.bitsets_)), + prop_names_(other.prop_names_), + edge_label_(other.edge_label_), + src_labels_(std::move(other.src_labels_)), + dst_label_(other.dst_label_) {} + + iterator begin() const { return iterator(vids_, adj_lists_, 0); } + + iterator end() const { return iterator(vids_, adj_lists_, vids_.size()); } + + private: + LabelT edge_label_, dst_label_; + std::array src_labels_; + + std::array prop_names_; + + std::vector vids_; + adj_list_array_t adj_lists_; + std::array bitsets_; +}; + +// general edge set stores multi src labels but only one dst label +// Which stores the nbr ptrs rather than edge. +template +class GeneralEdgeSet<2, GI, VID_T, LabelT, T...> { + public: + static constexpr size_t num_src_labels = 2; + static constexpr size_t num_props = sizeof...(T); + static constexpr bool is_edge_set = true; + static constexpr bool is_multi_dst_label = false; + using lid_t = VID_T; + using flat_t = FlatEdgeSet; + + using adj_list_t = typename GI::template adj_list_t; + using adj_list_array_t = typename GI::template adj_list_array_t; + using adj_list_iter_t = typename adj_list_t::iterator; + using ele_tuple_t = std::tuple; + using data_tuple_t = ele_tuple_t; + using index_ele_tuple_t = std::tuple; + + using iterator = GeneralEdgeSetIter; + using builder_t = GeneralEdgeSetBuilder<2, GI, VID_T, LabelT, T...>; + GeneralEdgeSet(std::vector&& vids, adj_list_array_t&& adj_lists, + grape::Bitset&& bitsets, + const std::array& prop_names, + LabelT edge_label, std::array src_labels, + LabelT dst_label, Direction dir) + : vids_(std::move(vids)), + adj_lists_(std::move(adj_lists)), + prop_names_(prop_names), + edge_label_(edge_label), + src_labels_(src_labels), + dst_label_(dst_label), + size_(0), + dir_(dir) { + bitsets_.swap(bitsets); + } + + GeneralEdgeSet(GeneralEdgeSet<2, GI, VID_T, LabelT, T...>&& other) + : vids_(std::move(other.vids_)), + adj_lists_(std::move(other.adj_lists_)), + prop_names_(other.prop_names_), + edge_label_(other.edge_label_), + src_labels_(std::move(other.src_labels_)), + dst_label_(other.dst_label_), + size_(0), + dir_(other.dir_) { + bitsets_.swap(other.bitsets_); + } + + iterator begin() const { return iterator(vids_, adj_lists_, 0); } + + iterator end() const { return iterator(vids_, adj_lists_, vids_.size()); } + + size_t Size() const { + if (size_ == 0) { + for (auto i = 0; i < adj_lists_.size(); ++i) { + auto adj = adj_lists_.get(i); + size_ += adj.size(); + } + } + return size_; + } + + builder_t CreateBuilder() const { + return builder_t(Size(), bitsets_, prop_names_, edge_label_, src_labels_, + dst_label_, dir_); + } + + template + flat_t Flat( + std::vector>& index_ele_tuple) const { + std::vector>> res; + res.reserve(index_ele_tuple.size()); + std::vector label_vec(index_ele_tuple.size(), (LabelT) 0); + for (auto i = 0; i < index_ele_tuple.size(); ++i) { + auto cur_ind_ele = std::get(index_ele_tuple[i]); + auto ind = std::get<0>(cur_ind_ele); + auto nbr = std::get<2>(cur_ind_ele); + res.emplace_back(std::make_tuple(std::get<1>(cur_ind_ele), + nbr->neighbor(), nbr->properties())); + if (!bitsets_.get_bit(ind)) { + // label_vec[i] = 1; + label_vec[i] = src_labels_[1]; + } else { + label_vec[i] = src_labels_[0]; + } + } + return flat_t(std::move(res), edge_label_, src_labels_, dst_label_, + prop_names_, std::move(label_vec), dir_); + } + + template + std::pair, std::vector> + GetVertices(VOpt v_opt, std::array& labels, + EXPR& expr) const { + if (dir_ == Direction::In) { + CHECK(v_opt == VOpt::Start || v_opt == VOpt::Other); + } else if (dir_ == Direction::Out) { + CHECK(v_opt == VOpt::End || v_opt == VOpt::Other); + } + std::vector offsets; + std::vector vids; + offsets.reserve(Size()); + vids.reserve(Size()); + offsets.emplace_back(0); + for (auto iter : *this) { + vids.emplace_back(iter.GetDst()); + offsets.emplace_back(vids.size()); + } + auto set = MakeDefaultRowVertexSet(std::move(vids), dst_label_); + return std::make_pair(std::move(set), std::move(offsets)); + } + + void Repeat(std::vector& cur_offset, + std::vector& repeat_vec) { + LOG(FATAL) << "not implemented"; + } + + private: + mutable size_t size_; + LabelT edge_label_, dst_label_; + std::array src_labels_; + + std::array prop_names_; + + std::vector vids_; + adj_list_array_t adj_lists_; + grape::Bitset bitsets_; // bitset of src vertices. + Direction dir_; +}; + +// general edge set stores multi src labels but only one dst label +template +class GeneralEdgeSet<2, GI, VID_T, LabelT, grape::EmptyType> { + public: + static constexpr size_t num_src_labels = 2; + static constexpr size_t num_props = 0; + static constexpr bool is_edge_set = true; + static constexpr bool is_multi_dst_label = false; + using lid_t = VID_T; + using flat_t = FlatEdgeSet; + + using adj_list_t = typename GI::template adj_list_t<>; + using adj_list_array_t = typename GI::template adj_list_array_t<>; + using adj_list_iter_t = typename adj_list_t::iterator; + using ele_tuple_t = std::tuple; + using data_tuple_t = ele_tuple_t; + using index_ele_tuple_t = std::tuple; + + using iterator = GeneralEdgeSetIter; + using builder_t = + GeneralEdgeSetBuilder<2, GI, VID_T, LabelT, grape::EmptyType>; + GeneralEdgeSet(std::vector&& vids, adj_list_array_t&& adj_lists, + grape::Bitset&& bitsets, LabelT edge_label, + std::array src_labels, LabelT dst_label, + Direction dir) + : vids_(std::move(vids)), + adj_lists_(std::move(adj_lists)), + edge_label_(edge_label), + src_labels_(src_labels), + dst_label_(dst_label), + size_(0), + dir_(dir) { + bitsets_.swap(bitsets); + } + + GeneralEdgeSet(GeneralEdgeSet<2, GI, VID_T, LabelT, grape::EmptyType>&& other) + : vids_(std::move(other.vids_)), + adj_lists_(std::move(other.adj_lists_)), + edge_label_(other.edge_label_), + src_labels_(std::move(other.src_labels_)), + dst_label_(other.dst_label_), + size_(0), + dir_(other.dir_) { + bitsets_.swap(other.bitsets_); + } + + iterator begin() const { return iterator(vids_, adj_lists_, 0); } + + iterator end() const { return iterator(vids_, adj_lists_, vids_.size()); } + + size_t Size() const { + if (size_ == 0) { + for (auto i = 0; i < adj_lists_.size(); ++i) { + auto adj = adj_lists_.get(i); + size_ += adj.size(); + } + } + return size_; + } + + builder_t CreateBuilder() const { + return builder_t(Size(), bitsets_, edge_label_, src_labels_, dst_label_, + dir_); + } + + template + flat_t Flat( + std::vector>& index_ele_tuple) const { + std::vector> res; + res.reserve(index_ele_tuple.size()); + std::vector label_vec(index_ele_tuple.size(), (LabelT) 0); + for (auto i = 0; i < index_ele_tuple.size(); ++i) { + auto cur_ind_ele = std::get(index_ele_tuple[i]); + auto ind = std::get<0>(cur_ind_ele); + auto nbr = std::get<2>(cur_ind_ele); + res.emplace_back(std::make_tuple(std::get<1>(cur_ind_ele), + nbr->neighbor(), grape::EmptyType())); + if (!bitsets_.get_bit(ind)) { + // label_vec[i] = 1; + label_vec[i] = src_labels_[1]; + } else { + label_vec[i] = src_labels_[0]; + } + } + // TODO :better label vec + // std::vector label_vec(res.size(), 0); + // for (auto i = 0; i < bitsets_.size(); ++i) { + // if (!bitsets_.get_bit(i)) { + // label_vec[i] = 1; + // } + // } + return flat_t(std::move(res), edge_label_, src_labels_, dst_label_, + std::move(label_vec)); + } + + template + std::pair, std::vector> + GetVertices(VOpt v_opt, std::array& labels, + EXPR& expr) const { + if (dir_ == Direction::In) { + CHECK(v_opt == VOpt::Start || v_opt == VOpt::Other); + } else if (dir_ == Direction::Out) { + CHECK(v_opt == VOpt::End || v_opt == VOpt::Other); + } + std::vector offsets; + std::vector vids; + offsets.reserve(Size()); + vids.reserve(Size()); + offsets.emplace_back(0); + for (auto iter : *this) { + vids.emplace_back(iter.GetDst()); + offsets.emplace_back(vids.size()); + } + auto set = MakeDefaultRowVertexSet(std::move(vids), dst_label_); + return std::make_pair(std::move(set), std::move(offsets)); + } + + void Repeat(std::vector& cur_offset, + std::vector& repeat_vec) { + LOG(FATAL) << "not implemented"; + } + + private: + mutable size_t size_; + LabelT edge_label_, dst_label_; + std::array src_labels_; + + std::vector vids_; + adj_list_array_t adj_lists_; + grape::Bitset bitsets_; // bitset of src vertices. + Direction dir_; +}; +} // namespace gs +#endif // ENGINES_HQPS_ENGINE_DS_EDGE_MULTISET_GENERAL_EDGE_SET_H_ \ No newline at end of file diff --git a/flex/engines/hqps_db/structures/multi_edge_set/multi_label_dst_edge_set.h b/flex/engines/hqps_db/structures/multi_edge_set/multi_label_dst_edge_set.h new file mode 100644 index 000000000000..625fa25d7e18 --- /dev/null +++ b/flex/engines/hqps_db/structures/multi_edge_set/multi_label_dst_edge_set.h @@ -0,0 +1,488 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_ENGINE_DS_EDGE_MULTISET_LABEL_DST_EDGE_SET_H_ +#define ENGINES_HQPS_ENGINE_DS_EDGE_MULTISET_LABEL_DST_EDGE_SET_H_ + +#include +#include +#include + +#include "flex/engines/hqps_db/structures/multi_edge_set/flat_edge_set.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/two_label_vertex_set.h" + +#include "grape/types.h" + +namespace gs { + +template +class MultiLabelDstEdgeSetIter { + public: + using label_id_t = typename GRAPH_T::label_id_t; + using vertex_id_t = typename GRAPH_T::vertex_id_t; + using adj_list_t = typename GRAPH_T::template adj_list_t; + using adj_list_iter_t = typename adj_list_t::iterator; + using adj_list_array_t = typename GRAPH_T::template adj_list_array_t; + using index_ele_tuple_t = std::tuple; + using self_type_t = MultiLabelDstEdgeSetIter; + + MultiLabelDstEdgeSetIter( + const std::vector& src_vertices, + const std::array& adj_lists, size_t ind) + : src_vertices_(src_vertices), + adj_lists_(adj_lists), + ind_(ind), + cur_label_ind_(0) { + if (ind_ == src_vertices_.size()) { + begin_ = adj_list_iter_t(); + end_ = adj_list_iter_t(); + } else { + probe_for_next(); + } + // init valid_lables_ with all true; + for (size_t i = 0; i < num_labels; i++) { + valid_labels_[i] = true; + } + } + + MultiLabelDstEdgeSetIter( + const std::vector& src_vertices, + const std::array& adj_lists, size_t ind, + std::array& labels) + : src_vertices_(src_vertices), + adj_lists_(adj_lists), + ind_(ind), + cur_label_ind_(0), + valid_labels_(labels) { + if (ind_ == src_vertices_.size()) { + begin_ = adj_list_iter_t(); + end_ = adj_list_iter_t(); + } else { + probe_for_next(); + } + } + + void probe_for_next() { + while (ind_ < src_vertices_.size()) { + while (cur_label_ind_ < num_labels) { + if (valid_labels_[cur_label_ind_]) { + auto cur_adj_list = adj_lists_[cur_label_ind_].get(ind_); + begin_ = cur_adj_list.begin(); + end_ = cur_adj_list.end(); + if (begin_ != end_) { + break; + } + } + cur_label_ind_++; + } + if (cur_label_ind_ < num_labels) { + break; + } else { + ind_++; + // may be optimized + cur_label_ind_ = 0; + } + } + } + + inline vertex_id_t GetSrc() const { return src_vertices_[ind_]; } + inline vertex_id_t GetDst() const { return begin_.neighbor(); } + inline size_t GetLabelInd() const { return cur_label_ind_; } + inline const std::tuple GetData() const { return begin_.properties(); } + + inline Edge GetElement() const { + return Edge(GetSrc(), GetDst(), GetData()); + } + + inline index_ele_tuple_t GetIndexElement() const { + return std::make_tuple(ind_, GetSrc(), begin_); + } + + inline const self_type_t& operator++() { + ++begin_; + if (begin_ == end_) { + ++cur_label_ind_; + + if (cur_label_ind_ >= num_labels) { + ++ind_; + cur_label_ind_ = 0; + } + } + return *this; + } + + inline bool operator==(const self_type_t& rhs) const { + return ind_ == rhs.ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + return ind_ != rhs.ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return ind_ < rhs.ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + private: + const std::vector& src_vertices_; + const std::array& adj_lists_; + size_t ind_; + size_t cur_label_ind_; + adj_list_iter_t begin_, end_; + std::array valid_labels_; +}; + +// specialization for MultiLabelDstEdgeSetIter with grape::EmptyType +template +class MultiLabelDstEdgeSetIter { + public: + using label_id_t = typename GRAPH_T::label_id_t; + using vertex_id_t = typename GRAPH_T::vertex_id_t; + using adj_list_t = typename GRAPH_T::template adj_list_t<>; + using adj_list_iter_t = typename adj_list_t::iterator; + using adj_list_array_t = typename GRAPH_T::template adj_list_array_t<>; + using index_ele_tuple_t = std::tuple; + using self_type_t = + MultiLabelDstEdgeSetIter; + + MultiLabelDstEdgeSetIter( + const std::vector& src_vertices, + const std::array& adj_lists, size_t ind) + : src_vertices_(src_vertices), + adj_lists_(adj_lists), + ind_(ind), + cur_label_ind_(0) { + if (ind_ == src_vertices_.size()) { + begin_ = adj_list_iter_t(); + end_ = adj_list_iter_t(); + } else { + probe_for_next(); + } + // init valid_lables_ with all true; + for (size_t i = 0; i < num_labels; i++) { + valid_labels_[i] = true; + } + } + MultiLabelDstEdgeSetIter( + const std::vector& src_vertices, + const std::array& adj_lists, size_t ind, + std::array valid_labels) + : src_vertices_(src_vertices), + adj_lists_(adj_lists), + ind_(ind), + cur_label_ind_(0), + valid_labels_(valid_labels) { + if (ind_ == src_vertices_.size()) { + begin_ = adj_list_iter_t(); + end_ = adj_list_iter_t(); + } else { + probe_for_next(); + } + } + + void probe_for_next() { + while (ind_ < src_vertices_.size()) { + while (cur_label_ind_ < num_labels) { + if (valid_labels_[cur_label_ind_]) { + auto cur_adj_list = adj_lists_[cur_label_ind_].get(ind_); + begin_ = cur_adj_list.begin(); + end_ = cur_adj_list.end(); + + if (begin_ != end_) { + break; + } + } + cur_label_ind_++; + } + if (cur_label_ind_ < num_labels) { + break; + } else { + ind_++; + cur_label_ind_ = 0; + } + } + VLOG(10) << "after probe for next: " << ind_ << " " << cur_label_ind_; + } + + inline vertex_id_t GetSrc() const { return src_vertices_[ind_]; } + inline vertex_id_t GetDst() const { return begin_.neighbor(); } + inline size_t GetLabelInd() const { return cur_label_ind_; } + inline const std::tuple GetData() const { + return std::make_tuple(grape::EmptyType()); + } + + inline Edge GetElement() const { + return Edge(GetSrc(), GetDst(), GetData()); + } + + inline index_ele_tuple_t GetIndexElement() const { + return std::make_tuple(ind_, GetSrc(), begin_); + } + + inline const self_type_t& operator++() { + ++begin_; + if (begin_ == end_) { + ++cur_label_ind_; + + if (cur_label_ind_ >= num_labels) { + ++ind_; + cur_label_ind_ = 0; + } + probe_for_next(); + } + return *this; + } + + inline bool operator==(const self_type_t& rhs) const { + return ind_ == rhs.ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + return ind_ != rhs.ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return ind_ < rhs.ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + private: + const std::vector& src_vertices_; + const std::array& adj_lists_; + size_t ind_; + size_t cur_label_ind_; + adj_list_iter_t begin_, end_; + std::array valid_labels_; // label_inds +}; + +// Multiple label dst edge set +template +class MultiLabelDstEdgeSet { + public: + using label_id_t = typename GRAPH_T::label_id_t; + using vertex_id_t = typename GRAPH_T::vertex_id_t; + using adj_list_array_t = typename GRAPH_T::template adj_list_array_t; + using iterator = MultiLabelDstEdgeSetIter; + using index_ele_tuple_t = typename iterator::index_ele_tuple_t; + using self_type_t = MultiLabelDstEdgeSet; + static constexpr bool is_multi_label = false; + static constexpr bool is_collection = false; + static constexpr bool is_edge_set = true; + static constexpr bool is_multi_dst_label = true; + + MultiLabelDstEdgeSet(std::vector&& src_vertices, + std::array&& adj_lists, + label_id_t edge_label, label_id_t src_label, + std::array other_label, + Direction dir) + : src_vertices_(std::move(src_vertices)), + adj_lists_(std::move(adj_lists)), + edge_label_(edge_label), + src_label_(src_label), + other_label_(other_label), + dir_(dir) { + size_ = 0; + for (auto i = 0; i < num_labels; ++i) { + for (auto j = 0; j < src_vertices_.size(); ++j) { + size_ += adj_lists_[i].get(j).size(); + } + } + } + + iterator begin() const { return iterator(src_vertices_, adj_lists_, 0); } + iterator end() const { + return iterator(src_vertices_, adj_lists_, src_vertices_.size()); + } + + size_t size() const { return size_; } + + private: + std::vector src_vertices_; + std::array adj_lists_; + label_id_t edge_label_, src_label_; + std::array other_label_; + size_t size_; + Direction dir_; +}; + +template +class MultiLabelDstEdgeSet { + public: + using label_id_t = typename GRAPH_T::label_id_t; + using vertex_id_t = typename GRAPH_T::vertex_id_t; + using adj_list_array_t = typename GRAPH_T::template adj_list_array_t<>; + using iterator = + MultiLabelDstEdgeSetIter; + using flat_t = + FlatEdgeSet; + using index_ele_tuple_t = typename iterator::index_ele_tuple_t; + using self_type_t = + MultiLabelDstEdgeSet; + static constexpr bool is_multi_label = false; + static constexpr bool is_collection = false; + static constexpr bool is_edge_set = true; + static constexpr bool is_multi_dst_label = true; + + MultiLabelDstEdgeSet(std::vector&& src_vertices, + std::array&& adj_lists, + label_id_t edge_label, label_id_t src_label, + std::array other_label, + Direction dir) + : src_vertices_(std::move(src_vertices)), + adj_lists_(std::move(adj_lists)), + edge_label_(edge_label), + src_label_(src_label), + other_label_(other_label), + dir_(dir) { + size_ = 0; + for (auto i = 0; i < num_labels; ++i) { + for (auto j = 0; j < src_vertices_.size(); ++j) { + size_ += adj_lists_[i].get(j).size(); + } + } + } + + iterator begin() const { return iterator(src_vertices_, adj_lists_, 0); } + iterator end() const { + return iterator(src_vertices_, adj_lists_, src_vertices_.size()); + } + + size_t Size() const { return size_; } + + // get vertices of only one kind label + template ::value) && + (num_query_labels == 1)>::type* = nullptr> + std::pair, + std::vector> + GetVertices(VOpt v_opt, std::array& labels, + EXPR& expr) const { + if (dir_ == Direction::In) { + CHECK(v_opt == VOpt::Start || v_opt == VOpt::Other); + } else if (dir_ == Direction::Out) { + CHECK(v_opt == VOpt::End || v_opt == VOpt::Other); + } + std::vector offsets; + std::vector vids; + offsets.reserve(Size()); + offsets.emplace_back(0); + std::array is_valid; + { + std::unordered_set tmp_set{labels.begin(), labels.end()}; + for (auto i = 0; i < num_labels; ++i) { + if (tmp_set.find(other_label_[i]) != tmp_set.end()) { + is_valid[i] = true; + } else { + is_valid[i] = false; + } + } + } + auto iter = iterator(src_vertices_, adj_lists_, 0, is_valid); + auto end = iterator(src_vertices_, adj_lists_, src_vertices_.size()); + while (iter != end) { + vids.emplace_back(iter.GetDst()); + offsets.emplace_back(vids.size()); + ++iter; + } + auto set = MakeDefaultRowVertexSet(std::move(vids), labels[0]); + return std::make_pair(std::move(set), std::move(offsets)); + } + + // get vertices of two kind labels. + template ::value) && + (num_query_labels == 2)>::type* = nullptr> + std::pair, + std::vector> + GetVertices(VOpt v_opt, std::array& labels, + EXPR& expr) const { + LOG(INFO) << "Get vertices from edgeset " << Size() + << "with labels: " << gs::to_string(labels); + if (dir_ == Direction::In) { + CHECK(v_opt == VOpt::Start || v_opt == VOpt::Other); + } else if (dir_ == Direction::Out) { + CHECK(v_opt == VOpt::End || v_opt == VOpt::Other); + } + std::vector offsets; + std::vector vids; + offsets.reserve(Size()); + offsets.emplace_back(0); + std::array is_valid; + { + std::unordered_set tmp_set{labels.begin(), labels.end()}; + for (auto i = 0; i < num_labels; ++i) { + if (tmp_set.find(other_label_[i]) != tmp_set.end()) { + LOG(INFO) << "ind : " << i << ",valid"; + is_valid[i] = true; + } else { + is_valid[i] = false; + } + } + } + auto iter = iterator(src_vertices_, adj_lists_, 0, is_valid); + auto end = iterator(src_vertices_, adj_lists_, src_vertices_.size()); + grape::Bitset bitset; + // make sure correct + bitset.init(size_); + while (iter != end) { + vids.emplace_back(iter.GetDst()); + offsets.emplace_back(vids.size()); + auto label_ind = iter.GetLabelInd(); + if (label_ind == 0) { + bitset.set_bit(label_ind); + } + ++iter; + } + LOG(INFO) << "vids size: " << vids.size(); + auto copied_label_ids = labels; + auto set = make_two_label_set(std::move(vids), std::move(copied_label_ids), + std::move(bitset)); + return std::make_pair(std::move(set), std::move(offsets)); + } + + template + flat_t Flat( + std::vector>& index_ele_tuple) const { + std::vector> res; + res.reserve(index_ele_tuple.size()); + for (auto i = 0; i < index_ele_tuple.size(); ++i) { + auto cur_ind_ele = std::get(index_ele_tuple[i]); + auto iter = std::get<2>(cur_ind_ele); + auto src = std::get<1>(cur_ind_ele); + res.emplace_back( + std::make_tuple(src, iter->neighbor(), grape::EmptyType())); + } + return flat_t(std::move(res)); + } + + private: + std::vector src_vertices_; + std::array adj_lists_; + label_id_t edge_label_, src_label_; + std::array other_label_; + size_t size_; + Direction dir_; +}; + +} // namespace gs + +#endif // ENGINES_HQPS_ENGINE_DS_EDGE_MULTISET_LABEL_DST_EDGE_SET_H_ \ No newline at end of file diff --git a/flex/engines/hqps_db/structures/multi_vertex_set/README.md b/flex/engines/hqps_db/structures/multi_vertex_set/README.md new file mode 100644 index 000000000000..ee39ccd4c73e --- /dev/null +++ b/flex/engines/hqps_db/structures/multi_vertex_set/README.md @@ -0,0 +1,9 @@ +# Vertex MultiSet + +- RowVertexSet: A simple Vertex Multi-set stores vertex handles of one label, may contains replicated vertices. +- TwoLabelVertexSet: Stores vertex handles of vertices from two labels. +- MultiLabelVertexSet: Stores vertex handles of vertices from multiple labels. Vertices from different labels are stored separately. +- GeneralVertexSet: Stores vertex handles of vertices from multiple labels. +Vertices from different labels are stored in a mixed manner. + + diff --git a/flex/engines/hqps_db/structures/multi_vertex_set/general_vertex_set.h b/flex/engines/hqps_db/structures/multi_vertex_set/general_vertex_set.h new file mode 100644 index 000000000000..cbd644b52504 --- /dev/null +++ b/flex/engines/hqps_db/structures/multi_vertex_set/general_vertex_set.h @@ -0,0 +1,472 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_DS_MULTI_VERTEX_SET_GENERAL_VERTEX_SET_H_ +#define ENGINES_HQPS_DS_MULTI_VERTEX_SET_GENERAL_VERTEX_SET_H_ + +#include +#include +#include +#include +#include "grape/types.h" +#include "grape/util.h" +#include "grape/utils/bitset.h" + +#include "flex/engines/hqps_db/core/utils/hqps_type.h" +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" + +namespace gs { + +// return the old labels, that are active in filter. +template +auto general_project_vertices_impl( + const std::vector& old_vec, + const std::array& old_bit_sets, + const std::array& old_labels, + const std::array& filter_labels, + const EXPR& expr, + const std::array& prop_getters) { + std::vector res_vec; + std::array res_bitsets; + // reserve enough size for bitset. + for (auto i = 0; i < old_num_labels; ++i) { + res_bitsets[i].init(old_vec.size()); + } + std::vector select_label_id; + if constexpr (filter_num_labels == 0) { + for (auto i = 0; i < old_labels.size(); ++i) { + select_label_id.emplace_back(i); + } + } else { + std::unordered_set set; + for (auto l : filter_labels) { + set.insert(l); + } + for (auto i = 0; i < old_labels.size(); ++i) { + if (set.find(old_labels[i]) != set.end()) { + select_label_id.emplace_back(i); + } + } + } + VLOG(10) << "selected label ids: " << gs::to_string(select_label_id) + << ", out of size: " << old_labels.size(); + std::vector offset; + + offset.emplace_back(0); + for (auto i = 0; i < old_vec.size(); ++i) { + for (auto label_id : select_label_id) { + if (old_bit_sets[label_id].get_bit(i)) { + auto eles = prop_getters[label_id].get_view(old_vec[i]); + if (expr(eles)) { + res_bitsets[label_id].set_bit(res_vec.size()); + res_vec.push_back(old_vec[i]); + break; + } + } + } + offset.emplace_back(res_vec.size()); + } + for (auto i = 0; i < res_vec.size(); ++i) { + bool flag = false; + for (auto j = 0; j < old_num_labels; ++j) { + flag |= res_bitsets[j].get_bit(i); + } + CHECK(flag) << "check fail at ind: " << i; + } + // resize bitset. + for (auto i = 0; i < old_num_labels; ++i) { + res_bitsets[i].resize(res_vec.size()); + } + for (auto i = 0; i < res_vec.size(); ++i) { + bool flag = false; + for (auto j = 0; j < old_num_labels; ++j) { + flag |= res_bitsets[j].get_bit(i); + } + CHECK(flag) << "check fail at ind: " << i; + } + return std::make_tuple(std::move(res_vec), std::move(res_bitsets), + std::move(offset)); +} + +template +auto general_project_with_repeat_array_impl( + const KeyAlias& key_alias, + const std::vector& repeat_array, + const std::vector& old_lids) { + using res_t = std::vector< + std::tuple>::type...>>; + + res_t res_vec; + for (auto i = 0; i < repeat_array.size(); ++i) { + for (auto j = 0; j < repeat_array[i]; ++j) { + auto tuple = std::make_tuple(old_lids[i]); + res_vec.emplace_back(std::make_tuple(gs::get_from_tuple(tuple)...)); + } + } + return res_vec; +} + +template +auto generalSetFlatImpl( + std::vector>& index_ele_tuples, + const std::vector& origin_vids, + const std::array& origin_bitsets) { + size_t dst_size = index_ele_tuples.size(); + std::vector res_vids; + std::array res_bitsets; + res_vids.reserve(dst_size); + for (auto i = 0; i < N; ++i) { + res_bitsets[i].init(dst_size); + } + for (auto ele : index_ele_tuples) { + auto& cur = std::get(ele); + //(ind, vid) + auto ind = std::get<0>(cur); + CHECK(ind < origin_vids.size()); + + for (auto i = 0; i < N; ++i) { + if (origin_bitsets[i].get_bit(ind)) { + res_bitsets[i].set_bit(res_vids.size()); + break; + } + } + res_vids.emplace_back(origin_vids[ind]); + } + return std::make_pair(std::move(res_vids), std::move(res_bitsets)); +} + +template +class GeneralVertexSetIter { + public: + using lid_t = VID_T; + using self_type_t = GeneralVertexSetIter; + using index_ele_tuple_t = std::tuple; + using data_tuple_t = std::tuple; + + GeneralVertexSetIter(const std::vector& vec, + const std::array& bitsets, size_t ind) + : vec_(vec), bitsets_(bitsets), ind_(ind) {} + + lid_t GetElement() const { return vec_[ind_]; } + + data_tuple_t GetData() const { return vec_[ind_]; } + + index_ele_tuple_t GetIndexElement() const { + return std::make_tuple(ind_, vec_[ind_]); + } + + lid_t GetVertex() const { return vec_[ind_]; } + + inline const self_type_t& operator++() { + ++ind_; + return *this; + } + + inline self_type_t operator++(int) { + self_type_t ret(*this); + ++ind_; + return ret; + } + + // We may never compare to other kind of iterators + inline bool operator==(const self_type_t& rhs) const { + return ind_ == rhs.ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + return ind_ != rhs.ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return ind_ < rhs.ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + private: + const std::vector& vec_; + const std::array& bitsets_; + size_t ind_; +}; + +/// @brief GeneralVertexSet are designed for the case we need to store multiple +/// label vertex in a mixed manner +/// @tparam VID_T +/// @tparam LabelT +/// @tparam N +template +class GeneralVertexSet { + public: + using lid_t = VID_T; + using self_type_t = GeneralVertexSet; + using iterator = GeneralVertexSetIter; + using index_ele_tuple_t = std::tuple; + using data_tuple_t = std::tuple; + using flat_t = self_type_t; + using EntityValueType = VID_T; + + static constexpr bool is_vertex_set = true; + static constexpr bool is_two_label_set = false; + static constexpr bool is_general_set = true; + static constexpr size_t num_labels = N; + static constexpr bool is_collection = false; + static constexpr bool is_multi_label = false; + GeneralVertexSet(std::vector&& vec, + std::array&& label_names, + std::array&& bitsets) + : vec_(std::move(vec)), label_names_(std::move(label_names)) { + for (auto i = 0; i < N; ++i) { + bitsets_[i].swap(bitsets[i]); + } + VLOG(10) << "[GeneralVertexSet], size: " << vec_.size() + << ", bitset size: " << bitsets_[0].cardinality(); + } + + GeneralVertexSet(GeneralVertexSet&& other) + : vec_(std::move(other.vec_)), + label_names_(std::move(other.label_names_)) { + for (auto i = 0; i < N; ++i) { + bitsets_[i].swap(other.bitsets_[i]); + } + VLOG(10) << "[GeneralVertexSet], size: " << vec_.size() + << ", bitset size: " << bitsets_[0].cardinality(); + } + + GeneralVertexSet(const GeneralVertexSet& other) + : vec_(other.vec_), label_names_(other.label_names_) { + for (auto i = 0; i < N; ++i) { + bitsets_[i].copy(other.bitsets_[i]); + } + VLOG(10) << "[GeneralVertexSet], size: " << vec_.size() + << ", bitset size: " << bitsets_[0].cardinality(); + } + + iterator begin() const { return iterator(vec_, bitsets_, 0); } + + iterator end() const { return iterator(vec_, bitsets_, vec_.size()); } + + template >> + RES_T project_vertices(std::array& filter_labels, + EXPRESSION& exprs, + std::array& prop_getter) const { + // TODO: vector-based cols should be able to be selected with + // certain rows. + + auto tuple = general_project_vertices_impl( + vec_, bitsets_, label_names_, filter_labels, exprs, prop_getter); + auto copied_label_names(label_names_); + auto set = self_type_t(std::move(std::get<0>(tuple)), + std::move(copied_label_names), + std::move(std::get<1>(tuple))); + return std::make_pair(std::move(set), std::move(std::get<2>(tuple))); + } + + const std::array& GetLabels() const { return label_names_; } + + LabelT GetLabel(size_t i) const { return label_names_[i]; } + + const std::array& GetBitsets() const { return bitsets_; } + + const std::vector& GetVertices() const { return vec_; } + + std::pair, std::vector> GetVertices( + size_t ind) const { + CHECK(ind < N); + std::vector res; + std::vector active_ind; + size_t cnt = bitsets_[ind].count(); + res.reserve(cnt); + active_ind.reserve(cnt); + for (auto i = 0; i < bitsets_[ind].cardinality(); ++i) { + if (bitsets_[ind].get_bit(i)) { + res.push_back(vec_[i]); + active_ind.push_back(i); + } + } + VLOG(10) << "Got vertices of tag: " << ind + << ", res vertices: " << res.size() + << ", active_ind: " << active_ind.size(); + return std::make_pair(std::move(res), std::move(active_ind)); + } + + template ::type* = nullptr> + self_type_t ProjectWithRepeatArray(const std::vector& repeat_array, + KeyAlias& key_alias) const { + std::vector next_vids; + size_t next_size = 0; + for (auto i = 0; i < repeat_array.size(); ++i) { + next_size += repeat_array[i]; + } + VLOG(10) << "[GeneralVertexSet] size: " << Size() + << " Project self, next size: " << next_size; + + next_vids.reserve(next_size); + std::array next_sets; + for (auto& i : next_sets) { + i.init(next_size); + } + VLOG(10) << "after init"; + for (auto i = 0; i < repeat_array.size(); ++i) { + size_t ind = 0; + while (ind < N) { + if (bitsets_[ind].get_bit(i)) { + break; + } + ind += 1; + } + CHECK(ind < N); + for (auto j = 0; j < repeat_array[i]; ++j) { + // VLOG(10) << "Project: " << vids_[i]; + next_sets[ind].set_bit(next_vids.size()); + next_vids.push_back(vec_[i]); + } + } + + auto copied_label_names(label_names_); + return self_type_t(std::move(next_vids), std::move(copied_label_names), + std::move(next_sets)); + } + + // Usually after sort. + template + flat_t Flat(std::vector>& index_ele_tuple) { + static_assert(col_ind < + std::tuple_size_v>); + auto res_vids_and_data_tuples = + generalSetFlatImpl(index_ele_tuple, vec_, bitsets_); + auto labels_copied(label_names_); + return self_type_t(std::move(res_vids_and_data_tuples.first), + std::move(labels_copied), + std::move(res_vids_and_data_tuples.second)); + } + + template + void fillBuiltinPropsImpl(std::vector>& tuples, + std::string& prop_name, + std::vector& repeat_array) const { + if constexpr (std::is_same_v>, + Dist>) { + if (prop_name == "dist") { + LOG(FATAL) << "Not supported"; + } + } + } + + template + void fillBuiltinPropsImpl(std::vector>& tuples, + PropNameArray& prop_names, + std::vector& repeat_array, + std::index_sequence) const { + (fillBuiltinPropsImpl(tuples, std::get(prop_names), + repeat_array), + ...); + } + + template + void fillBuiltinProps(std::vector>& tuples, + PropNameArray& prop_names, + std::vector& repeat_array) const { + fillBuiltinPropsImpl(tuples, prop_names, repeat_array, + std::make_index_sequence()); + } + + // No repeat array is not provided + template + void fillBuiltinProps(std::vector>& tuples, + PropNameArray& prop_names) { + LOG(WARNING) << "not supported"; + } + + size_t Size() const { return vec_.size(); } + + private: + std::vector vec_; + std::array label_names_; + std::array bitsets_; +}; + +template +auto make_general_set(std::vector&& vec, + std::array&& label_names, + std::array&& bitsets) { + return GeneralVertexSet( + std::move(vec), std::move(label_names), std::move(bitsets)); +} + +template +static std::array, num_labels> bitsets_to_vids_inds( + const std::array& bitset) { + std::array, num_labels> res; + auto limit_size = bitset[0].size(); + VLOG(10) << "old bitset limit size: " << limit_size; + for (auto i = 0; i < num_labels; ++i) { + auto count = bitset[i].count(); + res[i].reserve(count); + for (auto j = 0; j < limit_size; ++j) { + if (bitset[i].get_bit(j)) { + res[i].emplace_back(j); + } + } + } + { + size_t cnt = 0; + for (auto& a : res) { + cnt += a.size(); + } + CHECK(cnt == limit_size) << " check failed: " << cnt << ", " << limit_size; + } + return res; +} + +template +static auto get_property_tuple_general( + const GRAPH_INTERFACE& graph, + const GeneralVertexSet& general_set, + const std::array& prop_names) { + auto label_array = general_set.GetLabels(); + auto vids_inds = bitsets_to_vids_inds(general_set.GetBitsets()); + + auto data_tuples = graph.template GetVertexPropsFromVid( + general_set.GetVertices(), label_array, vids_inds, prop_names); + + return data_tuples; +} + +template +static auto get_property_tuple_general( + const GRAPH_INTERFACE& graph, + const GeneralVertexSet& general_set, + const std::tuple...>& named_prop) { + std::array prop_names; + int ind = 0; + std::apply([&prop_names, + &ind](auto&&... args) { ((prop_names[ind++] = args.name), ...); }, + named_prop); + return get_property_tuple_general(graph, general_set, prop_names); +} + +} // namespace gs + +#endif // ENGINES_HQPS_DS_MULTI_VERTEX_SET_GENERAL_VERTEX_SET_H_ diff --git a/flex/engines/hqps_db/structures/multi_vertex_set/keyed_row_vertex_set.h b/flex/engines/hqps_db/structures/multi_vertex_set/keyed_row_vertex_set.h new file mode 100644 index 000000000000..8b060fc5abbb --- /dev/null +++ b/flex/engines/hqps_db/structures/multi_vertex_set/keyed_row_vertex_set.h @@ -0,0 +1,1026 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_DS_MULTI_VERTEX_SET_KEYED_ROW_VERTEX_SET_H_ +#define ENGINES_HQPS_DS_MULTI_VERTEX_SET_KEYED_ROW_VERTEX_SET_H_ + +#include "glog/logging.h" + +#include +#include +#include +#include + +#include "flex/engines/hqps_db/core/params.h" +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h" +#include "flex/storages/rt_mutable_graph/types.h" +#include "grape/util.h" + +namespace gs { + +//////////////////////////VertexIter/////////////////////// + +template +class KeyedRowVertexSetBuilderImpl; + +// 0. Keyed Vector-base vertex set iterator. +template +class KeyedRowVertexSetIter { + public: + using key_t = KEY_T; + using lid_t = VID_T; + using data_tuple_t = typename std::tuple; + using self_type_t = KeyedRowVertexSetIter; + using index_ele_tuple_t = std::tuple; + // from this tuple, we can reconstruct the partial set. + using flat_ele_tuple_t = std::tuple>; + + static constexpr VID_T NULL_VID = std::numeric_limits::max(); + + KeyedRowVertexSetIter(const std::vector& keys, + const std::vector& vids, + const std::vector& datas, LabelT v_label, + size_t ind) + : keys_(keys), vids_(vids), datas_(datas), v_label_(v_label), ind_(ind) {} + + KeyedRowVertexSetIter(const self_type_t& other) + : keys_(other.keys_), + vids_(other.vids_), + datas_(other.datas_), + v_label_(other.v_label_), + ind_(other.ind_) {} + ~KeyedRowVertexSetIter() {} + + lid_t GetElement() const { return vids_[ind_]; } + + index_ele_tuple_t GetIndexElement() const { + return std::make_tuple(ind_, vids_[ind_]); + } + + flat_ele_tuple_t GetFlatElement() const { + return std::make_tuple(ind_, vids_[ind_], GetData()); + } + + key_t GetKey() const { return keys_[ind_]; } + + lid_t GetVertex() const { return vids_[ind_]; } + + data_tuple_t GetData() const { return datas_[ind_]; } + + inline const self_type_t& operator++() { + ++ind_; + return *this; + } + + inline self_type_t operator++(int) { + self_type_t ret(*this); + ++ind_; + return ret; + } + + inline self_type_t& operator=(const self_type_t& rhs) { + if (*this == rhs) + return *this; + ind_ == rhs.ind_; + vids_ = rhs.vids_; + keys_ = rhs.keys_; + datas_ = rhs.datas_; + return *this; + } + // We may never compare to other kind of iterators + inline bool operator==(const self_type_t& rhs) const { + return ind_ == rhs.ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + return ind_ != rhs.ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return ind_ < rhs.ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + protected: + const std::vector& keys_; + const std::vector& vids_; + const std::vector& datas_; + LabelT v_label_; + size_t ind_; // index for keys_. +}; + +//////////////////////////VertexIter/////////////////////// + +// 0. Keyed Vector-base vertex set iterator. +template +class KeyedRowVertexSetIter { + public: + using key_t = KEY_T; + using lid_t = VID_T; + using data_tuple_t = typename std::tuple; + using self_type_t = + KeyedRowVertexSetIter; + using index_ele_tuple_t = std::tuple; + + // from this tuple, we can reconstruct the partial set. + using flat_ele_tuple_t = std::tuple; + + static constexpr VID_T NULL_VID = std::numeric_limits::max(); + + KeyedRowVertexSetIter(const std::vector& keys, + const std::vector& vids, LabelT v_label, + size_t ind) + : keys_(keys), vids_(vids), v_label_(v_label), ind_(ind) {} + + KeyedRowVertexSetIter(const self_type_t& other) + : keys_(other.keys_), + vids_(other.vids_), + v_label_(other.v_label_), + ind_(other.ind_) {} + ~KeyedRowVertexSetIter() {} + + lid_t GetElement() const { return vids_[ind_]; } + + index_ele_tuple_t GetIndexElement() const { + return std::make_tuple(ind_, vids_[ind_]); + } + + flat_ele_tuple_t GetFlatElement() const { + return std::make_tuple(ind_, vids_[ind_]); + } + + key_t GetKey() const { return keys_[ind_]; } + + lid_t GetVertex() const { return vids_[ind_]; } + + data_tuple_t GetData() const { return std::make_tuple(grape::EmptyType()); } + + inline const self_type_t& operator++() { + ++ind_; + return *this; + } + + inline self_type_t operator++(int) { + self_type_t ret(*this); + ++ind_; + return ret; + } + + inline self_type_t& operator=(const self_type_t& rhs) { + if (*this == rhs) + return *this; + ind_ == rhs.ind_; + vids_ = rhs.vids_; + keys_ = rhs.keys_; + return *this; + } + // We may never compare to other kind of iterators + inline bool operator==(const self_type_t& rhs) const { + return ind_ == rhs.ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + return ind_ != rhs.ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return ind_ < rhs.ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + protected: + const std::vector& keys_; + const std::vector& vids_; + LabelT v_label_; + size_t ind_; // index for keys_. +}; + +template +std::tuple, std::vector, std::vector> +keyedRowFlatImpl( + std::vector>& index_ele_tuples, + std::vector& origin_keys, std::vector& origin_vids, + std::vector& origin_datas) { + std::vector res_vids; + std::vector res_keys; + std::vector res_datas; + res_vids.reserve(index_ele_tuples.size()); + res_keys.reserve(index_ele_tuples.size()); + res_datas.reserve(index_ele_tuples.size()); + for (auto ele : index_ele_tuples) { + auto& cur = std::get(ele); + //(ind, vid) + auto& ind = std::get<0>(cur); + CHECK(ind < origin_vids.size()); + res_vids.emplace_back(origin_vids[ind]); + res_keys.emplace_back(origin_keys[ind]); + res_datas.emplace_back(origin_datas[ind]); + } + return std::make_tuple(res_keys, res_vids, res_datas); +} + +template +std::tuple, std::vector> keyedRowFlatImpl( + std::vector>& index_ele_tuples, + std::vector& origin_keys, std::vector& origin_vids) { + std::vector res_vids; + std::vector res_keys; + res_vids.reserve(index_ele_tuples.size()); + res_keys.reserve(index_ele_tuples.size()); + for (auto ele : index_ele_tuples) { + auto& cur = std::get(ele); + //(ind, vid) + auto& ind = std::get<0>(cur); + CHECK(ind < origin_vids.size()); + res_vids.emplace_back(origin_vids[ind]); + res_keys.emplace_back(origin_keys[ind]); + } + return std::make_tuple(res_keys, res_vids); +} + +template +std::tuple, std::vector, std::vector, + std::vector> +keyed_row_filter_with_indices_impl(std::vector& indices, + std::vector& vids, + std::vector& keys, + std::vector& datas, + JoinKind join_kind) { + std::vector res_offset; + std::vector res_vids; + std::vector res_keys; + std::vector res_datas; + + res_offset.reserve(vids.size() + 1); + + size_t indices_ind = 0; + if (join_kind == JoinKind::InnerJoin) { + res_vids.reserve(indices.size()); + res_keys.reserve(indices.size()); + res_datas.reserve(indices.size()); + size_t vid_ind = 0; + res_offset.emplace_back(res_vids.size()); + for (; vid_ind < vids.size(); ++vid_ind) { + while (indices_ind < indices.size() && indices[indices_ind] < vid_ind) { + indices_ind++; + } + if (indices_ind < indices.size()) { + if (indices[indices_ind] == vid_ind) { + res_vids.emplace_back(vids[vid_ind]); + res_keys.emplace_back(keys[vid_ind]); + res_datas.emplace_back(datas[vid_ind]); + } + } + res_offset.emplace_back(res_vids.size()); + } + CHECK(res_vids.size() == indices.size()); + } else { + res_vids.reserve(vids.size() - indices.size()); + res_keys.reserve(vids.size() - indices.size()); + res_datas.reserve(vids.size() - indices.size()); + size_t vid_ind = 0; + res_offset.emplace_back(res_vids.size()); + while (vid_ind < vids.size()) { + while (indices_ind < indices.size() && indices[indices_ind] < vid_ind) { + indices_ind += 1; + } + if (indices_ind < indices.size()) { + if (indices[indices_ind] != vid_ind) { + res_vids.emplace_back(vids[vid_ind]); + res_keys.emplace_back(keys[vid_ind]); + res_datas.emplace_back(datas[vid_ind]); + } + } else { + res_vids.emplace_back(vids[vid_ind]); + res_keys.emplace_back(keys[vid_ind]); + res_datas.emplace_back(datas[vid_ind]); + } + vid_ind += 1; + res_offset.emplace_back(res_vids.size()); + } + CHECK(res_vids.size() == vids.size() - indices.size()); + } + CHECK(res_offset.size() == vids.size() + 1); + VLOG(10) << "res offset: " << gs::to_string(res_offset); + VLOG(10) << "res vids: " << gs::to_string(res_vids); + VLOG(10) << "res keys: " << gs::to_string(res_keys); + return std::make_tuple(std::move(res_vids), std::move(res_keys), + std::move(res_datas), std::move(res_offset)); +} +template +std::tuple, std::vector, std::vector> +keyed_row_filter_with_indices_impl(std::vector& indices, + std::vector& vids, + std::vector& keys, + JoinKind join_kind) { + std::vector res_offset; + std::vector res_vids; + std::vector res_keys; + + res_offset.reserve(vids.size() + 1); + + size_t indices_ind = 0; + if (join_kind == JoinKind::InnerJoin) { + res_vids.reserve(indices.size()); + res_keys.reserve(indices.size()); + size_t vid_ind = 0; + res_offset.emplace_back(res_vids.size()); + for (; vid_ind < vids.size(); ++vid_ind) { + while (indices_ind < indices.size() && indices[indices_ind] < vid_ind) { + indices_ind++; + } + if (indices_ind < indices.size()) { + if (indices[indices_ind] == vid_ind) { + res_vids.emplace_back(vids[vid_ind]); + res_keys.emplace_back(keys[vid_ind]); + } + } + res_offset.emplace_back(res_vids.size()); + } + CHECK(res_vids.size() == indices.size()); + } else { + res_vids.reserve(vids.size() - indices.size()); + res_keys.reserve(vids.size() - indices.size()); + + size_t vid_ind = 0; + res_offset.emplace_back(res_vids.size()); + while (vid_ind < vids.size()) { + while (indices_ind < indices.size() && indices[indices_ind] < vid_ind) { + indices_ind += 1; + } + if (indices_ind < indices.size()) { + if (indices[indices_ind] != vid_ind) { + res_vids.emplace_back(vids[vid_ind]); + res_keys.emplace_back(keys[vid_ind]); + } + } else { + res_vids.emplace_back(vids[vid_ind]); + res_keys.emplace_back(keys[vid_ind]); + } + vid_ind += 1; + res_offset.emplace_back(res_vids.size()); + } + CHECK(res_vids.size() == vids.size() - indices.size()); + } + CHECK(res_offset.size() == vids.size() + 1); + VLOG(10) << "res offset: " << gs::to_string(res_offset); + VLOG(10) << "res vids: " << gs::to_string(res_vids); + VLOG(10) << "res keys: " << gs::to_string(res_keys); + return std::make_tuple(std::move(res_vids), std::move(res_keys), + std::move(res_offset)); +} + +template , std::vector, + std::vector, std::vector>> +RES_T keyed_row_project_vertices_impl( + const std::vector& keys, const std::vector& lids, + const std::vector& datas, LabelT cur_label, + std::array& labels, EXPRESSION& expr, + std::vector& eles) { // temporary property + // TODO: vector-based cols should be able to be selected with certain rows. + std::vector offsets; + std::vector new_keys; + std::vector new_lids; + std::vector new_datas; + size_t cnt = 0; + offsets.reserve(lids.size() + 1); + int label_ind = -1; + if constexpr (num_labels == 0) { + label_ind = 0; // neq -1 + } else { + // FIXME: no repeated labels. + for (auto i = 0; i < num_labels; ++i) { + if (cur_label == labels[i]) + label_ind = i; + } + } + if (label_ind == -1) { + VLOG(10) << "No label found in query params"; + // for current set, we don't need. + auto size = lids.size(); + for (auto i = 0; i < size; ++i) { + offsets.emplace_back(cnt); + } + } else { + VLOG(10) << "Found label in query params"; + for (auto i = 0; i < lids.size(); ++i) { + offsets.emplace_back(cnt); + if (expr(eles[i])) { + new_keys.emplace_back(keys[i]); + new_lids.emplace_back(lids[i]); + new_datas.emplace_back(datas[i]); + cnt += 1; + } + } + offsets.emplace_back(cnt); + } + return std::make_tuple(std::move(new_keys), std::move(new_lids), + std::move(new_datas), std::move(offsets)); +} + +template , std::vector, + std::vector>> +RES_T keyed_row_project_vertices_impl( + const std::vector& keys, const std::vector& lids, + LabelT cur_label, std::array& labels, EXPRESSION& expr, + std::vector& eles) { // temporary property + // TODO: vector-based cols should be able to be selected with certain rows. + std::vector offsets; + std::vector new_keys; + std::vector new_lids; + size_t cnt = 0; + offsets.reserve(lids.size() + 1); + int label_ind = -1; + if constexpr (num_labels == 0) { + label_ind = 0; // neq -1 + } else { + // FIXME: no repeated labels. + for (auto i = 0; i < num_labels; ++i) { + if (cur_label == labels[i]) + label_ind = i; + } + } + // FIXME: no repeated labels. + for (auto i = 0; i < num_labels; ++i) { + if (cur_label == labels[i]) + label_ind = i; + } + if (label_ind == -1) { + VLOG(10) << "No label found in query params"; + // for current set, we don't need. + auto size = lids.size(); + for (auto i = 0; i < size; ++i) { + offsets.emplace_back(cnt); + } + } else { + VLOG(10) << "Found label in query params"; + for (auto i = 0; i < lids.size(); ++i) { + offsets.emplace_back(cnt); + if (expr(eles[i])) { + new_keys.emplace_back(keys[i]); + new_lids.emplace_back(lids[i]); + cnt += 1; + } + } + offsets.emplace_back(cnt); + } + return std::make_tuple(std::move(new_keys), std::move(new_lids), + std::move(offsets)); +} + +/////////////////Keyed vertex set impl//////////////////// +template +class KeyedRowVertexSetImpl { + public: + using key_t = KEY_T; + using self_type_t = KeyedRowVertexSetImpl; + using lid_t = VID_T; + using data_tuple_t = std::tuple; + using flat_t = self_type_t; + + using iterator = KeyedRowVertexSetIter; + using filtered_vertex_set = self_type_t; + using ground_vertex_set_t = RowVertexSet; + using index_ele_tuple_t = std::tuple; + // from this tuple, we can reconstruct the partial set. + using flat_ele_tuple_t = std::tuple>; + using EntityValueType = VID_T; + + template + using with_data_t = KeyedRowVertexSetImpl; + + using builder_t = RowVertexSetBuilder; + + static constexpr VID_T NULL_VID = std::numeric_limits::max(); + + static constexpr bool is_keyed = true; + static constexpr bool is_vertex_set = true; + static constexpr bool is_edge_set = false; + static constexpr bool is_multi_label = false; + static constexpr bool is_collection = false; + static constexpr bool is_general_set = false; + static constexpr bool is_two_label_set = false; + + explicit KeyedRowVertexSetImpl(std::vector&& keys, + std::vector&& vids, + std::vector&& datas, + LabelT v_label, + std::array names) + : keys_(std::move(keys)), + vids_(std::move(vids)), + datas_(std::move(datas)), + v_label_(v_label), + prop_names_(names) { + // check_col_len(); + } + + KeyedRowVertexSetImpl(self_type_t&& other) noexcept + : keys_(std::move(other.keys_)), + vids_(std::move(other.vids_)), + datas_(std::move(other.datas_)), + v_label_(other.v_label_), + prop_names_(other.prop_names_) {} + + KeyedRowVertexSetImpl(const self_type_t& other) noexcept + : keys_(other.keys_), + vids_(other.vids_), + datas_(other.datas_), + v_label_(other.v_label_), + prop_names_(other.prop_names_) {} + + builder_t CreateBuilder() const { return builder_t(v_label_, prop_names_); } + + iterator begin() const { return iterator(keys_, vids_, datas_, v_label_, 0); } + + iterator end() const { + return iterator(keys_, vids_, datas_, v_label_, keys_.size()); + } + + const std::vector& GetDataVec() const { return datas_; } + + size_t Size() const { return keys_.size(); } + + LabelT GetLabel() const { return v_label_; } + + const std::array& GetPropNames() const { + return prop_names_; + } + + const std::vector& GetVertices() const { return vids_; } + + void Repeat(std::vector& cur_offset, + std::vector& repeat_vec) { + LOG(FATAL) << "not implemented"; + } + + // Unwrap keyed vertex set to unkeyed vertex set. + ground_vertex_set_t ToGround(JoinKind&& join_kind) { + std::vector vec; + std::vector datas; + if (join_kind == JoinKind::InnerJoin) { + vec.reserve(keys_.size()); + datas.reserve(keys_.size()); + for (auto ind : keys_) { + vec.push_back(vids_[ind]); + datas.push_back(datas_[ind]); + // For cols check if it need to apply keys. + } + } else { // anti join. + vec.reserve(vids_.size() - keys_.size()); + datas.reserve(vids_.size() - keys_.size()); + size_t key_ind = 0; + size_t vid_ind = 0; + while (vid_ind < vids_.size() && key_ind < keys_.size()) { + if (vid_ind < keys_[key_ind]) { + vec.push_back(vids_[vid_ind]); + datas.push_back(datas_[vid_ind]); + } else { + while (key_ind < keys_.size() && vid_ind >= keys_[key_ind]) { + key_ind += 1; + } + } + vid_ind += 1; + } + while (vid_ind < vids_.size()) { + vec.push_back(vids_[vid_ind]); + datas.push_back(datas_[vid_ind]); + vid_ind += 1; + } + } + return ground_vertex_set_t(std::move(vec), v_label_, std::move(datas), + std::move(prop_names_)); + } + + template + flat_t Flat(std::vector>& index_ele_tuple) { + static_assert(col_ind < + std::tuple_size_v>); + auto res_keys_vids = + keyedRowFlatImpl(index_ele_tuple, keys_, vids_, datas_); + return self_type_t(std::move(std::get<0>(res_keys_vids)), + std::move(std::get<1>(res_keys_vids)), + std::move(std::get<2>(res_keys_vids)), v_label_, + std::move(prop_names_)); + } + + std::vector FilterWithIndices(std::vector& indices, + JoinKind join_kind) { + auto tuple = keyed_row_filter_with_indices_impl(indices, vids_, keys_, + datas_, join_kind); + vids_.swap(std::get<0>(tuple)); + keys_.swap(std::get<1>(tuple)); + datas_.swap(std::get<2>(tuple)); + return std::get<3>(tuple); + } + + template >> + RES_T project_vertices(std::array& labels, + EXPRESSION& expr, std::vector& eles) const { + // TODO: vector-based cols should be able to be selected with certain rows. + + auto new_lids_datas_and_offset = keyed_row_project_vertices_impl( + keys_, vids_, datas_, v_label_, labels, expr, eles); + self_type_t res_set(std::move(std::get<0>(new_lids_datas_and_offset)), + std::move(std::get<1>(new_lids_datas_and_offset)), + std::move(std::get<2>(new_lids_datas_and_offset)), + v_label_, std::move(prop_names_)); + + return std::make_pair(std::move(res_set), + std::move(std::get<3>(new_lids_datas_and_offset))); + } + + // projectwithRepeatArray, projecting myself + template * = nullptr> + self_type_t ProjectWithRepeatArray(const std::vector& repeat_array, + KeyAlias& key_alias) { + std::vector new_keys; + std::vector new_vids; + std::vector new_datas; + + for (auto i = 0; i < repeat_array.size(); ++i) { + for (auto j = 0; j < repeat_array[i]; ++j) { + new_keys.push_back(keys_[i]); + new_vids.push_back(vids_[i]); + new_datas.push_back(datas_[i]); + } + } + return self_type_t(std::move(new_keys), std::move(new_vids), + std::move(new_datas), v_label_, std::move(prop_names_)); + } + + template + KeyedRowVertexSetImpl WithNewData( + std::vector>&& new_datas) { + CHECK(vids_.size() == new_datas.size()); + return KeyedRowVertexSetImpl( + std::move(keys_), std::move(vids_), std::move(new_datas), v_label_, + std::move(prop_names_)); + } + + template + void fillBuiltinProps(std::vector>& tuples, + const PropNameArray& prop_names) { + std::vector repeat_array(vids_.size(), 1); + fillBuiltinPropsImpl(datas_, prop_names_, tuples, prop_names, repeat_array); + } + + template + void fillBuiltinProps(std::vector>& tuples, + const PropNameArray& prop_names, + const std::vector& repeat_array) { + fillBuiltinPropsImpl(datas_, prop_names_, tuples, prop_names, repeat_array); + } + + protected: + std::vector keys_; + std::vector vids_; + std::vector datas_; + LabelT v_label_; + std::array prop_names_; +}; + +//////////////////////////////////// + +template +class KeyedRowVertexSetImpl { + public: + using key_t = KEY_T; + using self_type_t = + KeyedRowVertexSetImpl; + using lid_t = VID_T; + using data_tuple_t = std::tuple; + using flat_t = self_type_t; + + using iterator = + KeyedRowVertexSetIter; + using filtered_vertex_set = self_type_t; + using ground_vertex_set_t = RowVertexSet; + using index_ele_tuple_t = std::tuple; + // from this tuple, we can reconstruct the partial set. + using flat_ele_tuple_t = std::tuple; + using EntityValueType = VID_T; + + template + using with_data_t = KeyedRowVertexSetImpl; + + using builder_t = RowVertexSetBuilder; + + static constexpr VID_T NULL_VID = std::numeric_limits::max(); + + static constexpr bool is_keyed = true; + static constexpr bool is_vertex_set = true; + static constexpr bool is_edge_set = false; + static constexpr bool is_multi_label = false; + static constexpr bool is_collection = false; + static constexpr bool is_general_set = false; + static constexpr bool is_two_label_set = false; + + explicit KeyedRowVertexSetImpl(std::vector&& keys, + std::vector&& vids, LabelT v_label) + : keys_(std::move(keys)), vids_(std::move(vids)), v_label_(v_label) { + // check_col_len(); + } + + KeyedRowVertexSetImpl(self_type_t&& other) noexcept + : keys_(std::move(other.keys_)), + vids_(std::move(other.vids_)), + v_label_(other.v_label_) {} + + KeyedRowVertexSetImpl(const self_type_t& other) noexcept + : keys_(other.keys_), vids_(other.vids_), v_label_(other.v_label_) {} + + iterator begin() const { return iterator(keys_, vids_, v_label_, 0); } + + iterator end() const { + return iterator(keys_, vids_, v_label_, keys_.size()); + } + + size_t Size() const { return keys_.size(); } + + LabelT GetLabel() const { return v_label_; } + + const std::vector& GetVertices() const { return vids_; } + + builder_t CreateBuilder() const { return builder_t(v_label_); } + + void Repeat(std::vector& cur_offset, + std::vector& repeat_vec) { + LOG(FATAL) << "not implemented"; + } + + // Unwrap keyed vertex set to unkeyed vertex set. + ground_vertex_set_t ToGround(JoinKind&& join_kind) { + std::vector vec; + if (join_kind == JoinKind::InnerJoin) { + vec.reserve(keys_.size()); + for (auto ind : keys_) { + vec.push_back(vids_[ind]); + // For cols check if it need to apply keys. + } + } else { // anti join. + vec.reserve(vids_.size() - keys_.size()); + size_t key_ind = 0; + size_t vid_ind = 0; + while (vid_ind < vids_.size() && key_ind < keys_.size()) { + if (vid_ind < keys_[key_ind]) { + vec.push_back(vids_[vid_ind]); + } else { + while (key_ind < keys_.size() && vid_ind >= keys_[key_ind]) { + key_ind += 1; + } + } + vid_ind += 1; + } + while (vid_ind < vids_.size()) { + vec.push_back(vids_[vid_ind]); + vid_ind += 1; + } + } + return ground_vertex_set_t(std::move(vec), v_label_); + } + + template + flat_t Flat(std::vector>& index_ele_tuple) { + static_assert(col_ind < + std::tuple_size_v>); + auto res_keys_vids = + keyedRowFlatImpl(index_ele_tuple, keys_, vids_); + return self_type_t(std::move(std::get<0>(res_keys_vids)), + std::move(std::get<1>(res_keys_vids)), v_label_); + } + + std::vector FilterWithIndices(std::vector& indices, + JoinKind join_kind) { + auto tuple = + keyed_row_filter_with_indices_impl(indices, vids_, keys_, join_kind); + vids_.swap(std::get<0>(tuple)); + keys_.swap(std::get<1>(tuple)); + return std::get<2>(tuple); + } + + template >> + RES_T project_vertices(std::array& labels, + EXPRESSION& expr, std::vector& eles) const { + // TODO: vector-based cols should be able to be selected with certain rows. + + auto new_lids_datas_and_offset = keyed_row_project_vertices_impl( + keys_, vids_, v_label_, labels, expr, eles); + self_type_t res_set(std::move(std::get<0>(new_lids_datas_and_offset)), + std::move(std::get<1>(new_lids_datas_and_offset)), + v_label_); + + return std::make_pair(std::move(res_set), + std::move(std::get<2>(new_lids_datas_and_offset))); + } + + // projectwithRepeatArray, projecting myself + template * = nullptr> + self_type_t ProjectWithRepeatArray(const std::vector& repeat_array, + KeyAlias& key_alias) { + std::vector new_keys; + std::vector new_vids; + + for (auto i = 0; i < repeat_array.size(); ++i) { + for (auto j = 0; j < repeat_array[i]; ++j) { + new_keys.push_back(keys_[i]); + new_vids.push_back(vids_[i]); + } + } + return self_type_t(std::move(new_keys), std::move(new_vids), v_label_); + } + + template + KeyedRowVertexSetImpl WithNewData( + std::vector>&& new_datas) { + CHECK(vids_.size() == new_datas.size()); + return KeyedRowVertexSetImpl( + std::move(keys_), std::move(vids_), std::move(new_datas), v_label_); + } + + template + void fillBuiltinProps(std::vector>& tuples, + const PropNameArray& prop_names, + const std::vector& repeat_array) const { + LOG(WARNING) << "not implemented"; + } + + template + void fillBuiltinProps(std::vector>& tuples, + const PropNameArray& prop_names) const { + LOG(WARNING) << "not implemented"; + } + + protected: + std::vector keys_; + std::vector vids_; + LabelT v_label_; +}; + +template +using KeyedRowVertexSet = KeyedRowVertexSetImpl; + +template +using DefaultKeyedRowVertexSet = + KeyedRowVertexSetImpl; + +template +auto MakeKeyedRowVertexSet(std::vector&& keys, std::vector&& vec, + std::vector>&& datas, + LabelT label) { + return KeyedRowVertexSet( + std::move(keys), std::move(vec), std::move(datas), label); +} + +//////////////////////////////KeyedRowVertexSetBuilder/////////////////////// + +template +class KeyedRowVertexSetBuilderImpl { + public: + using key_t = KEY_T; + using lid_t = VID_T; + using data_tuple_t = std::tuple; + using build_res_t = KeyedRowVertexSetImpl; + + KeyedRowVertexSetBuilderImpl(LabelT label, + std::array prop_names) + : label_(label), prop_names_(prop_names), ind_(0) {} + + KeyedRowVertexSetBuilderImpl(const RowVertexSet& old_set) + : label_(old_set.GetLabel()), + prop_names_(old_set.GetPropNames()), + ind_(0) {} + + size_t insert(std::tuple ele_tuple, data_tuple_t data_tuple) { + auto key = std::get<1>(ele_tuple); + if (prop2ind_.find(key) != prop2ind_.end()) { + return prop2ind_[key]; + } else { + prop2ind_[key] = ind_; + keys_.emplace_back(key); + vids_.emplace_back(key); + datas_.emplace_back(data_tuple); + return ind_++; + } + } + + size_t insert(const VID_T& key, data_tuple_t data_tuple) { + if (prop2ind_.find(key) != prop2ind_.end()) { + return prop2ind_[key]; + } else { + prop2ind_[key] = ind_; + keys_.emplace_back(key); + vids_.emplace_back(key); + datas_.emplace_back(data_tuple); + return ind_++; + } + } + + size_t insert(const std::tuple& ele_tuple) { + return insert(std::get<0>(ele_tuple), std::get<1>(ele_tuple)); + } + + build_res_t Build() { + return build_res_t(std::move(keys_), std::move(vids_), std::move(datas_), + label_, std::move(prop_names_)); + } + + private: + LabelT label_; + // Keep the mapping from lid to ind. So we can directly make the lids + // array when building. + std::unordered_map prop2ind_; + std::vector keys_; + std::vector vids_; + std::vector datas_; + size_t ind_; + std::array prop_names_; +}; + +template +class KeyedRowVertexSetBuilderImpl { + public: + using key_t = KEY_T; + using lid_t = VID_T; + using build_res_t = + KeyedRowVertexSetImpl; + + KeyedRowVertexSetBuilderImpl(LabelT label) : label_(label), ind_(0) {} + + KeyedRowVertexSetBuilderImpl( + const RowVertexSet& old_set) + : label_(old_set.GetLabel()), ind_(0) {} + + size_t insert(std::tuple ele_tuple) { + auto key = std::get<1>(ele_tuple); + if (prop2ind_.find(key) != prop2ind_.end()) { + return prop2ind_[key]; + } else { + prop2ind_[key] = ind_; + keys_.emplace_back(key); + vids_.emplace_back(key); + return ind_++; + } + } + + size_t insert(const VID_T& key) { + if (prop2ind_.find(key) != prop2ind_.end()) { + return prop2ind_[key]; + } else { + prop2ind_[key] = ind_; + keys_.emplace_back(key); + vids_.emplace_back(key); + return ind_++; + } + } + + build_res_t Build() { + return build_res_t(std::move(keys_), std::move(vids_), label_); + } + + private: + LabelT label_; + // Keep the mapping from lid to ind. So we can directly make the lids + // array when building. + std::unordered_map prop2ind_; + std::vector keys_; + std::vector vids_; + size_t ind_; +}; + +template +using KeyedRowVertexSetBuilder = + KeyedRowVertexSetBuilderImpl; + +} // namespace gs + +#endif // ENGINES_HQPS_DS_MULTI_VERTEX_SET_KEYED_ROW_VERTEX_SET_H_ diff --git a/flex/engines/hqps_db/structures/multi_vertex_set/multi_label_vertex_set.h b/flex/engines/hqps_db/structures/multi_vertex_set/multi_label_vertex_set.h new file mode 100644 index 000000000000..65e9a2d5a3a1 --- /dev/null +++ b/flex/engines/hqps_db/structures/multi_vertex_set/multi_label_vertex_set.h @@ -0,0 +1,457 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_DS_MULTI_VERTEX_SET_MULTI_LABEL_VERTEX_SET_H_ +#define ENGINES_DS_MULTI_VERTEX_SET_MULTI_LABEL_VERTEX_SET_H_ + +#include +#include +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" +#include "grape/util.h" +#include "grape/utils/bitset.h" + +namespace gs { + +template +class MultiLabelVertexSetIter { + public: + using lid_t = typename VERTEX_SET_T::lid_t; + using self_type_t = MultiLabelVertexSetIter; + using inner_iter_t = typename VERTEX_SET_T::iterator; + using data_tuple_t = typename inner_iter_t::data_tuple_t; + using index_ele_tuple_t = + std::tuple; + + MultiLabelVertexSetIter( + const std::array& set_array, + const std::array, N>& offset_array, + std::array&& begin_iters, + std::array&& end_iters, size_t ind) + : set_array_(set_array), + offset_array_(offset_array), + begin_iters_(std::move(begin_iters)), + end_iters_(std::move(end_iters)), + ind_(ind), + cur_label_(0), + limit_(offset_array_[0].size() - 1), + safe_eles(0) { + for (auto i = 0; i < N; ++i) { + local_ind_[i] = 0; + } + probe_for_next(); + } + + lid_t GetVertex() const { return begin_iters_[cur_label_].GetVertex(); } + + lid_t GetElement() const { return GetVertex(); } + + data_tuple_t GetData() const { return begin_iters_[cur_label_].GetData(); } + + // Get the current ind of which set we are using. + size_t GetCurInd() const { return cur_label_; } + + size_t GetCurSetInnerInd() const { return local_ind_[cur_label_]; } + + inline const self_type_t& operator++() { + ++begin_iters_[cur_label_]; + ++local_ind_[cur_label_]; + if (safe_eles > 0) { + safe_eles -= 1; + } else { + cur_label_ = cur_label_ + 1; + probe_for_next(); + } + + return *this; + } + + index_ele_tuple_t GetIndexElement() const { + return std::make_tuple( + cur_label_, std::make_tuple(local_ind_[cur_label_], + begin_iters_[cur_label_].GetVertex())); + } + + inline bool operator==(const self_type_t& rhs) const { + return ind_ == rhs.ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + return ind_ != rhs.ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return ind_ < rhs.ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + inline void probe_for_next() { + while (ind_ < limit_) { + while (cur_label_ < N && + local_ind_[cur_label_] >= offset_array_[cur_label_][ind_ + 1]) { + cur_label_ += 1; + } + if (cur_label_ < N) { + break; + } + cur_label_ = 0; + ind_ += 1; + } + if (ind_ < limit_) { + safe_eles = offset_array_[cur_label_][ind_ + 1] - + offset_array_[cur_label_][ind_] - 1; + } + } + + private: + size_t ind_; + size_t cur_label_; + const std::array& set_array_; + const std::array, N> offset_array_; + std::array begin_iters_; + std::array end_iters_; + std::array local_ind_; + size_t safe_eles; + size_t limit_; +}; + +// The vertex sets can be of different labels. +// But share the same vertex set type. +template +class MultiLabelVertexSet { + public: + using inner_iter = typename VERTEX_SET_T::iterator; + using lid_t = typename VERTEX_SET_T::lid_t; + using iterator = MultiLabelVertexSetIter; + using data_tuple_t = typename VERTEX_SET_T::data_tuple_t; + using index_ele_tuple_t = + std::tuple; + using self_type_t = MultiLabelVertexSet; + using flat_t = self_type_t; + using EntityValueType = typename VERTEX_SET_T::EntityValueType; + using label_id_t = typename VERTEX_SET_T::label_id_t; + static constexpr auto ind_seq = std::make_index_sequence{}; + static constexpr bool is_keyed = false; + static constexpr bool is_vertex_set = true; + static constexpr bool is_two_label_set = false; + static constexpr bool is_edge_set = false; + static constexpr bool is_multi_label = true; + static constexpr bool is_collection = false; + static constexpr bool is_general_set = false; + static constexpr size_t num_labels = N; + + MultiLabelVertexSet(std::array&& set_array, + std::array, N>&& offset_array) + : set_array_(std::move(set_array)), + offset_array_(std::move(offset_array)) {} + + iterator begin() const { + auto begin_iters = create_begin_array(ind_seq); + auto end_iters = create_end_array(ind_seq); + return iterator(set_array_, offset_array_, std::move(begin_iters), + std::move(end_iters), 0); + } + + iterator end() const { + auto begin_iters = create_begin_array(ind_seq); + auto end_iters = create_end_array(ind_seq); + return iterator(set_array_, offset_array_, std::move(begin_iters), + std::move(end_iters), + std::get<0>(offset_array_).size() - 1); + } + + template + auto create_begin_array(std::index_sequence) const { + return gs::make_array(set_array_[Is].begin()...); + } + + template + auto create_end_array(std::index_sequence) const { + return gs::make_array(set_array_[Is].end()...); + } + + size_t Size() const { + size_t res = 0; + for (auto s : set_array_) { + res += s.Size(); + } + return res; + } + + std::array GetLabels() const { + std::array labels; + for (auto i = 0; i < N; ++i) { + labels[i] = set_array_[i].GetLabel(); + } + return labels; + } + + // subset inplace. + void SubSetWithIndices(std::vector& select_indices) { + std::vector> indices_vec(N); + std::vector> local_offsets(N); + size_t cur_cnt = 0; + size_t select_indices_ind = 0; + for (auto i = 0; i < N; ++i) { + local_offsets[i].emplace_back(0); + } + for (auto iter : *this) { + auto set_ind = iter.GetCurInd(); + auto set_inner_ind = iter.GetCurSetInnerInd(); + // if (active_label.get_bit(array_[set_ind].first.GetLabel())) { + while (select_indices_ind < select_indices.size() && + select_indices[select_indices_ind] < cur_cnt) { + select_indices_ind++; + } + if (select_indices_ind >= select_indices.size()) { + break; + } + if (select_indices[select_indices_ind] == cur_cnt) { + indices_vec[set_ind].emplace_back(set_inner_ind); + } + local_offsets[set_ind].emplace_back(indices_vec[set_ind].size()); + cur_cnt += 1; + } + + // for (auto i = 0; i < N; ++i) { + // VLOG(10) << "sub set: " << i + // << ", offset: " << gs::to_string(local_offsets[i]); + // VLOG(10) << "sub set: " << i + // << ", indices: " << gs::to_string(indices_vec[i]); + // } + // regard offset array + for (auto i = 0; i < N; ++i) { + for (auto j = 0; j < offset_array_[i].size(); ++j) { + local_offsets[i][j] = local_offsets[i][offset_array_[i][j]]; + } + } + // for (auto i = 0; i < N; ++i) { + // VLOG(10) << "sub set: " << i + // << ", res offset: " << gs::to_string(local_offsets[i]); + // } + for (auto i = 0; i < N; ++i) { + set_array_[i].SubSetWithIndices(indices_vec[i]); + offset_array_[i].swap(local_offsets[i]); + } + } + + // project self. + template ::type* = nullptr> + self_type_t ProjectWithRepeatArray(const std::vector& repeat_array, + KeyAlias& key_alias) const { + std::vector> indices_vec(N); + std::array, N> local_offsets; + + size_t cur_cnt = 0; + for (auto i = 0; i < N; ++i) { + local_offsets[i].emplace_back(0); + } + + size_t cur_ind = 0; + CHECK(Size() == repeat_array.size()); + for (auto iter : *this) { + auto set_ind = iter.GetCurInd(); + auto set_inner_ind = iter.GetCurSetInnerInd(); + if (repeat_array[cur_ind] > 0) { + for (auto j = 0; j < repeat_array[cur_ind]; ++j) { + indices_vec[set_ind].emplace_back(set_inner_ind); + // local_offsets[set_ind].emplace_back(indices_vec[set_ind].size()); + } + for (auto j = 0; j < N; ++j) { + local_offsets[j].emplace_back(indices_vec[j].size()); + } + } + cur_ind += 1; + } + + auto res_set_array = make_set_offset_pair_array_pair( + std::move(indices_vec), std::make_index_sequence()); + MultiLabelVertexSet res_set( + std::move(res_set_array), std::move(std::move(local_offsets))); + return res_set; + } + + template + self_type_t Flat( + std::vector>& index_ele_tuple) { + //> + std::array, N> indices; + for (auto i = 0; i < N; ++i) { + offset_array_[i].clear(); + } + + // update offsets. + std::array local_ind; + for (auto i = 0; i < N; ++i) { + local_ind[i] = 0; + offset_array_[i].emplace_back(local_ind[i]); + } + for (auto i = 0; i < index_ele_tuple.size(); ++i) { + auto cur_index_ele = std::get(index_ele_tuple[i]); + VLOG(10) << "MultiLabel: got index ele: " << gs::to_string(cur_index_ele); + auto label = std::get<0>(cur_index_ele); + auto inner_ind = std::get<0>(std::get<1>(cur_index_ele)); + local_ind[label] += 1; + + indices[label].emplace_back(inner_ind); + for (auto i = 0; i < N; ++i) { + offset_array_[i].emplace_back(local_ind[i]); + } + } + for (auto i = 0; i < N; ++i) { + set_array_[i].SubSetWithIndices(indices[i]); + // array_[i].second.emplace_back(local_ind[i]++); + VLOG(10) << "offset for: " << i << ",is" + << gs::to_string(offset_array_[i]); + } + + for (auto i = 0; i < N; ++i) { + VLOG(10) << "Multi label finish flat: " << local_ind[i]; + } + VLOG(10) << "size: " << Size(); + return std::move(*this); + } + + // Filter vertex sets with expresion and labels. + template , + typename RES_T = std::pair>> + RES_T project_vertices(std::array& labels, + EXPRESSION& expr, + std::vector>& eles) const { + // TODO: vector-based cols should be able to be selected with certain rows. + + std::unordered_set active_label; + for (auto l : labels) { + active_label.insert(l); + } + VLOG(10) << "finish set active label"; + + std::vector> indices_vec(N); + std::vector global_offset; + std::vector> local_offsets(N); + size_t cur_cnt = 0; + global_offset.emplace_back(0); + for (auto i = 0; i < N; ++i) { + local_offsets[i].emplace_back(0); + } + for (auto iter : *this) { + auto set_ind = iter.GetCurInd(); + auto set_inner_ind = iter.GetCurSetInnerInd(); + if (active_label.find(set_array_[set_ind].GetLabel()) != + active_label.end()) { + // check filter + if (expr(eles[set_ind][set_inner_ind])) { + indices_vec[set_ind].emplace_back(set_inner_ind); + cur_cnt += 1; + } + } + local_offsets[set_ind].emplace_back(indices_vec[set_ind].size()); + global_offset.emplace_back(cur_cnt); + } + // build global offset from local offset. + + std::array, N> new_offset; + for (auto i = 0; i < N; ++i) { + new_offset[i].reserve(offset_array_[i].size()); + for (auto j = 0; j < offset_array_[i].size(); ++j) { + new_offset[i].emplace_back(local_offsets[i][offset_array_[i][j]]); + // local_offsets[i][j] = local_offsets[i][offset_array_[i][j]]; + } + } + + auto res_set_array = make_set_offset_pair_array_pair( + std::move(indices_vec), std::make_index_sequence()); + MultiLabelVertexSet res_set( + std::move(res_set_array), std::move(std::move(new_offset))); + + return std::make_pair(std::move(res_set), std::move(global_offset)); + } + + template + auto make_set_offset_pair_array_pair( + std::vector>&& indices, + std::index_sequence) const { + CHECK(indices.size() == N); + return std::array{ + std::move(std::get(set_array_).WithIndices(indices[Is]))...}; + } + + void Repeat(std::vector& cur_offset, + std::vector& repeat_vec) { + LOG(FATAL) << "Not implemented"; + } + + const VERTEX_SET_T& GetSet(size_t ind) const { return set_array_[ind]; } + + template + VERTEX_SET_T& GetSet() { + return set_array_[Is]; + } + + template + std::vector& GetOffset() { + return offset_array_[Is]; + } + + std::vector& GetOffset(size_t Is) { return offset_array_[Is]; } + + template + void fillBuiltinPropsImpl( + std::vector>>& tuples, + std::string& prop_name, std::vector& repeat_array) const { + if constexpr (std::is_same_v>, + Dist>) { + if (prop_name == "dist") { + LOG(FATAL) << "Not supported"; + } + } + } + + template + void fillBuiltinPropsImpl( + std::vector>>& tuples, + PropNameArray& prop_names, std::vector& repeat_array, + std::index_sequence) const { + (fillBuiltinPropsImpl(tuples, std::get(prop_names), + repeat_array), + ...); + } + + template + void fillBuiltinProps(std::vector>>& tuples, + PropNameArray& prop_names, + std::vector& repeat_array) const { + fillBuiltinPropsImpl(tuples, prop_names, repeat_array, + std::make_index_sequence()); + } + + // No repeat array is not provided + template + void fillBuiltinProps(std::vector>>& tuples, + PropNameArray& prop_names) { + LOG(FATAL) << "not supported"; + } + + private: + std::array set_array_; + std::array, N> offset_array_; +}; +} // namespace gs + +#endif // ENGINES_DS_MULTI_VERTEX_SET_MULTI_LABEL_VERTEX_SET_H_ diff --git a/flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h b/flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h new file mode 100644 index 000000000000..3eb601dbaffd --- /dev/null +++ b/flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h @@ -0,0 +1,1451 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_DS_MULTI_VERTEX_SET_ROW_VERTEX_SET_H_ +#define ENGINES_HQPS_DS_MULTI_VERTEX_SET_ROW_VERTEX_SET_H_ + +#include +#include +#include + +#include "flex/engines/hqps_db/structures/collection.h" +#include "grape/util.h" + +// Vertex set in with data in rows. +namespace gs { + +namespace internal { + +template +void fillBuiltinPropsImpl( + const std::vector>& datas, + const std::array& set_prop_names, + std::vector>& tuples, const std::string& prop_name, + const std::vector& repeat_array) { + using cur_prop = std::tuple_element_t>; + using my_prop = std::tuple_element_t>; + VLOG(10) << "ProjId: " << Proj_Is << ", MyId: " << My_Is << ", " + << " input prop_name: " << prop_name << ", " + << typeid(cur_prop).name() << ", " << typeid(my_prop).name() + << ",prop_name " << set_prop_names[My_Is] + << ", eq: " << gs::to_string(std::is_same_v); + if constexpr (std::is_same_v) { + if (prop_name == set_prop_names[My_Is]) { + VLOG(10) << "Found builin property " << prop_name; + CHECK(repeat_array.size() == datas.size()); + size_t ind = 0; + for (auto i = 0; i < repeat_array.size(); ++i) { + for (auto j = 0; j < repeat_array[i]; ++j) { + std::get(tuples[ind]) = std::get<0>(datas[i]); + ind += 1; + } + } + } + } +} + +template +void fillBuiltinPropsImpl( + const std::vector>& datas, + const std::array& set_prop_names, + std::vector>& tuples, const std::string& prop_name, + const std::vector& repeat_array, std::index_sequence) { + (fillBuiltinPropsImpl(datas, set_prop_names, tuples, prop_name, + repeat_array), + ...); +} + +template +void fillBuiltinPropsImpl( + const std::vector>& datas, + const std::array& set_prop_names, + std::vector>& tuples, + const PropNameArray& prop_names, + const std::vector& repeat_array, std::index_sequence) { + (fillBuiltinPropsImpl(datas, set_prop_names, tuples, + std::get(prop_names), repeat_array, + std::make_index_sequence()), + ...); +} +template +void fillBuiltinPropsImpl( + const std::vector>& datas, + const std::array& set_prop_names, + std::vector>& tuples, + const PropNameArray& prop_names, + const std::vector& repeat_array) { + return gs::internal::fillBuiltinPropsImpl( + datas, set_prop_names, tuples, prop_names, repeat_array, + std::make_index_sequence()); +} +} // namespace internal + +template +void fillBuiltinPropsImpl( + const std::vector>& datas, + const std::array& set_prop_names, + std::vector>& tuples, + const PropNameArray& prop_names, + const std::vector& repeat_array) { + return gs::internal::fillBuiltinPropsImpl(datas, set_prop_names, tuples, + prop_names, repeat_array); +} + +template +class RowVertexSetImpl; + +// RowSetBuilder +template +class RowVertexSetImplBuilder { + public: + using result_t = RowVertexSetImpl; + using data_tuple_t = std::tuple; + static constexpr bool is_row_vertex_set_builder = true; + static constexpr bool is_flat_edge_set_builder = false; + static constexpr bool is_general_edge_set_builder = false; + static constexpr bool is_two_label_set_builder = false; + + RowVertexSetImplBuilder(LabelT v_label, + std::array prop_names) + : v_label_(v_label), prop_names_(prop_names) {} + + RowVertexSetImplBuilder( + const RowVertexSetImplBuilder& other) { + v_label_ = other.v_label_; + prop_names_ = other.prop_names_; + } + + void Insert(VID_T&& vid, data_tuple_t&& data) { + vids_.emplace_back(std::move(vid)); + datas_.emplace_back(std::move(data)); + } + + void Insert(const VID_T& vid, const data_tuple_t& data) { + vids_.push_back(vid); + datas_.push_back(data); + } + + void Insert(const std::tuple& ind_ele, + const data_tuple_t& data) { + vids_.push_back(std::get<1>(ind_ele)); + datas_.push_back(data); + } + + void Insert(const std::tuple>& flat_eles) { + vids_.push_back(std::get<1>(flat_eles)); + datas_.push_back(std::get<2>(flat_eles)); + } + + result_t Build() { + return result_t(std::move(vids_), v_label_, std::move(datas_), + std::move(prop_names_)); + } + + size_t Size() const { return vids_.size(); } + + private: + std::vector vids_; + std::vector datas_; + LabelT v_label_; + std::array prop_names_; +}; + +// RowSetBuilder +template +class RowVertexSetImplBuilder { + public: + using result_t = RowVertexSetImpl; + + static constexpr bool is_row_vertex_set_builder = true; + static constexpr bool is_flat_edge_set_builder = false; + static constexpr bool is_general_edge_set_builder = false; + static constexpr bool is_two_label_set_builder = false; + + RowVertexSetImplBuilder(LabelT v_label) : v_label_(v_label) {} + + RowVertexSetImplBuilder(const RowVertexSetImplBuilder& rhs) + : vids_(rhs.vids_), v_label_(rhs.v_label_) {} + + void Insert(VID_T&& vid) { vids_.emplace_back(vid); } + + void Insert(const VID_T& vid) { vids_.push_back(vid); } + + void Insert(const std::tuple& flat_eles) { + vids_.push_back(std::get<1>(flat_eles)); + } + + result_t Build() { return result_t(std::move(vids_), v_label_); } + + size_t Size() const { return vids_.size(); } + + private: + std::vector vids_; + LabelT v_label_; +}; + +template +using RowVertexSetBuilder = RowVertexSetImplBuilder; + +template +class RowVertexSetIter { + public: + using lid_t = VID_T; + using index_ele_tuple_t = std::tuple; + + using data_tuple_t = std::tuple; + using self_type_t = RowVertexSetIter; + + // from this tuple, we can reconstruct the partial set. + using flat_ele_tuple_t = std::tuple>; + static constexpr VID_T NULL_VID = std::numeric_limits::max(); + + RowVertexSetIter(const std::vector& vids, + const std::vector& datas, size_t ind) + : vids_(vids), datas_(datas), cur_ind_(ind) {} + + lid_t GetElement() const { return vids_[cur_ind_]; } + + index_ele_tuple_t GetIndexElement() const { + return std::make_tuple(cur_ind_, vids_[cur_ind_]); + } + + flat_ele_tuple_t GetFlatElement() const { + return std::make_tuple(cur_ind_, vids_[cur_ind_], GetData()); + } + + lid_t GetVertex() const { return vids_[cur_ind_]; } + + data_tuple_t GetData() const { return datas_[cur_ind_]; } + + template + auto GetCol() const { + // TODO: fixme + return std::get(datas_[cur_ind_]); + } + + inline const self_type_t& operator++() { + ++cur_ind_; + return *this; + } + + inline self_type_t operator++(int) { + self_type_t ret(*this); + ++cur_ind_; + return ret; + } + + // We may never compare to other kind of iterators + inline bool operator==(const self_type_t& rhs) const { + return cur_ind_ == rhs.cur_ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + return cur_ind_ != rhs.cur_ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return cur_ind_ < rhs.cur_ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + private: + const std::vector& vids_; + const std::vector& datas_; + size_t cur_ind_; +}; + +template +class RowVertexSetIter { + public: + using lid_t = VID_T; + using index_ele_tuple_t = std::tuple; + using data_tuple_t = std::tuple; + using self_type_t = RowVertexSetIter; + // from this tuple, we can reconstruct the partial set. + using flat_ele_tuple_t = std::tuple; + static constexpr VID_T NULL_VID = std::numeric_limits::max(); + + RowVertexSetIter(const std::vector& vids, size_t ind) + : vids_(vids), cur_ind_(ind) {} + + lid_t GetElement() const { return vids_[cur_ind_]; } + + index_ele_tuple_t GetIndexElement() const { + return std::make_tuple(cur_ind_, vids_[cur_ind_]); + } + + flat_ele_tuple_t GetFlatElement() const { + return std::make_tuple(cur_ind_, vids_[cur_ind_]); + } + + lid_t GetVertex() const { return vids_[cur_ind_]; } + + data_tuple_t GetData() const { return grape::EmptyType(); } + + inline const self_type_t& operator++() { + ++cur_ind_; + return *this; + } + + inline self_type_t operator++(int) { + self_type_t ret(*this); + ++cur_ind_; + return ret; + } + + // We may never compare to other kind of iterators + inline bool operator==(const self_type_t& rhs) const { + return cur_ind_ == rhs.cur_ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + return cur_ind_ != rhs.cur_ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return cur_ind_ < rhs.cur_ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + private: + const std::vector& vids_; + size_t cur_ind_; +}; + +template +static std::pair, std::vector> +RowSetSubSetImpl(const std::vector& old_vids, + const std::vector& old_datas, + std::vector& indices) { + VLOG(10) << "RowSetSubSetImple"; + std::vector new_vids(indices.size(), 0); + std::vector new_datas; + new_datas.reserve(indices.size()); + for (auto i = 0; i < indices.size(); ++i) { + new_vids[i] = old_vids[indices[i]]; + new_datas.emplace_back(old_datas[indices[i]]); + } + return std::make_pair(std::move(new_vids), std::move(new_datas)); +} + +template +static std::vector RowSetSubSetImpl(const std::vector& old_vids, + std::vector& indices) { + VLOG(10) << "RowSetSubSetImple"; + std::vector new_vids(indices.size(), 0); + for (auto i = 0; i < indices.size(); ++i) { + new_vids[i] = old_vids[indices[i]]; + } + return new_vids; +} + +template +std::vector RowSetDedupImpl( + const std::vector& ori_lids, + const std::vector& ori_datas, std::vector& res_lids, + std::vector& res_datas) { + std::vector offsets; + VLOG(10) << "lid size" << ori_lids.size(); + offsets.reserve(ori_lids.size()); + + // TODO: replace with bitset. + std::unordered_map v2lid; + size_t cnt = 0; + for (auto i = 0; i < ori_lids.size(); ++i) { + offsets.emplace_back(cnt); + auto ret = v2lid.insert({ori_lids[i], i}); + if (ret.second == true) { + cnt += 1; + res_lids.emplace_back(ori_lids[i]); + res_datas.emplace_back(ori_datas[i]); + } + } + offsets.emplace_back(cnt); + + // VLOG(10) << "in dedup: offsets: " << gs::to_string(offsets); + // VLOG(10) << "in dedup: vids : " << gs::to_string(res_lids); + return offsets; +} + +template +std::vector RowSetDedupImpl(const std::vector& ori_lids, + std::vector& res_lids) { + std::vector offsets; + VLOG(10) << "lid size" << ori_lids.size(); + offsets.reserve(ori_lids.size()); + + // TODO: replace with bitset. + std::unordered_map v2lid; + size_t cnt = 0; + for (auto i = 0; i < ori_lids.size(); ++i) { + offsets.emplace_back(cnt); + auto ret = v2lid.insert({ori_lids[i], i}); + if (ret.second == true) { + cnt += 1; + res_lids.emplace_back(ori_lids[i]); + } + } + offsets.emplace_back(cnt); + + return offsets; +} + +template +auto rowSetFlatImpl( + std::vector>& index_ele_tuples, + std::vector& origin_vids, std::vector& origin_datas) { + std::vector res_vids; + std::vector res_data_tuple; + res_vids.reserve(index_ele_tuples.size()); + for (auto ele : index_ele_tuples) { + auto& cur = std::get(ele); + //(ind, vid) + auto& ind = std::get<0>(cur); + CHECK(ind < origin_vids.size()); + res_vids.emplace_back(origin_vids[ind]); + res_data_tuple.emplace_back(origin_datas[ind]); + } + return std::make_pair(std::move(res_vids), std::move(res_data_tuple)); +} + +// if num_labels == 0, we deem it as take all labels. +template , std::vector>> +RES_T row_project_vertices_impl(const std::vector& lids, + LabelT cur_label, + std::array& labels, + EXPRESSION& expr, + std::array& prop_getters) { + std::vector offsets; + std::vector new_lids; + size_t cnt = 0; + offsets.reserve(lids.size() + 1); + int label_ind = -1; + + // FIXME: no repeated labels. + if constexpr (num_labels == 0) { + VLOG(10) << "take all labels"; + label_ind = 0; // whatever greater than -1. + } else { + for (auto i = 0; i < num_labels; ++i) { + if (cur_label == labels[i]) + label_ind = i; + } + } + if (label_ind == -1) { + VLOG(10) << "No label found in query params"; + // for current set, we don't need. + auto size = lids.size(); + for (auto i = 0; i < size; ++i) { + offsets.emplace_back(cnt); + } + } else { + // VLOG(10) << "Found label in query params"; + auto& cur_prop_getter = prop_getters[0]; + for (auto i = 0; i < lids.size(); ++i) { + offsets.emplace_back(cnt); + auto cur_lid = lids[i]; + auto prop = cur_prop_getter.get_view(cur_lid); + if (std::apply(expr, prop)) { + // if (expr(eles[i])) { + new_lids.emplace_back(cur_lid); + cnt += 1; + } + } + offsets.emplace_back(cnt); + } + VLOG(10) << "Project vertices, new lids" << new_lids.size() + << ", offset size: " << offsets.size(); + return std::make_pair(std::move(new_lids), std::move(offsets)); +} +template < + typename lid_t, typename EXPRESSION, size_t num_labels, typename LabelT, + typename data_tuple_t, typename PROP_GETTER, + typename RES_T = std::tuple, std::vector, + std::vector>> +RES_T row_project_vertices_impl( + const std::vector& lids, const std::vector& datas, + LabelT cur_label, std::array& labels, EXPRESSION& expr, + std::array& prop_getters) { // temporary property + std::vector offsets; + std::vector new_lids; + std::vector new_datas; + size_t cnt = 0; + offsets.reserve(lids.size() + 1); + int label_ind = -1; + if constexpr (num_labels == 0) { + VLOG(10) << "num_labels == 0"; + label_ind = 0; // whatever greater than -1. + } else { + // FIXME: no repeated labels. + for (auto i = 0; i < num_labels; ++i) { + if (cur_label == labels[i]) + label_ind = i; + } + } + if (label_ind == -1) { + VLOG(10) << "No label found in query params"; + // for current set, we don't need. + auto size = lids.size(); + for (auto i = 0; i < size; ++i) { + offsets.emplace_back(cnt); + } + } else { + VLOG(10) << "Found label in query params"; + auto& cur_prop_getter = prop_getters[0]; + for (auto i = 0; i < lids.size(); ++i) { + offsets.emplace_back(cnt); + auto cur_lid = lids[i]; + auto prop = cur_prop_getter.get_view(cur_lid); + if (std::apply(expr, prop)) { + new_lids.emplace_back(lids[i]); + new_datas.emplace_back(datas[i]); + cnt += 1; + } + } + offsets.emplace_back(cnt); + } + return std::make_tuple(std::move(new_lids), std::move(new_datas), + std::move(offsets)); +} + +// select certain labels from set +template , std::vector>> +RES_T select_labels(const std::vector& lids, LabelT cur_label, + std::array& labels) { + std::vector offsets; + size_t cnt = 0; + offsets.reserve(lids.size() + 1); + int label_ind = -1; + std::vector new_lids; + if constexpr (num_labels == 0) { + label_ind = 0; + } else { + // FIXME: no repeated labels. + for (auto i = 0; i < num_labels; ++i) { + if (cur_label == labels[i]) + label_ind = i; + } + } + if (label_ind == -1) { + VLOG(10) << "No label found in query params"; + // for current set, we don't need. + auto size = lids.size(); + for (auto i = 0; i < size; ++i) { + offsets.emplace_back(cnt); + } + } else { + for (auto i = 0; i < lids.size(); ++i) { + offsets.emplace_back(i); + } + offsets.emplace_back(lids.size()); + new_lids = lids; // copy the vids. + } + return std::make_pair(std::move(new_lids), std::move(offsets)); +} + +template < + typename lid_t, typename data_tuple_t, size_t num_labels, typename LabelT, + typename RES_T = std::tuple, std::vector, + std::vector>> +RES_T select_labels(const std::vector& lids, + const std::vector& data_tuples, + LabelT cur_label, std::array& labels) { + std::vector offsets; + size_t cnt = 0; + offsets.reserve(lids.size() + 1); + int label_ind = -1; + std::vector new_lids; + std::vector new_data_tuples; + // FIXME: no repeated labels. + if constexpr (num_labels == 0) { + label_ind = 0; + } else { + for (auto i = 0; i < num_labels; ++i) { + if (cur_label == labels[i]) + label_ind = i; + } + } + if (label_ind == -1) { + VLOG(10) << "No label found in query params"; + // for current set, we don't need. + auto size = lids.size(); + for (auto i = 0; i < size; ++i) { + offsets.emplace_back(cnt); + } + } else { + for (auto i = 0; i < lids.size(); ++i) { + offsets.emplace_back(i); + } + offsets.emplace_back(lids.size()); + new_lids = lids; // copy the vids. + new_data_tuples = data_tuples; + } + return std::make_tuple(std::move(new_lids), std::move(new_data_tuples), + std::move(offsets)); +} + +template +auto rowSetFlatImpl( + std::vector>& index_ele_tuples, + std::vector& origin_vids) { + std::vector res_vids; + res_vids.reserve(index_ele_tuples.size()); + for (auto ele : index_ele_tuples) { + auto& cur = std::get(ele); + //(ind, vid) + auto& ind = std::get<0>(cur); + CHECK(ind < origin_vids.size()); + res_vids.emplace_back(origin_vids[ind]); + } + return res_vids; +} + +template +auto row_project_with_repeat_array_impl( + const KeyAlias& key_alias, + const std::vector& repeat_array, + const std::vector& old_lids) { + using res_t = std::vector< + std::tuple>::type...>>; + + res_t res_vec; + for (auto i = 0; i < repeat_array.size(); ++i) { + for (auto j = 0; j < repeat_array[i]; ++j) { + auto tuple = std::make_tuple(old_lids[i]); + res_vec.emplace_back(std::make_tuple(gs::get_from_tuple(tuple)...)); + } + } + return res_vec; +} + +template +auto row_project_with_repeat_array_impl( + const KeyAlias& key_alias, + const std::vector& repeat_array, const std::vector& old_lids, + const std::vector>& old_datas) { + using res_t = std::vector>::type...>>; + + res_t res_vec; + for (auto i = 0; i < repeat_array.size(); ++i) { + for (auto j = 0; j < repeat_array[i]; ++j) { + auto tuple = std::tuple_cat(old_datas[i], std::make_tuple(old_lids[i])); + res_vec.emplace_back(std::make_tuple(gs::get_from_tuple(tuple)...)); + } + } + return res_vec; +} + +// We assume the indices are in order, no dup +template +std::tuple, std::vector, std::vector> +row_filter_with_indices_impl(std::vector& indices, + std::vector& vids, + std::vector& datas, + JoinKind join_kind) { + std::vector res_offset; + std::vector res_vids; + std::vector res_datas; + + res_offset.reserve(vids.size() + 1); + + size_t indices_ind = 0; + if (join_kind == JoinKind::InnerJoin) { + res_vids.reserve(indices.size()); + res_datas.reserve(indices.size()); + size_t vid_ind = 0; + res_offset.emplace_back(0); + for (; vid_ind < vids.size(); ++vid_ind) { + while (indices_ind < indices.size() && indices[indices_ind] < vid_ind) { + indices_ind++; + } + if (indices_ind < indices.size()) { + if (indices[indices_ind] == vid_ind) { + res_vids.push_back(vids[vid_ind]); + res_datas.push_back(datas[vid_ind]); + } + } + res_offset.emplace_back(res_vids.size()); + } + CHECK(res_vids.size() == indices.size()); + CHECK(res_datas.size() == indices.size()); + CHECK(res_offset.size() == vids.size() + 1); + } else { + res_vids.reserve(vids.size() - indices.size()); + res_datas.reserve(vids.size() - indices.size()); + res_offset.emplace_back(res_vids.size()); + size_t vid_ind = 0; + while (vid_ind < vids.size()) { + while (indices_ind < indices.size() && indices[indices_ind] < vid_ind) { + indices_ind += 1; + } + if (indices_ind < indices.size()) { + if (indices[indices_ind] != vid_ind) { + res_vids.emplace_back(vids[vid_ind]); + res_datas.push_back(datas[vid_ind]); + } + } else { + res_vids.emplace_back(vids[vid_ind]); + res_datas.push_back(datas[vid_ind]); + } + vid_ind += 1; + res_offset.emplace_back(res_vids.size()); + } + CHECK(res_vids.size() == vids.size() - indices.size()); + CHECK(res_datas.size() == vids.size() - indices.size()); + CHECK(res_offset.size() == vids.size() + 1); + } + // VLOG(10) << "res offset: " << gs::to_string(res_offset); + // VLOG(10) << "res vids: " << gs::to_string(res_vids); + return std::make_tuple(std::move(res_vids), std::move(res_datas), + std::move(res_offset)); +} + +// We assume the indices are in order, no dup +template +std::pair, std::vector> +row_filter_with_indices_impl(std::vector& indices, + std::vector& vids, JoinKind join_kind) { + std::vector res_offset; + std::vector res_vids; + + res_offset.reserve(vids.size() + 1); + + size_t indices_ind = 0; + if (join_kind == JoinKind::InnerJoin) { + res_vids.reserve(indices.size()); + size_t vid_ind = 0; + res_offset.emplace_back(0); + for (; vid_ind < vids.size(); ++vid_ind) { + while (indices_ind < indices.size() && indices[indices_ind] < vid_ind) { + indices_ind++; + } + if (indices_ind < indices.size()) { + if (indices[indices_ind] == vid_ind) { + res_vids.push_back(vids[vid_ind]); + } + } + res_offset.emplace_back(res_vids.size()); + } + CHECK(res_vids.size() == indices.size()); + CHECK(res_offset.size() == vids.size() + 1); + } else { + res_vids.reserve(vids.size() - indices.size()); + res_offset.emplace_back(res_vids.size()); + size_t vid_ind = 0; + while (vid_ind < vids.size()) { + while (indices_ind < indices.size() && indices[indices_ind] < vid_ind) { + indices_ind += 1; + } + if (indices_ind < indices.size()) { + if (indices[indices_ind] != vid_ind) { + res_vids.emplace_back(vids[vid_ind]); + } + } else { + res_vids.emplace_back(vids[vid_ind]); + } + vid_ind += 1; + res_offset.emplace_back(res_vids.size()); + } + CHECK(res_vids.size() == vids.size() - indices.size()); + CHECK(res_offset.size() == vids.size() + 1); + } + // VLOG(10) << "res offset: " << gs::to_string(res_offset); + // VLOG(10) << "res vids: " << gs::to_string(res_vids); + return std::make_pair(std::move(res_vids), std::move(res_offset)); +} + +template +std::pair, std::vector> +subSetWithRemovedIndicesImpl(std::vector& removed_indices, + std::vector& indices_range, + std::vector& old_vids) { + CHECK(old_vids.size() == indices_range.back()); + std::vector res_vids; + std::vector res_indices_range; + size_t res_ind_left = 0; + // TODO: we can know the size exactly. + res_vids.reserve(old_vids.size()); + size_t removed_ind = 0; + for (auto ind = 0; ind < indices_range.size() - 1; ++ind) { + if (removed_ind >= removed_indices.size() || + ind < removed_indices[removed_ind]) { + res_indices_range.emplace_back(res_ind_left); + int left = indices_range[ind]; + int right = indices_range[ind + 1]; + res_ind_left += (right - left); + for (auto j = left; j < right; ++j) { + res_vids.emplace_back(old_vids[j]); + } + } else if (ind == removed_indices[removed_ind]) { + removed_ind += 1; + } else { + LOG(FATAL) << "not possible" << ind << ", " << removed_ind + << ", :" << gs::to_string(removed_indices) << ", " + << gs::to_string(indices_range); + } + } + res_indices_range.emplace_back(res_ind_left); + return std::make_pair(std::move(res_vids), std::move(res_indices_range)); +} + +template +std::tuple, std::vector, std::vector> +subSetWithRemovedIndicesImpl(std::vector& removed_indices, + std::vector& indices_range, + std::vector& old_vids, + std::vector& old_data) { + CHECK(old_vids.size() == indices_range.back()); + std::vector res_vids; + std::vector res_datas; + std::vector res_indices_range; + size_t res_ind_left = 0; + // TODO: how can we know the size exactly. + // res_vids.reserve(old_vids.size()); + // old_data.reserve() + size_t removed_ind = 0; + for (auto ind = 0; ind < indices_range.size() - 1; ++ind) { + if (removed_ind >= removed_indices.size() || + ind < removed_indices[removed_ind]) { + res_indices_range.emplace_back(res_ind_left); + int left = indices_range[ind]; + int right = indices_range[ind + 1]; + res_ind_left += (right - left); + for (auto j = left; j < right; ++j) { + res_vids.emplace_back(old_vids[j]); + res_datas.emplace_back(old_data[j]); + } + } else if (ind == removed_indices[removed_ind]) { + removed_ind += 1; + } else { + LOG(FATAL) << "not possible" << ind << ", " << removed_ind + << ", :" << gs::to_string(removed_indices) << ", " + << gs::to_string(indices_range); + } + } + res_indices_range.emplace_back(res_ind_left); + return std::make_tuple(std::move(res_vids), std::move(res_datas), + std::move(res_indices_range)); +} + +template +class RowVertexSetImpl { + public: + using lid_t = VID_T; + using data_tuple_t = std::tuple; + using index_ele_tuple_t = std::tuple; + // from this tuple, we can reconstruct the partial set. + using flat_ele_tuple_t = std::tuple>; + using flat_t = RowVertexSetImpl; + using iterator = RowVertexSetIter; + using self_type_t = RowVertexSetImpl; + using EntityValueType = VID_T; + using builder_t = RowVertexSetImplBuilder; + + template + using with_data_t = RowVertexSetImpl; + static constexpr VID_T NULL_VID = std::numeric_limits::max(); + + static constexpr bool is_keyed = false; + static constexpr bool is_vertex_set = true; + static constexpr bool is_row_vertex_set = true; + static constexpr bool is_two_label_set = false; + static constexpr bool is_edge_set = false; + static constexpr bool is_multi_label = false; + static constexpr bool is_general_set = false; + static constexpr bool is_collection = false; + + static constexpr size_t num_props = sizeof...(T); + + explicit RowVertexSetImpl(std::vector&& vids, LabelT v_label, + std::vector&& data_tuples, + std::array&& prop_names) + : vids_(std::move(vids)), + v_label_(v_label), + data_tuples_(std::move(data_tuples)), + prop_names_(std::move(prop_names)) { + CHECK(vids.size() == data_tuples.size()); + } + + explicit RowVertexSetImpl( + std::vector&& vids, LabelT v_label, + std::vector&& data_tuples, + const std::array& prop_names) + : vids_(std::move(vids)), + v_label_(v_label), + data_tuples_(std::move(data_tuples)), + prop_names_(prop_names) { + CHECK(vids.size() == data_tuples.size()); + } + + RowVertexSetImpl(self_type_t&& other) noexcept + : vids_(std::move(other.vids_)), + v_label_(other.v_label_), + data_tuples_(std::move(other.data_tuples_)), + prop_names_(std::move(other.prop_names_)) {} + + RowVertexSetImpl(const self_type_t& other) noexcept + : vids_(other.vids_), + v_label_(other.v_label_), + data_tuples_(other.data_tuples_), + prop_names_(other.prop_names_) {} + + iterator begin() const { return iterator(vids_, data_tuples_, 0); } + + iterator end() const { return iterator(vids_, data_tuples_, vids_.size()); } + + size_t Size() const { return vids_.size(); } + + builder_t CreateBuilder() const { return builder_t(v_label_, prop_names_); } + + void Repeat(std::vector& cur_offset, + std::vector& repeat_vec) { + CHECK(cur_offset.size() == repeat_vec.size()); + std::vector res_vec; + std::vector res_datas; + res_vec.reserve(repeat_vec.back()); + for (auto i = 0; i + 1 < cur_offset.size(); ++i) { + auto times_to_repeat = repeat_vec[i + 1] - repeat_vec[i]; + for (auto j = 0; j < times_to_repeat; ++j) { + for (auto k = cur_offset[i]; k < cur_offset[i + 1]; ++k) { + res_vec.emplace_back(vids_[k]); + res_datas.emplace_back(data_tuples_[k]); + } + } + } + vids_.swap(res_vec); + data_tuples_.swap(res_datas); + } + + // create a copy + self_type_t CreateCopy() const { + std::vector copied_lids = vids_; + std::vector copied_data = data_tuples_; + return self_type_t(std::move(copied_lids), v_label_, + std::move(copied_data)); + } + + const LabelT& GetLabel() const { return v_label_; } + + const std::vector& GetVertices() const { return vids_; } + + std::vector& GetMutableVertices() { return vids_; } + + const auto& GetDataVec() const { return data_tuples_; } + + const std::array& GetPropNames() const { + return prop_names_; + } + std::vector&& MoveVertices() { return std::move(vids_); } + + std::vector GenerateKeys() const { + std::vector res; + res.reserve(vids_.size()); + for (auto i = 0; i < vids_.size(); ++i) { + res.emplace_back(i); + } + return res; + } + + // According to the given indices, filter in place. + void SubSetWithIndices(std::vector& indices) { + // VLOG(10) << "subset with " << gs::to_string(indices); + auto vids_and_tuples = RowSetSubSetImpl(vids_, data_tuples_, indices); + vids_.swap(vids_and_tuples.first); + data_tuples_.swap(vids_and_tuples.second); + // VLOG(10) << "after subset: " << vids_.size(); + } + + auto WithIndices(std::vector& indices) { + auto vids_and_tuples = RowSetSubSetImpl(vids_, data_tuples_, indices); + return self_type_t(std::move(vids_and_tuples.first), v_label_, + std::move(vids_and_tuples.second), prop_names_); + } + + // all dedup are done inplace + std::vector Dedup() { + std::vector vids; + std::vector data_tuples; + auto offset = RowSetDedupImpl(vids_, data_tuples_, vids, data_tuples); + vids_.swap(vids); + data_tuples_.swap(data_tuples); + return offset; + } + + // Filter current vertices with expression. + template + std::pair> Filter(EXPR&& expr) { + // Expression contains the property name, we extract vertex store here. + static constexpr size_t num_args = EXPR::num_args; + static_assert(num_args == sizeof...(T)); + + size_t cur = 0; + std::vector offset; + std::vector res_lids; + std::vector res_data_tuples; + offset.reserve(Size() + 1); + for (auto iter : *this) { + offset.emplace_back(cur); + if (expr(iter.GetVertex())) { + res_lids.emplace_back(iter.GetVertex()); + res_data_tuples.emplace_back(iter.GetData()); + cur += 1; + } + } + offset.emplace_back(cur); + auto new_set = self_type_t(std::move(res_lids), v_label_, + std::move(res_data_tuples), prop_names_); + return std::make_pair(std::move(new_set), std::move(offset)); + } + + // Usually after + template + flat_t Flat(std::vector>& index_ele_tuple) { + static_assert(col_ind < + std::tuple_size_v>); + auto res_vids_and_data_tuples = + rowSetFlatImpl(index_ele_tuple, vids_, data_tuples_); + return self_type_t(std::move(res_vids_and_data_tuples.first), v_label_, + std::move(res_vids_and_data_tuples.second), prop_names_); + } + + // size_t... Is denotes the ind of data array need to project. + //-1 denote it self. + template < + int tag_id, int Fs, int... Is, + typename std::enable_if<(sizeof...(Is) > 0)>::type* = nullptr, + typename res_t = Collection>::type, + typename gs::tuple_element>::type...>>> + res_t ProjectWithRepeatArray(const std::vector& repeat_array, + KeyAlias& key_alias) const { + auto res_vec = row_project_with_repeat_array_impl(key_alias, repeat_array, + vids_, data_tuples_); + return res_t(std::move(res_vec)); + } + + template ::type* = nullptr, + typename res_t = Collection>::type>>> + res_t ProjectWithRepeatArray(const std::vector& repeat_array, + KeyAlias& key_alias) const { + auto res_vec = row_project_with_repeat_array_impl(key_alias, repeat_array, + vids_, data_tuples_); + return res_t(std::move(res_vec)); + } + + // project my self. + template ::type* = nullptr> + self_type_t ProjectWithRepeatArray(const std::vector& repeat_array, + KeyAlias& key_alias) const { + std::vector vids; + std::vector data_tuples; + for (auto i = 0; i < repeat_array.size(); ++i) { + for (auto j = 0; j < repeat_array[i]; ++j) { + // VLOG(10) << "Project: " << vids_[i]; + vids.push_back(vids_[i]); + data_tuples.push_back(data_tuples_[i]); + } + } + + return self_type_t(std::move(vids), v_label_, std::move(data_tuples), + std::move(prop_names_)); + } + + // project vertices when expression udf + template )>::type* = nullptr, + typename RES_SET_T = self_type_t, + typename RES_T = std::pair>> + RES_T project_vertices(std::array& labels, + EXPRESSION& expr, + std::array& prop_getters) const { + auto new_lids_datas_and_offset = row_project_vertices_impl( + vids_, data_tuples_, v_label_, labels, expr, prop_getters); + self_type_t res_set( + std::move(std::get<0>(new_lids_datas_and_offset)), v_label_, + std::move(std::get<1>(new_lids_datas_and_offset)), prop_names_); + + return std::make_pair(std::move(res_set), + std::move(std::get<2>(new_lids_datas_and_offset))); + } + + // only project certain labels, without any expression. + template >> + RES_T project_vertices(std::array& labels) const { + auto new_lids_datas_and_offset = + select_labels(vids_, data_tuples_, v_label_, labels); + self_type_t res_set( + std::move(std::get<0>(new_lids_datas_and_offset)), v_label_, + std::move(std::get<1>(new_lids_datas_and_offset)), prop_names_); + + return std::make_pair(std::move(res_set), + std::move(std::get<2>(new_lids_datas_and_offset))); + } + + std::vector FilterWithIndices(std::vector& offset, + JoinKind join_kind) { + auto tuple = + row_filter_with_indices_impl(offset, vids_, data_tuples_, join_kind); + vids_.swap(std::get<0>(tuple)); + data_tuples_.swap(std::get<1>(tuple)); + return std::get<2>(tuple); + } + + template + void fillBuiltinProps(std::vector>& tuples, + const PropNameArray& prop_names, + const std::vector& repeat_array) { + fillBuiltinPropsImpl(data_tuples_, prop_names_, tuples, prop_names, + repeat_array); + } + + // fill builtin props withour repeat array. + template + void fillBuiltinProps(std::vector>& tuples, + const PropNameArray& prop_names) { + std::vector repeat_array(vids_.size(), 1); + fillBuiltinPropsImpl(data_tuples_, prop_names_, tuples, prop_names, + repeat_array); + } + + // In places + template + RowVertexSetImpl WithNewData( + std::vector>&& new_datas) { + CHECK(new_datas.size() == new_datas.size()); + std::vector> new_data_tuples; + new_data_tuples.reserve(vids_.size()); + for (auto i = 0; i < vids_.size(); ++i) { + new_data_tuples.emplace_back( + std::tuple_cat(std::move(data_tuples_[i]), std::move(new_datas[i]))); + } + return RowVertexSetImpl( + std::move(vids_), v_label_, std::move(new_data_tuples), + std::move(prop_names_)); + } + + std::vector SubSetWithRemovedIndices( + std::vector& removed_indices, + std::vector& indices_range) { + auto vids_and_new_offset_range = subSetWithRemovedIndicesImpl( + removed_indices, indices_range, vids_, data_tuples_); + vids_.swap(std::get<0>(vids_and_new_offset_range)); + data_tuples_.swap(std::get<1>(vids_and_new_offset_range)); + return std::get<2>(vids_and_new_offset_range); + } + + private: + std::vector vids_; + std::vector data_tuples_; + LabelT v_label_; + std::array prop_names_; +}; + +template +class RowVertexSetImpl { + public: + using lid_t = VID_T; + using data_tuple_t = std::tuple; + // from this tuple, we can reconstruct the partial set. + using flat_ele_tuple_t = std::tuple; + + using index_ele_tuple_t = std::tuple; + using flat_t = RowVertexSetImpl; + using iterator = RowVertexSetIter; + using self_type_t = RowVertexSetImpl; + using EntityValueType = VID_T; + using builder_t = RowVertexSetImplBuilder; + + template + using with_data_t = RowVertexSetImpl; + + static constexpr VID_T NULL_VID = std::numeric_limits::max(); + + static constexpr bool is_keyed = false; + static constexpr bool is_vertex_set = true; + static constexpr bool is_row_vertex_set = true; + static constexpr bool is_two_label_set = false; + static constexpr bool is_edge_set = false; + static constexpr bool is_multi_label = false; + static constexpr bool is_general_set = false; + static constexpr bool is_collection = false; + + static constexpr size_t num_props = 0; + + explicit RowVertexSetImpl(std::vector&& vids, LabelT v_label) + : vids_(std::move(vids)), v_label_(v_label) {} + + RowVertexSetImpl(self_type_t&& other) noexcept + : vids_(std::move(other.vids_)), v_label_(other.v_label_) {} + + RowVertexSetImpl(const self_type_t& other) noexcept + : vids_(other.vids_), v_label_(other.v_label_) {} + + iterator begin() const { return iterator(vids_, 0); } + + iterator end() const { return iterator(vids_, vids_.size()); } + + size_t Size() const { return vids_.size(); } + + void Repeat(std::vector& cur_offset, + std::vector& repeat_vec) { + CHECK(cur_offset.size() == repeat_vec.size()); + std::vector res_vec; + res_vec.reserve(repeat_vec.back()); + for (auto i = 0; i + 1 < cur_offset.size(); ++i) { + auto times_to_repeat = repeat_vec[i + 1] - repeat_vec[i]; + for (auto j = 0; j < times_to_repeat; ++j) { + for (auto k = cur_offset[i]; k < cur_offset[i + 1]; ++k) { + res_vec.emplace_back(vids_[k]); + // VLOG(10) << "j: "< copied_lids = vids_; + return self_type_t(std::move(copied_lids), v_label_); + } + + const LabelT& GetLabel() const { return v_label_; } + + const std::vector& GetVertices() const { return vids_; } + std::vector& GetMutableVertices() { return vids_; } + std::vector&& MoveVertices() { return std::move(vids_); } + + std::vector GenerateKeys() const { + std::vector res; + res.reserve(vids_.size()); + for (auto i = 0; i < vids_.size(); ++i) { + res.emplace_back(i); + } + return res; + } + + // According to the given indices, filter in place. + void SubSetWithIndices(std::vector& indices) { + auto vids = RowSetSubSetImpl(vids_, indices); + vids_.swap(vids); + VLOG(10) << "after subset: " << vids_.size(); + } + + auto WithIndices(std::vector& indices) const { + auto vids = RowSetSubSetImpl(vids_, indices); + return self_type_t(std::move(vids), v_label_); + } + + // all dedup are done inplace + std::vector Dedup() { + std::vector vids; + auto offset = RowSetDedupImpl(vids_, vids); + vids_.swap(vids); + return offset; + } + + // Filter current vertices with expression. + template + std::pair> Filter(EXPR&& expr) { + // Expression contains the property name, we extract vertex store here. + static constexpr size_t num_args = EXPR::num_args; + + size_t cur = 0; + std::vector offset; + std::vector res_lids; + offset.reserve(Size() + 1); + for (auto iter : *this) { + offset.emplace_back(cur); + if (expr(iter.GetVertex())) { + res_lids.emplace_back(iter.GetVertex()); + cur += 1; + } + } + offset.emplace_back(cur); + auto new_set = self_type_t(std::move(res_lids), v_label_); + return std::make_pair(std::move(new_set), std::move(offset)); + } + + // Usually after + template + flat_t Flat(std::vector>& index_ele_tuple) { + static_assert(col_ind < + std::tuple_size_v>); + auto res_vids = rowSetFlatImpl(index_ele_tuple, vids_); + return self_type_t(std::move(res_vids), v_label_); + } + + // size_t... Is denotes the ind of data array need to project. + //-1 denote it self. + template 0)>::type* = nullptr, + typename res_t = Collection>::type, + typename gs::tuple_element>::type...>>> + res_t ProjectWithRepeatArray(const std::vector& repeat_array, + KeyAlias& key_alias) const { + auto res_vec = + row_project_with_repeat_array_impl(key_alias, repeat_array, vids_); + return res_t(std::move(res_vec)); + } + + template < + int tag_id, int Fs, typename std::enable_if::type* = nullptr, + typename res_t = Collection< + std::tuple>::type>>> + res_t ProjectWithRepeatArray(const std::vector& repeat_array, + KeyAlias& key_alias) const { + auto res_vec = + row_project_with_repeat_array_impl(key_alias, repeat_array, vids_); + return res_t(std::move(res_vec)); + } + + // project my self. + template ::type* = nullptr> + self_type_t ProjectWithRepeatArray(const std::vector& repeat_array, + KeyAlias& key_alias) const { + std::vector vids; + for (auto i = 0; i < repeat_array.size(); ++i) { + for (auto j = 0; j < repeat_array[i]; ++j) { + // VLOG(10) << "Project: " << vids_[i]; + vids.push_back(vids_[i]); + } + } + return self_type_t(std::move(vids), v_label_); + } + + template )>::type* = nullptr, + typename RES_SET_T = self_type_t, + typename RES_T = std::pair>> + RES_T project_vertices(std::array& labels, + EXPRESSION& exprs, + std::array& prop_getter) const { + // TODO: vector-based cols should be able to be selected with certain rows. + + auto new_lids_and_offsets = + row_project_vertices_impl(vids_, v_label_, labels, exprs, prop_getter); + self_type_t res_set(std::move(new_lids_and_offsets.first), v_label_); + + return std::make_pair(std::move(res_set), + std::move(new_lids_and_offsets.second)); + } + + // only project certain labels, without any expression. + template >> + RES_T project_vertices(std::array& labels) const { + // TODO: vector-based cols should be able to be selected with certain rows. + + auto new_lids_datas_and_offset = select_labels(vids_, v_label_, labels); + self_type_t res_set(std::move(new_lids_datas_and_offset.first), v_label_); + + return std::make_pair(std::move(res_set), + std::move(new_lids_datas_and_offset.second)); + } + + std::vector FilterWithIndices(std::vector& offset, + JoinKind join_kind) { + auto pair = row_filter_with_indices_impl(offset, vids_, join_kind); + vids_.swap(pair.first); + return pair.second; + } + + template + void fillBuiltinProps(std::vector>& tuples, + const PropNameArray& prop_names, + const std::vector& repeat_array) const { + VLOG(10) << "Skip filling bulitin props for empty prop row vertex set"; + } + + template + void fillBuiltinProps(std::vector>& tuples, + const PropNameArray& prop_names) const { + VLOG(10) << "Skip filling bulitin props for empty prop row vertex set"; + } + + // In places + template + RowVertexSetImpl WithNewData( + std::vector>&& new_datas) { + CHECK(vids_.size() == new_datas.size()); + return RowVertexSetImpl(std::move(vids_), v_label_, + std::move(new_datas)); + } + + // Removed_indices is not repest to current set's indices. + // It refer to the indices_range's index. + // removed = [1] + // indices_range = [0, 3, 5, 8] + // Then we should remove eles in [3,5) + // indices became + // [0, 3, 6], + // num _elemenst 8 -> 6 + // return the new offset range + std::vector SubSetWithRemovedIndices( + std::vector& removed_indices, + std::vector& indices_range) { + auto vids_and_new_offset_range = + subSetWithRemovedIndicesImpl(removed_indices, indices_range, vids_); + vids_.swap(vids_and_new_offset_range.first); + return vids_and_new_offset_range.second; + } + + private: + std::vector vids_; + LabelT v_label_; +}; + +template +using RowVertexSet = RowVertexSetImpl; + +template +using DefaultRowVertexSet = RowVertexSet; + +template +auto make_row_vertex_set(std::vector&& lids, LabelT label, + std::vector>&& data_tuples, + std::array&& prop_strs) { + return RowVertexSet( + std::move(lids), label, std::move(data_tuples), std::move(prop_strs)); +} + +template +auto MakeDefaultRowVertexSet(std::vector&& lids, LabelT label) { + return DefaultRowVertexSet(std::move(lids), label); +} + +} // namespace gs + +#endif // ENGINES_HQPS_DS_MULTI_VERTEX_SET_ROW_VERTEX_SET_H_ diff --git a/flex/engines/hqps_db/structures/multi_vertex_set/two_label_vertex_set.h b/flex/engines/hqps_db/structures/multi_vertex_set/two_label_vertex_set.h new file mode 100644 index 000000000000..d5c008f3cac2 --- /dev/null +++ b/flex/engines/hqps_db/structures/multi_vertex_set/two_label_vertex_set.h @@ -0,0 +1,1315 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_DS_MULTI_VERTEX_SET_TWO_LABEL_VERTEX_SET_H_ +#define ENGINES_HQPS_DS_MULTI_VERTEX_SET_TWO_LABEL_VERTEX_SET_H_ + +#include +#include +#include +#include + +#include "grape/util.h" +#include "grape/utils/bitset.h" + +namespace gs { + +template +class TwoLabelVertexSetImpl; + +template +class TwoLabelVertexSetImplBuilder { + public: + using res_t = TwoLabelVertexSetImpl; + using ele_tuple_t = std::tuple; + using data_tuple_t = std::tuple; + using index_ele_tuple_t = std::tuple; + + static constexpr bool is_row_vertex_set_builder = false; + static constexpr bool is_flat_edge_set_builder = false; + static constexpr bool is_general_edge_set_builder = false; + static constexpr bool is_two_label_set_builder = true; + static constexpr bool is_collection_builder = false; + + TwoLabelVertexSetImplBuilder( + size_t size, const std::array& labels, + const std::array& props) + : labels_(labels), props_(props) { + vec_.reserve(size); + data_.reserve(size); + bitset_.init(size); + } + + TwoLabelVertexSetImplBuilder( + const TwoLabelVertexSetImplBuilder& other) { + vec_.reserve(other.vec_.capacity()); + data_.reserve(other.data_.capacity()); + bitset_.copy(other.bitset_); + labels_ = other.labels_; + } + + void Insert(const index_ele_tuple_t& tuple, const data_tuple_t& data) { + vec_.emplace_back(std::get<2>(tuple)); + data_.emplace_back(data); + if (std::get<1>(tuple) == 0) { + auto new_size = vec_.size(); + while (new_size > bitset_.cardinality()) { + bitset_.resize(bitset_.cardinality() * 2); + } + bitset_.set_bit(vec_.size() - 1); + } + } + + res_t Build() { + VLOG(10) << "Try to resize from " << bitset_.size_ << ", to " + << vec_.size(); + bitset_.resize(vec_.size()); + return res_t(std::move(vec_), std::move(data_), std::move(labels_), + std::move(props_), std::move(bitset_)); + } + + size_t Size() const { return vec_.size(); } + + private: + std::vector vec_; + std::vector data_; + std::array labels_; + std::array props_; + grape::Bitset bitset_; +}; + +template +class TwoLabelVertexSetImplBuilder { + public: + using res_t = TwoLabelVertexSetImpl; + using ele_tuple_t = std::tuple; + using index_ele_tuple_t = std::tuple; + + static constexpr bool is_row_vertex_set_builder = false; + static constexpr bool is_flat_edge_set_builder = false; + static constexpr bool is_general_edge_set_builder = false; + static constexpr bool is_two_label_set_builder = true; + static constexpr bool is_collection_builder = false; + + TwoLabelVertexSetImplBuilder(size_t size, const std::array& labels) + : labels_(labels) { + VLOG(10) << "two label set:" << std::to_string(labels[0]) << " " + << std::to_string(labels[1]); + vec_.reserve(size); + bitset_.init(size); + } + + TwoLabelVertexSetImplBuilder( + const TwoLabelVertexSetImplBuilder& + other) { + vec_ = other.vec_; + bitset_.copy(other.bitset_); + labels_ = other.labels_; + } + + void Insert(const index_ele_tuple_t& tuple) { + //(ind, label_ind, vid) + vec_.emplace_back(std::get<2>(tuple)); + if (std::get<1>(tuple) == 0) { + auto new_size = vec_.size(); + while (new_size > bitset_.cardinality()) { + bitset_.resize(bitset_.cardinality() * 2); + } + bitset_.set_bit(vec_.size() - 1); + } + } + + res_t Build() { + VLOG(10) << "Try to resize from " << bitset_.cardinality() << ", to " + << vec_.size(); + bitset_.resize(vec_.size()); + return res_t(std::move(vec_), std::move(labels_), std::move(bitset_)); + } + + size_t Size() const { return vec_.size(); } + + private: + std::vector vec_; + std::array labels_; + grape::Bitset bitset_; +}; +template +std::vector filter_labels( + const std::array& filter_labels, + const std::array& old_labels) { + std::vector label_flag(2, false); + if constexpr (filter_label_num == 0) { + // set label_flat to all true + label_flag[0] = true; + label_flag[1] = true; + } else { + std::unordered_set set; + for (auto l : filter_labels) { + set.insert(l); + } + for (auto i = 0; i < old_labels.size(); ++i) { + if (set.find(old_labels[i]) != set.end()) { + label_flag[i] = true; + } + } + } + return label_flag; +} + +// return the old labels, that are active in filter. +template +auto two_label_project_vertices_impl( + const std::vector& old_vec, const grape::Bitset& old_bit_set, + const std::array& old_labels, + const std::array& filtering_labels, + const EXPR& expr, const std::array& prop_getters) { + std::vector res_vec; + grape::Bitset res_bitset; + // reserve enough size for bitset. + res_bitset.init(old_vec.size()); + + std::vector label_flag = filter_labels(filtering_labels, old_labels); + std::vector offset; + res_vec.reserve(old_vec.size()); + offset.reserve(old_vec.size() + 1); + + offset.emplace_back(0); + double t0 = -grape::GetCurrentTime(); + for (auto i = 0; i < old_vec.size(); ++i) { + if (old_bit_set.get_bit(i) && label_flag[0]) { + auto vid = old_vec[i]; + if (std::apply(expr, prop_getters[0].get_view(vid))) { + res_bitset.set_bit(res_vec.size()); + res_vec.emplace_back(old_vec[i]); + } + } else if (label_flag[1] && + std::apply(expr, prop_getters[1].get_view(old_vec[i]))) { + res_vec.emplace_back(old_vec[i]); + } + + offset.emplace_back(res_vec.size()); + } + t0 += grape::GetCurrentTime(); + VLOG(10) << "expr + copy cost: " << t0; + + res_bitset.resize(res_vec.size()); + + return std::make_tuple(std::move(res_vec), std::move(res_bitset), + std::move(offset)); +} + +// filter with labels. +template +auto two_label_project_vertices_impl( + const std::vector& old_vec, const grape::Bitset& old_bit_set, + const std::array& old_labels, + const std::array& filtering_labels) { + std::vector res_vec; + grape::Bitset res_bitset; + // reserve enough size for bitset. + res_bitset.init(old_vec.size()); + + auto label_flag = filter_labels(filtering_labels, old_labels); + + std::vector offset; + res_vec.reserve(old_vec.size()); + offset.reserve(old_vec.size() + 1); + + offset.emplace_back(0); + double t0 = -grape::GetCurrentTime(); + for (auto i = 0; i < old_vec.size(); ++i) { + if (old_bit_set.get_bit(i) && label_flag[0]) { + res_bitset.set_bit(res_vec.size()); + res_vec.emplace_back(old_vec[i]); + } else if (label_flag[1]) { + res_vec.emplace_back(old_vec[i]); + } + offset.emplace_back(res_vec.size()); + } + t0 += grape::GetCurrentTime(); + VLOG(10) << "expr + copy cost: " << t0; + + res_bitset.resize(res_vec.size()); + + return std::make_tuple(std::move(res_vec), std::move(res_bitset), + std::move(offset)); +} + +template +auto two_label_project_vertices_impl( + const std::vector& old_vec, + const std::vector& old_data, const grape::Bitset& old_bit_set, + const std::array& old_labels, + const std::array& filtering_labels, + const EXPR& expr, const std::array& prop_getters) { + std::vector res_vec; + std::vector res_data; + grape::Bitset res_bitset; + // reserve enough size for bitset. + res_bitset.init(old_vec.size()); + + std::vector label_flag = filter_labels(filtering_labels, old_labels); + std::vector offset; + + offset.emplace_back(0); + for (auto i = 0; i < old_vec.size(); ++i) { + if (old_bit_set.get_bit(i) && label_flag[0]) { + auto vid = old_vec[i]; + if (std::apply(expr, prop_getters[0](vid))) { + res_bitset.set_bit(res_vec.size()); + res_vec.emplace_back(old_vec[i]); + res_data.emplace_back(old_data[i]); + } + } else if (label_flag[1] && std::apply(expr, prop_getters[1](old_vec[i]))) { + res_vec.emplace_back(old_vec[i]); + res_data.emplace_back(old_data[i]); + } + + offset.emplace_back(res_vec.size()); + } + + res_bitset.resize(res_vec.size()); + + return std::make_tuple(std::move(res_vec), std::move(res_data), + std::move(res_bitset), std::move(offset)); +} + +// filter with labels. +template +auto two_label_project_vertices_impl( + const std::vector& old_vec, + const std::vector& old_data, const grape::Bitset& old_bit_set, + const std::array& old_labels, + const std::array& filtering_labels) { + std::vector res_vec; + std::vector res_data; + grape::Bitset res_bitset; + // reserve enough size for bitset. + res_bitset.init(old_vec.size()); + + std::vector label_flag = filter_labels(filtering_labels, old_labels); + std::vector offset; + + offset.emplace_back(0); + for (auto i = 0; i < old_vec.size(); ++i) { + if (old_bit_set.get_bit(i) && label_flag[0]) { + auto vid = old_vec[i]; + res_bitset.set_bit(res_vec.size()); + res_vec.emplace_back(old_vec[i]); + res_data.emplace_back(old_data[i]); + } else if (label_flag[1]) { + res_vec.emplace_back(old_vec[i]); + res_data.emplace_back(old_data[i]); + } + + offset.emplace_back(res_vec.size()); + } + + res_bitset.resize(res_vec.size()); + + return std::make_tuple(std::move(res_vec), std::move(res_data), + std::move(res_bitset), std::move(offset)); +} + +template ::type* = nullptr> +void filter_with_select_prop( + const std::vector& label_flag, const std::vector& old_vec, + const std::vector>& old_data, + const grape::Bitset& old_bitset, std::vector& res_vec, + std::vector>& res_data, grape::Bitset& res_bitset, + std::vector& offset, const NamedProperty& query_prop, + const std::array& my_prop, EXPR& expr) { + using indexed_prop_t = std::tuple_element_t>; + if constexpr (std::is_same_v) { + if (query_prop.names[0] == my_prop.names[Is]) { + VLOG(10) << "Found satisfied prop: " << query_prop.names[0] + << " at index: " << Is; + offset.emplace_back(0); + res_vec.reserve(old_vec.size() / 2); + res_data.reserve(old_vec.size() / 2); + + for (auto i = 0; i < old_vec.size(); ++i) { + auto& data = old_data[i]; + if (expr(std::get(data))) { + if (old_bitset.get_bit(i) && label_flag[0]) { + res_bitset.set_bit(res_vec.size()); + res_vec.emplace_back(old_vec[i]); + res_data.emplace_back(old_data[i]); + } else if (label_flag[1]) { + res_vec.emplace_back(old_vec[i]); + res_data.emplace_back(old_data[i]); + } + } + offset.emplace_back(res_vec.size()); + } + return; + } + } else { + filter_with_select_prop(label_flag, old_vec, old_data, old_bitset, + res_vec, res_data, res_bitset, offset, + query_prop, my_prop, expr); + } +} + +template = sizeof...(T))>::type* = nullptr> +void filter_with_select_prop( + const std::vector& label_flag, const std::vector& old_vec, + const std::vector>& old_data, + const grape::Bitset& old_bitset, std::vector& res_vec, + std::vector>& res_data, grape::Bitset& res_bitset, + std::vector& offset, const NamedProperty& query_prop, + const std::array& my_prop, EXPR& expr) { + LOG(FATAL) << "Query property: " << gs::to_string(query_prop.names) + << "not found in :" << gs::to_string(my_prop.names); +} + +// Implementation for the required data is already in data vector. +template +auto two_label_project_vertices_internal_impl( + const std::vector& old_vec, + const std::vector>& old_data, + const grape::Bitset& old_bitset, const std::array& old_labels, + const std::array& filter_labels, + const std::array& old_prop_names, + const EXPR& expr) { + std::vector res_vec; + std::vector> res_data; + grape::Bitset res_bitset; + // reserve enough size for bitset. + res_bitset.init(old_vec.size()); + + std::vector label_flag(2, false); + if constexpr (filter_num_labels == 0) { + // set label_flat to all true + label_flag[0] = true; + label_flag[1] = true; + } else { + std::unordered_set set; + for (auto l : filter_labels) { + set.insert(l); + } + for (auto i = 0; i < old_labels.size(); ++i) { + if (set.find(old_labels[i]) != set.end()) { + label_flag[i] = true; + } + } + } + VLOG(10) << "selected label ids: " + << ", out of size: " << old_labels.size(); + + // check the required property. + std::vector offset; + auto expr_prop = expr.Properties(); + filter_with_select_prop<0>(label_flag, old_vec, old_data, old_bitset, res_vec, + res_data, res_bitset, offset, expr_prop, + old_prop_names, expr); + + res_bitset.resize(res_vec.size()); + VLOG(10) << "filter " << res_vec.size() + << " from old set of size: " << old_vec.size() + << ", label0 cnt: " << res_bitset.count() << "/" + << res_bitset.cardinality(); + + return std::make_tuple(std::move(res_vec), std::move(res_data), + std::move(res_bitset), std::move(offset)); +} + +template +auto twoLabelSetFlatImpl( + std::vector>& index_ele_tuples, + const std::vector& origin_vids, const grape::Bitset& origin_bitset) { + size_t dst_size = index_ele_tuples.size(); + std::vector res_vids; + grape::Bitset res_bitset; + res_vids.reserve(dst_size); + res_bitset.init(dst_size); + + for (auto ele : index_ele_tuples) { + auto& cur = std::get(ele); + //(ind, vid) + auto ind = std::get<0>(cur); + CHECK(ind < origin_vids.size()); + + if (origin_bitset.get_bit(ind)) { + res_bitset.set_bit(res_vids.size()); + } + + res_vids.emplace_back(origin_vids[ind]); + } + return std::make_pair(std::move(res_vids), std::move(res_bitset)); +} + +template +auto twoLabelSetFlatImpl( + std::vector>& index_ele_tuples, + const std::vector& origin_vids, + const std::vector& origin_data, + const grape::Bitset& origin_bitset) { + size_t dst_size = index_ele_tuples.size(); + std::vector res_vids; + std::vector res_data; + grape::Bitset res_bitset; + res_vids.reserve(dst_size); + res_data.reserve(dst_size); + res_bitset.init(dst_size); + + for (auto ele : index_ele_tuples) { + auto& cur = std::get(ele); + //(ind, vid) + auto ind = std::get<0>(cur); + CHECK(ind < origin_vids.size()); + + if (origin_bitset.get_bit(ind)) { + res_bitset.set_bit(res_vids.size()); + } + + res_vids.emplace_back(origin_vids[ind]); + res_data.emplace_back(origin_data[ind]); + } + return std::make_tuple(std::move(res_vids), std::move(res_data), + std::move(res_bitset)); +} + +template +class TwoLabelVertexSetIter { + public: + using lid_t = VID_T; + using self_type_t = TwoLabelVertexSetIter; + using ele_tuple_t = std::pair; + using index_ele_tuple_t = std::tuple; + + using data_tuple_t = std::tuple; + + TwoLabelVertexSetIter(const std::vector& vec, + const std::vector& data, + const grape::Bitset& bitset, size_t ind) + : vec_(vec), data_(data), bitset_(bitset), ind_(ind) {} + + ele_tuple_t GetElement() const { + if (bitset_.get_bit(ind_)) { + return std::make_pair(0, vec_[ind_]); + } else { + return std::make_pair(1, vec_[ind_]); + } + } + + data_tuple_t GetData() const { return data_[ind_]; } + + index_ele_tuple_t GetIndexElement() const { + if (bitset_.get_bit(ind_)) { + return std::make_tuple(ind_, 0, vec_[ind_]); + } else { + return std::make_tuple(ind_, 1, vec_[ind_]); + } + } + + lid_t GetVertex() const { return vec_[ind_]; } + + inline const self_type_t& operator++() { + ++ind_; + return *this; + } + + inline self_type_t operator++(int) { + self_type_t ret(*this); + ++ind_; + return ret; + } + + // We may never compare to other kind of iterators + inline bool operator==(const self_type_t& rhs) const { + return ind_ == rhs.ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + return ind_ != rhs.ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return ind_ < rhs.ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + private: + const std::vector& vec_; + const std::vector& data_; + const grape::Bitset& bitset_; + size_t ind_; +}; + +template +class TwoLabelVertexSetIter { + public: + using lid_t = VID_T; + using self_type_t = TwoLabelVertexSetIter; + using ele_tuple_t = std::pair; + using index_ele_tuple_t = std::tuple; + + using data_tuple_t = std::tuple; + + TwoLabelVertexSetIter(const std::vector& vec, + const grape::Bitset& bitset, size_t ind) + : vec_(vec), bitset_(bitset), ind_(ind) {} + + ele_tuple_t GetElement() const { + if (bitset_.get_bit(ind_)) { + return std::make_pair(0, vec_[ind_]); + } else { + return std::make_pair(1, vec_[ind_]); + } + } + + data_tuple_t GetData() const { return std::make_tuple(grape::EmptyType()); } + + index_ele_tuple_t GetIndexElement() const { + if (bitset_.get_bit(ind_)) { + return std::make_tuple(ind_, 0, vec_[ind_]); + } else { + return std::make_tuple(ind_, 1, vec_[ind_]); + } + } + + lid_t GetVertex() const { return vec_[ind_]; } + + inline const self_type_t& operator++() { + ++ind_; + return *this; + } + + inline self_type_t operator++(int) { + self_type_t ret(*this); + ++ind_; + return ret; + } + + // We may never compare to other kind of iterators + inline bool operator==(const self_type_t& rhs) const { + return ind_ == rhs.ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + return ind_ != rhs.ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return ind_ < rhs.ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + private: + const std::vector& vec_; + const grape::Bitset& bitset_; + size_t ind_; +}; + +/* General vertex set, can contain multiple label vertices. + */ +template +class TwoLabelVertexSetImpl { + public: + using lid_t = VID_T; + using self_type_t = TwoLabelVertexSetImpl; + using iterator = TwoLabelVertexSetIter; + using index_ele_tuple_t = std::tuple; + using data_tuple_t = std::tuple; + using flat_t = self_type_t; + using EntityValueType = VID_T; + using builder_t = TwoLabelVertexSetImplBuilder; + + static constexpr bool is_vertex_set = true; + static constexpr bool is_general_set = false; + static constexpr bool is_two_label_set = true; + static constexpr size_t num_labels = 2; + static constexpr size_t num_props = sizeof...(T); + static constexpr bool is_collection = false; + static constexpr bool is_multi_label = false; + TwoLabelVertexSetImpl(std::vector&& vec, + std::vector>&& data_tuple, + std::array&& label_names, + std::array&& named_property, + grape::Bitset&& bitset) + : vec_(std::move(vec)), + data_tuple_(std::move(data_tuple)), + label_names_(std::move(label_names)), + named_property_(std::move(named_property)) { + CHECK(vec.size() == data_tuple.size()); + bitset_.swap(bitset); + } + + TwoLabelVertexSetImpl(TwoLabelVertexSetImpl&& other) + : vec_(std::move(other.vec_)), + data_tuple_(std::move(other.data_tuple_)), + label_names_(std::move(other.label_names_)), + named_property_(std::move(other.named_property_)) { + bitset_.swap(other.bitset_); + } + + TwoLabelVertexSetImpl(const TwoLabelVertexSetImpl& other) + : vec_(other.vec_), + data_tuple_(other.data_tuple_), + label_names_(other.label_names_), + named_property_(other.named_property_) { + bitset_.copy(other.bitset_); + } + + builder_t CreateBuilder() const { + return builder_t(vec_.size(), label_names_, named_property_); + } + + iterator begin() const { return iterator(vec_, data_tuple_, bitset_, 0); } + + iterator end() const { + return iterator(vec_, data_tuple_, bitset_, vec_.size()); + } + + template >> + RES_T project_vertices(std::array& filter_labels, + EXPRESSION& exprs, + std::array& prop_getters) const { + // TODO: vector-based cols should be able to be selected with + // certain rows. + + auto tuple = two_label_project_vertices_impl(vec_, data_tuple_, bitset_, + label_names_, filter_labels, + exprs, prop_getters); + auto copied_label_names(label_names_); + auto copied_prop_names(named_property_); + auto set = self_type_t( + std::move(std::get<0>(tuple)), std::move(std::get<1>(tuple)), + std::move(copied_label_names), std::move(copied_prop_names), + std::move(std::get<2>(tuple))); + return std::make_pair(std::move(set), std::move(std::get<3>(tuple))); + } + + // project vertices with only labels filtering + template >> + RES_T project_vertices(std::array& filter_labels) const { + auto tuple = two_label_project_vertices_impl(vec_, data_tuple_, bitset_, + label_names_, filter_labels); + auto copied_label_names(label_names_); + auto copied_prop_names(named_property_); + auto set = self_type_t( + std::move(std::get<0>(tuple)), std::move(std::get<1>(tuple)), + std::move(copied_label_names), std::move(copied_prop_names), + std::move(std::get<2>(tuple))); + return std::make_pair(std::move(set), std::move(std::get<3>(tuple))); + } + + template >> + RES_T project_vertices_internal(std::array& filter_labels, + EXPRESSION& exprs) const { + // We assume expr only applies on one column + auto tuple = two_label_project_vertices_internal_impl( + vec_, data_tuple_, bitset_, label_names_, filter_labels, + named_property_, exprs); + auto copied_label_names(label_names_); + auto copied_prop_names(named_property_); + auto set = self_type_t( + std::move(std::get<0>(tuple)), std::move(std::get<1>(tuple)), + std::move(copied_label_names), std::move(copied_prop_names), + std::move(std::get<2>(tuple))); + return std::make_pair(std::move(set), std::move(std::get<3>(tuple))); + } + + const std::array& GetLabels() const { return label_names_; } + + LabelT GetLabel(size_t i) const { return label_names_[i]; } + + const grape::Bitset& GetBitset() const { return bitset_; } + + grape::Bitset& GetMutableBitset() { return bitset_; } + + const std::vector& GetVertices() const { return vec_; } + + const std::vector& GetDataVec() const { return data_tuple_; } + + const std::array& GetPropNames() const { + return named_property_; + } + + std::pair, std::vector> GetVertices( + size_t ind) const { + CHECK(ind < 2); + CHECK(bitset_.cardinality() == vec_.size()); + std::vector res; + std::vector active_ind; + // 0 denotes label0, 1 denotes label1. + size_t cnt; + if (ind == 0) { + cnt = bitset_.count(); + } else { + cnt = bitset_.cardinality() - bitset_.count(); + } + res.reserve(cnt); + active_ind.reserve(cnt); + if (ind == 0) { + for (auto i = 0; i < bitset_.cardinality(); ++i) { + if (bitset_.get_bit(i)) { + res.emplace_back(vec_[i]); + active_ind.emplace_back(i); + } + } + } else { + for (auto i = 0; i < bitset_.cardinality(); ++i) { + if (!bitset_.get_bit(i)) { + res.emplace_back(vec_[i]); + active_ind.emplace_back(i); + } + } + } + + VLOG(10) << "Got vertices of tag: " << ind + << ", res vertices: " << res.size() + << ", active_ind size: " << active_ind.size(); + return std::make_pair(std::move(res), std::move(active_ind)); + } + + template ::type* = nullptr> + self_type_t ProjectWithRepeatArray(const std::vector& repeat_array, + KeyAlias& key_alias) const { + std::vector next_vids; + std::vector> next_datas; + size_t next_size = 0; + for (auto i = 0; i < repeat_array.size(); ++i) { + next_size += repeat_array[i]; + } + VLOG(10) << "[TwoLabelVertexSetImpl] size: " << Size() + << " Project self, next size: " << next_size; + + next_vids.reserve(next_size); + next_datas.reserve(next_size); + grape::Bitset next_set; + next_set.init(next_size); + for (auto i = 0; i < repeat_array.size(); ++i) { + if (bitset_.get_bit(i)) { + for (auto j = 0; j < repeat_array[i]; ++j) { + // VLOG(10) << "Project: " << vids_[i]; + next_set.set_bit(next_vids.size()); + next_vids.push_back(vec_[i]); + next_datas.push_back(data_tuple_[i]); + } + } else { + for (auto j = 0; j < repeat_array[i]; ++j) { + // VLOG(10) << "Project: " << vids_[i]; + // next_set.set_bit(next_vids.size()); + next_vids.push_back(vec_[i]); + next_datas.push_back(data_tuple_[i]); + } + } + } + + auto copied_label_names(label_names_); + auto copied_named_prop(named_property_); + return self_type_t(std::move(next_vids), std::move(next_datas), + std::move(copied_label_names), + std::move(copied_named_prop), std::move(next_set)); + } + + // Usually after sort. + template + flat_t Flat(std::vector>& index_ele_tuple) { + static_assert(col_ind < + std::tuple_size_v>); + auto res_vids_and_data_tuples = twoLabelSetFlatImpl( + index_ele_tuple, vec_, data_tuple_, bitset_); + auto labels_copied(label_names_); + auto copied_named_prop(named_property_); + return self_type_t(std::move(std::get<0>(res_vids_and_data_tuples)), + std::move(std::get<1>(res_vids_and_data_tuples)), + std::move(labels_copied), std::move(copied_named_prop), + std::move(std::get<2>(res_vids_and_data_tuples))); + } + + template + void fillBuiltinPropsImpl(std::vector>& tuples, + std::string& prop_name, + std::vector& repeat_array) const { + if constexpr (std::is_same_v>, + Dist>) { + if (prop_name == "dist") { + LOG(FATAL) << "Not supported"; + } + } + } + + template + void fillBuiltinPropsImpl(std::vector>& tuples, + PropNameArray& prop_names, + std::vector& repeat_array, + std::index_sequence) const { + (fillBuiltinPropsImpl(tuples, std::get(prop_names), + repeat_array), + ...); + } + + template + void fillBuiltinProps(std::vector>& tuples, + PropNameArray& prop_names, + std::vector& repeat_array) const { + fillBuiltinPropsImpl(tuples, prop_names, repeat_array, + std::make_index_sequence()); + } + + // No repeat array is not provided + template + void fillBuiltinProps(std::vector>& tuples, + PropNameArray& prop_names) const { + LOG(WARNING) << "not supported"; + } + + template + void fillBuiltinProps(std::vector& tuples, + PropNameArray& prop_names) const { + LOG(WARNING) << "not supported"; + } + + void Repeat(std::vector& cur_offset, + std::vector& repeat_vec) { + LOG(FATAL) << "Not implemented"; + } + + size_t Size() const { return vec_.size(); } + + private: + std::vector vec_; + std::vector> data_tuple_; + std::array label_names_; + std::array named_property_; + grape::Bitset bitset_; +}; + +/// @brief //////////////////////Specialization for empty data type. +/// @tparam VID_T +/// @tparam LabelT +/// @tparam ...T +template +class TwoLabelVertexSetImpl { + public: + using lid_t = VID_T; + using self_type_t = TwoLabelVertexSetImpl; + using iterator = TwoLabelVertexSetIter; + using index_ele_tuple_t = std::tuple; + using data_tuple_t = std::tuple; + using flat_t = self_type_t; + using EntityValueType = VID_T; + using builder_t = + TwoLabelVertexSetImplBuilder; + + static constexpr bool is_vertex_set = true; + static constexpr bool is_general_set = false; + static constexpr bool is_two_label_set = true; + static constexpr size_t num_labels = 2; + static constexpr size_t num_props = 0; + static constexpr bool is_collection = false; + static constexpr bool is_multi_label = false; + TwoLabelVertexSetImpl(std::vector&& vec, + std::array&& label_names, + grape::Bitset&& bitset) + : vec_(std::move(vec)), label_names_(std::move(label_names)) { + bitset_.swap(bitset); + } + + TwoLabelVertexSetImpl(TwoLabelVertexSetImpl&& other) + : vec_(std::move(other.vec_)), + label_names_(std::move(other.label_names_)) { + bitset_.swap(other.bitset_); + } + + TwoLabelVertexSetImpl(const TwoLabelVertexSetImpl& other) + : vec_(other.vec_), label_names_(other.label_names_) { + bitset_.copy(other.bitset_); + } + + builder_t CreateBuilder() const { + return builder_t(bitset_.cardinality(), label_names_); + } + + template + TwoLabelVertexSetImpl WithData( + std::vector>&& data, + std::array&& named_prop) const { + auto copied_vec(vec_); + grape::Bitset copied_bitset; + copied_bitset.copy(bitset_); + auto copied_label_names(label_names_); + return TwoLabelVertexSetImpl( + std::move(copied_vec), std::move(data), std::move(copied_label_names), + std::move(named_prop), std::move(copied_bitset)); + } + + iterator begin() const { return iterator(vec_, bitset_, 0); } + + iterator end() const { return iterator(vec_, bitset_, vec_.size()); } + + template >> + RES_T project_vertices(std::array& filter_labels, + EXPRESSION& exprs, + std::array& prop_getters) const { + // TODO: vector-based cols should be able to be selected with + // certain rows. + + auto tuple = two_label_project_vertices_impl( + vec_, bitset_, label_names_, filter_labels, exprs, prop_getters); + auto copied_label_names(label_names_); + auto set = self_type_t(std::move(std::get<0>(tuple)), + std::move(copied_label_names), + std::move(std::get<1>(tuple))); + return std::make_pair(std::move(set), std::move(std::get<2>(tuple))); + } + + // project vertices with only filter labels + template >> + RES_T project_vertices(std::array& filter_labels) const { + auto tuple = two_label_project_vertices_impl(vec_, bitset_, label_names_, + filter_labels); + auto copied_label_names(label_names_); + auto set = self_type_t(std::move(std::get<0>(tuple)), + std::move(copied_label_names), + std::move(std::get<1>(tuple))); + return std::make_pair(std::move(set), std::move(std::get<2>(tuple))); + } + + const std::array& GetLabels() const { return label_names_; } + + LabelT GetLabel(size_t i) const { return label_names_[i]; } + + const grape::Bitset& GetBitset() const { return bitset_; } + + grape::Bitset& GetMutableBitset() { return bitset_; } + + const std::vector& GetVertices() const { return vec_; } + + std::vector& GetMutableVertices() { return vec_; } + + std::pair, std::vector> GetVertices( + size_t ind) const { + CHECK(ind < 2); + CHECK(bitset_.cardinality() == vec_.size()); + std::vector res; + std::vector active_ind; + // 0 denotes label0, 1 denotes label1. + size_t cnt; + if (ind == 0) { + cnt = bitset_.count(); + } else { + cnt = bitset_.cardinality() - bitset_.count(); + } + res.reserve(cnt); + active_ind.reserve(cnt); + if (ind == 0) { + for (auto i = 0; i < bitset_.cardinality(); ++i) { + if (bitset_.get_bit(i)) { + res.push_back(vec_[i]); + active_ind.push_back(i); + } + } + } else { + for (auto i = 0; i < bitset_.cardinality(); ++i) { + if (!bitset_.get_bit(i)) { + res.push_back(vec_[i]); + active_ind.push_back(i); + } + } + } + + VLOG(10) << "Got vertices of tag: " << ind + << ", res vertices: " << res.size() + << ", active_ind size: " << active_ind.size(); + return std::make_pair(std::move(res), std::move(active_ind)); + } + + template ::type* = nullptr> + self_type_t ProjectWithRepeatArray(const std::vector& repeat_array, + KeyAlias& key_alias) const { + std::vector next_vids; + size_t next_size = 0; + for (auto i = 0; i < repeat_array.size(); ++i) { + next_size += repeat_array[i]; + } + VLOG(10) << "[TwoLabelVertexSetImpl] size: " << Size() + << " Project self, next size: " << next_size; + + next_vids.reserve(next_size); + grape::Bitset next_set; + next_set.init(next_size); + for (auto i = 0; i < repeat_array.size(); ++i) { + if (bitset_.get_bit(i)) { + for (auto j = 0; j < repeat_array[i]; ++j) { + next_set.set_bit(next_vids.size()); + next_vids.push_back(vec_[i]); + } + } else { + for (auto j = 0; j < repeat_array[i]; ++j) { + next_vids.push_back(vec_[i]); + } + } + } + + auto copied_label_names(label_names_); + return self_type_t(std::move(next_vids), std::move(copied_label_names), + std::move(next_set)); + } + + // Usually after sort. + template + flat_t Flat(std::vector>& index_ele_tuple) { + static_assert(col_ind < + std::tuple_size_v>); + auto res_vids_and_data_tuples = + twoLabelSetFlatImpl(index_ele_tuple, vec_, bitset_); + auto labels_copied(label_names_); + return self_type_t(std::move(res_vids_and_data_tuples.first), + std::move(labels_copied), + std::move(res_vids_and_data_tuples.second)); + } + + template + void fillBuiltinPropsImpl(std::vector>& tuples, + std::string& prop_name, + std::vector& repeat_array) { + if constexpr (std::is_same_v>, + Dist>) { + if (prop_name == "dist") { + LOG(FATAL) << "Not supported"; + } + } + } + + template + void fillBuiltinPropsImpl(std::vector>& tuples, + PropNameArray& prop_names, + std::vector& repeat_array, + std::index_sequence) { + (fillBuiltinPropsImpl(tuples, std::get(prop_names), + repeat_array), + ...); + } + + template + void fillBuiltinProps(std::vector>& tuples, + PropNameArray& prop_names, + std::vector& repeat_array) { + fillBuiltinPropsImpl(tuples, prop_names, repeat_array, + std::make_index_sequence()); + } + + // No repeat array is not provided + template + void fillBuiltinProps(std::vector>& tuples, + PropNameArray& prop_names) { + LOG(WARNING) << "not supported"; + } + + void SubSetWithIndices(std::vector& indices) { + std::vector new_vec; + grape::Bitset new_bitset; + new_vec.reserve(indices.size()); + new_bitset.init(indices.size()); + { + size_t i = 0; + for (auto& index : indices) { + new_vec.emplace_back(vec_[index]); + if (bitset_.get_bit(index)) { + new_bitset.set_bit(i); + } + ++i; + } + } + vec_.swap(new_vec); + // safe? + bitset_.swap(new_bitset); + VLOG(10) << "after subset: " << vec_.size() + << ",count: " << bitset_.count(); + ; + } + + void Repeat(std::vector& cur_offset, + std::vector& repeat_vec) { + std::vector new_vec; + grape::Bitset new_bitset; + new_vec.reserve(repeat_vec.back()); + // estimate size + { + size_t tmp_size = 0; + for (auto i = 0; i + 1 < cur_offset.size(); ++i) { + auto times_to_repeat = repeat_vec[i + 1] - repeat_vec[i]; + tmp_size += (cur_offset[i + 1] - cur_offset[i]) * times_to_repeat; + } + new_bitset.init(tmp_size); + } + + for (auto i = 0; i + 1 < cur_offset.size(); ++i) { + auto times_to_repeat = repeat_vec[i + 1] - repeat_vec[i]; + for (auto j = 0; j < times_to_repeat; ++j) { + for (auto k = cur_offset[i]; k < cur_offset[i + 1]; ++k) { + new_vec.emplace_back(vec_[k]); + if (bitset_.get_bit(k)) { + new_bitset.set_bit(new_vec.size() - 1); + } + } + } + } + vec_.swap(new_vec); + bitset_.swap(new_bitset); + VLOG(10) << "Finish repeat on two label set"; + } + + size_t Size() const { return vec_.size(); } + + private: + std::vector vec_; + std::array label_names_; + grape::Bitset bitset_; +}; + +template +using TwoLabelVertexSet = TwoLabelVertexSetImpl; + +template +auto make_two_label_set(std::vector&& vec, + std::array&& label_names, + grape::Bitset&& bitset) { + return TwoLabelVertexSet( + std::move(vec), std::move(label_names), std::move(bitset)); +} + +template +auto make_two_label_set(std::vector&& vec, + const std::array& label_names, + grape::Bitset&& bitset) { + auto copied(label_names); + return TwoLabelVertexSet( + std::move(vec), std::move(copied), std::move(bitset)); +} + +template +auto make_two_label_set(std::vector&& vec, + std::vector>&& data, + std::array&& label_names, + std::array&& prop_names, + grape::Bitset&& bitset) { + return TwoLabelVertexSet( + std::move(vec), std::move(data), std::move(label_names), + std::move(prop_names), std::move(bitset)); +} + +static std::array, 2> two_label_bitset_to_vids_inds( + const grape::Bitset& bitset) { + std::array, 2> res; + auto limit_size = bitset.cardinality(); + VLOG(10) << "old bitset limit size: " << limit_size; + auto label0_cnt = bitset.count(); + res[0].reserve(label0_cnt); + res[1].reserve(limit_size - label0_cnt); + for (auto i = 0; i < limit_size; ++i) { + if (bitset.get_bit(i)) { + res[0].emplace_back(i); + } else { + res[1].emplace_back(i); + } + } + return res; +} + +template +static std::pair, 2>, + std::array, 2>> +two_label_bitset_to_vids_indsV2(const grape::Bitset& bitset, + const std::vector& old_vids) { + std::array, 2> res; + std::array, 2> res_vids; + auto limit_size = bitset.cardinality(); + VLOG(10) << "old bitset limit size: " << limit_size; + auto label0_cnt = bitset.count(); + res[0].reserve(label0_cnt); + res_vids[0].reserve(label0_cnt); + res[1].reserve(limit_size - label0_cnt); + res_vids[1].reserve(limit_size - label0_cnt); + for (auto i = 0; i < limit_size; ++i) { + if (bitset.get_bit(i)) { + res[0].emplace_back(i); + res_vids[0].emplace_back(old_vids[i]); + } else { + res[1].emplace_back(i); + res_vids[1].emplace_back(old_vids[i]); + } + } + return std::make_pair(std::move(res_vids), std::move(res)); +} + +template +static auto get_property_tuple_two_label( + const GRAPH_INTERFACE& graph, + const TwoLabelVertexSet& general_set, + const std::array& prop_names) { + double t0 = -grape::GetCurrentTime(); + auto& label_array = general_set.GetLabels(); + t0 += grape::GetCurrentTime(); + + // Get data for multilabel vertices, mixed + // double t1 = -grape::GetCurrentTime(); + auto data_tuples = graph.template GetVertexPropsFromVidV2( + general_set.GetVertices(), label_array, general_set.GetBitset(), + prop_names); + + return data_tuples; +} + +template +static auto get_property_tuple_two_label( + const GRAPH_INTERFACE& graph, + const TwoLabelVertexSetImpl& general_set, + const std::tuple& named_prop) { + using data_tuple_t = std::tuple; + std::array prop_names; + size_t ind = 0; + std::apply([&prop_names, + &ind](auto&... args) { ((prop_names[ind++] = args.name), ...); }, + named_prop); + return get_property_tuple_two_label( + graph, general_set, prop_names); +} + +} // namespace gs + +#endif // ENGINES_HQPS_DS_MULTI_VERTEX_SET_TWO_LABEL_VERTEX_SET_H_ diff --git a/flex/engines/hqps_db/structures/path.h b/flex/engines/hqps_db/structures/path.h new file mode 100644 index 000000000000..23fb07ec4172 --- /dev/null +++ b/flex/engines/hqps_db/structures/path.h @@ -0,0 +1,136 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifndef GRAPHSCOPE_DS_PATH_H_ +#define GRAPHSCOPE_DS_PATH_H_ + +#include +#include +#include + +namespace gs { + +// Path Set contains all vertices and edges along each path. +// Note that path doesn't have to be full length. +// i.e. :[1], [1,2], [1,2,3] + +// Assumes all path share same label +template +struct Path { + std::vector vids_; + std::vector label_ids_; + + Path(std::vector&& vids, std::vector&& label_ids) + : vids_(std::move(vids)), label_ids_(std::move(label_ids)) {} + + size_t length() const { return vids_.size() - 1; } + + const std::vector& GetVertices() const { return vids_; } + + std::string to_string() const { + std::stringstream ss; + for (auto i = 0; i < vids_.size() - 1; ++i) { + ss << vids_[i] << "->"; + } + ss << vids_[vids_.size() - 1]; + return ss.str(); + } +}; + +template +class PathSetIter { + public: + using self_type_t = PathSetIter; + using index_ele_tuple_t = std::pair&>; + + PathSetIter(const std::vector>& paths, size_t ind) + : paths_(paths), ind_(ind) {} + + const Path& GetElement() const { return paths_[ind_]; } + + std::tuple> GetData() const { + return std::make_tuple(paths_[ind_]); + } + + index_ele_tuple_t GetIndexElement() const { + return std::make_tuple(ind_, paths_[ind_]); + } + + inline const self_type_t& operator++() { + ++ind_; + return *this; + } + + // We may never compare to other kind of iterators + inline bool operator==(const self_type_t& rhs) const { + return ind_ == rhs.ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + return ind_ != rhs.ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return ind_ < rhs.ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + private: + const std::vector>& paths_; + size_t ind_; +}; + +template +class PathSet { + public: + using flat_t = PathSet; + using self_type_t = PathSet; + using iterator = PathSetIter; + using data_tuple_t = std::tuple>; + using index_ele_tuple_t = std::pair&>; + PathSet(std::vector&& labels) : labels_(std::move(labels)){}; + + PathSet(std::vector>&& paths, std::vector&& labels) + : paths_(std::move(paths)), labels_(std::move(labels)) {} + + void EmplacePath(Path&& path) { paths_.emplace_back(std::move(path)); } + + const Path& get(size_t i) const { + CHECK(i < paths_.size()); + return paths_[i]; + } + + size_t Size() const { return paths_.size(); } + + iterator begin() const { return iterator(paths_, 0); } + + iterator end() const { return iterator(paths_, paths_.size()); } + + private: + std::vector labels_; + std::vector> paths_; +}; + +template +auto make_empty_path_set(std::vector&& labels) { + return PathSet(std::move(labels)); +} + +} // namespace gs + +#endif // GRAPHSCOPE_DS_PATH_H_ diff --git a/flex/engines/http_server/CMakeLists.txt b/flex/engines/http_server/CMakeLists.txt new file mode 100644 index 000000000000..0ddd2a13bff4 --- /dev/null +++ b/flex/engines/http_server/CMakeLists.txt @@ -0,0 +1,24 @@ +find_package (Hiactor) +if (Hiactor_FOUND) + include (${Hiactor_CODEGEN_CMAKE_FILE}) + + hiactor_codegen (server_actor_autogen server_actor_autogen_files + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/ + INCLUDE_PATHS ${Hiactor_INCLUDE_DIR},${CMAKE_CURRENT_SOURCE_DIR}/../../../) + + file(GLOB_RECURSE SERVER_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.cc") + + add_library(flex_server STATIC ${SERVER_FILES} ${server_actor_autogen_files}) + add_dependencies(flex_server server_actor_autogen) + target_compile_options (flex_server + PUBLIC + -Wno-attributes) + target_link_libraries(flex_server Hiactor::hiactor hqps_plan_proto) + target_include_directories(flex_server PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/../hqps/) + + + install(TARGETS flex_server + RUNTIME DESTINATION bin + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib) +endif () \ No newline at end of file diff --git a/flex/engines/graph_db/server/actor_system.cc b/flex/engines/http_server/actor_system.cc similarity index 97% rename from flex/engines/graph_db/server/actor_system.cc rename to flex/engines/http_server/actor_system.cc index d7405cffa85b..8196b8b129ef 100644 --- a/flex/engines/graph_db/server/actor_system.cc +++ b/flex/engines/http_server/actor_system.cc @@ -13,7 +13,7 @@ * limitations under the License. */ -#include "flex/engines/graph_db/server/actor_system.h" +#include "flex/engines/http_server/actor_system.h" #include #include diff --git a/flex/engines/graph_db/server/actor_system.h b/flex/engines/http_server/actor_system.h similarity index 89% rename from flex/engines/graph_db/server/actor_system.h rename to flex/engines/http_server/actor_system.h index e774f5a70b22..83858d8e6c5d 100644 --- a/flex/engines/graph_db/server/actor_system.h +++ b/flex/engines/http_server/actor_system.h @@ -13,8 +13,8 @@ * limitations under the License. */ -#ifndef SERVER_ACTOR_SYSTEM_H_ -#define SERVER_ACTOR_SYSTEM_H_ +#ifndef ENGINES_HTTP_SERVER_ACTOR_SYSTEM_H_ +#define ENGINES_HTTP_SERVER_ACTOR_SYSTEM_H_ #include #include @@ -46,4 +46,4 @@ class actor_system { } // namespace server -#endif // SERVER_ACTOR_SYSTEM_H_ +#endif // ENGINES_HTTP_SERVER_ACTOR_SYSTEM_H_ diff --git a/interactive_engine/proto/ingest_progress_service.proto b/flex/engines/http_server/codegen_proxy.cc similarity index 52% rename from interactive_engine/proto/ingest_progress_service.proto rename to flex/engines/http_server/codegen_proxy.cc index baefde0644c1..a074acc3bc0f 100644 --- a/interactive_engine/proto/ingest_progress_service.proto +++ b/flex/engines/http_server/codegen_proxy.cc @@ -1,31 +1,23 @@ -/** - * Copyright 2020 Alibaba Group Holding Limited. - * +/** Copyright 2020 Alibaba Group Holding Limited. + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * + * + * http://www.apache.org/licenses/LICENSE-2.0 + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ -syntax = "proto3"; +#include "flex/engines/http_server/codegen_proxy.h" -option java_package = "com.alibaba.graphscope.proto.groot"; -option java_multiple_files = true; - -service IngestProgress { - rpc getTailOffsets(GetTailOffsetsRequest) returns(GetTailOffsetsResponse); -} - -message GetTailOffsetsRequest { - repeated int32 queueId = 1; +namespace server { +CodegenProxy& CodegenProxy::get() { + static CodegenProxy instance; + return instance; } -message GetTailOffsetsResponse { - repeated int64 offsets = 1; -} +} // namespace server diff --git a/flex/engines/http_server/codegen_proxy.h b/flex/engines/http_server/codegen_proxy.h new file mode 100644 index 000000000000..36ddfa1e33ff --- /dev/null +++ b/flex/engines/http_server/codegen_proxy.h @@ -0,0 +1,158 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HQPS_SERVER_CODEGEN_PROXY_H_ +#define ENGINES_HQPS_SERVER_CODEGEN_PROXY_H_ + +#include +#include +#include +#include + +#include "glog/logging.h" + +#include "proto_generated_gie/job_service.pb.h" +#include "proto_generated_gie/physical.pb.h" + +namespace server { + +// Manages the codegen runner, process the incoming adhoc query, and output to +// the desired directory + +class CodegenProxy { + public: + static CodegenProxy& get(); + CodegenProxy() : initialized_(false){}; + + ~CodegenProxy() = default; + + bool Initialized() { return initialized_; } + + void Init(std::string working_dir, std::string codegen_bin, + std::string db_home) { + working_directory_ = working_dir; + codegen_bin_ = codegen_bin; + db_home_ = db_home; + initialized_ = true; + LOG(INFO) << "CodegenProxy working dir: " << working_directory_ + << ",codegen bin " << codegen_bin_ << ", db_home: " << db_home_; + } + + // Do gen + std::optional> do_gen( + const physical::PhysicalPlan& plan) { + LOG(INFO) << "Start generating for query: "; + auto next_job_id = getNextJobId(); + auto work_dir = get_work_directory(next_job_id); + auto query_name = "query_" + std::to_string(next_job_id); + std::string plan_path = prepare_next_job_dir(work_dir, query_name, plan); + if (plan_path.empty()) { + return {}; + } + + std::string res_lib_path = + call_codegen_cmd(plan_path, query_name, work_dir); + + // check res_lib_path exists + if (!std::filesystem::exists(res_lib_path)) { + LOG(ERROR) << "res lib path " << res_lib_path << " not exists"; + return {}; + } + return std::make_pair(next_job_id, res_lib_path); + } + + std::string call_codegen_cmd(const std::string& plan_path, + const std::string& query_name, + const std::string& work_dir) { + // TODO: different suffix for different platform + std::string res_lib_path = work_dir + "/lib" + query_name + ".so"; + std::string cmd = codegen_bin_ + " -i=" + plan_path + " -w=" + work_dir + + " --db_home=" + db_home_; + LOG(INFO) << "Start call codegen cmd: " << cmd; + auto res = std::system(cmd.c_str()); + if (res != 0) { + LOG(ERROR) << "call codegen cmd failed: " << cmd; + return ""; + } + return res_lib_path; + } + + private: + int32_t getNextJobId() { return next_job_id_.fetch_add(1); } + + std::string get_work_directory(int32_t job_id) { + std::string work_dir = working_directory_ + "/" + std::to_string(job_id); + ensure_dir_exists(work_dir); + return work_dir; + } + + void ensure_dir_exists(const std::string& working_dir) { + LOG(INFO) << "Ensuring [" << working_dir << "] exists "; + std::filesystem::path path = working_dir; + if (!std::filesystem::exists(path)) { + LOG(INFO) << path << " not exists"; + auto res = std::filesystem::create_directories(path); + if (!res) { + LOG(WARNING) << "create " << path << " failed"; + } else { + LOG(INFO) << "create " << path << " success"; + } + } else { + LOG(INFO) << working_dir << " already exists"; + } + } + + void clear_dir(const std::string& working_dir) { + LOG(INFO) << "[Cleaning]" << working_dir; + std::filesystem::path path = working_dir; + if (std::filesystem::exists(path)) { + size_t num = 0; + for (const auto& entry : + std::filesystem::directory_iterator(working_dir)) { + std::filesystem::remove_all(entry.path()); + num += 1; + } + LOG(INFO) << "remove " << num << "files under " << path; + } + } + + std::string prepare_next_job_dir(const std::string& plan_work_dir, + const std::string& query_name, + const physical::PhysicalPlan& plan) { + // clear directory; + clear_dir(plan_work_dir); + + // dump plan to file + std::string plan_path = plan_work_dir + "/" + query_name + ".pb"; + std::ofstream ofs(plan_path, std::ios::binary); + auto ret = plan.SerializeToOstream(&ofs); + LOG(INFO) << "Dump plan to: " << plan_path + << ", ret: " << std::to_string(ret); + if (!ret) { + return ""; + } + + return plan_path; + } + + std::string working_directory_; + std::string codegen_bin_; + std::string db_home_; + std::atomic next_job_id_{0}; + bool initialized_; +}; + +} // namespace server + +#endif // ENGINES_HQPS_SERVER_CODEGEN_PROXY_H_ diff --git a/flex/engines/http_server/executor.act.cc b/flex/engines/http_server/executor.act.cc new file mode 100644 index 000000000000..93962aecbdf6 --- /dev/null +++ b/flex/engines/http_server/executor.act.cc @@ -0,0 +1,127 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "flex/engines/http_server/executor.act.h" + +#include "flex/engines/graph_db/database/graph_db.h" +#include "flex/engines/graph_db/database/graph_db_session.h" +#include "flex/engines/http_server/codegen_proxy.h" +#include "flex/engines/http_server/stored_procedure.h" + +#include + +namespace server { + +executor::~executor() { + // finalization + // ... +} + +executor::executor(hiactor::actor_base* exec_ctx, const hiactor::byte_t* addr) + : hiactor::actor(exec_ctx, addr) { + set_max_concurrency(1); // set max concurrency for task reentrancy (stateful) + // initialization + // ... +} + +seastar::future executor::run_graph_db_query( + query_param&& param) { + auto ret = gs::GraphDB::get() + .GetSession(hiactor::local_shard_id()) + .Eval(param.content); + seastar::sstring content(ret.data(), ret.size()); + return seastar::make_ready_future(std::move(content)); +} + +// run_query_for stored_procedure +seastar::future executor::run_hqps_procedure_query( + query_param&& param) { + auto& str = param.content; + const char* str_data = str.data(); + size_t str_length = str.size(); + LOG(INFO) << "Receive pay load: " << str_length << " bytes"; + + query::Query cur_query; + { + CHECK(cur_query.ParseFromArray(str.data(), str.size())); + LOG(INFO) << "Parse query: " << cur_query.DebugString(); + } + auto& store_procedure_manager = server::StoredProcedureManager::get(); + return store_procedure_manager.Query(cur_query).then( + [&cur_query](results::CollectiveResults&& hqps_result) { + LOG(INFO) << "Finish running query: " << cur_query.DebugString(); + LOG(INFO) << "Query results" << hqps_result.DebugString(); + + auto tem_str = hqps_result.SerializeAsString(); + + seastar::sstring content(tem_str.data(), tem_str.size()); + return seastar::make_ready_future(std::move(content)); + }); +} + +seastar::future executor::run_hqps_adhoc_query( + query_param&& param) { + LOG(INFO) << "Run adhoc query"; + // The received query's pay load shoud be able to deserialze to physical plan + auto& str = param.content; + if (str.size() <= 0) { + LOG(INFO) << "Empty query"; + return seastar::make_exception_future( + std::runtime_error("Empty query string")); + } + + const char* str_data = str.data(); + size_t str_length = str.size(); + LOG(INFO) << "Deserialize physical job request" << str_length; + + physical::PhysicalPlan plan; + bool ret = plan.ParseFromArray(str_data, str_length); + if (ret) { + LOG(INFO) << "Parse physical plan: " << plan.DebugString(); + } else { + LOG(ERROR) << "Fail to parse physical plan"; + return seastar::make_exception_future( + std::runtime_error("Fail to parse physical plan")); + } + + // 0. do codegen gen. + std::string lib_path = ""; + int32_t job_id = -1; + auto& codegen_proxy = server::CodegenProxy::get(); + if (codegen_proxy.Initialized()) { + auto ret = codegen_proxy.do_gen(plan); + if (ret.has_value()) { + auto& v = ret.value(); + job_id = v.first; + lib_path = v.second; + } + } else { + return seastar::make_exception_future( + std::runtime_error("Codegen proxy not initialized")); + } + if (job_id == -1) { + return seastar::make_exception_future( + std::runtime_error("Fail to parse job id from codegen proxy")); + } + // 1. load and run. + LOG(INFO) << "Okay, try to run the query of lib path: " << lib_path + << ", job id: " << job_id; + + seastar::sstring content = + server::load_and_run(job_id, lib_path, hiactor::local_shard_id()); + return seastar::make_ready_future(std::move(content)); +} + +} // namespace server diff --git a/flex/engines/graph_db/server/executor.act.h b/flex/engines/http_server/executor.act.h similarity index 68% rename from flex/engines/graph_db/server/executor.act.h rename to flex/engines/http_server/executor.act.h index 534b5cf8a1fb..cf722586f2cb 100644 --- a/flex/engines/graph_db/server/executor.act.h +++ b/flex/engines/http_server/executor.act.h @@ -13,10 +13,10 @@ * limitations under the License. */ -#ifndef SERVER_EXECUTOR_ACT_H_ -#define SERVER_EXECUTOR_ACT_H_ +#ifndef ENGINES_HTTP_SERVER_EXECUTOR_ACT_H_ +#define ENGINES_HTTP_SERVER_EXECUTOR_ACT_H_ -#include "flex/engines/graph_db/server/types.h" +#include "flex/engines/http_server/types.h" #include #include @@ -28,7 +28,11 @@ class ANNOTATION(actor:impl) executor : public hiactor::actor { executor(hiactor::actor_base* exec_ctx, const hiactor::byte_t* addr); ~executor() override; - seastar::future ANNOTATION(actor:method) run_query(query_param&& param); + seastar::future ANNOTATION(actor:method) run_graph_db_query(query_param&& param); + + seastar::future ANNOTATION(actor:method) run_hqps_procedure_query(query_param&& param); + + seastar::future ANNOTATION(actor:method) run_hqps_adhoc_query(query_param&& param); // DECLARE_RUN_QUERYS; /// Declare `do_work` func here, no need to implement. @@ -40,4 +44,4 @@ class ANNOTATION(actor:impl) executor : public hiactor::actor { } // namespace server -#endif // SERVER_EXECUTOR_ACT_H_ +#endif // ENGINES_HTTP_SERVER_EXECUTOR_ACT_H_ diff --git a/flex/engines/graph_db/server/executor_group.actg.h b/flex/engines/http_server/executor_group.actg.h similarity index 86% rename from flex/engines/graph_db/server/executor_group.actg.h rename to flex/engines/http_server/executor_group.actg.h index 1380a988528a..17fd91ccd547 100644 --- a/flex/engines/graph_db/server/executor_group.actg.h +++ b/flex/engines/http_server/executor_group.actg.h @@ -13,16 +13,15 @@ * limitations under the License. */ -#ifndef SERVER_EXECUTOR_GROUP_ACTG_H_ -#define SERVER_EXECUTOR_GROUP_ACTG_H_ +#ifndef ENGINES_HTTP_SERVER_EXECUTOR_GROUP_ACTG_H_ +#define ENGINES_HTTP_SERVER_EXECUTOR_GROUP_ACTG_H_ #include - namespace server { class ANNOTATION(actor:group) executor_group : public hiactor::schedulable_actor_group { -public: + public: executor_group(hiactor::actor_base* exec_ctx, const hiactor::byte_t* addr) : hiactor::schedulable_actor_group(exec_ctx, addr) {} @@ -34,4 +33,4 @@ class ANNOTATION(actor:group) executor_group : public hiactor::schedulable_actor } // namespace server -#endif // SERVER_EXECUTOR_GROUP_ACTG_H_ +#endif // ENGINES_HTTP_SERVER_EXECUTOR_GROUP_ACTG_H_ diff --git a/flex/engines/graph_db/server/http_handler.cc b/flex/engines/http_server/graph_db_http_handler.cc similarity index 71% rename from flex/engines/graph_db/server/http_handler.cc rename to flex/engines/http_server/graph_db_http_handler.cc index 0e549aebd95a..0514160aac43 100644 --- a/flex/engines/graph_db/server/http_handler.cc +++ b/flex/engines/http_server/graph_db_http_handler.cc @@ -13,21 +13,21 @@ * limitations under the License. */ -#include "flex/engines/graph_db/server/executor_group.actg.h" -#include "flex/engines/graph_db/server/options.h" -#include "flex/engines/graph_db/server/service.h" +#include "flex/engines/http_server/executor_group.actg.h" +#include "flex/engines/http_server/graph_db_service.h" +#include "flex/engines/http_server/options.h" #include #include #include -#include "flex/engines/graph_db/server/generated/executor_ref.act.autogen.h" -#include "flex/engines/graph_db/server/types.h" +#include "flex/engines/http_server/generated/executor_ref.act.autogen.h" +#include "flex/engines/http_server/types.h" namespace server { -class ic_handler : public seastar::httpd::handler_base { +class graph_db_ic_handler : public seastar::httpd::handler_base { public: - ic_handler(uint32_t group_id, uint32_t shard_concurrency) + graph_db_ic_handler(uint32_t group_id, uint32_t shard_concurrency) : shard_concurrency_(shard_concurrency), executor_idx_(0) { executor_refs_.reserve(shard_concurrency_); hiactor::scope_builder builder; @@ -38,7 +38,7 @@ class ic_handler : public seastar::httpd::handler_base { executor_refs_.emplace_back(builder.build_ref(i)); } } - ~ic_handler() override = default; + ~graph_db_ic_handler() override = default; seastar::future> handle( const seastar::sstring& path, @@ -48,7 +48,7 @@ class ic_handler : public seastar::httpd::handler_base { executor_idx_ = (executor_idx_ + 1) % shard_concurrency_; return executor_refs_[dst_executor] - .run_query(query_param{std::move(req->content)}) + .run_graph_db_query(query_param{std::move(req->content)}) .then_wrapped([rep = std::move(rep)]( seastar::future&& fut) mutable { if (__builtin_expect(fut.failed(), false)) { @@ -69,13 +69,13 @@ class ic_handler : public seastar::httpd::handler_base { std::vector executor_refs_; }; -class exit_handler : public seastar::httpd::handler_base { +class graph_db_exit_handler : public seastar::httpd::handler_base { public: seastar::future> handle( const seastar::sstring& path, std::unique_ptr req, std::unique_ptr rep) override { - service::get().set_exit_state(); + GraphDBService::get().set_exit_state(); rep->write_body("bin", seastar::sstring{"The graph_db server is exiting ..."}); return seastar::make_ready_future>( @@ -83,9 +83,10 @@ class exit_handler : public seastar::httpd::handler_base { } }; -http_handler::http_handler(uint16_t http_port) : http_port_(http_port) {} +graph_db_http_handler::graph_db_http_handler(uint16_t http_port) + : http_port_(http_port) {} -void http_handler::start() { +void graph_db_http_handler::start() { auto fut = seastar::alien::submit_to( *seastar::alien::internal::default_instance, 0, [this] { return server_.start() @@ -99,26 +100,29 @@ void http_handler::start() { fut.wait(); } -void http_handler::stop() { +void graph_db_http_handler::stop() { auto fut = seastar::alien::submit_to(*seastar::alien::internal::default_instance, 0, [this] { return server_.stop(); }); fut.wait(); } -seastar::future<> http_handler::set_routes() { +seastar::future<> graph_db_http_handler::set_routes() { return server_.set_routes([this](seastar::httpd::routes& r) { r.add(seastar::httpd::operation_type::POST, seastar::httpd::url("/interactive/query"), - new ic_handler(ic_query_group_id, shard_query_concurrency)); + new graph_db_ic_handler(ic_query_group_id, shard_query_concurrency)); + r.add( + seastar::httpd::operation_type::POST, + seastar::httpd::url("/interactive/update"), + new graph_db_ic_handler(ic_update_group_id, shard_update_concurrency)); + r.add( + seastar::httpd::operation_type::POST, + seastar::httpd::url("/interactive/app"), + new graph_db_ic_handler(ic_update_group_id, shard_update_concurrency)); r.add(seastar::httpd::operation_type::POST, - seastar::httpd::url("/interactive/update"), - new ic_handler(ic_update_group_id, shard_update_concurrency)); - r.add(seastar::httpd::operation_type::POST, - seastar::httpd::url("/interactive/app"), - new ic_handler(ic_update_group_id, shard_update_concurrency)); - r.add(seastar::httpd::operation_type::POST, - seastar::httpd::url("/interactive/exit"), new exit_handler()); + seastar::httpd::url("/interactive/exit"), + new graph_db_exit_handler()); return seastar::make_ready_future<>(); }); } diff --git a/flex/engines/http_server/graph_db_http_handler.h b/flex/engines/http_server/graph_db_http_handler.h new file mode 100644 index 000000000000..0f854f51c48d --- /dev/null +++ b/flex/engines/http_server/graph_db_http_handler.h @@ -0,0 +1,40 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ENGINES_HTTP_SERVER_GRAPH_DB_HTTP_HANDLER_H_ +#define ENGINES_HTTP_SERVER_GRAPH_DB_HTTP_HANDLER_H_ + +#include + +namespace server { + +class graph_db_http_handler { + public: + graph_db_http_handler(uint16_t http_port); + + void start(); + void stop(); + + private: + seastar::future<> set_routes(); + + private: + const uint16_t http_port_; + seastar::httpd::http_server_control server_; +}; + +} // namespace server + +#endif // ENGINES_HTTP_SERVER_GRAPH_DB_HTTP_HANDLER_H_ diff --git a/flex/engines/graph_db/server/service.cc b/flex/engines/http_server/graph_db_service.cc similarity index 68% rename from flex/engines/graph_db/server/service.cc rename to flex/engines/http_server/graph_db_service.cc index 2b183770b0e2..c0335c62b482 100644 --- a/flex/engines/graph_db/server/service.cc +++ b/flex/engines/http_server/graph_db_service.cc @@ -13,18 +13,19 @@ * limitations under the License. */ -#include "flex/engines/graph_db/server/service.h" -#include "flex/engines/graph_db/server/options.h" +#include "flex/engines/http_server/graph_db_service.h" +#include "flex/engines/http_server/options.h" namespace server { -void service::init(uint32_t num_shards, uint16_t http_port, bool dpdk_mode) { +void GraphDBService::init(uint32_t num_shards, uint16_t http_port, + bool dpdk_mode) { actor_sys_ = std::make_unique(num_shards, dpdk_mode); - http_hdl_ = std::make_unique(http_port); + http_hdl_ = std::make_unique(http_port); } -void service::run_and_wait_for_exit() { +void GraphDBService::run_and_wait_for_exit() { if (!actor_sys_ || !http_hdl_) { - std::cerr << "Service has not been inited!" << std::endl; + std::cerr << "GraphDB service has not been inited!" << std::endl; return; } actor_sys_->launch(); @@ -37,6 +38,6 @@ void service::run_and_wait_for_exit() { actor_sys_->terminate(); } -void service::set_exit_state() { running_.store(false); } +void GraphDBService::set_exit_state() { running_.store(false); } } // namespace server diff --git a/flex/engines/graph_db/server/service.h b/flex/engines/http_server/graph_db_service.h similarity index 66% rename from flex/engines/graph_db/server/service.h rename to flex/engines/http_server/graph_db_service.h index 5fc0240e2869..c26cfdafde44 100644 --- a/flex/engines/graph_db/server/service.h +++ b/flex/engines/http_server/graph_db_service.h @@ -13,35 +13,35 @@ * limitations under the License. */ -#ifndef SERVER_SERVICE_H_ -#define SERVER_SERVICE_H_ +#ifndef ENGINES_HTTP_SERVER_GRAPH_DB_SERVICE_H_ +#define ENGINES_HTTP_SERVER_GRAPH_DB_SERVICE_H_ -#include "flex/engines/graph_db/server/actor_system.h" -#include "flex/engines/graph_db/server/http_handler.h" +#include "flex/engines/http_server/actor_system.h" +#include "flex/engines/http_server/graph_db_http_handler.h" namespace server { -class service { +class GraphDBService { public: - static service& get() { - static service instance; + static GraphDBService& get() { + static GraphDBService instance; return instance; } - ~service() = default; + ~GraphDBService() = default; void init(uint32_t num_shards, uint16_t http_port, bool dpdk_mode); void run_and_wait_for_exit(); void set_exit_state(); private: - service() = default; + GraphDBService() = default; private: std::unique_ptr actor_sys_; - std::unique_ptr http_hdl_; + std::unique_ptr http_hdl_; std::atomic running_{false}; }; } // namespace server -#endif // SERVER_SERVICE_H_ +#endif // ENGINES_HTTP_SERVER_GRAPH_DB_SERVICE_H_ diff --git a/flex/engines/http_server/hqps_http_handler.cc b/flex/engines/http_server/hqps_http_handler.cc new file mode 100644 index 000000000000..9a6f375d2fed --- /dev/null +++ b/flex/engines/http_server/hqps_http_handler.cc @@ -0,0 +1,188 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "flex/engines/http_server/executor_group.actg.h" +#include "flex/engines/http_server/hqps_service.h" +#include "flex/engines/http_server/options.h" + +#include +#include +#include +#include "flex/engines/http_server/generated/executor_ref.act.autogen.h" +#include "flex/engines/http_server/types.h" + +namespace server { + +class hqps_ic_handler : public seastar::httpd::handler_base { + public: + hqps_ic_handler(uint32_t group_id, uint32_t shard_concurrency) + : shard_concurrency_(shard_concurrency), executor_idx_(0) { + executor_refs_.reserve(shard_concurrency_); + hiactor::scope_builder builder; + builder.set_shard(hiactor::local_shard_id()) + .enter_sub_scope(hiactor::scope(0)) + .enter_sub_scope(hiactor::scope(group_id)); + for (unsigned i = 0; i < shard_concurrency_; ++i) { + executor_refs_.emplace_back(builder.build_ref(i)); + } + } + ~hqps_ic_handler() override = default; + + seastar::future> handle( + const seastar::sstring& path, + std::unique_ptr req, + std::unique_ptr rep) override { + auto dst_executor = executor_idx_; + executor_idx_ = (executor_idx_ + 1) % shard_concurrency_; + + return executor_refs_[dst_executor] + .run_hqps_procedure_query(query_param{std::move(req->content)}) + .then_wrapped([rep = std::move(rep)]( + seastar::future&& fut) mutable { + if (__builtin_expect(fut.failed(), false)) { + rep->set_status( + seastar::httpd::reply::status_type::internal_server_error); + try { + std::rethrow_exception(fut.get_exception()); + } catch (std::exception& e) { + rep->write_body("bin", seastar::sstring(e.what())); + } + rep->done(); + return seastar::make_ready_future< + std::unique_ptr>(std::move(rep)); + } + auto result = fut.get0(); + rep->write_body("bin", std::move(result.content)); + rep->done(); + return seastar::make_ready_future< + std::unique_ptr>(std::move(rep)); + }); + } + + private: + const uint32_t shard_concurrency_; + uint32_t executor_idx_; + std::vector executor_refs_; +}; + +// a handler for handl adhoc query. +class hqps_adhoc_query_handler : public seastar::httpd::handler_base { + public: + hqps_adhoc_query_handler(uint32_t group_id, uint32_t shard_concurrency) + : shard_concurrency_(shard_concurrency), executor_idx_(0) { + executor_refs_.reserve(shard_concurrency_); + hiactor::scope_builder builder; + builder.set_shard(hiactor::local_shard_id()) + .enter_sub_scope(hiactor::scope(0)) + .enter_sub_scope(hiactor::scope(group_id)); + for (unsigned i = 0; i < shard_concurrency_; ++i) { + executor_refs_.emplace_back(builder.build_ref(i)); + } + } + ~hqps_adhoc_query_handler() override = default; + + seastar::future> handle( + const seastar::sstring& path, + std::unique_ptr req, + std::unique_ptr rep) override { + auto dst_executor = executor_idx_; + executor_idx_ = (executor_idx_ + 1) % shard_concurrency_; + + return executor_refs_[dst_executor] + .run_hqps_adhoc_query(query_param{std::move(req->content)}) + .then_wrapped([rep = std::move(rep)]( + seastar::future&& fut) mutable { + if (__builtin_expect(fut.failed(), false)) { + rep->set_status( + seastar::httpd::reply::status_type::internal_server_error); + try { + std::rethrow_exception(fut.get_exception()); + } catch (std::exception& e) { + rep->write_body("bin", seastar::sstring(e.what())); + } + rep->done(); + return seastar::make_ready_future< + std::unique_ptr>(std::move(rep)); + } + auto result = fut.get0(); + rep->write_body("bin", std::move(result.content)); + rep->done(); + return seastar::make_ready_future< + std::unique_ptr>(std::move(rep)); + }); + } + + private: + const uint32_t shard_concurrency_; + uint32_t executor_idx_; + std::vector executor_refs_; +}; + +class hqps_exit_handler : public seastar::httpd::handler_base { + public: + seastar::future> handle( + const seastar::sstring& path, + std::unique_ptr req, + std::unique_ptr rep) override { + HQPSService::get().set_exit_state(); + rep->write_body( + "bin", + seastar::sstring{"The ldbc snb interactive service is exiting ..."}); + return seastar::make_ready_future>( + std::move(rep)); + } +}; + +hqps_http_handler::hqps_http_handler(uint16_t http_port) + : http_port_(http_port) {} + +void hqps_http_handler::start() { + auto fut = seastar::alien::submit_to( + *seastar::alien::internal::default_instance, 0, [this] { + return server_.start() + .then([this] { return set_routes(); }) + .then([this] { return server_.listen(http_port_); }) + .then([this] { + fmt::print( + "Ldbc snb interactive http handler is listening on port {} " + "...\n", + http_port_); + }); + }); + fut.wait(); +} + +void hqps_http_handler::stop() { + auto fut = + seastar::alien::submit_to(*seastar::alien::internal::default_instance, 0, + [this] { return server_.stop(); }); + fut.wait(); +} + +seastar::future<> hqps_http_handler::set_routes() { + return server_.set_routes([this](seastar::httpd::routes& r) { + r.add(seastar::httpd::operation_type::POST, + seastar::httpd::url("/interactive/query"), + new hqps_ic_handler(ic_query_group_id, shard_query_concurrency)); + r.add(seastar::httpd::operation_type::POST, + seastar::httpd::url("/interactive/adhoc_query"), + new hqps_adhoc_query_handler(ic_adhoc_group_id, + shard_adhoc_concurrency)); + r.add(seastar::httpd::operation_type::POST, + seastar::httpd::url("/interactive/exit"), new hqps_exit_handler()); + return seastar::make_ready_future<>(); + }); +} + +} // namespace server diff --git a/flex/engines/graph_db/server/http_handler.h b/flex/engines/http_server/hqps_http_handler.h similarity index 80% rename from flex/engines/graph_db/server/http_handler.h rename to flex/engines/http_server/hqps_http_handler.h index 85667a43cd48..ce2ea7eeb1a2 100644 --- a/flex/engines/graph_db/server/http_handler.h +++ b/flex/engines/http_server/hqps_http_handler.h @@ -12,17 +12,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -#ifndef SERVER_HTTP_HANDLER_H_ -#define SERVER_HTTP_HANDLER_H_ +#ifndef ENGINES_HTTP_SERVER_HQPS_HTTP_HANDLER_H_ +#define ENGINES_HTTP_SERVER_HQPS_HTTP_HANDLER_H_ #include namespace server { -class http_handler { +class hqps_http_handler { public: - http_handler(uint16_t http_port); + hqps_http_handler(uint16_t http_port); void start(); void stop(); @@ -37,4 +36,4 @@ class http_handler { } // namespace server -#endif // SERVER_HTTP_HANDLER_H_ +#endif // ENGINES_HTTP_SERVER_HQPS_HTTP_HANDLER_H_ \ No newline at end of file diff --git a/flex/engines/http_server/hqps_service.cc b/flex/engines/http_server/hqps_service.cc new file mode 100644 index 000000000000..f09b3dc56220 --- /dev/null +++ b/flex/engines/http_server/hqps_service.cc @@ -0,0 +1,48 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "flex/engines/http_server/hqps_service.h" +#include "flex/engines/http_server/options.h" +namespace server { + +void HQPSService::init(uint32_t num_shards, uint16_t http_port, + bool dpdk_mode) { + actor_sys_ = std::make_unique(num_shards, dpdk_mode); + http_hdl_ = std::make_unique(http_port); +} + +HQPSService::~HQPSService() { + if (actor_sys_) { + actor_sys_->terminate(); + } +} + +void HQPSService::run_and_wait_for_exit() { + if (!actor_sys_ || !http_hdl_) { + std::cerr << "High QPS service has not been inited!" << std::endl; + return; + } + actor_sys_->launch(); + http_hdl_->start(); + running_.store(true); + while (running_.load(std::memory_order_relaxed)) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + http_hdl_->stop(); + actor_sys_->terminate(); +} + +void HQPSService::set_exit_state() { running_.store(false); } + +} // namespace server diff --git a/flex/engines/http_server/hqps_service.h b/flex/engines/http_server/hqps_service.h new file mode 100644 index 000000000000..4a7987e1e32c --- /dev/null +++ b/flex/engines/http_server/hqps_service.h @@ -0,0 +1,50 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HTTP_SERVER_HQPS_SERVICE_H_ +#define ENGINES_HTTP_SERVER_HQPS_SERVICE_H_ + +#include + +#include "flex/engines/http_server/actor_system.h" +#include "flex/engines/http_server/hqps_http_handler.h" + +namespace server { + +class HQPSService { + public: + static HQPSService& get() { + static HQPSService instance; + return instance; + } + ~HQPSService(); + + // the store procedure contains + void init(uint32_t num_shards, uint16_t http_port, bool dpdk_mode); + + void run_and_wait_for_exit(); + void set_exit_state(); + + private: + HQPSService() = default; + + private: + std::unique_ptr actor_sys_; + std::unique_ptr http_hdl_; + std::atomic running_{false}; +}; + +} // namespace server + +#endif // ENGINES_HTTP_SERVER_HQPS_SERVICE_H_ diff --git a/flex/engines/graph_db/server/options.cc b/flex/engines/http_server/options.cc similarity index 89% rename from flex/engines/graph_db/server/options.cc rename to flex/engines/http_server/options.cc index a1495f177aa3..f61df97f1508 100644 --- a/flex/engines/graph_db/server/options.cc +++ b/flex/engines/http_server/options.cc @@ -13,11 +13,12 @@ * limitations under the License. */ -#include "flex/engines/graph_db/server/options.h" +#include "flex/engines/http_server/options.h" namespace server { uint32_t shard_query_concurrency = 16; uint32_t shard_update_concurrency = 4; +uint32_t shard_adhoc_concurrency = 4; } // namespace server diff --git a/flex/engines/graph_db/server/options.h b/flex/engines/http_server/options.h similarity index 81% rename from flex/engines/graph_db/server/options.h rename to flex/engines/http_server/options.h index b84e8ed9e8d4..642018eedbc9 100644 --- a/flex/engines/graph_db/server/options.h +++ b/flex/engines/http_server/options.h @@ -13,8 +13,8 @@ * limitations under the License. */ -#ifndef SERVER_OPTIONS_H_ -#define SERVER_OPTIONS_H_ +#ifndef ENGINES_HTTP_SERVER_OPTIONS_H_ +#define ENGINES_HTTP_SERVER_OPTIONS_H_ #include @@ -23,10 +23,12 @@ namespace server { /// make update executors with higher priority. const uint32_t ic_query_group_id = 1; const uint32_t ic_update_group_id = 2; +const uint32_t ic_adhoc_group_id = 3; extern uint32_t shard_query_concurrency; extern uint32_t shard_update_concurrency; +extern uint32_t shard_adhoc_concurrency; } // namespace server -#endif // SERVER_OPTIONS_H_ +#endif // ENGINES_HTTP_SERVER_OPTIONS_H_ diff --git a/flex/engines/http_server/stored_procedure.cc b/flex/engines/http_server/stored_procedure.cc new file mode 100644 index 000000000000..e35c6f19a3f9 --- /dev/null +++ b/flex/engines/http_server/stored_procedure.cc @@ -0,0 +1,177 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "flex/engines/http_server/stored_procedure.h" +#include "flex/engines/graph_db/database/graph_db.h" + +namespace server { + +void put_argment(gs::Encoder& encoder, const query::Argument& argment) { + auto& value = argment.value(); + auto item_case = value.item_case(); + switch (item_case) { + case common::Value::kI32: + encoder.put_int(value.i32()); + break; + case common::Value::kI64: + encoder.put_long(value.i64()); + break; + case common::Value::kF64: + encoder.put_double(value.f64()); + break; + case common::Value::kStr: + encoder.put_string(value.str()); + break; + default: + LOG(ERROR) << "Not recognizable param type" << static_cast(item_case); + } +} + +// get the handle of the dynamic library, throw error if needed +void* open_lib(const char* lib_path) { + LOG(INFO) << "try to open library: " << lib_path; + void* handle = dlopen(lib_path, RTLD_LAZY); + auto* p_error_msg = dlerror(); + if (p_error_msg) { + LOG(FATAL) << "Fail to open library: " << lib_path + << ", error: " << p_error_msg; + } + LOG(INFO) << "Successfully open library: " << lib_path; + return handle; +} + +void* get_func_ptr(const char* lib_path, void* handle, const char* symbol) { + auto* p_func = dlsym(handle, symbol); + auto* p_error_msg = dlerror(); + if (p_error_msg) { + LOG(FATAL) << "Failed to get symbol " << symbol << " from " << lib_path + << ". Reason: " << std::string(p_error_msg); + } + return p_func; +} + +// close the handle of the dynamic library, throw error if needed +void close_lib(void* handle, const char* lib_path) { + if (handle) { + auto ret = dlclose(handle); + if (ret == 0) { + LOG(INFO) << "Sucessfuly closed library" << lib_path; + } else { + auto* p_error_msg = dlerror(); + if (p_error_msg) { + LOG(FATAL) << "Fail to close library, error: " << p_error_msg; + } + } + } else { + LOG(WARNING) << "Try to close a null handle," << lib_path; + } +} + +std::vector get_yaml_files(const std::string& plugin_dir) { + std::filesystem::path dir_path = plugin_dir; + std::string suffix = ".yaml"; + std::vector res_yaml_files; + + for (auto& entry : std::filesystem::directory_iterator(dir_path)) { + if (entry.is_regular_file() && entry.path().extension() == suffix) { + res_yaml_files.emplace_back(entry.path()); + } + } + return res_yaml_files; +} + +std::vector parse_from_multiple_yamls( + const std::vector& stored_procedure_yamls) { + std::vector stored_procedures; + for (auto cur_yaml : stored_procedure_yamls) { + LOG(INFO) << "Loading for: " << cur_yaml; + YAML::Node root = YAML::LoadFile(cur_yaml); + if (!root["name"]) { + LOG(ERROR) << "Expect name in pre_installed procedure"; + } else if (!root["library"]) { + LOG(ERROR) << "Expect path in pre_installed procedure"; + } else { + std::string name = root["name"].as(); + std::string path = root["library"].as(); + if (!std::filesystem::exists(path)) { + LOG(ERROR) << "plugin - " << path << " file not found..."; + } else { + stored_procedures.push_back({name, path}); + } + } + } + return stored_procedures; +} + +std::vector parse_stored_procedures( + const std::string& stored_procedure_yaml) { + std::vector stored_procedures; + YAML::Node root = YAML::LoadFile(stored_procedure_yaml); + if (root["pre_installed"]) { + std::vector installed_got; + if (!get_sequence(root, "pre_installed", installed_got)) { + LOG(ERROR) << "installed_got is not set properly"; + } + for (auto& procedure : installed_got) { + if (!procedure["name"]) { + LOG(ERROR) << "Expect name in pre_installed procedure"; + } else if (!procedure["path"]) { + LOG(ERROR) << "Expect path in pre_installed procedure"; + } else { + std::string name = procedure["name"].as(); + std::string path = procedure["path"].as(); + if (!std::filesystem::exists(path)) { + LOG(ERROR) << "plugin - " << path << " file not found..."; + } else { + stored_procedures.push_back({name, path}); + } + } + } + } else { + LOG(WARNING) << "Expect ntry : " << stored_procedure_yaml; + } + return stored_procedures; +} + +std::shared_ptr create_stored_procedure_impl( + int32_t procedure_id, const std::string& procedure_path, int32_t shard_id) { + auto time_stamp = std::numeric_limits::max() - 1; + gs::MutableCSRInterface graph_store(gs::GraphDB::get().GetSession(shard_id)); + + return std::make_shared< + server::CypherStoredProcedure>( + procedure_id, procedure_path, graph_store, gs::GraphStoreType::Grape); +} + +std::string load_and_run(int32_t job_id, const std::string& lib_path, + int32_t shard_id) { + auto temp_stored_procedure = + server::create_stored_procedure_impl(job_id, lib_path, shard_id); + LOG(INFO) << "Create stored procedure: " << temp_stored_procedure->ToString(); + std::vector empty; + gs::Decoder input_decoder(empty.data(), empty.size()); + auto res = temp_stored_procedure->Query(input_decoder); + LOG(INFO) << "Finish running"; + LOG(INFO) << res.DebugString(); + std::string res_str; + res.SerializeToString(&res_str); + return res_str; +} + +StoredProcedureManager& StoredProcedureManager::get() { + static StoredProcedureManager instance; + return instance; +} + +} // namespace server diff --git a/flex/engines/http_server/stored_procedure.h b/flex/engines/http_server/stored_procedure.h new file mode 100644 index 000000000000..8aa9a3ae4121 --- /dev/null +++ b/flex/engines/http_server/stored_procedure.h @@ -0,0 +1,285 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HTTP_SERVER_STORED_PROCEDURE_H_ +#define ENGINES_HTTP_SERVER_STORED_PROCEDURE_H_ + +#include +#include + +#include +#include +#include +#include +#include +#include "glog/logging.h" + +#include "proto_generated_gie/results.pb.h" +#include "proto_generated_gie/stored_procedure.pb.h" + +#include +#include +#include "flex/engines/hqps_db/app/hqps_app_base.h" +#include "flex/engines/hqps_db/database/mutable_csr_interface.h" +#include "flex/utils/app_utils.h" + +#include + +namespace server { + +std::string load_and_run(int32_t job_id, const std::string& lib_path, + int32_t shard_id); + +// get the handle of the dynamic library, throw error if needed +void* open_lib(const char* lib_path); + +void* get_func_ptr(const char* lib_path, void* handle, const char* symbol); + +// close the handle of the dynamic library, throw error if needed +void close_lib(void* handle, const char* lib_path); + +void put_argment(gs::Encoder& encoder, const query::Argument& argment); + +template +bool get_scalar(YAML::Node node, const std::string& key, T& value) { + YAML::Node cur = node[key]; + if (cur && cur.IsScalar()) { + value = cur.as(); + return true; + } + return false; +} + +template +bool get_sequence(YAML::Node node, const std::string& key, + std::vector& seq) { + YAML::Node cur = node[key]; + if (cur && cur.IsSequence()) { + int num = cur.size(); + seq.clear(); + for (int i = 0; i < num; ++i) { + seq.push_back(cur[i].as()); + } + return true; + } + return false; +} +struct StoredProcedureMeta { + std::string name; + std::string path; +}; + +std::vector parse_stored_procedures( + const std::string& stored_procedure_yaml); +std::vector parse_from_multiple_yamls( + const std::vector& stored_procedure_yamls); + +enum class StoredProcedureType { + kCypher = 0, + kSut = 1, +}; + +// return a void* ptr with no params +typedef void* CreateAppT(gs::GraphStoreType); + +// return void with void* as input +typedef void DeleteAppT(void*, gs::GraphStoreType); + +// the root interface of stored produce +class BaseStoredProcedure { + public: + BaseStoredProcedure(int32_t procedure_id, std::string procedure_path) + : procedure_id_(procedure_id), + procedure_path_(procedure_path), + dl_handle_(nullptr) { + dl_handle_ = open_lib(procedure_path.c_str()); + CHECK(dl_handle_); + } + virtual ~BaseStoredProcedure() { + LOG(INFO) << "Destructing stored procedure" << ToString(); + } + virtual StoredProcedureType GetType() const = 0; + + virtual results::CollectiveResults Query(gs::Decoder& decoder) const = 0; + + virtual void delete_app() = 0; + + virtual std::string ToString() const { + std::stringstream ss; + ss << "StoredProcedure{" + << "procedure_id: " << procedure_id_ + << "}, {procedure_path: " << procedure_path_ << "}"; + return ss.str(); + } + + int32_t GetProcedureId() const { return procedure_id_; } + std::string GetProcedureName() const { return procedure_path_; } + + protected: + int32_t procedure_id_; + std::string procedure_path_; + void* dl_handle_; +}; + +template +class CypherStoredProcedure; + +// Create StoredProcedure +// Why we extract the function here rather then put it in the class? +// To support ad-hoc query, and reuse code. + +std::shared_ptr create_stored_procedure_impl( + int32_t procedure_id, const std::string& procedure_path, int32_t shard_id); + +std::vector get_yaml_files(const std::string& plugin_dir); + +class StoredProcedureManager { + public: + static StoredProcedureManager& get(); + StoredProcedureManager() {} + + // expect multiple query.yaml under this directory. + void LoadFromPluginDir(const std::string& plugin_dir, int32_t shard_id) { + auto yaml_files = get_yaml_files(plugin_dir); + auto stored_procedures = parse_from_multiple_yamls(yaml_files); + CreateStoredProcedures(stored_procedures, shard_id); + } + + void LoadFromYaml(const std::string& stored_procedure_yaml, + int32_t shard_id) { + auto stored_procedures = parse_stored_procedures(stored_procedure_yaml); + CreateStoredProcedures(stored_procedures, shard_id); + } + + void CreateStoredProcedures( + const std::vector& stored_procedures, + int32_t shard_id) { + for (auto i = 0; i < stored_procedures.size(); ++i) { + stored_procedures_.emplace(stored_procedures[i].name, + server::create_stored_procedure_impl( + i, stored_procedures[i].path, shard_id)); + } + + LOG(INFO) << "Load [" << stored_procedures_.size() << "] stored procedures"; + } + + seastar::future Query( + const query::Query& query_pb) const { + auto query_name = query_pb.query_name().name(); + if (query_name.empty()) { + LOG(ERROR) << "Query name is empty"; + return seastar::make_exception_future( + std::runtime_error("Query name is empty")); + } + auto it = stored_procedures_.find(query_name); + if (it != stored_procedures_.end()) { + // create a decoder to decode the query + std::vector input_buffer; + gs::Encoder input_encoder(input_buffer); + auto& args = query_pb.arguments(); + for (auto i = 0; i < args.size(); ++i) { + auto& arg = args[i]; + LOG(INFO) << "Putting " << i << "th arg" << arg.DebugString(); + put_argment(input_encoder, arg); + } + LOG(INFO) << "Before running " << query_name; + gs::Decoder input_decoder(input_buffer.data(), input_buffer.size()); + auto result = it->second->Query(input_decoder); + return seastar::make_ready_future( + std::move(result)); + } else { + LOG(ERROR) << "No stored procedure with id: " << query_name; + return seastar::make_exception_future( + std::runtime_error("No stored procedure with id: " + query_name)); + } + } + + private: + std::unordered_map> + stored_procedures_; +}; + +// one stored procedure contains one dynamic lib, two function pointer +// one for create app, other for delete app; +template +class CypherStoredProcedure : public BaseStoredProcedure { + public: + static constexpr const char* CREATOR_APP_FUNC_NAME = "CreateApp"; + static constexpr const char* DELETER_APP_FUNC_NAME = "DeleteApp"; + + CypherStoredProcedure(int32_t procedure_id, std::string procedure_path, + const GRAPH_TYPE& graph, + gs::GraphStoreType graph_store_type) + : BaseStoredProcedure(procedure_id, procedure_path), + app_ptr_(nullptr), + create_app_ptr_(nullptr), + delete_app_ptr_(nullptr), + graph_(graph), + graph_store_type_(graph_store_type) { + // get the func_ptr we need for cypher query. + create_app_ptr_ = reinterpret_cast(get_func_ptr( + procedure_path_.c_str(), dl_handle_, CREATOR_APP_FUNC_NAME)); + CHECK(create_app_ptr_); + delete_app_ptr_ = reinterpret_cast(get_func_ptr( + procedure_path_.c_str(), dl_handle_, DELETER_APP_FUNC_NAME)); + CHECK(delete_app_ptr_); + LOG(INFO) << "Successfully get cypher query function pointer"; + app_ptr_ = reinterpret_cast*>( + create_app_ptr_(graph_store_type_)); + CHECK(app_ptr_); + LOG(INFO) << "Successfully create app"; + } + + virtual ~CypherStoredProcedure() { + if (app_ptr_) { + delete_app(); + } + } + + StoredProcedureType GetType() const override { + return StoredProcedureType::kCypher; + } + + results::CollectiveResults Query(gs::Decoder& decoder) const override { + CHECK(app_ptr_); + LOG(INFO) << "Start to query with cypher stored procedure"; + return app_ptr_->Query(graph_, decoder); + } + + void delete_app() override { + LOG(INFO) << "Start to delete app"; + delete_app_ptr_(static_cast(app_ptr_), graph_store_type_); + LOG(INFO) << "Successfully delete app"; + } + + std::string ToString() const override { + std::stringstream ss; + ss << "CypherStoredProcedure{" + << "procedure_id: " << procedure_id_ + << "}, {procedure_path: " << procedure_path_ << "}"; + return ss.str(); + } + + private: + const GRAPH_TYPE& graph_; + gs::GraphStoreType graph_store_type_; + gs::HqpsAppBase* app_ptr_; + + // func ptr; + CreateAppT* create_app_ptr_; + DeleteAppT* delete_app_ptr_; +}; +} // namespace server + +#endif // ENGINES_HTTP_SERVER_STORED_PROCEDURE_H_ diff --git a/flex/engines/graph_db/server/types.h b/flex/engines/http_server/types.h similarity index 93% rename from flex/engines/graph_db/server/types.h rename to flex/engines/http_server/types.h index 3b4640b2260d..8c53a5f39337 100644 --- a/flex/engines/graph_db/server/types.h +++ b/flex/engines/http_server/types.h @@ -13,8 +13,8 @@ * limitations under the License. */ -#ifndef SERVER_TYPES_ACT_H_ -#define SERVER_TYPES_ACT_H_ +#ifndef ENGINES_HTTP_SERVER_TYPES_H_ +#define ENGINES_HTTP_SERVER_TYPES_H_ #include #include @@ -55,4 +55,4 @@ using query_result = payload; } // namespace server -#endif // SERVER_TYPES_ACT_H_ +#endif // ENGINES_HTTP_SERVER_TYPES_H_ diff --git a/flex/grin b/flex/grin new file mode 160000 index 000000000000..bcafca761f7a --- /dev/null +++ b/flex/grin @@ -0,0 +1 @@ +Subproject commit bcafca761f7ac44f2fb20f599d8ce7c13daae3d9 diff --git a/flex/resources/hqps/CMakeLists.txt.template b/flex/resources/hqps/CMakeLists.txt.template new file mode 100644 index 000000000000..949f0c9f0036 --- /dev/null +++ b/flex/resources/hqps/CMakeLists.txt.template @@ -0,0 +1,44 @@ +############################################################################### +########### Generated by GraphScope Flex ######################## +############################################################################### +cmake_minimum_required(VERSION 3.5) + +include(FindPackageHandleStandardArgs) + +project(HighQpsCodeGen + VERSION 1.0 + LANGUAGES CXX) + +# the query name, +if (QUERY_NAME) + message("Query name ${QUERY_NAME}") +else() + message(FATAL_ERROR "QueryName not set") +endif() + +# the path where we can find graphscope headers +if (FLEX_INCLUDE_PREFIX) + message("flex header install dir ${FLEX_INCLUDE_PREFIX}") +else() + message(FATAL_ERROR "FLEX_INCLUDE_PREFIX not set") +endif() + +# the path where we can find graphscope libs. +if (FLEX_LIB_DIR) + message("flex lib install dir ${FLEX_LIB_DIR}") +else() + message(FATAL_ERROR "FLEX_LIB_DIR not set") +endif() +link_directories(${FLEX_LIB_DIR}) + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -Wl,-rpath,$ORIGIN -O3 -flto -Werror=unused-result -fPIC -no-pie") + + +find_package(MPI REQUIRED) +include_directories(SYSTEM ${MPI_CXX_INCLUDE_PATH}) + +add_library(${QUERY_NAME} SHARED ${PROJECT_SOURCE_DIR}/${QUERY_NAME}.cc) +target_include_directories(${QUERY_NAME} PUBLIC ${FLEX_INCLUDE_PREFIX} ${FLEX_INCLUDE_PREFIX}/flex/build/engines/hqps_db/) +target_link_libraries(${QUERY_NAME} PUBLIC hqps_plan_proto flex_utils) \ No newline at end of file diff --git a/flex/resources/pegasus/benchmark/Cargo.toml b/flex/resources/pegasus/benchmark/Cargo.toml new file mode 100644 index 000000000000..03b8eec03c0b --- /dev/null +++ b/flex/resources/pegasus/benchmark/Cargo.toml @@ -0,0 +1,5 @@ +[workspace] +members = [ + "query", + "runner", +] \ No newline at end of file diff --git a/flex/resources/pegasus/benchmark/README.md b/flex/resources/pegasus/benchmark/README.md new file mode 100644 index 000000000000..4e805dc4c498 --- /dev/null +++ b/flex/resources/pegasus/benchmark/README.md @@ -0,0 +1,18 @@ +## Build Dynamic Lib +``` +# Create directory as input +cd gie-codegen/benchmark +mkdir code +cp ${rs_path} code + +# build dynamic lib +# path of dynamic lib query/target/release/libcodegen_queries.dylib +./build_codegen.sh -c=code +``` + +## Run Codegen Query +``` +# Build codegen runner +cd runner && cargo build --release && cd .. +RUST_LOG=debug CSR_PATH=${GRAPH_PATH} PARTITION_ID=0 runner/target/release/run_ldbc-w ${worker_num} -q ${query_file} -p -l query/target/release/libcodegen_queries.dylib +``` diff --git a/flex/resources/pegasus/benchmark/query/Cargo.toml b/flex/resources/pegasus/benchmark/query/Cargo.toml new file mode 100644 index 000000000000..55462b145976 --- /dev/null +++ b/flex/resources/pegasus/benchmark/query/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "codegen_queries" +version = "0.1.0" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +pegasus = { path = "../../../../../interactive_engine/executor/engine/pegasus/pegasus" } +pegasus_common = { path = "../../../../../interactive_engine/executor/engine/pegasus/common" } +pegasus_network = { path = "../../../../../interactive_engine/executor/engine/pegasus/network" } +chrono = "0.4" +tonic = { version = "0.5", features = ["default", "compression"] } +prost = "0.8" +tokio = { version = "1.0", features = ["macros", "sync", "rt-multi-thread"] } +structopt = "0.3" +futures = "0.3.19" +lazy_static = "1.4.0" +log = "0.4" +rand = "0.8.5" +serde = { version = "1.0", features = ["derive"] } +itertools = "0.10.2" +mcsr = { path = "../../../../../interactive_engine/executor/store/mcsr" } + +[build-dependencies] +tonic-build = { version = "0.5", features = ["default", "compression"] } + +[features] +default = [] +gcip = [] + +[lib] +crate-type = ["dylib"] +name = "codegen_queries" +path = "src/lib.rs" \ No newline at end of file diff --git a/flex/resources/pegasus/benchmark/query/rustfmt.toml b/flex/resources/pegasus/benchmark/query/rustfmt.toml new file mode 100644 index 000000000000..7b5e8f756693 --- /dev/null +++ b/flex/resources/pegasus/benchmark/query/rustfmt.toml @@ -0,0 +1,9 @@ + +# Stable +edition = "2018" +unstable_features = true +chain_width = 48 +max_width = 108 +use_small_heuristics = "Max" +fn_args_layout = "Compressed" +group_imports = "StdExternalCrate" diff --git a/flex/resources/pegasus/benchmark/query/src/lib.rs b/flex/resources/pegasus/benchmark/query/src/lib.rs new file mode 100644 index 000000000000..437960bf1939 --- /dev/null +++ b/flex/resources/pegasus/benchmark/query/src/lib.rs @@ -0,0 +1,23 @@ +// +//! Copyright 2020 Alibaba Group Holding Limited. +//! +//! Licensed under the Apache License, Version 2.0 (the "License"); +//! you may not use this file except in compliance with the License. +//! You may obtain a copy of the License at +//! +//! http://www.apache.org/licenses/LICENSE-2.0 +//! +//! Unless required by applicable law or agreed to in writing, software +//! distributed under the License is distributed on an "AS IS" BASIS, +//! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//! See the License for the specific language governing permissions and +//! limitations under the License. +#[macro_use] +extern crate log; +#[macro_use] +extern crate lazy_static; + +extern crate rand; + +pub mod queries; +pub mod utils; diff --git a/flex/resources/pegasus/benchmark/query/src/queries/mod.rs b/flex/resources/pegasus/benchmark/query/src/queries/mod.rs new file mode 100644 index 000000000000..a63e89c3890b --- /dev/null +++ b/flex/resources/pegasus/benchmark/query/src/queries/mod.rs @@ -0,0 +1,14 @@ +// +//! Copyright 2020 Alibaba Group Holding Limited. +//! +//! Licensed under the Apache License, Version 2.0 (the "License"); +//! you may not use this file except in compliance with the License. +//! You may obtain a copy of the License at +//! +//! http://www.apache.org/licenses/LICENSE-2.0 +//! +//! Unless required by applicable law or agreed to in writing, software +//! distributed under the License is distributed on an "AS IS" BASIS, +//! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//! See the License for the specific language governing permissions and +//! limitations under the License. diff --git a/flex/resources/pegasus/benchmark/query/src/utils/mod.rs b/flex/resources/pegasus/benchmark/query/src/utils/mod.rs new file mode 100644 index 000000000000..e47725876221 --- /dev/null +++ b/flex/resources/pegasus/benchmark/query/src/utils/mod.rs @@ -0,0 +1,25 @@ +// +//! Copyright 2020 Alibaba Group Holding Limited. +//! +//! Licensed under the Apache License, Version 2.0 (the "License"); +//! you may not use this file except in compliance with the License. +//! You may obtain a copy of the License at +//! +//! http://www.apache.org/licenses/LICENSE-2.0 +//! +//! Unless required by applicable law or agreed to in writing, software +//! distributed under the License is distributed on an "AS IS" BASIS, +//! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//! See the License for the specific language governing permissions and +//! limitations under the License. +pub fn get_partition(id: &u64, workers: usize, num_servers: usize) -> u64 { + let id_usize = *id as usize; + let magic_num = id_usize / num_servers; + // The partitioning logics is as follows: + // 1. `R = id - magic_num * num_servers = id % num_servers` routes a given id + // to the machine R that holds its data. + // 2. `R * workers` shifts the worker's id in the machine R. + // 3. `magic_num % workers` then picks up one of the workers in the machine R + // to do the computation. + ((id_usize - magic_num * num_servers) * workers + magic_num % workers) as u64 +} \ No newline at end of file diff --git a/flex/resources/pegasus/benchmark/runner/Cargo.toml b/flex/resources/pegasus/benchmark/runner/Cargo.toml new file mode 100644 index 000000000000..c127cf685122 --- /dev/null +++ b/flex/resources/pegasus/benchmark/runner/Cargo.toml @@ -0,0 +1,40 @@ +[package] +name = "codegen-benchmark" +version = "0.1.0" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +pegasus = { path = "../../../../../interactive_engine/executor/engine/pegasus/pegasus" } +pegasus_common = { path = "../../../../../interactive_engine/executor/engine/pegasus/common" } +pegasus_network = { path = "../../../../../interactive_engine/executor/engine/pegasus/network" } +chrono = "0.4" +tonic = { version = "0.5", features = ["default", "compression"] } +prost = "0.8" +tokio = { version = "1.0", features = ["macros", "sync", "rt-multi-thread"] } +structopt = "0.3" +futures = "0.3.19" +lazy_static = "1.4.0" +libloading = "0.8" +dlopen = "0.1.8" +dlopen_derive = "0.1.4" +log = "0.4" +rand = "0.8.5" +serde = { version = "1.0", features = ["derive"] } +itertools = "0.10.2" +mcsr = { path = "../../../../../interactive_engine/executor/store/mcsr" } + +[build-dependencies] +tonic-build = { version = "0.5", features = ["default", "compression"] } + +[features] +default = [] +gcip = [] + +[lib] +name = "codegen_benchmark" +path = "src/lib.rs" + +[[bin]] +name = "run_ldbc" \ No newline at end of file diff --git a/flex/resources/pegasus/benchmark/runner/rustfmt.toml b/flex/resources/pegasus/benchmark/runner/rustfmt.toml new file mode 100644 index 000000000000..7b5e8f756693 --- /dev/null +++ b/flex/resources/pegasus/benchmark/runner/rustfmt.toml @@ -0,0 +1,9 @@ + +# Stable +edition = "2018" +unstable_features = true +chain_width = 48 +max_width = 108 +use_small_heuristics = "Max" +fn_args_layout = "Compressed" +group_imports = "StdExternalCrate" diff --git a/flex/resources/pegasus/benchmark/runner/src/bin/run_ldbc.rs b/flex/resources/pegasus/benchmark/runner/src/bin/run_ldbc.rs new file mode 100644 index 000000000000..51997940eaa0 --- /dev/null +++ b/flex/resources/pegasus/benchmark/runner/src/bin/run_ldbc.rs @@ -0,0 +1,159 @@ +// +//! Copyright 2020 Alibaba Group Holding Limited. +//! +//! Licensed under the Apache License, Version 2.0 (the "License"); +//! you may not use this file except in compliance with the License. +//! You may obtain a copy of the License at +//! +//! http://www.apache.org/licenses/LICENSE-2.0 +//! +//! Unless required by applicable law or agreed to in writing, software +//! distributed under the License is distributed on an "AS IS" BASIS, +//! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//! See the License for the specific language governing permissions and +//! limitations under the License. +extern crate dlopen; +#[macro_use] +extern crate dlopen_derive; + +use std::any::TypeId; +use std::collections::HashMap; +use std::fs::File; +use std::io::{self, BufRead}; +use std::ops::Add; +use std::path::PathBuf; +use std::time::Instant; + +use codegen_benchmark::queries; +use dlopen::wrapper::{Container, WrapperApi}; +use itertools::Itertools; +use mcsr::graph_db::GlobalCsrTrait; +use mcsr::graph_db_impl::{CsrDB, SingleSubGraph, SubGraph}; +use pegasus::api::*; +use pegasus::errors::BuildJobError; +use pegasus::result::ResultSink; +use pegasus::result::ResultStream; +use pegasus::{Configuration, JobConf, ServerConf}; +use pegasus_network::config::ServerAddr; +use serde::{Deserialize, Serialize}; +use structopt::StructOpt; + +#[derive(WrapperApi)] +struct QueryApi { + Query: fn( + conf: JobConf, + graph: &'static CsrDB, + input_params: Vec, + ) + -> Box, ResultSink) -> Result<(), BuildJobError>>, +} + +#[derive(Debug, Clone, StructOpt, Default)] +pub struct Config { + #[structopt(short = "m", long = "mode", default_value = "codegen")] + mode: String, + #[structopt(short = "w", long = "workers", default_value = "2")] + workers: u32, + #[structopt(short = "q", long = "query")] + query_path: String, + #[structopt(short = "p", long = "print")] + print_result: bool, + #[structopt(short = "s", long = "servers")] + servers: Option, + #[structopt(short = "l", long = "dylib")] + lib_path: String, +} + +fn main() { + pegasus_common::logs::init_log(); + + let config: Config = Config::from_args(); + + queries::graph::CSR.get_current_partition(); + + let mut server_conf = if let Some(ref servers) = config.servers { + let servers = std::fs::read_to_string(servers).unwrap(); + Configuration::parse(&servers).unwrap() + } else { + Configuration::singleton() + }; + + let mut servers = vec![]; + if let Some(network) = &server_conf.network { + for i in 0..network.servers_size { + servers.push(i as u64); + } + } + pegasus::startup(server_conf).ok(); + pegasus::wait_servers_ready(&ServerConf::All); + + let mut query_map = HashMap::new(); + let libs_path = config.lib_path; + let file = File::open(libs_path).unwrap(); + let lines = io::BufReader::new(file).lines(); + for line in lines { + let line = line.unwrap(); + let split = line.trim().split("|").collect::>(); + let query_name = split[0].clone().to_string(); + let lib_path = split[1].clone().to_string(); + let libc: Container = unsafe { Container::load(lib_path.clone()) } + .expect("Could not open library or load symbols"); + query_map.insert(query_name, libc); + } + + let query_start = Instant::now(); + if config.mode == "handwriting" { + let query_path = config.query_path; + let mut queries = vec![]; + let file = File::open(query_path).unwrap(); + let lines = io::BufReader::new(file).lines(); + for line in lines { + queries.push(line.unwrap()); + } + let mut index = 0i32; + for query in queries { + let split = query.trim().split("|").collect::>(); + let query_name = split[0].clone(); + let mut conf = JobConf::new(query_name.clone().to_owned() + "-" + &index.to_string()); + conf.set_workers(config.workers); + conf.reset_servers(ServerConf::Partial(servers.clone())); + match split[0] { + _ => println!("Unknown query"), + } + index += 1; + } + } else if config.mode == "codegen" { + let query_path = config.query_path; + let mut queries = vec![]; + let file = File::open(query_path).unwrap(); + let lines = io::BufReader::new(file).lines(); + for line in lines { + queries.push(line.unwrap()); + } + let mut index = 0i32; + for query in queries { + let split = query.trim().split("|").collect::>(); + let query_name = split[0].to_string(); + let mut input_params = vec![]; + for i in 1..split.len() { + input_params.push(split[i].to_string()); + } + println!("Start run query {}", query_name); + let mut conf = JobConf::new(query_name.clone().to_owned() + "-" + &index.to_string()); + conf.set_workers(config.workers); + conf.reset_servers(ServerConf::Partial(servers.clone())); + if let Some(libc) = query_map.get(&query_name) { + let result = pegasus::run(conf.clone(), || { + libc.Query(conf.clone(), &queries::graph::CSR, input_params.clone()) + }) + .expect("submit Query0 failure"); + for x in result { + println!("{:?}", x.unwrap()); + } + } + index += 1; + } + } + pegasus::shutdown_all(); + println!("Finished query, elapsed time: {:?}", query_start.elapsed()) +} diff --git a/flex/resources/pegasus/benchmark/runner/src/lib.rs b/flex/resources/pegasus/benchmark/runner/src/lib.rs new file mode 100644 index 000000000000..3058d155c7bd --- /dev/null +++ b/flex/resources/pegasus/benchmark/runner/src/lib.rs @@ -0,0 +1,24 @@ +// +//! Copyright 2020 Alibaba Group Holding Limited. +//! +//! Licensed under the Apache License, Version 2.0 (the "License"); +//! you may not use this file except in compliance with the License. +//! You may obtain a copy of the License at +//! +//! http://www.apache.org/licenses/LICENSE-2.0 +//! +//! Unless required by applicable law or agreed to in writing, software +//! distributed under the License is distributed on an "AS IS" BASIS, +//! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//! See the License for the specific language governing permissions and +//! limitations under the License. +#[macro_use] +extern crate log; +#[macro_use] +extern crate lazy_static; + +extern crate rand; + +pub mod queries; +pub use queries::graph::CSR; + diff --git a/flex/resources/pegasus/benchmark/runner/src/queries/graph.rs b/flex/resources/pegasus/benchmark/runner/src/queries/graph.rs new file mode 100644 index 000000000000..8005481ca21a --- /dev/null +++ b/flex/resources/pegasus/benchmark/runner/src/queries/graph.rs @@ -0,0 +1,56 @@ +// +//! Copyright 2020 Alibaba Group Holding Limited. +//! +//! Licensed under the Apache License, Version 2.0 (the "License"); +//! you may not use this file except in compliance with the License. +//! You may obtain a copy of the License at +//! +//! http://www.apache.org/licenses/LICENSE-2.0 +//! +//! Unless required by applicable law or agreed to in writing, software +//! distributed under the License is distributed on an "AS IS" BASIS, +//! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//! See the License for the specific language governing permissions and +//! limitations under the License. +extern crate chrono; + +use std::path::Path; +use std::sync::Arc; + +use chrono::offset::{TimeZone, Utc}; +use chrono::DateTime; +use mcsr::graph_db_impl::{CsrDB, SingleSubGraph, SubGraph}; +use pegasus::configure_with_default; + +lazy_static! { + pub static ref CSR: CsrDB = _init_csr(); + pub static ref CSR_PATH: String = configure_with_default!(String, "CSR_PATH", "".to_string()); + pub static ref PARTITION_ID: usize = configure_with_default!(usize, "PARTITION_ID", 0); + pub static ref subgraph_2_3_2_in: SubGraph<'static, usize, usize> = + CSR.get_sub_graph(2, 3, 2, mcsr::graph::Direction::Incoming); + pub static ref subgraph_2_3_3_in: SubGraph<'static, usize, usize> = + CSR.get_sub_graph(3, 3, 2, mcsr::graph::Direction::Incoming); + pub static ref subgraph_7_14_6_in: SubGraph<'static, usize, usize> = + CSR.get_sub_graph(6, 14, 7, mcsr::graph::Direction::Incoming); + pub static ref subgraph_2_1_7_in: SubGraph<'static, usize, usize> = + CSR.get_sub_graph(7, 1, 2, mcsr::graph::Direction::Incoming); + pub static ref subgraph_3_1_7_in: SubGraph<'static, usize, usize> = + CSR.get_sub_graph(7, 1, 3, mcsr::graph::Direction::Incoming); +} + +fn _init_csr() -> CsrDB { + println!("Start load graph"); + CsrDB::deserialize(&*(CSR_PATH), *PARTITION_ID).unwrap() +} + +pub fn get_partition(id: &u64, workers: usize, num_servers: usize) -> u64 { + let id_usize = *id as usize; + let magic_num = id_usize / num_servers; + // The partitioning logics is as follows: + // 1. `R = id - magic_num * num_servers = id % num_servers` routes a given id + // to the machine R that holds its data. + // 2. `R * workers` shifts the worker's id in the machine R. + // 3. `magic_num % workers` then picks up one of the workers in the machine R + // to do the computation. + ((id_usize - magic_num * num_servers) * workers + magic_num % workers) as u64 +} diff --git a/flex/resources/pegasus/benchmark/runner/src/queries/mod.rs b/flex/resources/pegasus/benchmark/runner/src/queries/mod.rs new file mode 100644 index 000000000000..fa8ab0d9bc62 --- /dev/null +++ b/flex/resources/pegasus/benchmark/runner/src/queries/mod.rs @@ -0,0 +1,16 @@ +// +//! Copyright 2020 Alibaba Group Holding Limited. +//! +//! Licensed under the Apache License, Version 2.0 (the "License"); +//! you may not use this file except in compliance with the License. +//! You may obtain a copy of the License at +//! +//! http://www.apache.org/licenses/LICENSE-2.0 +//! +//! Unless required by applicable law or agreed to in writing, software +//! distributed under the License is distributed on an "AS IS" BASIS, +//! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//! See the License for the specific language governing permissions and +//! limitations under the License. +pub mod graph; +use graph::CSR; diff --git a/flex/resources/queries/ic/adhoc/ic11_adhoc.cypher b/flex/resources/queries/ic/adhoc/ic11_adhoc.cypher new file mode 100644 index 000000000000..90676b528c24 --- /dev/null +++ b/flex/resources/queries/ic/adhoc/ic11_adhoc.cypher @@ -0,0 +1,5 @@ +MATCH (p:PERSON {id: 6597069812321})-[:KNOWS*1..3]-(friend:PERSON)-[wa:WORKAT]->(com:ORGANISATION)-[:ISLOCATEDIN]->(:PLACE {name: "Papua_New_Guinea"}) +WHERE p <> friend and wa.workFrom < 2011 +with distinct friend as friend, com AS com, wa.workFrom as organizationWorkFromYear +ORDER BY organizationWorkFromYear ASC, friend.id ASC, com.name DESC LIMIT 10 +return friend.id AS personId, friend.firstName AS personFirstName, friend.lastName AS personLastName, com.name as organizationName, organizationWorkFromYear as organizationWorkFromYear \ No newline at end of file diff --git a/flex/resources/queries/ic/adhoc/ic12_adhoc.cypher b/flex/resources/queries/ic/adhoc/ic12_adhoc.cypher new file mode 100644 index 000000000000..ce60f0bd97fe --- /dev/null +++ b/flex/resources/queries/ic/adhoc/ic12_adhoc.cypher @@ -0,0 +1,4 @@ +MATCH (unused:PERSON {id: 8796093037034})-[:KNOWS]-(friend:PERSON)<-[:HASCREATOR]-(comments:COMMENT)- +[:REPLYOF]->(:POST)-[:HASTAG]->(tags:TAG)-[:HASTYPE]->(:TAGCLASS)-[:ISSUBCLASSOF*0..10]->(:TAGCLASS {name: "MartialArtist"}) + with friend AS friend, collect(DISTINCT tags.name) AS tagNames, count(DISTINCT comments) AS replyCount + ORDER BY replyCount DESC, friend.id ASC LIMIT 20 return friend.id AS personId, friend.firstName AS personFirstName, friend.lastName AS personLastName, tagNames, replyCount \ No newline at end of file diff --git a/flex/resources/queries/ic/adhoc/ic2_adhoc.cypher b/flex/resources/queries/ic/adhoc/ic2_adhoc.cypher new file mode 100644 index 000000000000..db846b9c3d92 --- /dev/null +++ b/flex/resources/queries/ic/adhoc/ic2_adhoc.cypher @@ -0,0 +1,4 @@ +MATCH (p :PERSON {id: 19791209300143})-[:KNOWS]-(friend:PERSON)<-[:HASCREATOR]-(message : POST | COMMENT) +WHERE message.creationDate < 1354060800000 WITH friend, message ORDER BY message.creationDate DESC, message.id ASC LIMIT 20 +return friend.id AS personId, friend.firstName AS personFirstName, friend.lastName AS personLastName, message.id AS postOrCommentId, +message.content AS content,message.imageFile AS imageFile,message.creationDate AS postOrCommentCreationDate; \ No newline at end of file diff --git a/flex/resources/queries/ic/adhoc/ic3_adhoc.cypher b/flex/resources/queries/ic/adhoc/ic3_adhoc.cypher new file mode 100644 index 000000000000..8d9314be51fe --- /dev/null +++ b/flex/resources/queries/ic/adhoc/ic3_adhoc.cypher @@ -0,0 +1,9 @@ +MATCH (countryX:PLACE {name: "Papua_New_Guinea"})<-[:ISLOCATEDIN]-(messageX : POST | COMMENT)-[:HASCREATOR]->(otherP:PERSON), +(countryY:PLACE {name: "Switzerland"})<-[:ISLOCATEDIN]-(messageY: POST | COMMENT)-[:HASCREATOR]->(otherP:PERSON), +(otherP:PERSON)-[:ISLOCATEDIN]->(city:PLACE)-[:ISPARTOF]->(countryCity:PLACE), +(p:PERSON {id:27493})-[:KNOWS*1..3]-(otherP:PERSON) +WHERE messageX.creationDate >= 1298937600000 and messageX.creationDate < 1301702400000 AND messageY.creationDate >= 1298937600000 +and messageY.creationDate < 1301702400000 AND countryCity.name <> "Papua_New_Guinea" and countryCity.name <> "Switzerland" +WITH otherP, count(messageX) as xCount, count(messageY) as yCount RETURN otherP.id as id,otherP.firstName as firstName, otherP.lastName as lastName, +xCount, yCount, xCount + yCount as total +ORDER BY total DESC, id ASC LIMIT 20; \ No newline at end of file diff --git a/flex/resources/queries/ic/adhoc/ic5_adhoc.cypher b/flex/resources/queries/ic/adhoc/ic5_adhoc.cypher new file mode 100644 index 000000000000..0b4572c98051 --- /dev/null +++ b/flex/resources/queries/ic/adhoc/ic5_adhoc.cypher @@ -0,0 +1,3 @@ +MATCH (p:PERSON {id: 40741})-[k:KNOWS*1..3]-(other:PERSON)<-[hasMem:HASMEMBER]-(f:FORUM), +(f:FORUM)-[:CONTAINEROF]->(po:POST)-[:HASCREATOR]->(other:PERSON) WHERE hasMem.joinDate > 1346889600000 +WITH f as f, count(distinct po) AS postCount ORDER BY postCount DESC, f.id ASC LIMIT 20 RETURN f.title as title, postCount \ No newline at end of file diff --git a/flex/resources/queries/ic/adhoc/ic6_adhoc.cypher b/flex/resources/queries/ic/adhoc/ic6_adhoc.cypher new file mode 100644 index 000000000000..a5598d9e7ea9 --- /dev/null +++ b/flex/resources/queries/ic/adhoc/ic6_adhoc.cypher @@ -0,0 +1,3 @@ +MATCH (p_:PERSON {id: 6597069812321})-[:KNOWS*1..3]-(other:PERSON)<-[:HASCREATOR]-(p:POST)-[:HASTAG]->(t:TAG {name: "William_Wordsworth"}), +(p:POST)-[:HASTAG]->(otherTag:TAG) WHERE otherTag <> t RETURN otherTag.name as name, count(distinct p) as postCnt +ORDER BY postCnt desc, name asc LIMIT 10 \ No newline at end of file diff --git a/flex/resources/queries/ic/adhoc/ic8_adhoc.cypher b/flex/resources/queries/ic/adhoc/ic8_adhoc.cypher new file mode 100644 index 000000000000..006e772fa1c7 --- /dev/null +++ b/flex/resources/queries/ic/adhoc/ic8_adhoc.cypher @@ -0,0 +1 @@ +MATCH(p:PERSON {id: 15393162801011}) <-[:HASCREATOR] -(msg : POST | COMMENT) <- [:REPLYOF] - (cmt: COMMENT) - [:HASCREATOR] -> (author : PERSON) with p, msg, cmt, author ORDER BY cmt.creationDate DESC, cmt.id ASC limit 20 return author.id, author.firstName, author.lastName, cmt.creationDate, cmt.id, cmt.content \ No newline at end of file diff --git a/flex/resources/queries/ic/adhoc/ic9_adhoc.cypher b/flex/resources/queries/ic/adhoc/ic9_adhoc.cypher new file mode 100644 index 000000000000..7bd39a894355 --- /dev/null +++ b/flex/resources/queries/ic/adhoc/ic9_adhoc.cypher @@ -0,0 +1,4 @@ +MATCH (p:PERSON {id: 2199023323088})-[:KNOWS*1..3]-(friend:PERSON)<-[:HASCREATOR]-(message : POST | COMMENT) +WHERE friend <> p and message.creationDate < 1333670400000 with friend,message ORDER BY message.creationDate DESC, message.id ASC LIMIT 20 +RETURN friend.id AS personId, friend.firstName AS personFirstName, friend.lastName AS personLastName, message.id AS commentOrPostId, +message.content AS messageContent, message.imageFile AS messageImageFile, message.creationDate AS commentOrPostCreationDate \ No newline at end of file diff --git a/flex/resources/queries/ic/stored_procedure/ic11_cypher.cypher b/flex/resources/queries/ic/stored_procedure/ic11_cypher.cypher new file mode 100644 index 000000000000..f83dbb6e0cab --- /dev/null +++ b/flex/resources/queries/ic/stored_procedure/ic11_cypher.cypher @@ -0,0 +1,4 @@ +MATCH (p:PERSON {id: $personId})-[:KNOWS*1..3]-(friend:PERSON)-[wa:workAt]->(com:organisation)-[:ISLOCATEDIN]->(:PLACE {name: $countryName}) +WHERE p <> friend and wa.workFrom < $workFromYear with distinct friend as friend, com AS com, wa.workFrom as organizationWorkFromYear +ORDER BY organizationWorkFromYear ASC, friend.id ASC, com.name DESC LIMIT 10 return friend.id AS personId, friend.firstName AS personFirstName, +friend.lastName AS personLastName, com.name as organizationName, organizationWorkFromYear as organizationWorkFromYear \ No newline at end of file diff --git a/flex/resources/queries/ic/stored_procedure/ic12_cypher.cypher b/flex/resources/queries/ic/stored_procedure/ic12_cypher.cypher new file mode 100644 index 000000000000..b2bf0425b073 --- /dev/null +++ b/flex/resources/queries/ic/stored_procedure/ic12_cypher.cypher @@ -0,0 +1,3 @@ +MATCH (unused:PERSON {id: $personId})-[:KNOWS]-(friend:PERSON)<-[:HASCREATOR]-(comments:COMMENT)-[:REPLYOF]->(:POST)-[:HASTAG]->(tags:TAG)-[:hasType]->(:TAGClass)-[:isSubclassOf*0..10]->(:TAGClass {name: $tagClassName}) + with friend AS friend, collect(DISTINCT tags.name) AS tagNames, count(DISTINCT comments) AS replyCount + ORDER BY replyCount DESC, friend.id ASC LIMIT 20 return friend.id AS personId, friend.firstName AS personFirstName, friend.lastName AS personLastName, tagNames, replyCount \ No newline at end of file diff --git a/flex/resources/queries/ic/stored_procedure/ic2_cypher.cypher b/flex/resources/queries/ic/stored_procedure/ic2_cypher.cypher new file mode 100644 index 000000000000..0d732175fe5a --- /dev/null +++ b/flex/resources/queries/ic/stored_procedure/ic2_cypher.cypher @@ -0,0 +1,4 @@ +MATCH (p :PERSON {id: $personId })-[:KNOWS]-(friend:PERSON)<-[:HASCREATOR]-(message : POST | COMMENT) +WHERE message.creationDate < $maxDate WITH friend, message ORDER BY message.creationDate DESC, message.id ASC +LIMIT 20 return friend.id AS personId, friend.firstName AS personFirstName, friend.lastName AS personLastName, message.id AS postOrCommentId, +message.content AS content,message.imageFile AS imageFile,message.creationDate AS postOrCommentCreationDate \ No newline at end of file diff --git a/flex/resources/queries/ic/stored_procedure/ic3_cypher.cypher b/flex/resources/queries/ic/stored_procedure/ic3_cypher.cypher new file mode 100644 index 000000000000..8dccc143a2b5 --- /dev/null +++ b/flex/resources/queries/ic/stored_procedure/ic3_cypher.cypher @@ -0,0 +1 @@ +MATCH (countryX:PLACE {name: $countryXName})<-[:ISLOCATEDIN]-(messageX : POST | COMMENT)-[:HASCREATOR]->(otherP:PERSON),(countryY:PLACE {name: $countryYName})<-[:ISLOCATEDIN]-(messageY: POST | COMMENT)-[:HASCREATOR]->(otherP:PERSON),(otherP:PERSON)-[:ISLOCATEDIN]->(city:PLACE)-[::ISPARTOF]->(countryCity:PLACE),(p:PERSON {id:$personId})-[:KNOWS*1..3]-(otherP:PERSON) WHERE messageX.creationDate >= $startDate and messageX.creationDate < $endDate AND messageY.creationDate >= $startDate and messageY.creationDate < $endDate AND countryCity.name <> $countryXName and countryCity.name <> $countryYName WITH otherP, count(messageX) as xCount, count(messageY) as yCount RETURN otherP.id as id,otherP.firstName as firstName, otherP.lastName as lastName, xCount, yCount, xCount + yCount as total ORDER BY total DESC, id ASC LIMIT 20 diff --git a/flex/resources/queries/ic/stored_procedure/ic5_cypher.cypher b/flex/resources/queries/ic/stored_procedure/ic5_cypher.cypher new file mode 100644 index 000000000000..f30503ba9f4f --- /dev/null +++ b/flex/resources/queries/ic/stored_procedure/ic5_cypher.cypher @@ -0,0 +1 @@ +MATCH (p:PERSON {id: $personId})-[k:KNOWS*1..3]-(other:PERSON)<-[hasMem:HASMEMBER]-(f:FORUM), (f:FORUM)-[:CONTAINEROF]->(po:POST)-[:HASCREATOR]->(other:PERSON) WHERE hasMem.joinDate > $minDate WITH f as f, count(distinct po) AS postCount ORDER BY postCount DESC, f.id ASC LIMIT 20 RETURN f.title as title, postCount \ No newline at end of file diff --git a/flex/resources/queries/ic/stored_procedure/ic6_cypher.cypher b/flex/resources/queries/ic/stored_procedure/ic6_cypher.cypher new file mode 100644 index 000000000000..2d91e813afa9 --- /dev/null +++ b/flex/resources/queries/ic/stored_procedure/ic6_cypher.cypher @@ -0,0 +1,2 @@ +MATCH (p_:PERSON {id:$personId})-[:KNOWS*1..3]-(other:PERSON)<-[:HASCREATOR]-(p:POST)-[:HASTAG]->(t:TAG {name:$tagName}),(p:POST)-[:HASTAG]->(otherTag:TAG) +WHERE otherTag <> t RETURN otherTag.name as name, count(distinct p) as postCnt ORDER BY postCnt desc, name asc LIMIT 10 \ No newline at end of file diff --git a/flex/resources/queries/ic/stored_procedure/ic8_cypher.cypher b/flex/resources/queries/ic/stored_procedure/ic8_cypher.cypher new file mode 100644 index 000000000000..1be53b901ec9 --- /dev/null +++ b/flex/resources/queries/ic/stored_procedure/ic8_cypher.cypher @@ -0,0 +1,2 @@ +MATCH(p:PERSON {id: $personId}) <-[:HASCREATOR] -(msg : POST | COMMENT) <- [:REPLYOF] - (cmt: COMMENT) - [:HASCREATOR] -> (author : PERSON) +with p, msg, cmt, author ORDER BY cmt.creationDate DESC, cmt.id ASC limit 20 return author.id, author.firstName, author.lastName, cmt.creationDate, cmt.id, cmt.content \ No newline at end of file diff --git a/flex/resources/queries/ic/stored_procedure/ic9_cypher.cypher b/flex/resources/queries/ic/stored_procedure/ic9_cypher.cypher new file mode 100644 index 000000000000..099127ad30c0 --- /dev/null +++ b/flex/resources/queries/ic/stored_procedure/ic9_cypher.cypher @@ -0,0 +1,2 @@ +MATCH (p:PERSON {id: $personId})-[:KNOWS*1..3]-(friend:PERSON)<-[:HASCREATOR]-(message : POST | COMMENT) +WHERE friend <> p and message.creationDate < $maxDate with friend,message ORDER BY message.creationDate DESC, message.id ASC LIMIT 20 RETURN friend.id AS personId, friend.firstName AS personFirstName, friend.lastName AS personLastName, message.id AS commentOrPostId, message.content AS messageContent, message.imageFile AS messageImageFile, message.creationDate AS commentOrPostCreationDate \ No newline at end of file diff --git a/flex/resources/queries/is/is1.cypher b/flex/resources/queries/is/is1.cypher new file mode 100644 index 000000000000..4d028567ac27 --- /dev/null +++ b/flex/resources/queries/is/is1.cypher @@ -0,0 +1 @@ +MATCH (p: PERSON { id: $personId}) - [:ISLOCATEDIN] ->(c : place) return p.firstName AS friendFirstName, p.lastName as friendLastName, p.birthday as personBirthday, p.locationIP as personLocationIP, p.browserUsed as personBrowserUsed, c.id AS cityID, p.gender as friendGender, p.creationDate AS personCreationDate \ No newline at end of file diff --git a/flex/resources/queries/is/is2.cypher b/flex/resources/queries/is/is2.cypher new file mode 100644 index 000000000000..22f2bc2a41ce --- /dev/null +++ b/flex/resources/queries/is/is2.cypher @@ -0,0 +1 @@ +MATCH (p: PERSON { id : $personId}) <- [:HASCREATOR] - (msg : POST | COMMENT) - [:REPLYOF*0..3] -> (po : POST) -[:HASCREATOR] -> (friend : PERSON) with msg AS msg, po AS po, friend AS friend ORDER By msg.creationDate DESC, msg.id DESC return msg.id as messageId, msg.content as messageContent, msg.imageFile as messageImageFile, msg.creationDate as messageCreationDate, po.id AS postId, friend.id AS personId, friend.firstName AS personFirstName, friend.lastName AS personLastName \ No newline at end of file diff --git a/flex/resources/queries/is/is3.cypher b/flex/resources/queries/is/is3.cypher new file mode 100644 index 000000000000..06e4dcaf9f30 --- /dev/null +++ b/flex/resources/queries/is/is3.cypher @@ -0,0 +1 @@ +MATCH (n:PERSON {id: $personId })-[r:KNOWS]-(friend:PERSON) RETURN friend.id AS personId, friend.firstName AS firstName, friend.lastName AS lastName, r.creationDate AS friendshipCreationDate ORDER BY friendshipCreationDate DESC, personId ASC \ No newline at end of file diff --git a/flex/resources/queries/is/is4.cypher b/flex/resources/queries/is/is4.cypher new file mode 100644 index 000000000000..43a30afd7cd6 --- /dev/null +++ b/flex/resources/queries/is/is4.cypher @@ -0,0 +1 @@ +MATCH (m: POST | COMMENT {id: $messageId }) RETURN m.creationDate as messageCreationDate, m.content AS messageContent, m.imageFile as messageImageFile \ No newline at end of file diff --git a/flex/resources/queries/is/is5.cypher b/flex/resources/queries/is/is5.cypher new file mode 100644 index 000000000000..fda3c79fe173 --- /dev/null +++ b/flex/resources/queries/is/is5.cypher @@ -0,0 +1 @@ +MATCH(p : POST | COMMENT {id : $messageId}) - [:HASCREATOR] -> (friend : PERSON) return friend.id as peronId, friend.firstName as personFirstName, friend.lastName As personLastName \ No newline at end of file diff --git a/flex/resources/queries/is/is6.cypher b/flex/resources/queries/is/is6.cypher new file mode 100644 index 000000000000..f4e49c0e3290 --- /dev/null +++ b/flex/resources/queries/is/is6.cypher @@ -0,0 +1 @@ +MATCH( msg : POST | COMMENT {id: $messageId })- [:REPLYOF*0..3] -> (po : POST) <- [:CONTAINEROF] - (f : forum) - [:hasModerator] -> (mod : PERSON) return f.id as forumId, f.title as forumTitle, mod.id as moderatorId, mod.firstName as moderatorFirstName, mod.lastName as moderatorLastName \ No newline at end of file diff --git a/flex/resources/queries/is/is7.cypher b/flex/resources/queries/is/is7.cypher new file mode 100644 index 000000000000..bfc2798b1ca6 --- /dev/null +++ b/flex/resources/queries/is/is7.cypher @@ -0,0 +1 @@ +MATCH( msg : POST | COMMENT { id: $messageId}) <- [:REPLYOF] - (com : COMMENT) - [:HASCREATOR] -> (replyAuthor : PERSON), (msg) - [:HASCREATOR] -> (otherP: PERSON) - [:KNOWS] - (replyAuthor) return replyAuthor \ No newline at end of file diff --git a/flex/scripts/install_dependencies.sh b/flex/scripts/install_dependencies.sh index 43268ccc1110..1d72c6f9e860 100644 --- a/flex/scripts/install_dependencies.sh +++ b/flex/scripts/install_dependencies.sh @@ -10,7 +10,7 @@ apt install -y xfslibs-dev libgnutls28-dev liblz4-dev maven openssl pkg-config \ git clone https://github.com/alibaba/libgrape-lite.git cd libgrape-lite -git checkout 976544ef7a9777ed93088459638ff87154e2109d +git checkout v0.3.2 mkdir build && cd build && cmake .. make -j && make install cp /usr/local/lib/libgrape-lite.so /usr/lib/libgrape-lite.so diff --git a/flex/storages/rt_mutable_graph/modern_graph/person_created_software.csv b/flex/storages/rt_mutable_graph/modern_graph/person_created_software.csv index 2b1d262322b5..248331e15956 100644 --- a/flex/storages/rt_mutable_graph/modern_graph/person_created_software.csv +++ b/flex/storages/rt_mutable_graph/modern_graph/person_created_software.csv @@ -2,4 +2,4 @@ person.id|software.id|weight 1|3|0.4 4|3|0.4 6|3|0.2 -4|5|1.0 \ No newline at end of file +2|5|1.0 \ No newline at end of file diff --git a/flex/storages/rt_mutable_graph/mutable_csr.cc b/flex/storages/rt_mutable_graph/mutable_csr.cc index c499453eb5db..8b84f9fd4461 100644 --- a/flex/storages/rt_mutable_graph/mutable_csr.cc +++ b/flex/storages/rt_mutable_graph/mutable_csr.cc @@ -170,4 +170,7 @@ template class MutableCsr; template class SingleMutableCsr; template class MutableCsr; +template class SingleMutableCsr; +template class MutableCsr; + } // namespace gs diff --git a/flex/storages/rt_mutable_graph/mutable_csr.h b/flex/storages/rt_mutable_graph/mutable_csr.h index 3b172824742b..5fa6b245103a 100644 --- a/flex/storages/rt_mutable_graph/mutable_csr.h +++ b/flex/storages/rt_mutable_graph/mutable_csr.h @@ -34,7 +34,9 @@ template struct MutableNbr { MutableNbr() = default; MutableNbr(const MutableNbr& rhs) - : neighbor(rhs.neighbor), timestamp(rhs.timestamp.load()), data(data) {} + : neighbor(rhs.neighbor), + timestamp(rhs.timestamp.load()), + data(rhs.data) {} ~MutableNbr() = default; vid_t neighbor; @@ -317,6 +319,8 @@ class MutableCsrBase { virtual std::shared_ptr edge_iter( vid_t v) const = 0; + virtual MutableCsrConstEdgeIterBase* edge_iter_raw(vid_t v) const = 0; + virtual std::shared_ptr edge_iter_mut(vid_t v) = 0; }; @@ -472,6 +476,9 @@ class MutableCsr : public TypedMutableCsrBase { get_edges(v)); } + MutableCsrConstEdgeIterBase* edge_iter_raw(vid_t v) const override { + return new TypedMutableCsrConstEdgeIter(get_edges(v)); + } std::shared_ptr edge_iter_mut(vid_t v) override { return std::make_shared>(get_edges_mut(v)); } @@ -576,6 +583,10 @@ class MutableCsr : public TypedMutableCsrBase { get_edges(v)); } + MutableCsrConstEdgeIterBase* edge_iter_raw(vid_t v) const override { + return new TypedMutableCsrConstEdgeIter(get_edges(v)); + } + std::shared_ptr edge_iter_mut(vid_t v) override { return std::make_shared>( get_edges_mut(v)); @@ -680,6 +691,10 @@ class SingleMutableCsr : public TypedMutableCsrBase { get_edges(v)); } + MutableCsrConstEdgeIterBase* edge_iter_raw(vid_t v) const override { + return new TypedMutableCsrConstEdgeIter(get_edges(v)); + } + std::shared_ptr edge_iter_mut(vid_t v) override { return std::make_shared>(get_edges_mut(v)); } diff --git a/flex/storages/rt_mutable_graph/mutable_property_fragment.cc b/flex/storages/rt_mutable_graph/mutable_property_fragment.cc index 5b4a1350365e..ca716c2802d5 100644 --- a/flex/storages/rt_mutable_graph/mutable_property_fragment.cc +++ b/flex/storages/rt_mutable_graph/mutable_property_fragment.cc @@ -46,6 +46,7 @@ void MutablePropertyFragment::initVertices( auto& table = vertex_data_[v_label_i]; auto& property_types = schema_.get_vertex_properties(v_label_name); size_t col_num = property_types.size(); + std::vector col_names; for (size_t col_i = 0; col_i < col_num; ++col_i) { col_names.push_back("col_" + std::to_string(col_i)); @@ -96,7 +97,10 @@ class EmptyCsr : public TypedMutableCsrBase { return std::make_shared>( MutableNbrSlice::empty()); } - + MutableCsrConstEdgeIterBase* edge_iter_raw(vid_t v) const override { + return new TypedMutableCsrConstEdgeIter( + MutableNbrSlice::empty()); + } std::shared_ptr edge_iter_mut(vid_t v) override { return std::make_shared>( MutableNbrSliceMut::empty()); @@ -272,6 +276,17 @@ void MutablePropertyFragment::initEdges( } else { LOG(FATAL) << "Unsupported edge property type."; } + } else if (property_types[0] == PropertyType::kDouble) { + if (filenames.empty()) { + std::tie(ie_[index], oe_[index]) = + construct_empty_csr(ie_strtagy, oe_strtagy); + } else { + std::tie(ie_[index], oe_[index]) = construct_csr( + filenames, property_types, ie_strtagy, oe_strtagy, + lf_indexers_[src_label_i], lf_indexers_[dst_label_i]); + + // LOG(FATAL) << "Unsupported edge property type."; + } } else { LOG(FATAL) << "Unsupported edge property type."; } @@ -575,6 +590,20 @@ MutablePropertyFragment::get_incoming_edges(label_t label, vid_t u, return ie_[index]->edge_iter(u); } +MutableCsrConstEdgeIterBase* MutablePropertyFragment::get_outgoing_edges_raw( + label_t label, vid_t u, label_t neighbor_label, label_t edge_label) const { + size_t index = label * vertex_label_num_ * edge_label_num_ + + neighbor_label * edge_label_num_ + edge_label; + return oe_[index]->edge_iter_raw(u); +} + +MutableCsrConstEdgeIterBase* MutablePropertyFragment::get_incoming_edges_raw( + label_t label, vid_t u, label_t neighbor_label, label_t edge_label) const { + size_t index = neighbor_label * vertex_label_num_ * edge_label_num_ + + label * edge_label_num_ + edge_label; + return ie_[index]->edge_iter_raw(u); +} + std::shared_ptr MutablePropertyFragment::get_outgoing_edges_mut(label_t label, vid_t u, label_t neighbor_label, diff --git a/flex/storages/rt_mutable_graph/mutable_property_fragment.h b/flex/storages/rt_mutable_graph/mutable_property_fragment.h index b275c301d6fb..3b3454b97c97 100644 --- a/flex/storages/rt_mutable_graph/mutable_property_fragment.h +++ b/flex/storages/rt_mutable_graph/mutable_property_fragment.h @@ -65,7 +65,6 @@ class MutablePropertyFragment { oid_t get_oid(label_t label, vid_t lid) const; vid_t add_vertex(label_t label, oid_t id); - std::shared_ptr get_outgoing_edges( label_t label, vid_t u, label_t neighbor_label, label_t edge_label) const; @@ -78,6 +77,14 @@ class MutablePropertyFragment { std::shared_ptr get_incoming_edges_mut( label_t label, vid_t u, label_t neighbor_label, label_t edge_label); + MutableCsrConstEdgeIterBase* get_outgoing_edges_raw(label_t label, vid_t u, + label_t neighbor_label, + label_t edge_label) const; + + MutableCsrConstEdgeIterBase* get_incoming_edges_raw(label_t label, vid_t u, + label_t neighbor_label, + label_t edge_label) const; + MutableCsrBase* get_oe_csr(label_t label, label_t neighbor_label, label_t edge_label); diff --git a/flex/storages/rt_mutable_graph/schema.cc b/flex/storages/rt_mutable_graph/schema.cc index be8b4f0473dd..c17c78f5d50d 100644 --- a/flex/storages/rt_mutable_graph/schema.cc +++ b/flex/storages/rt_mutable_graph/schema.cc @@ -359,6 +359,8 @@ static PropertyType StringToPropertyType(const std::string& str) { return PropertyType::kEmpty; } else if (str == "int64") { return PropertyType::kInt64; + } else if (str == "double") { + return PropertyType::kDouble; } else { return PropertyType::kEmpty; } @@ -543,6 +545,29 @@ static bool parse_edges_schema(YAML::Node node, Schema& schema) { return true; } +static bool access_file(std::string& file_path) { + if (file_path.size() == 0) { + return false; + } + if (file_path[0] == '/') { + std::filesystem::path path(file_path); + return std::filesystem::exists(path); + } + char* flex_data_dir = std::getenv("FLEX_DATA_DIR"); + if (flex_data_dir != NULL) { + auto temp = std::string(flex_data_dir) + "/" + file_path; + std::filesystem::path path(temp); + if (std::filesystem::exists(path)) { + file_path = temp; + return true; + } + } + file_path = + std::filesystem::current_path().generic_string() + "/" + file_path; + std::filesystem::path path(file_path); + return std::filesystem::exists(path); +} + static bool parse_vertex_files( YAML::Node node, std::vector>& files) { std::string label_name; @@ -569,10 +594,10 @@ static bool parse_vertex_files( if (!get_scalar(files_node[i], "path", file_path)) { return false; } - std::filesystem::path path(file_path); - if (!std::filesystem::exists(path)) { + if (!access_file(file_path)) { LOG(ERROR) << "vertex file - " << file_path << " file not found..."; } + std::filesystem::path path(file_path); files.emplace_back(label_name, std::filesystem::canonical(path)); } return true; @@ -630,10 +655,10 @@ static bool parse_edge_files( if (!get_scalar(files_node[i], "path", file_path)) { return false; } - std::filesystem::path path(file_path); - if (!std::filesystem::exists(path)) { + if (!access_file(file_path)) { LOG(ERROR) << "edge file - " << file_path << " file not found..."; } + std::filesystem::path path(file_path); files.emplace_back(src_label, dst_label, edge_label, std::filesystem::canonical(path)); } diff --git a/flex/tests/CMakeLists.txt b/flex/tests/CMakeLists.txt new file mode 100644 index 000000000000..93eea8dcdd09 --- /dev/null +++ b/flex/tests/CMakeLists.txt @@ -0,0 +1,2 @@ + +add_subdirectory(hqps) \ No newline at end of file diff --git a/flex/tests/hqps/CMakeLists.txt b/flex/tests/hqps/CMakeLists.txt new file mode 100644 index 000000000000..31e072f2bb65 --- /dev/null +++ b/flex/tests/hqps/CMakeLists.txt @@ -0,0 +1,12 @@ + +# file(GLOB_RECURSE GS_TEST_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.cc") +file(GLOB GS_TEST_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/*.cc") + + +foreach(f ${GS_TEST_FILES}) + string(REGEX MATCH "^(.*)\\.[^.]*$" dummy ${f}) + set(T_NAME ${CMAKE_MATCH_1}) + message(STATUS "Found graphscope test - " ${T_NAME}) + add_executable(${T_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/${T_NAME}.cc ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/app_utils.cc) + target_link_libraries(${T_NAME} hqps_plan_proto flex_rt_mutable_graph flex_graph_db ${GLOG_LIBRARIES} ${LIBGRAPELITE_LIBRARIES}) +endforeach() diff --git a/flex/tests/hqps/context_test.cc b/flex/tests/hqps/context_test.cc new file mode 100644 index 000000000000..2383e050e6bc --- /dev/null +++ b/flex/tests/hqps/context_test.cc @@ -0,0 +1,452 @@ +#include +#include +#include + +#include "flex/engines/hqps_db/core/context.h" +#include "flex/engines/hqps_db/core/params.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/two_label_vertex_set.h" +#include "flex/storages/rt_mutable_graph/types.h" + +#include "flex/engines/hqps_db/core/base_engine.h" + +#include "flex/engines/hqps_db/core/operator/project.h" +#include "flex/engines/hqps_db/database/mutable_csr_interface.h" +#include "flex/storages/rt_mutable_graph/types.h" +#include "flex/utils/property/column.h" +#include "glog/logging.h" +#include "grape/types.h" + +struct VertexSetTest {}; +struct EdgeSetTest {}; + +struct TmpComparator { + inline bool operator()( + const std::tuple& left, + const std::tuple& right) const { + if (std::get<0>(left) < std::get<0>(right)) { + return true; + } + if (std::get<0>(left) > std::get<0>(right)) { + return false; + } + if (std::get<1>(left) > std::get<1>(right)) { + return true; + } + if (std::get<1>(left) < std::get<1>(right)) { + return false; + } + return true; + } +}; + +using offset_t = gs::offset_t; + +auto make_vertex_set_a() { + std::vector vids{0, 1}; + std::vector> datas{std::make_tuple(1, 1.0), + std::make_tuple(2, 2.0)}; + auto res = gs::make_row_vertex_set(std::move(vids), "0", std::move(datas), + {"a", "b"}); + return res; +} + +auto make_vertex_set_b() { + std::vector vids{2, 3, 4, 5}; + std::vector> datas{ + std::make_tuple(2, 2.0), std::make_tuple(3, 3.0), std::make_tuple(4, 4.0), + std::make_tuple(5, 5.0)}; + auto res = gs::make_row_vertex_set(std::move(vids), "0", std::move(datas), + {"a", "b"}); + return res; +} + +auto make_vertex_set_c() { + std::vector vids{0, 1, 4}; + auto res = gs::MakeDefaultRowVertexSet(std::move(vids), "0"); + return res; +} + +auto make_vertex_set_d() { + std::vector vids{2, 3, 1, 0}; + auto res = gs::MakeDefaultRowVertexSet(std::move(vids), "0"); + return res; +} + +// 0,2,6; +// 0,3,7 +// 1,4,8 +// 1,5,9 +// 1,5,10 +auto make_sample_context() {} + +int main() { + // test join + + LOG(INFO) << "start join test"; + auto vertex_set_a = make_vertex_set_c(); + auto ctx_a = gs::DefaultContext( + std::move(vertex_set_a)); + + auto vertex_set_b = make_vertex_set_d(); + auto ctx_b = gs::DefaultContext( + std::move(vertex_set_b)); + + auto ctx_c = gs::BaseEngine::template Join<-1, -1, gs::JoinKind::InnerJoin>( + std::move(ctx_a), std::move(ctx_b)); + for (auto iter : ctx_c) { + VLOG(10) << gs::to_string(iter.GetAllElement()); + } + LOG(INFO) << "Finish join test"; + + { + // test two label set + std::vector vec{1, 2, 3, 4, 5, 6}; + std::array labels{"a", "b"}; + grape::Bitset set; + set.init(6); + { + set.set_bit(0); + set.set_bit(2); + set.set_bit(3); + set.set_bit(5); + } + auto two_label_set = gs::make_two_label_set( + std::move(vec), std::move(labels), std::move(set)); + { + size_t cnt = 1; + for (auto iter : two_label_set) { + CHECK(iter.GetElement().second == cnt); + cnt += 1; + } + } + LOG(INFO) << "Finish two label test"; + } + + { + std::srand((unsigned) time(NULL)); + size_t limit = 300000; + std::vector indices; + indices.reserve(limit); + unsigned int tmp; + for (auto i = 0; i < limit; ++i) { + rand_r(&tmp); + indices.emplace_back((int32_t) tmp % limit); + } + auto col1 = + std::make_shared>(gs::StorageStrategy::kMem); + + col1->init(limit); + for (int i = 0; i < limit; ++i) { + col1->set_value(i, (int32_t)(i)); + } + { + int32_t res = 0; + double t0 = -grape::GetCurrentTime(); + for (auto j = 0; j < 10; ++j) { + for (auto ind : indices) { + res += col1->get_view(ind); + } + } + t0 += grape::GetCurrentTime(); + + double t1 = -grape::GetCurrentTime(); + res = 0; + for (auto ind : indices) { + res += col1->get_view(ind); + } + t1 += grape::GetCurrentTime(); + LOG(INFO) << "ptr visit cost: " << t1 << " ,warm up took: " << t0 + << ", res: " << res; + } + + { + auto ref_col = std::make_shared>(*col1); + gs::mutable_csr_graph_impl::SinglePropGetter getter(ref_col); + double t1 = -grape::GetCurrentTime(); + int32_t res = 0; + for (auto ind : indices) { + res += getter.get_view(ind); + } + t1 += grape::GetCurrentTime(); + LOG(INFO) << "no get tuple visit cost: " << t1 << ", res: " << res; + } + } + + { + std::srand((unsigned) time(NULL)); + size_t limit = 300000; + std::vector indices; + indices.reserve(limit); + unsigned int tmp; + for (auto i = 0; i < limit; ++i) { + rand_r(&tmp); + indices.emplace_back(tmp % limit); + } + auto col1 = + std::make_shared>(gs::StorageStrategy::kMem); + auto col2 = + std::make_shared>(gs::StorageStrategy::kMem); + + col1->init(limit); + col2->init(limit); + for (int i = 0; i < limit; ++i) { + col1->set_value(i, (int32_t)(i)); + col2->set_value(i, (int64_t) i); + } + { + int32_t res = 0; + int64_t res0 = 0; + double t0 = -grape::GetCurrentTime(); + for (auto j = 0; j < 10; ++j) { + for (auto ind : indices) { + res += col1->get_view(ind); + res0 += col2->get_view(ind); + } + } + t0 += grape::GetCurrentTime(); + + double t1 = -grape::GetCurrentTime(); + res = 0; + res0 = 0; + for (auto ind : indices) { + res += col1->get_view(ind); + res0 += col2->get_view(ind); + } + t1 += grape::GetCurrentTime(); + LOG(INFO) << "ptr visit cost: " << t1 << " ,warm up took: " << t0 + << ", res: " << res; + } + + { + auto ref_col1 = std::make_shared>(*col1); + auto ref_col2 = std::make_shared>(*col2); + gs::mutable_csr_graph_impl::SinglePropGetter getter1(ref_col1); + gs::mutable_csr_graph_impl::SinglePropGetter getter2(ref_col2); + + double t1 = -grape::GetCurrentTime(); + int32_t res = 0; + int64_t res0 = 0; + for (auto ind : indices) { + res += getter1.get_view(ind); + res0 += getter2.get_view(ind); + auto tuple = std::tuple{res, res0}; + } + t1 += grape::GetCurrentTime(); + LOG(INFO) << "bench1 cost: " << t1 << ", res: " << res; + } + + { + std::vector> vids; + std::vector> nbrs; + vids.resize(300); + nbrs.resize(300); + { + double t0 = -grape::GetCurrentTime(); + for (auto i = 0; i < 300; ++i) { + for (auto j = 0; j < 1000; ++j) { + vids[i].emplace_back(j); + } + } + t0 += grape::GetCurrentTime(); + LOG(INFO) << "emplacing vec: " << t0; + double t1 = -grape::GetCurrentTime(); + int64_t tmp = 0; + for (auto vid_vec : vids) { + for (auto vid : vid_vec) { + tmp += vid; + } + } + t1 += grape::GetCurrentTime(); + LOG(INFO) << "visiting 2d vec cost: " << t1 << ", res" << tmp; + } + { + double t0 = -grape::GetCurrentTime(); + for (auto i = 0; i < 300; ++i) { + for (auto j = 0; j < 1000; ++j) { + nbrs[i].emplace_back(gs::mutable_csr_graph_impl::Nbr(j)); + } + } + t0 += grape::GetCurrentTime(); + LOG(INFO) << "emplacing nbr: " << t0; + double t1 = -grape::GetCurrentTime(); + int64_t tmp = 0; + for (int i = 0; i < nbrs.size(); ++i) { + for (auto nbr : nbrs[i]) { + tmp += nbr.neighbor(); + } + } + t1 += grape::GetCurrentTime(); + LOG(INFO) << "visiting nbrs cost: " << t1 << ", res" << tmp; + } + } + + { + size_t limit = 300000; + auto col1 = + std::make_shared>(gs::StorageStrategy::kMem); + auto col2 = + std::make_shared>(gs::StorageStrategy::kMem); + + col1->init(limit); + col2->init(limit); + for (int i = 0; i < limit; i += 2) { + col1->set_value(i, (int32_t)(i)); + } + for (int i = 1; i < limit; i += 2) { + col2->set_value(i, (int32_t)(i)); + } + // test two label vertex set prop getter. + auto ref_col1 = std::make_shared>(*col1); + auto ref_col2 = std::make_shared>(*col2); + gs::mutable_csr_graph_impl::SinglePropGetter getter1(ref_col1); + gs::mutable_csr_graph_impl::SinglePropGetter getter2(ref_col2); + std::array, 2> + array{getter1, getter2}; + + // generate index ele + std::vector> index_eles; + index_eles.reserve(limit); + for (auto i = 0; i < limit; ++i) { + if (i % 2 == 0) { + index_eles.emplace_back(std::make_tuple(0, i)); + } else { + index_eles.emplace_back(std::make_tuple(1, i)); + } + } + + { + double t1 = -grape::GetCurrentTime(); + int32_t res = 0; + for (auto ind : index_eles) { + res += array[std::get<0>(ind)].get_view(std::get<1>(ind)); + } + t1 += grape::GetCurrentTime(); + LOG(INFO) << "get from two label set cost: " << t1 << ", res: " << res; + } + + { + // more locality + grape::Bitset bitset; + bitset.init(limit); + for (auto i = 0; i < limit; ++i) { + if (i % 2 == 0) { + bitset.set_bit(i); + } + } + double t1 = -grape::GetCurrentTime(); + int32_t res = 0; + + auto& first = array[0]; + for (auto i = 0; i < limit; ++i) { + if (bitset.get_bit(i)) { + res += first.get_view(std::get<1>(index_eles[i])); + } + } + auto& second = array[1]; + for (auto i = 0; i < limit; ++i) { + if (!bitset.get_bit(i)) { + res += second.get_view(std::get<1>(index_eles[i])); + } + } + t1 += grape::GetCurrentTime(); + LOG(INFO) << "get with locality cost: " << t1 << ", res: " << res; + } + } + + { + using sort_tuple_t = std::tuple; + TmpComparator sorter; + + size_t limit = 100000; + std::vector vec0; + std::vector vec1; + std::vector vec2; + { + vec0.reserve(limit); + vec1.reserve(limit); + vec2.reserve(limit); + { + unsigned int tmp; + for (auto i = 0; i < limit; ++i) { + rand_r(&tmp); + vec0.emplace_back(tmp % limit); + } + } + { + unsigned int tmp; + for (auto i = 0; i < limit; ++i) { + rand_r(&tmp); + vec1.emplace_back((int64_t) tmp % limit); + } + } + { + unsigned int tmp; + for (auto i = 0; i < limit; ++i) { + rand_r(&tmp); + vec2.emplace_back((double) (tmp % limit)); + } + } + } + + { + std::priority_queue, + TmpComparator> + pq(sorter); + double t1 = -grape::GetCurrentTime(); + for (auto i = 0; i < limit; ++i) { + if (pq.size() < 20) { + pq.emplace(vec0[i], vec1[i], vec2[i]); + } else { + auto tuple = std::make_tuple(vec0[i], vec1[i], vec2[i]); + if (sorter(tuple, pq.top())) { + pq.pop(); + pq.emplace(tuple); + } + } + } + t1 += grape::GetCurrentTime(); + LOG(INFO) << " emplace tuple: " << t1; + } + + { + std::priority_queue, + TmpComparator> + pq(sorter); + double t1 = -grape::GetCurrentTime(); + // int32_t first; + // int64_t second; + // double third; + sort_tuple_t empty_tuple; + sort_tuple_t& top_tuple = empty_tuple; + for (auto i = 0; i < limit; ++i) { + if (pq.size() < 20) { + auto tuple = std::make_tuple(vec0[i], vec1[i], vec2[i]); + pq.emplace(std::move(tuple)); + top_tuple = pq.top(); + } else { + if (vec0[0] < std::get<0>(top_tuple) && + vec1[1] > std::get<1>(top_tuple)) { + pq.pop(); + pq.emplace(std::make_tuple(vec0[i], vec1[i], vec2[i])); + top_tuple = pq.top(); + } + } + } + t1 += grape::GetCurrentTime(); + LOG(INFO) << " emplace tuple: " << t1; + } + } + } + { + using result_t = typename gs::ResultContextTImpl< + 1, gs::RowVertexSetImpl, 0, + gs::RowVertexSetImpl, 0, + std::tuple, + gs::Dummy<1, gs::RowVertexSetImpl, + 0, gs::RowVertexSetImpl, + 0, grape::EmptyType>>::result_t; + } + + LOG(INFO) << "Finish context test."; +} diff --git a/flex/tests/hqps/dedup_test.cc b/flex/tests/hqps/dedup_test.cc new file mode 100644 index 000000000000..4a8b63621c8f --- /dev/null +++ b/flex/tests/hqps/dedup_test.cc @@ -0,0 +1,80 @@ +#include +#include +#include "flex/engines/hqps_db/core/context.h" +#include "flex/engines/hqps_db/core/params.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h" +#include "flex/storages/rt_mutable_graph/types.h" + +#include "flex/engines/hqps_db/core/operator/project.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/multi_label_vertex_set.h" +#include "flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h" + +#include "flex/engines/hqps_db/database/mutable_csr_interface.h" + +#include "grape/types.h" + +#include "glog/logging.h" + +struct VertexSetTest {}; +struct EdgeSetTest {}; + +using offset_t = gs::offset_t; + +auto make_sample_context() {} + +namespace gs { +void work() { + using GI = gs::MutableCSRInterface; + using vertex_id_t = typename GI::vertex_id_t; + std::vector vids{0}; + auto set_a = gs::MakeDefaultRowVertexSet(std::move(vids), "a"); + gs::Context ctx_a(std::move(set_a)); + VLOG(10) << "Finish construct set a"; + + // add multi label vertex_set + + std::vector vids_b0{1, 2, 1}; + std::vector off_b0{0, 3}; + + auto set_b = gs::MakeDefaultRowVertexSet(std::move(vids_b0), "b"); + + auto ctx_2 = + ctx_a.AddNode(std::move(set_b), std::move(off_b0)); + for (auto iter : ctx_2) { + VLOG(10) << gs::to_string(iter.GetAllElement()); + } + + std::vector vids_c0{3, 4, 5, 6, 7}; + std::vector off_c0{0, 3, 3, 5}; + + auto set_c = gs::MakeDefaultRowVertexSet(std::move(vids_c0), "b"); + auto ctx_3 = + ctx_2.AddNode(std::move(set_c), std::move(off_c0)); + for (auto iter : ctx_3) { + VLOG(10) << gs::to_string(iter.GetAllElement()); + } + + ctx_3.Dedup<1>(); + VLOG(10) << "after dedup on 1"; + for (auto iter : ctx_3) { + VLOG(10) << gs::to_string(iter.GetAllElement()); + } + + auto& select_node = ctx_3.template GetMutableNode<1>(); + // dedup inplace, and return the offset_array to old node. + auto offset_to_old_node = select_node.Dedup(); + // The offset need to be changed. + ctx_3.template UpdateChildNode<1>(std::move(offset_to_old_node)); + + VLOG(10) << "after dedup on itself"; + for (auto iter : ctx_3) { + VLOG(10) << gs::to_string(iter.GetAllElement()); + } +} + +} // namespace gs + +int main() { + gs::work(); + LOG(INFO) << "Finish context test."; +} diff --git a/flex/utils/app_utils.cc b/flex/utils/app_utils.cc index 696268f16e2a..e5e34a959075 100644 --- a/flex/utils/app_utils.cc +++ b/flex/utils/app_utils.cc @@ -79,6 +79,13 @@ void Encoder::put_string_view(const std::string_view& v) { memcpy(&buf_[size + 4], v.data(), len); } + +void Encoder::put_double(double v){ + size_t size = buf_.size(); + buf_.resize(size + sizeof(double)); + memcpy(&buf_[size], &v, sizeof(double)); +} + void Encoder::clear() { buf_.clear(); } static int64_t char_ptr_to_long(const char* data) { @@ -91,6 +98,11 @@ static int char_ptr_to_int(const char* data) { return *ptr; } +static double char_ptr_to_double(const char* data){ + const double* ptr = reinterpret_cast(data); + return *ptr; +} + int Decoder::get_int() { int ret = char_ptr_to_int(data_); data_ += 4; @@ -103,6 +115,12 @@ int64_t Decoder::get_long() { return ret; } +double Decoder::get_double(){ + double ret = char_ptr_to_double(data_); + data_ += 8; + return ret; +} + std::string_view Decoder::get_string() { int len = get_int(); std::string_view ret(data_, len); diff --git a/flex/utils/app_utils.h b/flex/utils/app_utils.h index 55aab1bcb266..2173cd13069e 100644 --- a/flex/utils/app_utils.h +++ b/flex/utils/app_utils.h @@ -48,6 +48,8 @@ class Encoder { void put_string_view(const std::string_view& v); + void put_double(double v); + void clear(); private: @@ -67,6 +69,8 @@ class Decoder { uint8_t get_byte(); + double get_double(); + const char* data() const; bool empty() const; diff --git a/flex/utils/property/column.h b/flex/utils/property/column.h index ec2661360d9b..b79356e5f576 100644 --- a/flex/utils/property/column.h +++ b/flex/utils/property/column.h @@ -96,6 +96,7 @@ using IntColumn = TypedColumn; using LongColumn = TypedColumn; using DateColumn = TypedColumn; using StringColumn = TypedColumn; +using DoubleColumn = TypedColumn; std::shared_ptr CreateColumn( PropertyType type, StorageStrategy strategy = StorageStrategy::kMem); diff --git a/flex/utils/property/table.cc b/flex/utils/property/table.cc index 2833717f77e2..2c53de48db1c 100644 --- a/flex/utils/property/table.cc +++ b/flex/utils/property/table.cc @@ -60,6 +60,22 @@ std::vector Table::column_names() const { return names; } +std::string Table::column_name(size_t index) const { + size_t col_num = col_id_indexer_.size(); + CHECK(index < col_num); + std::string name{}; + CHECK(col_id_indexer_.get_key(index, name)); + return name; +} + +int Table::get_column_id_by_name(const std::string& name) const { + int col_id; + if (col_id_indexer_.get_index(name, col_id)) { + return col_id; + } + return -1; +} + std::vector Table::column_types() const { size_t col_num = col_id_indexer_.size(); std::vector types(col_num); diff --git a/flex/utils/property/table.h b/flex/utils/property/table.h index 0b4e5fcbdc42..4fa2110ffab4 100644 --- a/flex/utils/property/table.h +++ b/flex/utils/property/table.h @@ -41,6 +41,10 @@ class Table { std::vector column_names() const; + std::string column_name(size_t index) const; + + int get_column_id_by_name(const std::string& name) const; + std::vector column_types() const; std::shared_ptr get_column(const std::string& name); diff --git a/flex/utils/property/types.cc b/flex/utils/property/types.cc index cfcdaaa56a63..f1e93c687802 100644 --- a/flex/utils/property/types.cc +++ b/flex/utils/property/types.cc @@ -40,6 +40,10 @@ inline void ParseString(const std::string_view& str, std::string_view& val) { val = str; } +inline void ParseDouble(const std::string_view& str, double& val) { + sscanf(str.data(), "%lf", &val); +} + void ParseRecord(const char* line, std::vector& rec) { const char* cur = line; for (auto& item : rec) { @@ -56,6 +60,8 @@ void ParseRecord(const char* line, std::vector& rec) { ParseDate(sv, item.value.d); } else if (item.type == PropertyType::kString) { ParseString(sv, item.value.s); + } else if (item.type == PropertyType::kDouble) { + ParseDouble(sv, item.value.db); } cur = ptr + 1; } @@ -102,9 +108,17 @@ void ParseRecordX(const char* line, int64_t& src, int64_t& dst, #endif } -// parseRecordX for edge with int64 property +void ParseRecordX(const char* line, int64_t& src, int64_t& dst, double& prop) { +#ifdef __APPLE__ + sscanf(line, "%lld|%lld|%lf", &src, &dst, &prop); +#else + sscanf(line, "%" SCNd64 "|%" SCNd64 "|%lf", &src, &dst, &prop); +#endif +} + void ParseRecordX(const char* line, int64_t& src, int64_t& dst, int64_t& prop) { #ifdef __APPLE__ + // parseRecordX for edge with int64 property sscanf(line, "%lld|%lld|%lld", &src, &dst, &prop); #else sscanf(line, "%" SCNd64 "|%" SCNd64 "|%" SCNd64 "", &src, &dst, &prop); @@ -125,6 +139,9 @@ grape::InArchive& operator<<(grape::InArchive& in_archive, const Any& value) { case PropertyType::kString: in_archive << value.type << value.value.s; break; + case PropertyType::kDouble: + in_archive << value.type << value.value.db; + break; default: in_archive << PropertyType::kEmpty; break; @@ -148,6 +165,9 @@ grape::OutArchive& operator>>(grape::OutArchive& out_archive, Any& value) { case PropertyType::kString: out_archive >> value.value.s; break; + case PropertyType::kDouble: + out_archive >> value.value.db; + break; default: break; } diff --git a/flex/utils/property/types.h b/flex/utils/property/types.h index 054d6a845202..8dc83aa468a1 100644 --- a/flex/utils/property/types.h +++ b/flex/utils/property/types.h @@ -38,15 +38,16 @@ enum class PropertyType { kString, kEmpty, kInt64, + kDouble, }; struct Date { Date() = default; ~Date() = default; Date(int64_t x); - Date(const char *str); + Date(const char* str); - void reset(const char *str); + void reset(const char* str); std::string to_string() const; int64_t milli_second; @@ -60,9 +61,11 @@ union AnyValue { int64_t l; Date d; std::string_view s; + double db; }; -template struct AnyConverter; +template +struct AnyConverter; struct Any { Any() : type(PropertyType::kEmpty) {} @@ -97,6 +100,11 @@ struct Any { value.s = v; } + void set_double(double db) { + type = PropertyType::kDouble; + value.db = db; + } + std::string to_string() const { if (type == PropertyType::kInt32) { return std::to_string(value.i); @@ -109,6 +117,8 @@ struct Any { return value.d.to_string(); } else if (type == PropertyType::kEmpty) { return "NULL"; + } else if (type == PropertyType::kDouble) { + return std::to_string(value.db); } else { LOG(FATAL) << "Unexpected property type: " << static_cast(type); return ""; @@ -125,17 +135,23 @@ struct Any { return value.l; } - const std::string_view &AsStringView() const { + double AsDouble() const { + assert(type == PropertyType::kDouble); + return value.db; + } + + const std::string_view& AsStringView() const { assert(type == PropertyType::kString); return value.s; } - const Date &AsDate() const { + const Date& AsDate() const { assert(type == PropertyType::kDate); return value.d; } - template static Any From(const T &value) { + template + static Any From(const T& value) { return AnyConverter::to_any(value); } @@ -143,222 +159,271 @@ struct Any { AnyValue value; }; -template struct ConvertAny { - static void to(const Any &value, T &out) { +template +struct ConvertAny { + static void to(const Any& value, T& out) { LOG(FATAL) << "Unexpected convert type..."; } }; -template <> struct ConvertAny { - static void to(const Any &value, int &out) { +template <> +struct ConvertAny { + static void to(const Any& value, int& out) { CHECK(value.type == PropertyType::kInt32); out = value.value.i; } }; -template <> struct ConvertAny { - static void to(const Any &value, int64_t &out) { +template <> +struct ConvertAny { + static void to(const Any& value, int64_t& out) { CHECK(value.type == PropertyType::kInt64); out = value.value.l; } }; -template <> struct ConvertAny { - static void to(const Any &value, Date &out) { +template <> +struct ConvertAny { + static void to(const Any& value, Date& out) { CHECK(value.type == PropertyType::kDate); out = value.value.d; } }; -template <> struct ConvertAny { - static void to(const Any &value, grape::EmptyType &out) { +template <> +struct ConvertAny { + static void to(const Any& value, grape::EmptyType& out) { CHECK(value.type == PropertyType::kEmpty); } }; -template <> struct ConvertAny { - static void to(const Any &value, std::string &out) { +template <> +struct ConvertAny { + static void to(const Any& value, std::string& out) { CHECK(value.type == PropertyType::kString); out = std::string(value.value.s); } }; -template struct AnyConverter {}; +template <> +struct ConvertAny { + static void to(const Any& value, double& out) { + CHECK(value.type == PropertyType::kDouble); + out = value.value.db; + } +}; -template <> struct AnyConverter { +template +struct AnyConverter {}; + +template <> +struct AnyConverter { static constexpr PropertyType type = PropertyType::kInt32; - static Any to_any(const int &value) { + static Any to_any(const int& value) { Any ret; ret.set_integer(value); return ret; } - static AnyValue to_any_value(const int &value) { + static AnyValue to_any_value(const int& value) { AnyValue ret; ret.i = value; return ret; } - static const int &from_any(const Any &value) { + static const int& from_any(const Any& value) { CHECK(value.type == PropertyType::kInt32); return value.value.i; } - static const int &from_any_value(const AnyValue &value) { return value.i; } + static const int& from_any_value(const AnyValue& value) { return value.i; } }; -template <> struct AnyConverter { +template <> +struct AnyConverter { static constexpr PropertyType type = PropertyType::kInt64; - static Any to_any(const int64_t &value) { + static Any to_any(const int64_t& value) { Any ret; ret.set_long(value); return ret; } - static AnyValue to_any_value(const int64_t &value) { + static AnyValue to_any_value(const int64_t& value) { AnyValue ret; ret.l = value; return ret; } - static const int64_t &from_any(const Any &value) { + static const int64_t& from_any(const Any& value) { CHECK(value.type == PropertyType::kInt64); return value.value.l; } - static const int64_t &from_any_value(const AnyValue &value) { + static const int64_t& from_any_value(const AnyValue& value) { return value.l; } }; -template <> struct AnyConverter { +template <> +struct AnyConverter { static constexpr PropertyType type = PropertyType::kDate; - static Any to_any(const Date &value) { + static Any to_any(const Date& value) { Any ret; ret.set_date(value); return ret; } - static AnyValue to_any_value(const Date &value) { + static AnyValue to_any_value(const Date& value) { AnyValue ret; ret.d = value; return ret; } - static const Date &from_any(const Any &value) { + static const Date& from_any(const Any& value) { CHECK(value.type == PropertyType::kDate); return value.value.d; } - static const Date &from_any_value(const AnyValue &value) { return value.d; } + static const Date& from_any_value(const AnyValue& value) { return value.d; } }; -template <> struct AnyConverter { +template <> +struct AnyConverter { static constexpr PropertyType type = PropertyType::kString; - static Any to_any(const std::string_view &value) { + static Any to_any(const std::string_view& value) { Any ret; ret.set_string(value); return ret; } - static AnyValue to_any_value(const std::string_view &value) { + static AnyValue to_any_value(const std::string_view& value) { AnyValue ret; ret.s = value; return ret; } - static const std::string_view &from_any(const Any &value) { + static const std::string_view& from_any(const Any& value) { CHECK(value.type == PropertyType::kString); return value.value.s; } - static const std::string_view &from_any_value(const AnyValue &value) { + static const std::string_view& from_any_value(const AnyValue& value) { return value.s; } }; -template <> struct AnyConverter { +template <> +struct AnyConverter { static constexpr PropertyType type = PropertyType::kString; - static Any to_any(const std::string &value) { + static Any to_any(const std::string& value) { Any ret; ret.set_string(value); return ret; } - static AnyValue to_any_value(const std::string &value) { + static AnyValue to_any_value(const std::string& value) { AnyValue ret; ret.s = value; return ret; } - static std::string from_any(const Any &value) { + static std::string from_any(const Any& value) { CHECK(value.type == PropertyType::kString); return std::string(value.value.s); } - static std::string from_any_value(const AnyValue &value) { + static std::string from_any_value(const AnyValue& value) { return std::string(value.s); } }; -template <> struct AnyConverter { +template <> +struct AnyConverter { static constexpr PropertyType type = PropertyType::kEmpty; - static Any to_any(const grape::EmptyType &value) { + static Any to_any(const grape::EmptyType& value) { Any ret; return ret; } - static AnyValue to_any_value(const grape::EmptyType &value) { + static AnyValue to_any_value(const grape::EmptyType& value) { AnyValue ret; return ret; } - static grape::EmptyType from_any(const Any &value) { + static grape::EmptyType from_any(const Any& value) { CHECK(value.type == PropertyType::kEmpty); return grape::EmptyType(); } - static grape::EmptyType from_any_value(const AnyValue &value) { + static grape::EmptyType from_any_value(const AnyValue& value) { return grape::EmptyType(); } }; -void ParseRecord(const char *line, std::vector &rec); +template <> +struct AnyConverter { + static constexpr PropertyType type = PropertyType::kDouble; + + static Any to_any(const double& value) { + Any ret; + ret.set_double(value); + return ret; + } + + static AnyValue to_any_value(const double& value) { + AnyValue ret; + ret.db = value; + return ret; + } + + static const double& from_any(const Any& value) { + CHECK(value.type == PropertyType::kDouble); + return value.value.db; + } + + static const double& from_any_value(const AnyValue& value) { + return value.db; + } +}; + +void ParseRecord(const char* line, std::vector& rec); -void ParseRecord(const char *line, int64_t &id, std::vector &rec); +void ParseRecord(const char* line, int64_t& id, std::vector& rec); -void ParseRecordX(const char *line, int64_t &src, int64_t &dst, int &prop); +void ParseRecordX(const char* line, int64_t& src, int64_t& dst, int& prop); -void ParseRecordX(const char *line, int64_t &src, int64_t &dst, Date &prop); +void ParseRecordX(const char* line, int64_t& src, int64_t& dst, Date& prop); -void ParseRecordX(const char *line, int64_t &src, int64_t &dst, - grape::EmptyType &prop); +void ParseRecordX(const char* line, int64_t& src, int64_t& dst, + grape::EmptyType& prop); +void ParseRecordX(const char* line, int64_t& src, int64_t& dst, double& prop); -void ParseRecordX(const char *line, int64_t &src, int64_t &dst, - int64_t &prop); +void ParseRecordX(const char* line, int64_t& src, int64_t& dst, int64_t& prop); -grape::InArchive &operator<<(grape::InArchive &in_archive, const Any &value); -grape::OutArchive &operator>>(grape::OutArchive &out_archive, Any &value); +grape::InArchive& operator<<(grape::InArchive& in_archive, const Any& value); +grape::OutArchive& operator>>(grape::OutArchive& out_archive, Any& value); -grape::InArchive &operator<<(grape::InArchive &in_archive, const std::string_view &value); -grape::OutArchive &operator>>(grape::OutArchive &out_archive, std::string_view &value); +grape::InArchive& operator<<(grape::InArchive& in_archive, + const std::string_view& value); +grape::OutArchive& operator>>(grape::OutArchive& out_archive, + std::string_view& value); -} // namespace gs +} // namespace gs namespace std { -inline ostream &operator<<(ostream &os, const gs::Date &dt) { +inline ostream& operator<<(ostream& os, const gs::Date& dt) { os << dt.to_string(); return os; } -inline ostream &operator<<(ostream &os, gs::PropertyType pt) { +inline ostream& operator<<(ostream& os, gs::PropertyType pt) { switch (pt) { case gs::PropertyType::kInt32: os << "int32"; @@ -375,6 +440,9 @@ inline ostream &operator<<(ostream &os, gs::PropertyType pt) { case gs::PropertyType::kEmpty: os << "Empty"; break; + case gs::PropertyType::kDouble: + os << "double"; + break; default: os << "Unknown"; break; @@ -382,6 +450,6 @@ inline ostream &operator<<(ostream &os, gs::PropertyType pt) { return os; } -} // namespace std +} // namespace std -#endif // GRAPHSCOPE_TYPES_H_ +#endif // GRAPHSCOPE_TYPES_H_ diff --git a/interactive_engine/common/pom.xml b/interactive_engine/common/pom.xml index bbd719967862..58ed3c8b109a 100644 --- a/interactive_engine/common/pom.xml +++ b/interactive_engine/common/pom.xml @@ -84,7 +84,7 @@ interactive - ../proto + ../../proto/groot/ sdk/* diff --git a/interactive_engine/compiler/Makefile b/interactive_engine/compiler/Makefile index 38086e7f3a0b..4f12cfba1142 100644 --- a/interactive_engine/compiler/Makefile +++ b/interactive_engine/compiler/Makefile @@ -43,7 +43,7 @@ test: # start rpc server # make run gremlin_test: - mvn test -Dtest=com.alibaba.graphscope.integration.standard.IrGremlinTest + mvn test -Dtest=com.alibaba.graphscope.gremlin.integration.standard.IrGremlinTest run: cd $(CUR_DIR) && $(java) \ @@ -65,11 +65,13 @@ physical_plan: # start rpc server # make run graph.schema:=../executor/ir/core/resource/ldbc_schema.json ldbc_test: - mvn test -Dtest=com.alibaba.graphscope.integration.ldbc.IrLdbcTest + # todo: need to rewrite gremlin ldbc queries + # mvn test -Dtest=com.alibaba.graphscope.gremlin.integration.ldbc.IrLdbcTest + mvn test -Dtest=com.alibaba.graphscope.cypher.integration.ldbc.IrLdbcTest # start rpc server # make run graph.schema:=../executor/ir/core/resource/ldbc_schema.json pattern_test: - mvn test -Dtest=com.alibaba.graphscope.integration.pattern.IrPatternTest + mvn test -Dtest=com.alibaba.graphscope.gremlin.integration.pattern.IrPatternTest .PHONY: build run diff --git a/interactive_engine/compiler/ir_exprimental_pattern_ci.sh b/interactive_engine/compiler/ir_exprimental_advanced_ci.sh similarity index 88% rename from interactive_engine/compiler/ir_exprimental_pattern_ci.sh rename to interactive_engine/compiler/ir_exprimental_advanced_ci.sh index 88caf5006702..0cfbb89cf663 100755 --- a/interactive_engine/compiler/ir_exprimental_pattern_ci.sh +++ b/interactive_engine/compiler/ir_exprimental_advanced_ci.sh @@ -4,7 +4,7 @@ base_dir=$(cd $(dirname $0); pwd) ps -ef | grep "com.alibaba.graphscope.GraphServer" | awk '{print $2}' | xargs kill -9 || true ps -ef | grep "start_rpc_server" | awk '{print $2}' | xargs kill -9 sleep 3 -# start engine service and load ldbc graph +# start engine service and load ldbc graph with sf=0.1 cd ${base_dir}/../executor/ir/target/release && RUST_LOG=info DATA_PATH=/tmp/gstest/ldbc_graph_exp_bin PARTITION_ID=0 ./start_rpc_server --config ${base_dir}/../executor/ir/integrated/config/distributed/server_0 & cd ${base_dir}/../executor/ir/target/release && @@ -13,8 +13,8 @@ sleep 10 # start compiler service cd ${base_dir} && make run graph.schema:=../executor/ir/core/resource/ldbc_schema.json pegasus.hosts:=127.0.0.1:1234,127.0.0.1:1235 & sleep 5 -# run gremlin standard tests -cd ${base_dir} && make pattern_test +# run pattern tests and ldbc tests +cd ${base_dir} && make pattern_test && make ldbc_test exit_code=$? # clean service ps -ef | grep "com.alibaba.graphscope.GraphServer" | awk '{print $2}' | xargs kill -9 || true diff --git a/interactive_engine/compiler/pom.xml b/interactive_engine/compiler/pom.xml index 8c4f6df1ed27..1a2c503ec937 100644 --- a/interactive_engine/compiler/pom.xml +++ b/interactive_engine/compiler/pom.xml @@ -128,6 +128,10 @@ org.neo4j neo4j + + org.neo4j.driver + neo4j-java-driver + org.immutables value diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/tools/GraphPlanner.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/tools/GraphPlanner.java index dfccfbd037df..696ed21c02fa 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/tools/GraphPlanner.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/tools/GraphPlanner.java @@ -189,6 +189,10 @@ private RelOptPlanner createRelOptPlanner(PlannerConfig plannerConfig) { } } + public AtomicLong getIdGenerator() { + return idGenerator; + } + public static void main(String[] args) throws Exception { if (args.length < 3 || args[0].isEmpty() || args[1].isEmpty() || args[2].isEmpty()) { throw new IllegalArgumentException( diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/ldbc/LdbcQueries.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/ldbc/LdbcQueries.java new file mode 100644 index 000000000000..2833ddbf68df --- /dev/null +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/ldbc/LdbcQueries.java @@ -0,0 +1,244 @@ +/* + * Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.graphscope.cypher.integration.suite.ldbc; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class LdbcQueries { + public static QueryContext get_ldbc_2_test() { + String query = + "MATCH (p:PERSON{id:" + + " 19791209300143})-[:KNOWS]-(friend:PERSON)<-[:HASCREATOR]-(message : POST |" + + " COMMENT) \n" + + "WHERE message.creationDate < 20121128080000000 \n" + + "WITH \n" + + "\tfriend.id AS personId, \n" + + "\tfriend.firstName AS personFirstName, \n" + + " friend.lastName AS personLastName, \n" + + " message.id AS postOrCommentId, \n" + + " message.creationDate AS postOrCommentCreationDate \n" + + "ORDER BY \n" + + " postOrCommentCreationDate DESC, \n" + + " postOrCommentId ASC \n" + + "LIMIT 20\n" + + "RETURN personId, personFirstName, personLastName, postOrCommentId"; + List expected = + Arrays.asList( + "Record<{personId: 24189255811566, personFirstName: \"The\"," + + " personLastName: \"Kunda\", postOrCommentId: 1099511875186}>", + "Record<{personId: 30786325578747, personFirstName: \"Zhang\"," + + " personLastName: \"Huang\", postOrCommentId: 1099511787223}>", + "Record<{personId: 8796093023000, personFirstName: \"Peng\"," + + " personLastName: \"Zhang\", postOrCommentId: 1099511959866}>", + "Record<{personId: 13194139533535, personFirstName: \"Shweta\"," + + " personLastName: \"Singh\", postOrCommentId: 1099511997952}>", + "Record<{personId: 13194139533535, personFirstName: \"Shweta\"," + + " personLastName: \"Singh\", postOrCommentId: 1099511997953}>", + "Record<{personId: 13194139533535, personFirstName: \"Shweta\"," + + " personLastName: \"Singh\", postOrCommentId: 1099511722622}>", + "Record<{personId: 13194139533535, personFirstName: \"Shweta\"," + + " personLastName: \"Singh\", postOrCommentId: 1099511997861}>", + "Record<{personId: 4398046511596, personFirstName: \"Ge\", personLastName:" + + " \"Wei\", postOrCommentId: 1099511949726}>", + "Record<{personId: 13194139533535, personFirstName: \"Shweta\"," + + " personLastName: \"Singh\", postOrCommentId: 1099511722621}>", + "Record<{personId: 8796093023000, personFirstName: \"Peng\"," + + " personLastName: \"Zhang\", postOrCommentId: 1099511959863}>", + "Record<{personId: 4398046511596, personFirstName: \"Ge\", personLastName:" + + " \"Wei\", postOrCommentId: 1099511681330}>", + "Record<{personId: 4398046511596, personFirstName: \"Ge\", personLastName:" + + " \"Wei\", postOrCommentId: 1099511949721}>", + "Record<{personId: 8796093023000, personFirstName: \"Peng\"," + + " personLastName: \"Zhang\", postOrCommentId: 1099511993242}>", + "Record<{personId: 8796093023000, personFirstName: \"Peng\"," + + " personLastName: \"Zhang\", postOrCommentId: 1099511980256}>", + "Record<{personId: 13194139533535, personFirstName: \"Shweta\"," + + " personLastName: \"Singh\", postOrCommentId: 1099511997871}>", + "Record<{personId: 13194139533535, personFirstName: \"Shweta\"," + + " personLastName: \"Singh\", postOrCommentId: 1099511645534}>", + "Record<{personId: 13194139533535, personFirstName: \"Shweta\"," + + " personLastName: \"Singh\", postOrCommentId: 1099511722160}>", + "Record<{personId: 30786325578747, personFirstName: \"Zhang\"," + + " personLastName: \"Huang\", postOrCommentId: 1099511977072}>", + "Record<{personId: 24189255811566, personFirstName: \"The\"," + + " personLastName: \"Kunda\", postOrCommentId: 1099511738988}>", + "Record<{personId: 13194139533535, personFirstName: \"Shweta\"," + + " personLastName: \"Singh\", postOrCommentId: 1099511860894}>"); + return new QueryContext(query, expected); + } + + public static QueryContext get_ldbc_3_test() { + String query = + "MATCH (countryX:COUNTRY {name:" + + " 'Puerto_Rico'})<-[:ISLOCATEDIN]-(messageX)-[:HASCREATOR]->(otherP:PERSON),\n" + + " \t(countryY:COUNTRY {name:" + + " 'Republic_of_Macedonia'})<-[:ISLOCATEDIN]-(messageY)-[:HASCREATOR]->(otherP:PERSON),\n" + + " \t(otherP)-[:ISLOCATEDIN]->(city)-[:ISPARTOF]->(countryCity),\n" + + " \t(person:PERSON {id:15393162790207})-[:KNOWS*1..2]-(otherP)\n" + + "WHERE messageX.creationDate >= 20101201080000000 and messageX.creationDate <" + + " 20101231080000000\n" + + " AND messageY.creationDate >= 20101201080000000 and messageY.creationDate <" + + " 20101231080000000\n" + + "\tAND countryCity.name <> 'Puerto_Rico' AND countryCity.name <>" + + " 'Republic_of_Macedonia'\n" + + "WITH otherP, count(messageX) as xCount, count(messageY) as yCount\n" + + "RETURN otherP.id as id,\n" + + "\t\t\t otherP.firstName as firstName,\n" + + "\t\t\t otherP.lastName as lastName,\n" + + "\t\t\t xCount,\n" + + "\t\t\t yCount,\n" + + "\t\t\t xCount + yCount as total\n" + + "ORDER BY total DESC, id ASC\n" + + "Limit 20"; + return new QueryContext(query, Collections.emptyList()); + } + + public static QueryContext get_ldbc_6_test() { + String query = + "MATCH (person:PERSON" + + " {id:30786325579101})-[:KNOWS*1..3]-(other:PERSON)<-[:HASCREATOR]-(post:POST)-[:HASTAG]->(tag:TAG" + + " {name:'Shakira'}),\n" + + " (post)-[:HASTAG]->(otherTag:TAG)\n" + + "WHERE otherTag <> tag\n" + + "RETURN otherTag.name as name, count(distinct post) as postCnt\n" + + "ORDER BY postCnt desc, name asc\n" + + "LIMIT 10"; + List expected = + Arrays.asList( + "Record<{name: \"David_Foster\", postCnt: 4}>", + "Record<{name: \"Muammar_Gaddafi\", postCnt: 2}>", + "Record<{name: \"Robert_John_Mutt_Lange\", postCnt: 2}>", + "Record<{name: \"Alfred_the_Great\", postCnt: 1}>", + "Record<{name: \"Andre_Agassi\", postCnt: 1}>", + "Record<{name: \"Andy_Roddick\", postCnt: 1}>", + "Record<{name: \"Bangladesh\", postCnt: 1}>", + "Record<{name: \"Benito_Mussolini\", postCnt: 1}>", + "Record<{name: \"Clark_Gable\", postCnt: 1}>", + "Record<{name: \"Condoleezza_Rice\", postCnt: 1}>"); + return new QueryContext(query, expected); + } + + public static QueryContext get_ldbc_8_test() { + String query = + "MATCH (person:PERSON {id:" + + " 2199023256816})<-[:HASCREATOR]-(message)<-[:REPLYOF]-(comment:COMMENT)-[:HASCREATOR]->(author:PERSON)\n" + + "RETURN \n" + + "\tauthor.id,\n" + + "\tauthor.firstName,\n" + + "\tauthor.lastName,\n" + + "\tcomment.creationDate as commentDate,\n" + + "\tcomment.id as commentId,\n" + + "\tcomment.content\n" + + "ORDER BY\n" + + "\tcommentDate desc,\n" + + "\tcommentId asc\n" + + "LIMIT 20"; + List expected = + Arrays.asList( + "Record<{id: 13194139533482, firstName: \"Ana Paula\", lastName: \"Silva\"," + + " commentDate: 20120913024615078, commentId: 1099511667820, content:" + + " \"About Heinz Guderian, aised and organized under his direction" + + " About Malacca Sul\"}>", + "Record<{id: 8796093022928, firstName: \"Hao\", lastName: \"Zhu\"," + + " commentDate: 20120909134103021, commentId: 1099511964827, content:" + + " \"About Nothing but the Beat, icki Minaj, Usher, Jennifer Hudson," + + " Jessie J and Sia Furler\"}>", + "Record<{id: 10995116278796, firstName: \"Kenji\", lastName: \"Sakai\"," + + " commentDate: 20120909115826789, commentId: 1099511964825, content:" + + " \"About Humayun, to expand the Empire further, leaving a suAbout" + + " Philip K. Dick, r o\"}>", + "Record<{id: 30786325577752, firstName: \"Jie\", lastName: \"Yang\"," + + " commentDate: 20120909065507083, commentId: 1099511964826, content:" + + " \"no\"}>", + "Record<{id: 24189255812755, firstName: \"Paulo\", lastName: \"Santos\"," + + " commentDate: 20120909051506094, commentId: 1099511964828, content:" + + " \"good\"}>", + "Record<{id: 687, firstName: \"Deepak\", lastName: \"Singh\", commentDate:" + + " 20120908105918087, commentId: 1030792351589, content: \"no" + + " way!\"}>", + "Record<{id: 2199023256586, firstName: \"Alfonso\", lastName: \"Elizalde\"," + + " commentDate: 20120907145833508, commentId: 1030792488768, content:" + + " \"About Humayun, ial legacy for his son, Akbar. His peaceful About" + + " Busta Rhymes, sta Rhy\"}>", + "Record<{id: 30786325578896, firstName: \"Yang\", lastName: \"Li\"," + + " commentDate: 20120907141705148, commentId: 1030792488774, content:" + + " \"roflol\"}>", + "Record<{id: 21990232555834, firstName: \"John\", lastName: \"Garcia\"," + + " commentDate: 20120907134041067, commentId: 1030792488763, content:" + + " \"no way!\"}>", + "Record<{id: 13194139534578, firstName: \"Kunal\", lastName: \"Sharma\"," + + " commentDate: 20120907122417245, commentId: 1030792488765, content:" + + " \"maybe\"}>", + "Record<{id: 15393162789932, firstName: \"Fali Sam\", lastName: \"Price\"," + + " commentDate: 20120907101759051, commentId: 1030792488767, content:" + + " \"roflol\"}>", + "Record<{id: 30786325579189, firstName: \"Cheh\", lastName: \"Yang\"," + + " commentDate: 20120907052608122, commentId: 1030792488759, content:" + + " \"yes\"}>", + "Record<{id: 555, firstName: \"Chen\", lastName: \"Yang\", commentDate:" + + " 20120907024704535, commentId: 1030792488769, content: \"About Skin" + + " and Bones, Another Round, reprising the contribution he made to the" + + " original a\"}>", + "Record<{id: 13194139534382, firstName: \"A.\", lastName: \"Budjana\"," + + " commentDate: 20120907024514312, commentId: 1030792488758, content:" + + " \"duh\"}>", + "Record<{id: 8796093022290, firstName: \"Alexei\", lastName: \"Codreanu\"," + + " commentDate: 20120906212321712, commentId: 1030792488760, content:" + + " \"ok\"}>", + "Record<{id: 21990232555958, firstName: \"Ernest B\", lastName:" + + " \"Law-Yone\", commentDate: 20120906201808132, commentId:" + + " 1030792488766, content: \"great\"}>", + "Record<{id: 26388279067760, firstName: \"Max\", lastName: \"Bauer\"," + + " commentDate: 20120906175431955, commentId: 1030792488761, content:" + + " \"thx\"}>", + "Record<{id: 10995116278300, firstName: \"Jie\", lastName: \"Li\"," + + " commentDate: 20120906174021751, commentId: 1030792488762, content:" + + " \"maybe\"}>", + "Record<{id: 10995116279093, firstName: \"Diem\", lastName: \"Nguyen\"," + + " commentDate: 20120906173946333, commentId: 1030792488764, content:" + + " \"thanks\"}>", + "Record<{id: 26388279066662, firstName: \"Alfonso\", lastName:" + + " \"Rodriguez\", commentDate: 20120906124058972, commentId:" + + " 1030792487632, content: \"good\"}>"); + return new QueryContext(query, expected); + } + + public static QueryContext get_ldbc_12() { + String query = + "MATCH (person:PERSON {id:" + + " 19791209300143})-[:KNOWS]-(friend:PERSON)<-[:HASCREATOR]-(comment:COMMENT)-[:REPLYOF]->(:POST)-[:HASTAG]->(tag:TAG)-[:HASTYPE]->(:TAGCLASS)-[:ISSUBCLASSOF*0..5]->(baseTagClass:TAGCLASS" + + " {name: 'BasketballPlayer'})\n" + + "RETURN\n" + + " friend.id AS personId,\n" + + " friend.firstName AS personFirstName,\n" + + " friend.lastName AS personLastName,\n" + + " collect(DISTINCT tag.name) AS tagNames,\n" + + " count(DISTINCT comment) AS replyCount\n" + + "ORDER BY\n" + + " replyCount DESC,\n" + + " personId ASC\n" + + "LIMIT 20"; + List expected = + Arrays.asList( + "Record<{personId: 8796093023000, personFirstName: \"Peng\"," + + " personLastName: \"Zhang\", tagNames: [\"Michael_Jordan\"]," + + " replyCount: 4}>"); + return new QueryContext(query, expected); + } +} diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/ldbc/QueryContext.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/ldbc/QueryContext.java new file mode 100644 index 000000000000..2af48d535e48 --- /dev/null +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/ldbc/QueryContext.java @@ -0,0 +1,38 @@ +/* + * Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.graphscope.cypher.integration.suite.ldbc; + +import java.util.Collections; +import java.util.List; + +public class QueryContext { + private final String query; + private final List expectedResult; + + public QueryContext(String query, List expectedResult) { + this.query = query; + this.expectedResult = expectedResult; + } + + public String getQuery() { + return query; + } + + public List getExpectedResult() { + return Collections.unmodifiableList(expectedResult); + } +} diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/gremlin/integration/suite/pattern/PatternQueryTest.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/gremlin/integration/suite/pattern/PatternQueryTest.java index 8bfaccb33eea..26fd2857cbfd 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/gremlin/integration/suite/pattern/PatternQueryTest.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/gremlin/integration/suite/pattern/PatternQueryTest.java @@ -63,256 +63,112 @@ public abstract class PatternQueryTest extends AbstractGremlinProcessTest { public void run_pattern_1_test() { Traversal traversal = this.get_pattern_1_test(); this.printTraversalForm(traversal); - int counter = 0; - - String expected = "343476"; - while (traversal.hasNext()) { - Long bindings = traversal.next(); - Assert.assertTrue(bindings.toString().equals(expected)); - ++counter; - } - - Assert.assertEquals(1, counter); + Assert.assertEquals(155633L, traversal.next().longValue()); } @Test public void run_pattern_2_test() { Traversal traversal = this.get_pattern_2_test(); this.printTraversalForm(traversal); - int counter = 0; - - String expected = "87328"; - while (traversal.hasNext()) { - Long bindings = traversal.next(); - Assert.assertTrue(bindings.toString().equals(expected)); - ++counter; - } - - Assert.assertEquals(1, counter); + Assert.assertEquals(55488L, traversal.next().longValue()); } @Test public void run_pattern_3_test() { Traversal traversal = this.get_pattern_3_test(); this.printTraversalForm(traversal); - int counter = 0; - - String expected = "1547850"; - while (traversal.hasNext()) { - Long bindings = traversal.next(); - Assert.assertTrue(bindings.toString().equals(expected)); - ++counter; - } - - Assert.assertEquals(1, counter); + Assert.assertEquals(568408L, traversal.next().longValue()); } @Test public void run_pattern_4_test() { Traversal traversal = this.get_pattern_4_test(); this.printTraversalForm(traversal); - int counter = 0; - - String expected = "33380"; - while (traversal.hasNext()) { - Long bindings = traversal.next(); - Assert.assertTrue(bindings.toString().equals(expected)); - ++counter; - } - - Assert.assertEquals(1, counter); + Assert.assertEquals(23286L, traversal.next().longValue()); } @Test public void run_pattern_5_test() { Traversal traversal = this.get_pattern_5_test(); this.printTraversalForm(traversal); - int counter = 0; - - String expected = "31580"; - while (traversal.hasNext()) { - Long bindings = traversal.next(); - Assert.assertTrue(bindings.toString().equals(expected)); - ++counter; - } - - Assert.assertEquals(1, counter); + Assert.assertEquals(16291L, traversal.next().longValue()); } @Test public void run_pattern_6_test() { Traversal traversal = this.get_pattern_6_test(); this.printTraversalForm(traversal); - int counter = 0; - - String expected = "71733"; - while (traversal.hasNext()) { - Long bindings = traversal.next(); - Assert.assertTrue(bindings.toString().equals(expected)); - ++counter; - } - - Assert.assertEquals(1, counter); + Assert.assertEquals(43169L, traversal.next().longValue()); } @Test public void run_pattern_7_test() { Traversal traversal = this.get_pattern_7_test(); this.printTraversalForm(traversal); - int counter = 0; - - String expected = "6568"; - while (traversal.hasNext()) { - Long bindings = traversal.next(); - Assert.assertTrue(bindings.toString().equals(expected)); - ++counter; - } - - Assert.assertEquals(1, counter); + Assert.assertEquals(2944L, traversal.next().longValue()); } @Test public void run_pattern_8_test() { Traversal traversal = this.get_pattern_8_test(); this.printTraversalForm(traversal); - int counter = 0; - - String expected = "1594426"; - while (traversal.hasNext()) { - Long bindings = traversal.next(); - Assert.assertTrue(bindings.toString().equals(expected)); - ++counter; - } - - Assert.assertEquals(1, counter); + Assert.assertEquals(782347L, traversal.next().longValue()); } @Test public void run_pattern_9_test() { Traversal traversal = this.get_pattern_9_test(); this.printTraversalForm(traversal); - int counter = 0; - - String expected = "33380"; - while (traversal.hasNext()) { - Long bindings = traversal.next(); - Assert.assertTrue(bindings.toString().equals(expected)); - ++counter; - } - - Assert.assertEquals(1, counter); + Assert.assertEquals(23286L, traversal.next().longValue()); } @Test public void run_pattern_10_test() { Traversal traversal = this.get_pattern_10_test(); this.printTraversalForm(traversal); - int counter = 0; - - String expected = "7327"; - while (traversal.hasNext()) { - Long bindings = traversal.next(); - Assert.assertTrue(bindings.toString().equals(expected)); - ++counter; - } - - Assert.assertEquals(1, counter); + Assert.assertEquals(3019L, traversal.next().longValue()); } @Test public void run_pattern_11_test() { Traversal traversal = this.get_pattern_11_test(); this.printTraversalForm(traversal); - int counter = 0; - - String expected = "908981"; - while (traversal.hasNext()) { - Long bindings = traversal.next(); - Assert.assertTrue(bindings.toString().equals(expected)); - ++counter; - } - - Assert.assertEquals(1, counter); + Assert.assertEquals(506513L, traversal.next().longValue()); } @Test public void run_pattern_12_test() { Traversal traversal = this.get_pattern_12_test(); this.printTraversalForm(traversal); - int counter = 0; - - String expected = "89621"; - while (traversal.hasNext()) { - Long bindings = traversal.next(); - Assert.assertTrue(bindings.toString().equals(expected)); - ++counter; - } - - Assert.assertEquals(1, counter); + Assert.assertEquals(232854L, traversal.next().longValue()); } @Test public void run_pattern_13_test() { Traversal traversal = this.get_pattern_13_test(); this.printTraversalForm(traversal); - int counter = 0; - - String expected = "2430116"; - while (traversal.hasNext()) { - Long bindings = traversal.next(); - Assert.assertTrue(bindings.toString().equals(expected)); - ++counter; - } - - Assert.assertEquals(1, counter); + Assert.assertEquals(1602774L, traversal.next().longValue()); } @Test public void run_pattern_14_test() { Traversal traversal = this.get_pattern_14_test(); this.printTraversalForm(traversal); - int counter = 0; - - String expected = "87328"; - while (traversal.hasNext()) { - Long bindings = traversal.next(); - Assert.assertTrue(bindings.toString().equals(expected)); - ++counter; - } - - Assert.assertEquals(1, counter); + Assert.assertEquals(55488L, traversal.next().longValue()); } @Test public void run_pattern_15_test() { Traversal traversal = this.get_pattern_15_test(); this.printTraversalForm(traversal); - int counter = 0; - - String expected = "33380"; - while (traversal.hasNext()) { - Long bindings = traversal.next(); - Assert.assertTrue(bindings.toString().equals(expected)); - ++counter; - } - - Assert.assertEquals(1, counter); + Assert.assertEquals(23286L, traversal.next().longValue()); } @Test public void run_pattern_16_test() { Traversal traversal = this.get_pattern_16_test(); this.printTraversalForm(traversal); - int counter = 0; - - String expected = "33380"; - while (traversal.hasNext()) { - Long bindings = traversal.next(); - Assert.assertTrue(bindings.toString().equals(expected)); - ++counter; - } - - Assert.assertEquals(1, counter); + Assert.assertEquals(23286L, traversal.next().longValue()); } public static class Traversals extends PatternQueryTest { diff --git a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/IrLdbcTest.java b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/IrLdbcTest.java new file mode 100644 index 000000000000..d5a627977899 --- /dev/null +++ b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/IrLdbcTest.java @@ -0,0 +1,81 @@ +/* + * Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.graphscope.cypher.integration.ldbc; + +import com.alibaba.graphscope.cypher.integration.suite.ldbc.LdbcQueries; +import com.alibaba.graphscope.cypher.integration.suite.ldbc.QueryContext; + +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; +import org.neo4j.driver.GraphDatabase; +import org.neo4j.driver.Result; +import org.neo4j.driver.Session; + +public class IrLdbcTest { + private static Session session; + + @BeforeClass + public static void beforeClass() { + String neo4jServerUrl = + System.getProperty("neo4j.bolt.server.url", "neo4j://localhost:7687"); + session = GraphDatabase.driver(neo4jServerUrl).session(); + } + + @Test + public void run_ldbc_2_test() { + QueryContext testQuery = LdbcQueries.get_ldbc_2_test(); + Result result = session.run(testQuery.getQuery()); + Assert.assertEquals(testQuery.getExpectedResult().toString(), result.list().toString()); + } + + @Test + public void run_ldbc_3_test() { + QueryContext testQuery = LdbcQueries.get_ldbc_3_test(); + Result result = session.run(testQuery.getQuery()); + Assert.assertEquals(testQuery.getExpectedResult().toString(), result.list().toString()); + } + + @Test + public void run_ldbc_6_test() { + QueryContext testQuery = LdbcQueries.get_ldbc_6_test(); + Result result = session.run(testQuery.getQuery()); + Assert.assertEquals(testQuery.getExpectedResult().toString(), result.list().toString()); + } + + @Test + public void run_ldbc_8_test() { + QueryContext testQuery = LdbcQueries.get_ldbc_8_test(); + Result result = session.run(testQuery.getQuery()); + Assert.assertEquals(testQuery.getExpectedResult().toString(), result.list().toString()); + } + + @Test + public void run_ldbc_12_test() { + QueryContext testQuery = LdbcQueries.get_ldbc_12(); + Result result = session.run(testQuery.getQuery()); + Assert.assertEquals(testQuery.getExpectedResult().toString(), result.list().toString()); + } + + @AfterClass + public static void afterClass() { + if (session != null) { + session.close(); + } + } +} diff --git a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/integration/ldbc/IrLdbcTest.java b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/gremlin/integration/ldbc/IrLdbcTest.java similarity index 95% rename from interactive_engine/compiler/src/test/java/com/alibaba/graphscope/integration/ldbc/IrLdbcTest.java rename to interactive_engine/compiler/src/test/java/com/alibaba/graphscope/gremlin/integration/ldbc/IrLdbcTest.java index ed11a77a9687..a15dfb4c2d55 100644 --- a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/integration/ldbc/IrLdbcTest.java +++ b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/gremlin/integration/ldbc/IrLdbcTest.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.alibaba.graphscope.integration.ldbc; +package com.alibaba.graphscope.gremlin.integration.ldbc; import com.alibaba.graphscope.gremlin.integration.graph.RemoteTestGraph; import com.alibaba.graphscope.gremlin.integration.graph.RemoteTestGraphProvider; diff --git a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/integration/pattern/IrPatternTest.java b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/gremlin/integration/pattern/IrPatternTest.java similarity index 94% rename from interactive_engine/compiler/src/test/java/com/alibaba/graphscope/integration/pattern/IrPatternTest.java rename to interactive_engine/compiler/src/test/java/com/alibaba/graphscope/gremlin/integration/pattern/IrPatternTest.java index ef74e43b43a7..49d7d931efe4 100644 --- a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/integration/pattern/IrPatternTest.java +++ b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/gremlin/integration/pattern/IrPatternTest.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.alibaba.graphscope.integration.pattern; +package com.alibaba.graphscope.gremlin.integration.pattern; import com.alibaba.graphscope.gremlin.integration.graph.RemoteTestGraph; import com.alibaba.graphscope.gremlin.integration.graph.RemoteTestGraphProvider; diff --git a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/integration/standard/IrGremlinTest.java b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/gremlin/integration/standard/IrGremlinTest.java similarity index 94% rename from interactive_engine/compiler/src/test/java/com/alibaba/graphscope/integration/standard/IrGremlinTest.java rename to interactive_engine/compiler/src/test/java/com/alibaba/graphscope/gremlin/integration/standard/IrGremlinTest.java index 124422382d2c..3d0cdcdd9956 100644 --- a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/integration/standard/IrGremlinTest.java +++ b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/gremlin/integration/standard/IrGremlinTest.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.alibaba.graphscope.integration.standard; +package com.alibaba.graphscope.gremlin.integration.standard; import com.alibaba.graphscope.gremlin.integration.graph.RemoteTestGraph; import com.alibaba.graphscope.gremlin.integration.graph.RemoteTestGraphProvider; diff --git a/interactive_engine/data-load-tool/src/main/java/com/alibaba/graphscope/groot/dataload/databuild/OfflineBuild.java b/interactive_engine/data-load-tool/src/main/java/com/alibaba/graphscope/groot/dataload/databuild/OfflineBuild.java index 417973b967fb..21a3d0e20faa 100644 --- a/interactive_engine/data-load-tool/src/main/java/com/alibaba/graphscope/groot/dataload/databuild/OfflineBuild.java +++ b/interactive_engine/data-load-tool/src/main/java/com/alibaba/graphscope/groot/dataload/databuild/OfflineBuild.java @@ -73,7 +73,9 @@ public static void main(String[] args) .build(); ObjectMapper objectMapper = new ObjectMapper(); Map columnMappingConfig = - objectMapper.readValue(columnMappingConfigStr, new TypeReference<>() {}); + objectMapper.readValue( + columnMappingConfigStr, + new TypeReference>() {}); List targets = new ArrayList<>(); for (FileColumnMapping fileColumnMapping : columnMappingConfig.values()) { diff --git a/interactive_engine/data-load-tool/src/main/java/com/alibaba/graphscope/groot/dataload/databuild/OfflineBuildOdps.java b/interactive_engine/data-load-tool/src/main/java/com/alibaba/graphscope/groot/dataload/databuild/OfflineBuildOdps.java index 1354847ab692..93af98038944 100644 --- a/interactive_engine/data-load-tool/src/main/java/com/alibaba/graphscope/groot/dataload/databuild/OfflineBuildOdps.java +++ b/interactive_engine/data-load-tool/src/main/java/com/alibaba/graphscope/groot/dataload/databuild/OfflineBuildOdps.java @@ -69,7 +69,9 @@ public static void main(String[] args) throws IOException { ObjectMapper objectMapper = new ObjectMapper(); Map columnMappingConfig = - objectMapper.readValue(columnMappingConfigStr, new TypeReference<>() {}); + objectMapper.readValue( + columnMappingConfigStr, + new TypeReference>() {}); List targets = new ArrayList<>(); for (FileColumnMapping fileColumnMapping : columnMappingConfig.values()) { diff --git a/interactive_engine/executor/ir/core/resource/ldbc_schema.json b/interactive_engine/executor/ir/core/resource/ldbc_schema.json index 5aa97c437f35..ca7025621bb7 100644 --- a/interactive_engine/executor/ir/core/resource/ldbc_schema.json +++ b/interactive_engine/executor/ir/core/resource/ldbc_schema.json @@ -110,6 +110,22 @@ "name": "COMMENT" }, "columns": [ + { + "key": { + "id": 0, + "name": "id" + }, + "data_type": 2, + "is_primary_key": false + }, + { + "key": { + "id": 2, + "name": "creationDate" + }, + "data_type": 2, + "is_primary_key": false + }, { "key": { "id": 4, diff --git a/interactive_engine/executor/store/groot/build.rs b/interactive_engine/executor/store/groot/build.rs index c7217b6d4bc6..37b6fee8aff0 100644 --- a/interactive_engine/executor/store/groot/build.rs +++ b/interactive_engine/executor/store/groot/build.rs @@ -1,7 +1,7 @@ extern crate protoc_grpcio; fn main() { - let proto_root = "../../../proto"; + let proto_root = "../../../../proto/groot"; protoc_grpcio::compile_grpc_protos( &[ proto_root.to_owned() + "/sdk/model.proto", diff --git a/interactive_engine/groot-client/pom.xml b/interactive_engine/groot-client/pom.xml index 7b1baf1d36fb..7bb85f11de01 100644 --- a/interactive_engine/groot-client/pom.xml +++ b/interactive_engine/groot-client/pom.xml @@ -90,7 +90,7 @@ sdk - ../proto + ../../proto/groot sdk/* @@ -103,11 +103,11 @@ graphscope - ../../python + ../../proto - graphscope/proto/graph_def.proto - graphscope/proto/ddl_service.proto - graphscope/proto/write_service.proto + graph_def.proto + ddl_service.proto + write_service.proto diff --git a/interactive_engine/lgraph/CMakeLists.txt b/interactive_engine/lgraph/CMakeLists.txt index 241dd3e6f2a9..3560f9bb72e7 100644 --- a/interactive_engine/lgraph/CMakeLists.txt +++ b/interactive_engine/lgraph/CMakeLists.txt @@ -39,7 +39,7 @@ find_package(gRPC 1.26.0 REQUIRED) find_package(CppKafka 0.3.1 REQUIRED) # Proto file generating -set(PROTO_FILE_DIR ${Lgraph_SOURCE_DIR}/../proto) +set(PROTO_FILE_DIR ${Lgraph_SOURCE_DIR}/../../proto/groot) set(GRPC_FILES ${PROTO_FILE_DIR}/sdk/client.proto ${PROTO_FILE_DIR}/sdk/client_backup_service.proto) @@ -47,7 +47,7 @@ set(PROTO_FILES ${GRPC_FILES} ${PROTO_FILE_DIR}/sdk/model.proto ${PROTO_FILE_DIR}/sdk/common.proto) -set(PROTOBUF_IMPORT_DIRS ${Lgraph_SOURCE_DIR}/../proto) +set(PROTOBUF_IMPORT_DIRS ${Lgraph_SOURCE_DIR}/../../proto/groot) set(PROTO_BUILD_DIR ${Lgraph_BINARY_DIR}/generated) set(PROTO_SRCS_DEST_DIR ${Lgraph_SOURCE_DIR}/src/proto) set(PROTO_HDRS_DEST_DIR ${Lgraph_SOURCE_DIR}/include/lgraph/proto) diff --git a/interactive_engine/pom.xml b/interactive_engine/pom.xml index 192d18131a58..c91ec81edc57 100644 --- a/interactive_engine/pom.xml +++ b/interactive_engine/pom.xml @@ -98,6 +98,17 @@ groot-client sdk-common + + + + maven-compiler-plugin + + 1.8 + 1.8 + + + + release @@ -627,6 +638,12 @@ + + org.neo4j.driver + neo4j-java-driver + ${noe4j.version} + test + org.immutables value diff --git a/interactive_engine/proto/ingestor_snapshot_service.proto b/interactive_engine/proto/ingestor_snapshot_service.proto deleted file mode 100644 index c17196f6db66..000000000000 --- a/interactive_engine/proto/ingestor_snapshot_service.proto +++ /dev/null @@ -1,31 +0,0 @@ -/** - * Copyright 2020 Alibaba Group Holding Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -syntax = "proto3"; - -option java_package = "com.alibaba.graphscope.proto.groot"; -option java_multiple_files = true; - -service IngestorSnapshot { - rpc advanceIngestSnapshotId(AdvanceIngestSnapshotIdRequest) returns (AdvanceIngestSnapshotIdResponse); -} - -message AdvanceIngestSnapshotIdRequest { - int64 snapshotId = 1; -} - -message AdvanceIngestSnapshotIdResponse { - int64 previousSnapshotId = 1; -} diff --git a/interactive_engine/sdk-common/pom.xml b/interactive_engine/sdk-common/pom.xml index 52a04ac79e07..82d38cbc6edb 100644 --- a/interactive_engine/sdk-common/pom.xml +++ b/interactive_engine/sdk-common/pom.xml @@ -107,7 +107,7 @@ sdk - ../proto + ../../proto/groot sdk/* @@ -120,11 +120,11 @@ graphscope - ../../python + ../../proto - graphscope/proto/graph_def.proto - graphscope/proto/ddl_service.proto - graphscope/proto/write_service.proto + graph_def.proto + ddl_service.proto + write_service.proto diff --git a/python/graphscope/proto/attr_value.proto b/proto/attr_value.proto similarity index 100% rename from python/graphscope/proto/attr_value.proto rename to proto/attr_value.proto diff --git a/python/graphscope/proto/coordinator_service.proto b/proto/coordinator_service.proto similarity index 97% rename from python/graphscope/proto/coordinator_service.proto rename to proto/coordinator_service.proto index cfe917bed62c..038709bd66ff 100644 --- a/python/graphscope/proto/coordinator_service.proto +++ b/proto/coordinator_service.proto @@ -16,7 +16,7 @@ syntax = "proto3"; package gs.rpc; -import "graphscope/proto/message.proto"; +import "message.proto"; service CoordinatorService { // Connect a session. diff --git a/python/graphscope/proto/data_types.proto b/proto/data_types.proto similarity index 100% rename from python/graphscope/proto/data_types.proto rename to proto/data_types.proto diff --git a/python/graphscope/proto/ddl_service.proto b/proto/ddl_service.proto similarity index 98% rename from python/graphscope/proto/ddl_service.proto rename to proto/ddl_service.proto index af0d49f96a94..e553924bd858 100644 --- a/python/graphscope/proto/ddl_service.proto +++ b/proto/ddl_service.proto @@ -15,7 +15,7 @@ syntax = "proto3"; package gs.rpc.ddl_service.v1; -import "graphscope/proto/graph_def.proto"; +import "graph_def.proto"; option java_package = "com.alibaba.graphscope.proto.ddl"; option java_multiple_files = true; diff --git a/python/graphscope/proto/engine_service.proto b/proto/engine_service.proto similarity index 95% rename from python/graphscope/proto/engine_service.proto rename to proto/engine_service.proto index 76596a826595..da15dbcf86d2 100644 --- a/python/graphscope/proto/engine_service.proto +++ b/proto/engine_service.proto @@ -17,7 +17,7 @@ syntax = "proto3"; package gs.rpc; -import "graphscope/proto/message.proto"; +import "message.proto"; service EngineService { // Drives the graph computation. diff --git a/python/graphscope/proto/error_codes.proto b/proto/error_codes.proto similarity index 100% rename from python/graphscope/proto/error_codes.proto rename to proto/error_codes.proto diff --git a/python/graphscope/proto/graph_def.proto b/proto/graph_def.proto similarity index 100% rename from python/graphscope/proto/graph_def.proto rename to proto/graph_def.proto diff --git a/interactive_engine/proto/cluster.proto b/proto/groot/cluster.proto similarity index 100% rename from interactive_engine/proto/cluster.proto rename to proto/groot/cluster.proto diff --git a/interactive_engine/proto/frontend_snapshot_service.proto b/proto/groot/frontend_snapshot_service.proto similarity index 100% rename from interactive_engine/proto/frontend_snapshot_service.proto rename to proto/groot/frontend_snapshot_service.proto diff --git a/interactive_engine/proto/id_allocate_service.proto b/proto/groot/id_allocate_service.proto similarity index 100% rename from interactive_engine/proto/id_allocate_service.proto rename to proto/groot/id_allocate_service.proto diff --git a/interactive_engine/proto/ingestor_write_service.proto b/proto/groot/ingestor_snapshot_service.proto similarity index 66% rename from interactive_engine/proto/ingestor_write_service.proto rename to proto/groot/ingestor_snapshot_service.proto index f10bfff8e103..6d9258dd7fb7 100644 --- a/interactive_engine/proto/ingestor_write_service.proto +++ b/proto/groot/ingestor_snapshot_service.proto @@ -20,6 +20,31 @@ import "sdk/model.proto"; option java_package = "com.alibaba.graphscope.proto.groot"; option java_multiple_files = true; +service IngestorSnapshot { + rpc advanceIngestSnapshotId(AdvanceIngestSnapshotIdRequest) returns (AdvanceIngestSnapshotIdResponse); +} + +message AdvanceIngestSnapshotIdRequest { + int64 snapshotId = 1; +} + +message AdvanceIngestSnapshotIdResponse { + int64 previousSnapshotId = 1; +} + + +service IngestProgress { + rpc getTailOffsets(GetTailOffsetsRequest) returns(GetTailOffsetsResponse); +} + +message GetTailOffsetsRequest { + repeated int32 queueId = 1; +} + +message GetTailOffsetsResponse { + repeated int64 offsets = 1; +} + service IngestorWrite { rpc writeIngestor(WriteIngestorRequest) returns (WriteIngestorResponse); } @@ -32,4 +57,4 @@ message WriteIngestorRequest { message WriteIngestorResponse { int64 snapshotId = 1; -} +} \ No newline at end of file diff --git a/interactive_engine/proto/meta_service.proto b/proto/groot/meta_service.proto similarity index 100% rename from interactive_engine/proto/meta_service.proto rename to proto/groot/meta_service.proto diff --git a/interactive_engine/proto/metrics_collect_service.proto b/proto/groot/metrics_collect_service.proto similarity index 100% rename from interactive_engine/proto/metrics_collect_service.proto rename to proto/groot/metrics_collect_service.proto diff --git a/interactive_engine/proto/schema_service.proto b/proto/groot/schema_service.proto similarity index 100% rename from interactive_engine/proto/schema_service.proto rename to proto/groot/schema_service.proto diff --git a/interactive_engine/proto/sdk/backup_service.proto b/proto/groot/sdk/backup_service.proto similarity index 100% rename from interactive_engine/proto/sdk/backup_service.proto rename to proto/groot/sdk/backup_service.proto diff --git a/interactive_engine/proto/sdk/client.proto b/proto/groot/sdk/client.proto similarity index 100% rename from interactive_engine/proto/sdk/client.proto rename to proto/groot/sdk/client.proto diff --git a/interactive_engine/proto/sdk/client_backup_service.proto b/proto/groot/sdk/client_backup_service.proto similarity index 100% rename from interactive_engine/proto/sdk/client_backup_service.proto rename to proto/groot/sdk/client_backup_service.proto diff --git a/interactive_engine/proto/sdk/client_ddl_service.proto b/proto/groot/sdk/client_ddl_service.proto similarity index 100% rename from interactive_engine/proto/sdk/client_ddl_service.proto rename to proto/groot/sdk/client_ddl_service.proto diff --git a/interactive_engine/proto/sdk/common.proto b/proto/groot/sdk/common.proto similarity index 100% rename from interactive_engine/proto/sdk/common.proto rename to proto/groot/sdk/common.proto diff --git a/interactive_engine/proto/sdk/model.proto b/proto/groot/sdk/model.proto similarity index 100% rename from interactive_engine/proto/sdk/model.proto rename to proto/groot/sdk/model.proto diff --git a/interactive_engine/proto/sdk/schema.proto b/proto/groot/sdk/schema.proto similarity index 100% rename from interactive_engine/proto/sdk/schema.proto rename to proto/groot/sdk/schema.proto diff --git a/interactive_engine/proto/snapshot_commit_service.proto b/proto/groot/snapshot_commit_service.proto similarity index 100% rename from interactive_engine/proto/snapshot_commit_service.proto rename to proto/groot/snapshot_commit_service.proto diff --git a/interactive_engine/proto/store_backup_service.proto b/proto/groot/store_backup_service.proto similarity index 100% rename from interactive_engine/proto/store_backup_service.proto rename to proto/groot/store_backup_service.proto diff --git a/interactive_engine/proto/store_ingest_service.proto b/proto/groot/store_ingest_service.proto similarity index 100% rename from interactive_engine/proto/store_ingest_service.proto rename to proto/groot/store_ingest_service.proto diff --git a/interactive_engine/proto/store_schema_service.proto b/proto/groot/store_schema_service.proto similarity index 100% rename from interactive_engine/proto/store_schema_service.proto rename to proto/groot/store_schema_service.proto diff --git a/interactive_engine/proto/store_write_service.proto b/proto/groot/store_write_service.proto similarity index 94% rename from interactive_engine/proto/store_write_service.proto rename to proto/groot/store_write_service.proto index f2b4fd7e5b95..35403de8125c 100644 --- a/interactive_engine/proto/store_write_service.proto +++ b/proto/groot/store_write_service.proto @@ -21,7 +21,7 @@ option java_multiple_files = true; import "sdk/model.proto"; service StoreWrite { - rpc writeStore(WriteStoreRequest) returns(WriteStoreResponse); + rpc writeStore(WriteStoreRequest) returns (WriteStoreResponse); } message PartitionToBatchPb { diff --git a/python/graphscope/proto/message.proto b/proto/message.proto similarity index 97% rename from python/graphscope/proto/message.proto rename to proto/message.proto index 3509d7aead60..a6cbff9abf02 100644 --- a/python/graphscope/proto/message.proto +++ b/proto/message.proto @@ -17,9 +17,9 @@ syntax = "proto3"; package gs.rpc; -import "graphscope/proto/error_codes.proto"; -import "graphscope/proto/op_def.proto"; -import "graphscope/proto/types.proto"; +import "error_codes.proto"; +import "op_def.proto"; +import "types.proto"; //////////////////////////////////////////////////////////////////////////////// diff --git a/python/graphscope/proto/op_def.proto b/proto/op_def.proto similarity index 92% rename from python/graphscope/proto/op_def.proto rename to proto/op_def.proto index 036329a07b79..1cd9c6b98d8b 100644 --- a/python/graphscope/proto/op_def.proto +++ b/proto/op_def.proto @@ -17,10 +17,10 @@ syntax = "proto3"; package gs.rpc; -import "graphscope/proto/attr_value.proto"; -import "graphscope/proto/error_codes.proto"; -import "graphscope/proto/graph_def.proto"; -import "graphscope/proto/types.proto"; +import "attr_value.proto"; +import "error_codes.proto"; +import "graph_def.proto"; +import "types.proto"; message OpDef { // Unique key for every OpDef. Usually generated by analytical engine. diff --git a/python/graphscope/proto/proto_generator.py b/proto/proto_generator.py old mode 100755 new mode 100644 similarity index 63% rename from python/graphscope/proto/proto_generator.py rename to proto/proto_generator.py index 503dba2fab5a..e6d3e06d5274 --- a/python/graphscope/proto/proto_generator.py +++ b/proto/proto_generator.py @@ -24,28 +24,27 @@ def gather_all_proto(proto_dir, suffix="*.proto"): - directory = os.path.join(proto_dir, suffix) - files = glob.glob(directory) - return files + pattern = os.path.join(proto_dir, suffix) + return glob.glob(pattern) def create_path(path): """Utility function to create a path.""" - if os.path.isdir(path): - return - os.makedirs(path, exist_ok=True) + if not os.path.isdir(path): + os.makedirs(path, exist_ok=True) def cpp_out(relative_dir, output_dir): files = gather_all_proto(relative_dir) for proto_file in files: + cmd = [ + shutil.which("protoc"), + "-I.", + f"--cpp_out={output_dir}", + proto_file, + ] subprocess.check_call( - [ - shutil.which("protoc"), - "-I%s" % ".", - "--cpp_out=%s" % output_dir, - proto_file, - ], + cmd, stderr=subprocess.STDOUT, ) @@ -53,16 +52,17 @@ def cpp_out(relative_dir, output_dir): def python_out(relative_dir, output_dir): files = gather_all_proto(relative_dir) for proto_file in files: + cmd = [ + sys.executable, + "-m", + "grpc_tools.protoc", + "-I.", + f"--python_out={output_dir}", + f"--mypy_out={output_dir}", + proto_file, + ] subprocess.check_call( - [ - sys.executable, - "-m", - "grpc_tools.protoc", - "-I%s" % ".", - "--python_out=%s" % os.path.join(output_dir), - "--mypy_out=%s" % os.path.join(output_dir), - proto_file, - ], + cmd, stderr=subprocess.STDOUT, ) @@ -74,14 +74,15 @@ def cpp_service_out(relative_dir, output_dir): suffix = "*_service.proto" files = gather_all_proto(relative_dir, suffix) for proto_file in files: + cmd = [ + shutil.which("protoc"), + "-I.", + f"--grpc_out={output_dir}", + f"--plugin=protoc-gen-grpc={plugin_path}", + proto_file, + ] subprocess.check_call( - [ - shutil.which("protoc"), - "-I%s" % ".", - "--grpc_out=%s" % output_dir, - "--plugin=protoc-gen-grpc=%s" % plugin_path, - proto_file, - ], + cmd, stderr=subprocess.STDOUT, ) @@ -90,18 +91,19 @@ def python_service_out(relative_dir, output_dir): suffix = "*_service.proto" files = gather_all_proto(relative_dir, suffix) for proto_file in files: + cmd = [ + sys.executable, + "-m", + "grpc_tools.protoc", + "-I.", + f"--python_out={output_dir}", + f"--mypy_out={output_dir}", + f"--grpc_python_out={output_dir}", + f"--mypy_grpc_out={output_dir}", + proto_file, + ] subprocess.check_call( - [ - sys.executable, - "-m", - "grpc_tools.protoc", - "-I%s" % ".", - "--python_out=%s" % output_dir, - "--mypy_out=%s" % os.path.join(output_dir), - "--grpc_python_out=%s" % output_dir, - "--mypy_grpc_out=%s" % os.path.join(output_dir), - proto_file, - ], + cmd, stderr=subprocess.STDOUT, ) @@ -111,19 +113,16 @@ def python_service_out(relative_dir, output_dir): print("Usage: python proto_generator.py [--cpp] [--python]") sys.exit(1) - # path to 'GraphScope/python/graphscope/proto' - current_dir = os.path.dirname(os.path.abspath(__file__)) - - # path to 'GraphScope/python' - base_dir = os.path.join(current_dir, "../", "../") - os.chdir(base_dir) - output_dir = sys.argv[1] output_dir = os.path.realpath(os.path.realpath(output_dir)) create_path(output_dir) + # path to 'GraphScope/proto' + current_dir = os.path.dirname(os.path.abspath(__file__)) + os.chdir(current_dir) + # must use relative path - relative_dir = os.path.join(".", "graphscope", "proto") + relative_dir = "." if len(sys.argv) <= 2 or len(sys.argv) > 2 and sys.argv[2] == "--cpp": print("Generating cpp proto to: " + output_dir) cpp_out(relative_dir, output_dir) diff --git a/python/graphscope/proto/types.proto b/proto/types.proto similarity index 100% rename from python/graphscope/proto/types.proto rename to proto/types.proto diff --git a/python/graphscope/proto/write_service.proto b/proto/write_service.proto similarity index 100% rename from python/graphscope/proto/write_service.proto rename to proto/write_service.proto diff --git a/python/graphscope/proto/__init__.py b/python/graphscope/proto/__init__.py index 618dda69a7be..e61cf97316fe 100644 --- a/python/graphscope/proto/__init__.py +++ b/python/graphscope/proto/__init__.py @@ -15,3 +15,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # + +import os +import sys + +sys.path.insert(0, os.path.dirname(__file__)) diff --git a/python/setup.py b/python/setup.py index 50d3c9cdc82b..8eab42409929 100644 --- a/python/setup.py +++ b/python/setup.py @@ -36,7 +36,7 @@ # https://github.com/pypa/pip/issues/7953 site.ENABLE_USER_SITE = "--user" in sys.argv[1:] -repo_root = os.path.dirname(os.path.abspath(__file__)) +pkg_root = os.path.dirname(os.path.abspath(__file__)) if platform.system() == "Darwin": # see also: https://github.com/python/cpython/issues/100420 @@ -57,20 +57,20 @@ def finalize_options(self): pass def run(self): + cmd = [ + sys.executable, + os.path.join( + pkg_root, + "..", + "proto", + "proto_generator.py", + ), + os.path.join(pkg_root, "graphscope", "proto"), + "--python", + ] + print(" ".join(cmd)) subprocess.check_call( - [ - sys.executable, - os.path.join( - repo_root, - "..", - "python", - "graphscope", - "proto", - "proto_generator.py", - ), - repo_root, - "--python", - ], + cmd, env=os.environ.copy(), ) @@ -92,17 +92,17 @@ def finalize_options(self): def run(self): if self.inplace: - subprocess.check_call([sys.executable, "-m", "isort", "."], cwd=repo_root) - subprocess.check_call([sys.executable, "-m", "black", "."], cwd=repo_root) - subprocess.check_call([sys.executable, "-m", "flake8", "."], cwd=repo_root) + subprocess.check_call([sys.executable, "-m", "isort", "."], cwd=pkg_root) + subprocess.check_call([sys.executable, "-m", "black", "."], cwd=pkg_root) + subprocess.check_call([sys.executable, "-m", "flake8", "."], cwd=pkg_root) else: subprocess.check_call( - [sys.executable, "-m", "isort", "--check", "--diff", "."], cwd=repo_root + [sys.executable, "-m", "isort", "--check", "--diff", "."], cwd=pkg_root ) subprocess.check_call( - [sys.executable, "-m", "black", "--check", "--diff", "."], cwd=repo_root + [sys.executable, "-m", "black", "--check", "--diff", "."], cwd=pkg_root ) - subprocess.check_call([sys.executable, "-m", "flake8", "."], cwd=repo_root) + subprocess.check_call([sys.executable, "-m", "flake8", "."], cwd=pkg_root) class CustomBuildPy(build_py): @@ -141,7 +141,7 @@ def run(self): graphlearn_shared_lib = "libgraphlearn_shared.so" if not os.path.isfile( os.path.join( - repo_root, + pkg_root, "..", "learning_engine", "graph-learn", @@ -156,18 +156,18 @@ def run(self): bdist_wheel.run(self) -with open(os.path.join(repo_root, "..", "README.md"), "r", encoding="utf-8") as fp: +with open(os.path.join(pkg_root, "..", "README.md"), "r", encoding="utf-8") as fp: long_description = fp.read() def parsed_reqs(): - with open(os.path.join(repo_root, "requirements.txt"), "r", encoding="utf-8") as fp: + with open(os.path.join(pkg_root, "requirements.txt"), "r", encoding="utf-8") as fp: return fp.read().splitlines() def parsed_dev_reqs(): with open( - os.path.join(repo_root, "requirements-dev.txt"), "r", encoding="utf-8" + os.path.join(pkg_root, "requirements-dev.txt"), "r", encoding="utf-8" ) as fp: return fp.read().splitlines() @@ -202,7 +202,7 @@ def build_learning_engine(): import numpy ROOT_PATH = os.path.abspath( - os.path.join(repo_root, "..", "learning_engine", "graph-learn") + os.path.join(pkg_root, "..", "learning_engine", "graph-learn") ) include_dirs = [] @@ -270,7 +270,7 @@ def parse_version(root, **kwargs): from setuptools_scm.git import parse from setuptools_scm.version import meta - version_file = os.path.join(repo_root, "..", "VERSION") + version_file = os.path.join(pkg_root, "..", "VERSION") if os.path.isfile(version_file): with open(version_file, "r", encoding="utf-8") as fp: return meta(fp.read().strip()) @@ -305,7 +305,7 @@ def parse_version(root, **kwargs): ], keywords="Graph, Large-Scale, Distributed Computing", use_scm_version={ - "root": repo_root, + "root": pkg_root, "parse": parse_version, }, setup_requires=[