Skip to content

Commit

Permalink
impl new schema reading
Browse files Browse the repository at this point in the history
  • Loading branch information
zhanglei1949 committed Aug 9, 2023
1 parent 8608dc0 commit 45a4dd3
Show file tree
Hide file tree
Showing 21 changed files with 774 additions and 339 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/flex.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,12 @@ jobs:
cmake .. && sudo make -j$(nproc)
export FLEX_DATA_DIR=../../../../storages/rt_mutable_graph/modern_graph/
./run_grin_test
- name: Test Graph Loading
env:
FLEX_DATA_DIR: ${GITHUB_WORKSPACE}/flex/build/
run: |
cd ${GITHUB_WORKSPACE}/flex/storages/rt_mutable_graph/modern_graph/
GLOG_v=10 ./tests/rt_mutable_graph/test_graph_loading \
../storages/rt_mutable_graph/modern_graph/modern_graph_new.yaml
../storages/rt_mutable_graph/modern_graph/bulk_load_new.yaml /tmp/csr-data-dir/
2 changes: 1 addition & 1 deletion flex/bin/rt_server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ int main(int argc, char** argv) {

auto ret = gs::Schema::LoadFromYaml(graph_schema_path, bulk_load_config_path);
db.Init(std::get<0>(ret), std::get<1>(ret), std::get<2>(ret),
std::get<3>(ret), data_path, shard_num);
std::get<3>(ret), std::get<4>(ret), data_path, shard_num);

t0 += grape::GetCurrentTime();

Expand Down
2 changes: 1 addition & 1 deletion flex/bin/sync_server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ int main(int argc, char** argv) {

auto ret = gs::Schema::LoadFromYaml(graph_schema_path, bulk_load_config_path);
db.Init(std::get<0>(ret), std::get<1>(ret), std::get<2>(ret),
std::get<3>(ret), data_path, shard_num);
std::get<3>(ret), std::get<4>(ret), data_path, shard_num);

t0 += grape::GetCurrentTime();

Expand Down
8 changes: 4 additions & 4 deletions flex/engines/graph_db/database/graph_db.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ GraphDB& GraphDB::get() {
void GraphDB::Init(
const Schema& schema,
const std::vector<std::pair<std::string, std::string>>& vertex_files,
const std::vector<std::tuple<std::string, std::string, std::string,
std::string>>& edge_files,
const std::vector<std::string>& plugins, const std::string& data_dir,
int thread_num) {
const std::vector<std::tuple<std::string, std::string, std::string, int32_t,
int32_t, std::string>>& edge_files,
const std::vector<std::string>& plugins, const LoadConfig& load_config,
const std::string& data_dir, int thread_num) {
std::filesystem::path data_dir_path(data_dir);
if (!std::filesystem::exists(data_dir_path)) {
std::filesystem::create_directory(data_dir_path);
Expand Down
6 changes: 3 additions & 3 deletions flex/engines/graph_db/database/graph_db.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ class GraphDB {
const Schema& schema,
const std::vector<std::pair<std::string, std::string>>& vertex_files,
const std::vector<std::tuple<std::string, std::string, std::string,
std::string>>& edge_files,
const std::vector<std::string>& plugins, const std::string& data_dir,
int thread_num = 1);
int32_t, int32_t, std::string>>& edge_files,
const std::vector<std::string>& plugins, const LoadConfig& config,
const std::string& data_dir, int thread_num = 1);

/** @brief Create a transaction to read vertices and edges.
*
Expand Down
120 changes: 72 additions & 48 deletions flex/storages/rt_mutable_graph/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,64 +25,88 @@ The configuration file ([modern graph example](./modern_graph/modern_graph.yaml)
Here is an example of a configuration file:

```yaml
graph:
graph_store: mutable_csr
vertex:
- label_name: person
name: modern
store_type: mutable_csr
stored_procedures:
directory: plugins
enable_lists:
- libxxx.so
schema:
vertex_types:
- type_name: person
x_csr_params:
max_vertex_num: 100
properties:
- name: _ID
type: int64
- name: name
type: String
- name: age
type: int32
max_vertex_num: 100
- label_name: software
- property_id: 0
property_name: id
property_type:
primitive_type: DT_SIGNED_INT64
- property_id: 1
property_name: name
property_type:
primitive_type: DT_STRING
- property_id: 2
property_name: age
property_type:
primitive_type: DT_SIGNED_INT32
primary_keys:
- id
- type_name: software
x_csr_params:
max_vertex_num: 100
properties:
- name: _ID
type: int64
- name: name
type: String
- name: lang
type: String
max_vertex_num: 100
edge:
- src_label_name: person
dst_label_name: software
edge_label_name: created
- property_id: 0
property_name: id
property_type:
primitive_type: DT_SIGNED_INT64
x_csr_params:
- property_id: 1
property_name: name
property_type:
primitive_type: DT_STRING
- property_id: 2
property_name: lang
property_type:
primitive_type: DT_STRING
primary_keys:
- id
edge_types:
- type_name: knows
x_csr_params:
incoming_edge_strategy: None
outgoing_edge_strategy: Multiple
vertex_type_pair_relations:
source_vertex: person
destination_vertex: person
relation: MANY_TO_MANY
properties:
- name: _SRC
type: int64
- name: _DST
type: int64
- name: weight
type: double
incoming_edge_strategy: None
outgoing_edge_strategy: Single
- src_label_name: person
dst_label_name: person
edge_label_name: knows
- property_id: 0
property_name: weight
property_type:
primitive_type: DT_DOUBLE
- type_name: created
x_csr_params:
incoming_edge_strategy: None
outgoing_edge_strategy: Single
vertex_type_pair_relations:
source_vertex: person
destination_vertex: software
relation: ONE_TO_MANY
properties:
- name: _SRC
type: int64
- name: _DST
type: int64
- name: weight
type: double
incoming_edge_strategy: None
outgoing_edge_strategy: Multiple

stored_procedures:
- libxxx.so
- property_id: 0
property_name: weight
property_type:
primitive_type: DT_DOUBLE
```
Notes:
- `_ID`, `_SRC`, `_DST` are reserved words, they are the external id of vertices, only int64 type is supported.
- `max_vertex_num` limit the number of vertices of this type:
- Currently we only support one primary key, and the type has to be `DT_SIGNED_INT64`.
- All implementation related configuration are put under x_csr_params.
- `max_vertex_num` limit the number of vertices of this type:
- The limit number is used to `mmap` memory, so it only takes virtual memory before vertices are actually inserted.
- If `max_vertex_num` is not set, a default large number (e.g.: 2^48) will be used.
- `incoming/outgoing_edge_strategy` specifies the storing strategy of the incoming or outgoing edges of this type, there are 3 kinds of strategies
- `incoming/outgoing_edge_strategy` specifies the storing strategy of the incoming or outgoing edges of this type, there are 3 kinds of strategies
- None: no edge will be stored
- Single: only one edge will be stored
- Multiple(default): multiple edges will be stored
Expand Down
30 changes: 30 additions & 0 deletions flex/storages/rt_mutable_graph/load_config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/** Copyright 2020 Alibaba Group Holding Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef STORAGE_RT_MUTABLE_GRAPH_LOAD_CONFIG_H_
#define STORAGE_RT_MUTABLE_GRAPH_LOAD_CONFIG_H_

#include <string>

namespace gs {
// Provide meta info about bulk loading.
struct LoadConfig {
std::string data_source_; // "file", "hdfs", "oss", "s3"
std::string delimiter_; // "\t", ",", " ", "|"
std::string method_; // init, append, overwrite
};
} // namespace gs

#endif // STORAGE_RT_MUTABLE_GRAPH_LOAD_CONFIG_H_
Loading

0 comments on commit 45a4dd3

Please sign in to comment.