-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding vector serialization and deserialization functions for PyVelox (…
…#4400) Summary: This PR adds support for `VectorSaver.h` functionality for serializing and deserializing vectors. `VectorSaver.h` includes `saveVectorToFile` and `restoreVectorFromFile` methods. This PR includes bindings for those methods as `save_vector` and `load_vector`. Pull Request resolved: #4400 Reviewed By: laithsakka Differential Revision: D45975301 Pulled By: kgpai fbshipit-source-id: 3d52b08f8c0642ddb8b5b507ea8ea11e34df2f55
- Loading branch information
1 parent
fd2972d
commit 527d9ca
Showing
7 changed files
with
269 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
/* | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include <pybind11/stl.h> | ||
#include "velox/common/memory/Memory.h" | ||
#include "velox/core/QueryCtx.h" | ||
|
||
namespace facebook::velox::py { | ||
|
||
/// PyVeloxContext is used only during function binding time. Its a utility | ||
/// that manages pool, query and exec context for Velox expressions and vectors. | ||
struct PyVeloxContext { | ||
static inline PyVeloxContext& getSingletonInstance() { | ||
if (!instance_) { | ||
instance_ = std::unique_ptr<PyVeloxContext>(new PyVeloxContext()); | ||
} | ||
return *instance_.get(); | ||
} | ||
|
||
facebook::velox::memory::MemoryPool* pool() { | ||
return pool_.get(); | ||
} | ||
|
||
facebook::velox::core::QueryCtx* queryCtx() { | ||
return queryCtx_.get(); | ||
} | ||
|
||
facebook::velox::core::ExecCtx* execCtx() { | ||
return execCtx_.get(); | ||
} | ||
|
||
static inline void cleanup() { | ||
if (instance_) { | ||
instance_.reset(); | ||
} | ||
} | ||
|
||
private: | ||
PyVeloxContext() = default; | ||
PyVeloxContext(const PyVeloxContext&) = delete; | ||
PyVeloxContext(const PyVeloxContext&&) = delete; | ||
PyVeloxContext& operator=(const PyVeloxContext&) = delete; | ||
PyVeloxContext& operator=(const PyVeloxContext&&) = delete; | ||
|
||
std::shared_ptr<facebook::velox::memory::MemoryPool> pool_ = | ||
facebook::velox::memory::addDefaultLeafMemoryPool(); | ||
std::shared_ptr<facebook::velox::core::QueryCtx> queryCtx_ = | ||
std::make_shared<facebook::velox::core::QueryCtx>(); | ||
std::unique_ptr<facebook::velox::core::ExecCtx> execCtx_ = | ||
std::make_unique<facebook::velox::core::ExecCtx>( | ||
pool_.get(), | ||
queryCtx_.get()); | ||
|
||
static inline std::unique_ptr<PyVeloxContext> instance_; | ||
}; | ||
|
||
} // namespace facebook::velox::py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
/* | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include "serde.h" | ||
#include "context.h" | ||
|
||
#include <velox/vector/VectorSaver.h> | ||
|
||
namespace facebook::velox::py { | ||
|
||
namespace py = pybind11; | ||
|
||
namespace { | ||
VectorPtr pyRestoreVectorFromFileHelper(const char* FOLLY_NONNULL filePath) { | ||
using namespace facebook::velox; | ||
memory::MemoryPool* pool = PyVeloxContext::getSingletonInstance().pool(); | ||
return restoreVectorFromFile(filePath, pool); | ||
} | ||
} // namespace | ||
|
||
void addSerdeBindings(py::module& m, bool asModuleLocalDefinitions) { | ||
using namespace facebook::velox; | ||
|
||
m.def( | ||
"save_vector", | ||
&saveVectorToFile, | ||
R"delimiter( | ||
Serializes the vector into binary format and writes it to a new file. | ||
Parameters | ||
---------- | ||
vector : Union[FlatVector, ConstantVector, DictionaryVector] | ||
The vector to be saved. | ||
file_path: str | ||
The path to which the vector will be saved. | ||
Returns | ||
------- | ||
None | ||
Examples | ||
-------- | ||
>>> import pyvelox.pyvelox as pv | ||
>>> vec = pv.from_list([1, 2, 3]) | ||
>>> pv.save_vector(vec, '/tmp/flatvector.bin') | ||
)delimiter", | ||
py::arg("vector"), | ||
py::arg("file_path")); | ||
m.def( | ||
"load_vector", | ||
&pyRestoreVectorFromFileHelper, | ||
R"delimiter( | ||
Reads and deserializes a vector from a file stored by save_vector. | ||
Parameters | ||
---------- | ||
file_path: str | ||
The path from which the vector will be loaded. | ||
Returns | ||
------- | ||
Union[FlatVector, ConstantVector, DictionaryVector] | ||
Examples | ||
-------- | ||
>>> import pyvelox.pyvelox as pv | ||
>>> pv.load_vector('/tmp/flatvector.bin') | ||
<pyvelox.pyvelox.FlatVector_BIGINT object at 0x7f8f6f818bb0> | ||
)delimiter", | ||
py::arg("file_path")); | ||
} | ||
|
||
} // namespace facebook::velox::py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
/* | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <pybind11/pybind11.h> | ||
#include <pybind11/stl.h> | ||
|
||
namespace facebook::velox::py { | ||
|
||
namespace py = pybind11; | ||
|
||
/// Adds serialization and deserialization bindings to module m. | ||
/// This adds bindings to save and load Vectors. | ||
/// | ||
/// @param m Module to add bindings to. | ||
/// @param asModuleLocalDefinitions If true then these bindings are only | ||
/// visible inside the module. Refer to | ||
/// https://pybind11.readthedocs.io/en/stable/advanced/classes.html#module-local-class-bindings | ||
/// for further details. | ||
void addSerdeBindings(py::module& m, bool asModuleLocalDefinitions = true); | ||
|
||
} // namespace facebook::velox::py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import shutil | ||
import tempfile | ||
import unittest | ||
from os import path | ||
|
||
import pyvelox.pyvelox as pv | ||
|
||
|
||
class TestVeloxVectorSaver(unittest.TestCase): | ||
def setUp(self): | ||
# create a temporary directory | ||
self.test_dir = tempfile.mkdtemp() | ||
|
||
def tearDown(self): | ||
# remove the temporary directory | ||
shutil.rmtree(self.test_dir) | ||
|
||
def make_flat_vector(self): | ||
return pv.from_list([1, 2, 3]) | ||
|
||
def make_const_vector(self): | ||
return pv.constant_vector(1000, 10) | ||
|
||
def make_dict_vector(self): | ||
base_indices = [0, 0, 1, 0, 2] | ||
return pv.dictionary_vector(pv.from_list([1, 2, 3]), base_indices) | ||
|
||
def test_serde_vector(self): | ||
data = { | ||
"flat_vector": self.make_flat_vector(), | ||
"const_vector": self.make_const_vector(), | ||
"dict_vector": self.make_dict_vector(), | ||
} | ||
|
||
paths = { | ||
"flat_vector": path.join(self.test_dir, "flat.pyvelox"), | ||
"const_vector": path.join(self.test_dir, "const.pyvelox"), | ||
"dict_vector": path.join(self.test_dir, "dict.pyvelox"), | ||
} | ||
|
||
for vec_key, fpath_key in zip(data, paths): | ||
vec = data[vec_key] | ||
fpath = paths[fpath_key] | ||
pv.save_vector(vector=vec, file_path=fpath) | ||
loaded_vec = pv.load_vector(file_path=fpath) | ||
self.assertEqual(len(vec), len(loaded_vec)) | ||
self.assertEqual(vec.dtype(), loaded_vec.dtype()) | ||
for i in range(len(vec)): | ||
self.assertEqual(vec[i], loaded_vec[i]) |