Skip to content

Commit

Permalink
Revolution security fix jsons (#1506)
Browse files Browse the repository at this point in the history
* replace pickle with jsons to avoid pickle security issues

* fix test_syapse.py

* use our internal msgpack-numpy that disables pickle

* skip git+ reqs in check_compat

* graceful error for submodules

* remove jsons in favor of json, no benefit as async client (httpx) attempts to internally json deserialize anyway. jsons would be of no benefit

* fix tests

* remove jsons req
  • Loading branch information
ifrit98 authored Sep 1, 2023
1 parent 4cd9ab0 commit 38d172a
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 22 deletions.
3 changes: 3 additions & 0 deletions bittensor/dendrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ def preprocess_synapse_for_request(
Returns:
bt.Synapse: The preprocessed synapse.
"""
bt.logging.trace("Pre-process synapse for request")

# Set the timeout for the synapse
synapse.timeout = str(timeout)
Expand Down Expand Up @@ -331,6 +332,8 @@ def process_server_response(self, server_response, local_synapse: bt.Synapse):
Raises:
None, but errors in attribute setting are silently ignored.
"""
bt.logging.trace("Postprocess server response")

# Check if the server responded with a successful status code
if server_response.status_code == 200:
# If the response is successful, overwrite local synapse state with
Expand Down
27 changes: 21 additions & 6 deletions bittensor/synapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@

import ast
import sys
import pickle
import torch
import json
import base64
import typing
import hashlib
Expand Down Expand Up @@ -551,9 +552,17 @@ def to_headers(self) -> dict:
headers[f"bt_header_dict_tensor_{field}"] = str(serialized_dict_tensor)

elif required and field in required:
serialized_value = pickle.dumps(value)
encoded_value = base64.b64encode(serialized_value).decode("utf-8")
headers[f"bt_header_input_obj_{field}"] = encoded_value
bittensor.logging.trace(f"Serializing {field} with json...")
try:
serialized_value = json.dumps(value)
encoded_value = base64.b64encode(serialized_value.encode()).decode(
"utf-8"
)
headers[f"bt_header_input_obj_{field}"] = encoded_value
except TypeError as e:
raise ValueError(
f"Error serializing {field} with value {value}. Objects must be json serializable."
) from e

# Adding the size of the headers and the total size to the headers
headers["header_size"] = str(sys.getsizeof(headers))
Expand Down Expand Up @@ -647,15 +656,21 @@ def parse_headers_to_inputs(cls, headers: dict) -> dict:
continue
# Handle 'input_obj' headers
elif "bt_header_input_obj" in key:
bittensor.logging.trace(f"Deserializing {key} with json...")
try:
new_key = key.split("bt_header_input_obj_")[1]
# Skip if the key already exists in the dictionary
if new_key in inputs_dict:
continue
# Decode and load the serialized object
inputs_dict[new_key] = pickle.loads(
base64.b64decode(value.encode("utf-8"))
inputs_dict[new_key] = json.loads(
base64.b64decode(value.encode()).decode("utf-8")
)
except json.JSONDecodeError as e:
bittensor.logging.error(
f"Error while json decoding 'input_obj' header {key}: {e}"
)
continue
except Exception as e:
bittensor.logging.error(
f"Error while parsing 'input_obj' header {key}: {e}"
Expand Down
2 changes: 1 addition & 1 deletion requirements/prod.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ munch==2.5.0
netaddr
numpy
msgpack
msgpack_numpy
git+https://github.com/opentensor/msgpack-numpy.git#egg=msgpack-numpy
nest_asyncio
pycryptodome>=3.18.0,<4.0.0
pyyaml
Expand Down
5 changes: 5 additions & 0 deletions scripts/check_compatibility.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ check_compatibility() {
all_supported=0

while read -r requirement; do
# Skip lines starting with git+
if [[ "$requirement" == git+* ]]; then
continue
fi

package_name=$(echo "$requirement" | awk -F'[!=<>]' '{print $1}' | awk -F'[' '{print $1}') # Strip off brackets
echo -n "Checking $package_name... "

Expand Down
40 changes: 32 additions & 8 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,46 @@
import subprocess


class SubmoduleSyncError(Exception):
pass


def sync_and_update_submodules():
try:
print("Synchronizing and updating submodules...")
subprocess.check_call(['git', 'submodule', 'sync'])
subprocess.check_call(['git', 'submodule', 'update', '--init'])
subprocess.check_call(["git", "submodule", "sync"])
subprocess.check_call(["git", "submodule", "update", "--init"])
except subprocess.CalledProcessError:
print("Error synchronizing or updating submodules. Please ensure you have git installed and are in the root directory of the repository.")
raise
print(
"Error synchronizing or updating submodules. Please ensure you have git installed and are in the root directory of the repository."
)
raise SubmoduleSyncError(
"An error occurred while synchronizing or updating submodules."
)


try:
sync_and_update_submodules()
except SubmoduleSyncError as e:
print(f"Submodule synchronization error: {e}")

sync_and_update_submodules()

def read_requirements(path):
requirements = []
git_requirements = []

with pathlib.Path(path).open() as requirements_txt:
return [
str(requirement) for requirement in parse_requirements(requirements_txt)
]
for line in requirements_txt:
if line.startswith("git+"):
git_requirements.append(line.strip())
else:
requirements.append(line.strip())

# Install git dependencies
for git_req in git_requirements:
subprocess.check_call(["pip", "install", git_req])

return requirements


requirements = read_requirements("requirements/prod.txt")
Expand Down
14 changes: 7 additions & 7 deletions tests/unit_tests/test_synapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
import json
import torch
import pickle
import base64
import typing
import pytest
Expand All @@ -31,9 +31,9 @@ class Test(bittensor.Synapse):
headers = {
"bt_header_axon_nonce": "111",
"bt_header_dendrite_ip": "12.1.1.2",
"bt_header_input_obj_key1": base64.b64encode(pickle.dumps([1, 2, 3, 4])).decode(
"utf-8"
),
"bt_header_input_obj_key1": base64.b64encode(
json.dumps([1, 2, 3, 4]).encode("utf-8")
).decode("utf-8"),
"bt_header_tensor_key2": "[3]-torch.float32",
"timeout": "12",
"name": "Test",
Expand Down Expand Up @@ -66,9 +66,9 @@ class Test(bittensor.Synapse):
headers = {
"bt_header_axon_nonce": "111",
"bt_header_dendrite_ip": "12.1.1.2",
"bt_header_input_obj_key1": base64.b64encode(pickle.dumps([1, 2, 3, 4])).decode(
"utf-8"
),
"bt_header_input_obj_key1": base64.b64encode(
json.dumps([1, 2, 3, 4]).encode("utf-8")
).decode("utf-8"),
"bt_header_tensor_key2": "[3]-torch.float32",
"timeout": "12",
"name": "Test",
Expand Down

0 comments on commit 38d172a

Please sign in to comment.