From 857dff153d2c9f238d3a68821262b7764aeb44ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tina=20M=C3=BCller?= Date: Sat, 23 Feb 2019 21:12:43 +0100 Subject: [PATCH] Apply FullLoader/UnsafeLoader changes to lib3 --- lib3/yaml/__init__.py | 100 ++++++++++++++++++++++-- lib3/yaml/constructor.py | 130 ++++++++++++++++++++------------ lib3/yaml/cyaml.py | 20 ++++- lib3/yaml/loader.py | 25 +++++- tests/lib3/test_errors.py | 6 +- tests/lib3/test_input_output.py | 12 +-- tests/lib3/test_recursive.py | 2 +- 7 files changed, 229 insertions(+), 66 deletions(-) diff --git a/lib3/yaml/__init__.py b/lib3/yaml/__init__.py index 25927851..02f51046 100644 --- a/lib3/yaml/__init__.py +++ b/lib3/yaml/__init__.py @@ -8,7 +8,7 @@ from .loader import * from .dumper import * -__version__ = '4.1' +__version__ = '3.13' try: from .cyaml import * __with_libyaml__ = True @@ -17,6 +17,44 @@ import io +#------------------------------------------------------------------------------ +# Warnings control +#------------------------------------------------------------------------------ + +# 'Global' warnings state: +_warnings_enabled = { + 'YAMLLoadWarning': True, +} + +# Get or set global warnings' state +def warnings(settings=None): + if settings is None: + return _warnings_enabled + + if type(settings) is dict: + for key in settings: + if key in _warnings_enabled: + _warnings_enabled[key] = settings[key] + +# Warn when load() is called without Loader=... +class YAMLLoadWarning(RuntimeWarning): + pass + +def load_warning(method): + if _warnings_enabled['YAMLLoadWarning'] is False: + return + + import warnings + + message = ( + "calling yaml.%s() without Loader=... is deprecated, as the " + "default Loader is unsafe. Please read " + "https://msg.pyyaml.org/load for full details." + ) % method + + warnings.warn(message, YAMLLoadWarning, stacklevel=3) + +#------------------------------------------------------------------------------ def scan(stream, Loader=Loader): """ Scan a YAML stream and produce scanning tokens. @@ -62,22 +100,30 @@ def compose_all(stream, Loader=Loader): finally: loader.dispose() -def load(stream, Loader=Loader): +def load(stream, Loader=None): """ Parse the first YAML document in a stream and produce the corresponding Python object. """ + if Loader is None: + load_warning('load') + Loader = FullLoader + loader = Loader(stream) try: return loader.get_single_data() finally: loader.dispose() -def load_all(stream, Loader=Loader): +def load_all(stream, Loader=None): """ Parse all YAML documents in a stream and produce corresponding Python objects. """ + if Loader is None: + load_warning('load_all') + Loader = FullLoader + loader = Loader(stream) try: while loader.check_data(): @@ -85,11 +131,33 @@ def load_all(stream, Loader=Loader): finally: loader.dispose() +def full_load(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + + Resolve all tags except those known to be + unsafe on untrusted input. + """ + return load(stream, FullLoader) + +def full_load_all(stream): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + + Resolve all tags except those known to be + unsafe on untrusted input. + """ + return load_all(stream, FullLoader) + def safe_load(stream): """ Parse the first YAML document in a stream and produce the corresponding Python object. - Resolve only basic YAML tags. + + Resolve only basic YAML tags. This is known + to be safe for untrusted input. """ return load(stream, SafeLoader) @@ -97,10 +165,32 @@ def safe_load_all(stream): """ Parse all YAML documents in a stream and produce corresponding Python objects. - Resolve only basic YAML tags. + + Resolve only basic YAML tags. This is known + to be safe for untrusted input. """ return load_all(stream, SafeLoader) +def unsafe_load(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + + Resolve all tags, even those known to be + unsafe on untrusted input. + """ + return load(stream, UnsafeLoader) + +def unsafe_load_all(stream): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + + Resolve all tags, even those known to be + unsafe on untrusted input. + """ + return load_all(stream, UnsafeLoader) + def emit(events, stream=None, Dumper=Dumper, canonical=None, indent=None, width=None, allow_unicode=None, line_break=None): diff --git a/lib3/yaml/constructor.py b/lib3/yaml/constructor.py index 193d91d9..34fc1ae9 100644 --- a/lib3/yaml/constructor.py +++ b/lib3/yaml/constructor.py @@ -1,6 +1,12 @@ -__all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor', - 'ConstructorError'] +__all__ = [ + 'BaseConstructor', + 'SafeConstructor', + 'FullConstructor', + 'UnsafeConstructor', + 'Constructor', + 'ConstructorError' +] from .error import * from .nodes import * @@ -464,7 +470,7 @@ def construct_undefined(self, node): SafeConstructor.add_constructor(None, SafeConstructor.construct_undefined) -class Constructor(SafeConstructor): +class FullConstructor(SafeConstructor): def construct_python_str(self, node): return self.construct_scalar(node) @@ -497,18 +503,22 @@ def construct_python_complex(self, node): def construct_python_tuple(self, node): return tuple(self.construct_sequence(node)) - def find_python_module(self, name, mark): + def find_python_module(self, name, mark, unsafe=False): if not name: raise ConstructorError("while constructing a Python module", mark, "expected non-empty name appended to the tag", mark) - try: - __import__(name) - except ImportError as exc: + if unsafe: + try: + __import__(name) + except ImportError as exc: + raise ConstructorError("while constructing a Python module", mark, + "cannot find module %r (%s)" % (name, exc), mark) + if not name in sys.modules: raise ConstructorError("while constructing a Python module", mark, - "cannot find module %r (%s)" % (name, exc), mark) + "module %r is not imported" % name, mark) return sys.modules[name] - def find_python_name(self, name, mark): + def find_python_name(self, name, mark, unsafe=False): if not name: raise ConstructorError("while constructing a Python object", mark, "expected non-empty name appended to the tag", mark) @@ -517,11 +527,15 @@ def find_python_name(self, name, mark): else: module_name = 'builtins' object_name = name - try: - __import__(module_name) - except ImportError as exc: + if unsafe: + try: + __import__(module_name) + except ImportError as exc: + raise ConstructorError("while constructing a Python object", mark, + "cannot find module %r (%s)" % (module_name, exc), mark) + if not module_name in sys.modules: raise ConstructorError("while constructing a Python object", mark, - "cannot find module %r (%s)" % (module_name, exc), mark) + "module %r is not imported" % module_name, mark) module = sys.modules[module_name] if not hasattr(module, object_name): raise ConstructorError("while constructing a Python object", mark, @@ -544,12 +558,16 @@ def construct_python_module(self, suffix, node): return self.find_python_module(suffix, node.start_mark) def make_python_instance(self, suffix, node, - args=None, kwds=None, newobj=False): + args=None, kwds=None, newobj=False, unsafe=False): if not args: args = [] if not kwds: kwds = {} cls = self.find_python_name(suffix, node.start_mark) + if not (unsafe or isinstance(cls, type)): + raise ConstructorError("while constructing a Python instance", node.start_mark, + "expected a class, but found %r" % type(cls), + node.start_mark) if newobj and isinstance(cls, type): return cls.__new__(cls, *args, **kwds) else: @@ -616,71 +634,87 @@ def construct_python_object_apply(self, suffix, node, newobj=False): def construct_python_object_new(self, suffix, node): return self.construct_python_object_apply(suffix, node, newobj=True) -Constructor.add_constructor( +FullConstructor.add_constructor( 'tag:yaml.org,2002:python/none', - Constructor.construct_yaml_null) + FullConstructor.construct_yaml_null) -Constructor.add_constructor( +FullConstructor.add_constructor( 'tag:yaml.org,2002:python/bool', - Constructor.construct_yaml_bool) + FullConstructor.construct_yaml_bool) -Constructor.add_constructor( +FullConstructor.add_constructor( 'tag:yaml.org,2002:python/str', - Constructor.construct_python_str) + FullConstructor.construct_python_str) -Constructor.add_constructor( +FullConstructor.add_constructor( 'tag:yaml.org,2002:python/unicode', - Constructor.construct_python_unicode) + FullConstructor.construct_python_unicode) -Constructor.add_constructor( +FullConstructor.add_constructor( 'tag:yaml.org,2002:python/bytes', - Constructor.construct_python_bytes) + FullConstructor.construct_python_bytes) -Constructor.add_constructor( +FullConstructor.add_constructor( 'tag:yaml.org,2002:python/int', - Constructor.construct_yaml_int) + FullConstructor.construct_yaml_int) -Constructor.add_constructor( +FullConstructor.add_constructor( 'tag:yaml.org,2002:python/long', - Constructor.construct_python_long) + FullConstructor.construct_python_long) -Constructor.add_constructor( +FullConstructor.add_constructor( 'tag:yaml.org,2002:python/float', - Constructor.construct_yaml_float) + FullConstructor.construct_yaml_float) -Constructor.add_constructor( +FullConstructor.add_constructor( 'tag:yaml.org,2002:python/complex', - Constructor.construct_python_complex) + FullConstructor.construct_python_complex) -Constructor.add_constructor( +FullConstructor.add_constructor( 'tag:yaml.org,2002:python/list', - Constructor.construct_yaml_seq) + FullConstructor.construct_yaml_seq) -Constructor.add_constructor( +FullConstructor.add_constructor( 'tag:yaml.org,2002:python/tuple', - Constructor.construct_python_tuple) + FullConstructor.construct_python_tuple) -Constructor.add_constructor( +FullConstructor.add_constructor( 'tag:yaml.org,2002:python/dict', - Constructor.construct_yaml_map) + FullConstructor.construct_yaml_map) -Constructor.add_multi_constructor( +FullConstructor.add_multi_constructor( 'tag:yaml.org,2002:python/name:', - Constructor.construct_python_name) + FullConstructor.construct_python_name) -Constructor.add_multi_constructor( +FullConstructor.add_multi_constructor( 'tag:yaml.org,2002:python/module:', - Constructor.construct_python_module) + FullConstructor.construct_python_module) -Constructor.add_multi_constructor( +FullConstructor.add_multi_constructor( 'tag:yaml.org,2002:python/object:', - Constructor.construct_python_object) + FullConstructor.construct_python_object) -Constructor.add_multi_constructor( +FullConstructor.add_multi_constructor( 'tag:yaml.org,2002:python/object/apply:', - Constructor.construct_python_object_apply) + FullConstructor.construct_python_object_apply) -Constructor.add_multi_constructor( +FullConstructor.add_multi_constructor( 'tag:yaml.org,2002:python/object/new:', - Constructor.construct_python_object_new) + FullConstructor.construct_python_object_new) + +class UnsafeConstructor(FullConstructor): + def find_python_module(self, name, mark): + return super(UnsafeConstructor, self).find_python_module(name, mark, unsafe=True) + + def find_python_name(self, name, mark): + return super(UnsafeConstructor, self).find_python_name(name, mark, unsafe=True) + + def make_python_instance(self, suffix, node, args=None, kwds=None, newobj=False): + return super(UnsafeConstructor, self).make_python_instance( + suffix, node, args, kwds, newobj, unsafe=True) + +# Constructor is same as UnsafeConstructor. Need to leave this in place in case +# people have extended it directly. +class Constructor(UnsafeConstructor): + pass diff --git a/lib3/yaml/cyaml.py b/lib3/yaml/cyaml.py index d5cb87e9..86f1d3d7 100644 --- a/lib3/yaml/cyaml.py +++ b/lib3/yaml/cyaml.py @@ -1,6 +1,8 @@ -__all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader', - 'CBaseDumper', 'CSafeDumper', 'CDumper'] +__all__ = [ + 'CBaseLoader', 'CSafeLoader', 'CFullLoader', 'CUnsafeLoader', 'CLoader', + 'CBaseDumper', 'CSafeDumper', 'CDumper' +] from _yaml import CParser, CEmitter @@ -25,6 +27,20 @@ def __init__(self, stream): SafeConstructor.__init__(self) Resolver.__init__(self) +class CFullLoader(CParser, FullConstructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + FullConstructor.__init__(self) + Resolver.__init__(self) + +class CUnsafeLoader(CParser, UnsafeConstructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + UnsafeConstructor.__init__(self) + Resolver.__init__(self) + class CLoader(CParser, Constructor, Resolver): def __init__(self, stream): diff --git a/lib3/yaml/loader.py b/lib3/yaml/loader.py index 08c8f01b..414cb2c1 100644 --- a/lib3/yaml/loader.py +++ b/lib3/yaml/loader.py @@ -1,5 +1,5 @@ -__all__ = ['BaseLoader', 'SafeLoader', 'Loader'] +__all__ = ['BaseLoader', 'FullLoader', 'SafeLoader', 'Loader', 'UnsafeLoader'] from .reader import * from .scanner import * @@ -18,6 +18,16 @@ def __init__(self, stream): BaseConstructor.__init__(self) BaseResolver.__init__(self) +class FullLoader(Reader, Scanner, Parser, Composer, FullConstructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + FullConstructor.__init__(self) + Resolver.__init__(self) + class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): def __init__(self, stream): @@ -38,3 +48,16 @@ def __init__(self, stream): Constructor.__init__(self) Resolver.__init__(self) +# UnsafeLoader is the same as Loader (which is and was always unsafe on +# untrusted input). Use of either Loader or UnsafeLoader should be rare, since +# FullLoad should be able to load almost all YAML safely. Loader is left intact +# to ensure backwards compatability. +class UnsafeLoader(Reader, Scanner, Parser, Composer, Constructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + Constructor.__init__(self) + Resolver.__init__(self) diff --git a/tests/lib3/test_errors.py b/tests/lib3/test_errors.py index a3f86afd..9ef9bbe9 100644 --- a/tests/lib3/test_errors.py +++ b/tests/lib3/test_errors.py @@ -3,7 +3,7 @@ def test_loader_error(error_filename, verbose=False): try: - list(yaml.load_all(open(error_filename, 'rb'))) + list(yaml.load_all(open(error_filename, 'rb'), yaml.FullLoader)) except yaml.YAMLError as exc: if verbose: print("%s:" % exc.__class__.__name__, exc) @@ -14,7 +14,7 @@ def test_loader_error(error_filename, verbose=False): def test_loader_error_string(error_filename, verbose=False): try: - list(yaml.load_all(open(error_filename, 'rb').read())) + list(yaml.load_all(open(error_filename, 'rb').read(), yaml.FullLoader)) except yaml.YAMLError as exc: if verbose: print("%s:" % exc.__class__.__name__, exc) @@ -25,7 +25,7 @@ def test_loader_error_string(error_filename, verbose=False): def test_loader_error_single(error_filename, verbose=False): try: - yaml.load(open(error_filename, 'rb').read()) + yaml.load(open(error_filename, 'rb').read(), yaml.FullLoader) except yaml.YAMLError as exc: if verbose: print("%s:" % exc.__class__.__name__, exc) diff --git a/tests/lib3/test_input_output.py b/tests/lib3/test_input_output.py index 50f0122a..52ac3428 100644 --- a/tests/lib3/test_input_output.py +++ b/tests/lib3/test_input_output.py @@ -5,9 +5,9 @@ def test_unicode_input(unicode_filename, verbose=False): data = open(unicode_filename, 'rb').read().decode('utf-8') value = ' '.join(data.split()) - output = yaml.load(data) + output = yaml.full_load(data) assert output == value, (output, value) - output = yaml.load(io.StringIO(data)) + output = yaml.full_load(io.StringIO(data)) assert output == value, (output, value) for input in [data.encode('utf-8'), codecs.BOM_UTF8+data.encode('utf-8'), @@ -15,9 +15,9 @@ def test_unicode_input(unicode_filename, verbose=False): codecs.BOM_UTF16_LE+data.encode('utf-16-le')]: if verbose: print("INPUT:", repr(input[:10]), "...") - output = yaml.load(input) + output = yaml.full_load(input) assert output == value, (output, value) - output = yaml.load(io.BytesIO(input)) + output = yaml.full_load(io.BytesIO(input)) assert output == value, (output, value) test_unicode_input.unittest = ['.unicode'] @@ -30,14 +30,14 @@ def test_unicode_input_errors(unicode_filename, verbose=False): codecs.BOM_UTF8+data.encode('utf-16-le')]: try: - yaml.load(input) + yaml.full_load(input) except yaml.YAMLError as exc: if verbose: print(exc) else: raise AssertionError("expected an exception") try: - yaml.load(io.BytesIO(input)) + yaml.full_load(io.BytesIO(input)) except yaml.YAMLError as exc: if verbose: print(exc) diff --git a/tests/lib3/test_recursive.py b/tests/lib3/test_recursive.py index 321a75fa..74c2ee65 100644 --- a/tests/lib3/test_recursive.py +++ b/tests/lib3/test_recursive.py @@ -31,7 +31,7 @@ def test_recursive(recursive_filename, verbose=False): output2 = None try: output1 = yaml.dump(value1) - value2 = yaml.load(output1) + value2 = yaml.full_load(output1) output2 = yaml.dump(value2) assert output1 == output2, (output1, output2) finally: