Skip to content

Commit

Permalink
Rework the safety related API code
Browse files Browse the repository at this point in the history
The main change is that 'Danger' has been renamed to 'Python' and that
the default `dump()` and `dump_all()` functions use the 'Python' schema
to be able to dump any Python data structure.

NOTE: In YAML, 'Schema' is used to mean all the semantics and rules of
what a YAML document means and how it is processed.

The `load()` and `load_all()` functions continue to use the Safe schema.

The dump() and load() sugar functions should be similar in that they
both do the must useful and safe operations.

There are top level functions for each schema (Safe and Python) and
those functions should be used when feeding data from one system to the
other and expecting the same semantics (schema):

* safe_dump safe_dump_all
* safe_load safe_load_all
* python_dump python_dump_all
* python_load python_load_all

When we have a schema language for YAML, the generic methods with be:

* yaml.dump(node, Schema='foo.schema')
* yaml.load(yaml, Schema='foo.schema')

A loader class like SafeLoader is a loader with a hardcoded schema.
Right now pyyaml has 2 schemas:

* Python - serialize any python data
* Safe - only serialize in a way that won't trigger code

'Danger' was used in response to a situation where people were caught
unaware that something bad could happen in a seemingly normal, default
situation. Now we've fixed the default to be safe, and Safe is an OK
name for a schema, but Danger really is not. It's not the purpose of the
schema to be dangerous. The purpose is to serialize Python data
structures.

The danger_ API functions can be removed because they have only been
released for a couple days and they aren't documented anywhere.

----

This also fixes a bug in that safe_load() and load() were aliases. They
shouldn't be, because load() accepts a Loader kwarg, and safe_load()
should not. ie safe_load(yaml, Loader=PythonLoader) shouldn't be
allowed.
  • Loading branch information
ingydotnet committed Jun 28, 2018
1 parent d3eb7da commit 3dc3f5f
Show file tree
Hide file tree
Showing 12 changed files with 178 additions and 100 deletions.
89 changes: 60 additions & 29 deletions lib/yaml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,52 +61,64 @@ def compose_all(stream, Loader=Loader):
finally:
loader.dispose()

def load(stream, Loader=Loader):
# Generic load() and load_all() default to SafeLoader.
def load(stream, Loader=SafeLoader):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
By default resolve only basic YAML tags, if an alternate Loader is
provided, may be dangerous.
"""
loader = Loader(stream)
try:
return loader.get_single_data()
finally:
loader.dispose()
safe_load = load

def load_all(stream, Loader=Loader):
def load_all(stream, Loader=SafeLoader):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
By default resolve only basic YAML tags, if an alternate Loader is
provided, may be dangerous.
"""
loader = Loader(stream)
try:
while loader.check_data():
yield loader.get_data()
finally:
loader.dispose()
safe_load_all = load_all

def danger_load(stream):
def safe_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
When used on untrusted input, can result in arbitrary code execution.
"""
return load(stream, DangerLoader)
return load(stream, Loader=SafeLoader)

def danger_load_all(stream):
def safe_load_all(stream):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
When used on untrusted input, can result in arbitrary code execution.
"""
return load_all(stream, DangerLoader)
return load_all(stream, Loader=SafeLoader)

# Note: Using an alternate Loader (like PythonLoader) may be dangerous.
def python_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Warning: When used on untrusted input, can result in arbitrary code
execution.
"""
return load(stream, Loader=PythonLoader)

def python_load_all(stream):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
Warning: When used on untrusted input, can result in arbitrary code
execution.
"""
return load_all(stream, Loader=PythonLoader)

def emit(events, stream=None, Dumper=Dumper,
canonical=None, indent=None, width=None,
Expand Down Expand Up @@ -168,7 +180,8 @@ def serialize(node, stream=None, Dumper=Dumper, **kwds):
"""
return serialize_all([node], stream, Dumper=Dumper, **kwds)

def dump_all(documents, stream=None, Dumper=Dumper,
# All the top level dump functions call this:
def _dump_all(documents, stream=None, Dumper=None,
default_style=None, default_flow_style=None,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
Expand Down Expand Up @@ -201,31 +214,49 @@ def dump_all(documents, stream=None, Dumper=Dumper,
dumper.dispose()
if getvalue:
return getvalue()
safe_dump_all = dump_all

def danger_dump_all(documents, stream=None, **kwds):
def dump(data, stream=None, Dumper=PythonDumper, **kwds):
"""
Serialize a Python object into a YAML stream.
Produce only basic YAML tags.
"""
return _dump_all([data], stream, Dumper=Dumper, **kwds)

def dump_all(documents, stream=None, Dumper=PythonDumper, **kwds):
"""
Serialize a sequence of Python objects into a YAML stream.
Produce only basic YAML tags.
If stream is None, return the produced string instead.
"""
return dump_all(documents, stream, Dumper=DangerDumper, **kwds)
return _dump_all(documents, stream, Dumper=Dumper, **kwds)

def dump(data, stream=None, Dumper=Dumper, **kwds):
# The safe_dump* functions can be used to create YAML that should always be
# loadable by the safe_load* functions. There should be nothing "unsafe" about
# dumping any object.
def safe_dump(data, stream=None, **kwds):
"""
Serialize a Python object into a YAML stream.
If stream is None, return the produced string instead.
Produce only basic YAML tags.
"""
return dump_all([data], stream, Dumper=Dumper, **kwds)
safe_dump = dump
return _dump_all([data], stream, Dumper=SafeDumper, **kwds)

def danger_dump(data, stream=None, **kwds):
def safe_dump_all(documents, stream=None, **kwds):
"""
Serialize a Python object into a YAML stream.
Serialize a sequence of Python objects into a YAML stream.
Produce only basic YAML tags.
If stream is None, return the produced string instead.
"""
return dump_all([data], stream, Dumper=DangerDumper, **kwds)
return _dump_all(documents, stream, Dumper=SafeDumper, **kwds)

def python_dump(data, stream=None, **kwds):
"""
Serialize a Python object into a YAML stream.
"""
return _dump_all([data], stream, Dumper=PythonDumper, **kwds)

def python_dump_all(documents, stream=None, **kwds):
"""
Serialize a sequence of Python objects into a YAML stream.
"""
return _dump_all(documents, stream, Dumper=PythonDumper, **kwds)

def add_implicit_resolver(tag, regexp, first=None,
Loader=Loader, Dumper=Dumper):
Expand Down
20 changes: 12 additions & 8 deletions lib/yaml/cyaml.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

__all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader', 'CDangerLoader',
'CBaseDumper', 'CSafeDumper', 'CDumper', 'CDangerDumper']
__all__ = ['CBaseLoader', 'CSafeLoader', 'CPythonLoader', 'CLoader',
'CBaseDumper', 'CSafeDumper', 'CPythonDumper', 'CDumper']

from _yaml import CParser, CEmitter

Expand All @@ -18,21 +18,24 @@ def __init__(self, stream):
BaseConstructor.__init__(self)
BaseResolver.__init__(self)

class CLoader(CParser, SafeConstructor, Resolver):
class CSafeLoader(CParser, SafeConstructor, Resolver):

def __init__(self, stream):
CParser.__init__(self, stream)
SafeConstructor.__init__(self)
Resolver.__init__(self)
CSafeLoader = CLoader

class CDangerLoader(CParser, Constructor, Resolver):
class CPythonLoader(CParser, Constructor, Resolver):

def __init__(self, stream):
CParser.__init__(self, stream)
Constructor.__init__(self)
Resolver.__init__(self)

CLoader = CSafeLoader



class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver):

def __init__(self, stream,
Expand All @@ -50,7 +53,7 @@ def __init__(self, stream,
default_flow_style=default_flow_style)
Resolver.__init__(self)

class CDumper(CEmitter, SafeRepresenter, Resolver):
class CSafeDumper(CEmitter, SafeRepresenter, Resolver):

def __init__(self, stream,
default_style=None, default_flow_style=None,
Expand All @@ -66,9 +69,8 @@ def __init__(self, stream,
SafeRepresenter.__init__(self, default_style=default_style,
default_flow_style=default_flow_style)
Resolver.__init__(self)
CSafeDumper = CDumper

class CDangerDumper(CEmitter, Serializer, Representer, Resolver):
class CPythonDumper(CEmitter, Serializer, Representer, Resolver):

def __init__(self, stream,
default_style=None, default_flow_style=None,
Expand All @@ -84,3 +86,5 @@ def __init__(self, stream,
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style)
Resolver.__init__(self)

CDumper = CPythonDumper
10 changes: 6 additions & 4 deletions lib/yaml/dumper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

__all__ = ['BaseDumper', 'SafeDumper', 'Dumper', 'DangerDumper']
__all__ = ['BaseDumper', 'SafeDumper', 'PythonDumper', 'Dumper']

from emitter import *
from serializer import *
Expand All @@ -24,7 +24,7 @@ def __init__(self, stream,
default_flow_style=default_flow_style)
Resolver.__init__(self)

class Dumper(Emitter, Serializer, SafeRepresenter, Resolver):
class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver):

def __init__(self, stream,
default_style=None, default_flow_style=None,
Expand All @@ -41,9 +41,8 @@ def __init__(self, stream,
SafeRepresenter.__init__(self, default_style=default_style,
default_flow_style=default_flow_style)
Resolver.__init__(self)
SafeDumper = Dumper

class DangerDumper(Emitter, Serializer, Representer, Resolver):
class PythonDumper(Emitter, Serializer, Representer, Resolver):

def __init__(self, stream,
default_style=None, default_flow_style=None,
Expand All @@ -60,3 +59,6 @@ def __init__(self, stream,
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style)
Resolver.__init__(self)

# The default Dumper is PythonDumper
Dumper = PythonDumper
10 changes: 6 additions & 4 deletions lib/yaml/loader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

__all__ = ['BaseLoader', 'SafeLoader', 'Loader', 'DangerLoader']
__all__ = ['BaseLoader', 'SafeLoader', 'PythonLoader', 'Loader']

from reader import *
from scanner import *
Expand All @@ -18,7 +18,7 @@ def __init__(self, stream):
BaseConstructor.__init__(self)
BaseResolver.__init__(self)

class Loader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver):
class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver):

def __init__(self, stream):
Reader.__init__(self, stream)
Expand All @@ -27,9 +27,8 @@ def __init__(self, stream):
Composer.__init__(self)
SafeConstructor.__init__(self)
Resolver.__init__(self)
SafeLoader = Loader

class DangerLoader(Reader, Scanner, Parser, Composer, Constructor, Resolver):
class PythonLoader(Reader, Scanner, Parser, Composer, Constructor, Resolver):

def __init__(self, stream):
Reader.__init__(self, stream)
Expand All @@ -38,3 +37,6 @@ def __init__(self, stream):
Composer.__init__(self)
Constructor.__init__(self)
Resolver.__init__(self)

# The default Loader is SafeLoader
Loader = SafeLoader
Loading

0 comments on commit 3dc3f5f

Please sign in to comment.