diff --git a/jq.pyx b/jq.pyx index 09e1ee7..81bc51c 100644 --- a/jq.pyx +++ b/jq.pyx @@ -19,6 +19,11 @@ cdef extern from "jv.h": JV_KIND_ARRAY, JV_KIND_OBJECT + ctypedef enum: + JV_PARSE_SEQ, + JV_PARSE_STREAMING, + JV_PARSE_STREAM_ERRORS + ctypedef struct jv: pass @@ -49,6 +54,7 @@ cdef extern from "jv.h": jv_parser* jv_parser_new(int) void jv_parser_free(jv_parser*) void jv_parser_set_buf(jv_parser*, const char*, int, int) + int jv_parser_remaining(jv_parser*) jv jv_parser_next(jv_parser*) jv jv_parse(const char*) @@ -247,27 +253,34 @@ cdef class _Program(object): self._program_bytes = program_bytes self._jq_state_pool = _JqStatePool(program_bytes, args=args) - def input(self, value=_NO_VALUE, text=_NO_VALUE, *, slurp=False): + def input(self, value=_NO_VALUE, text=_NO_VALUE, *, + slurp=False, seq=False): if (value is _NO_VALUE) == (text is _NO_VALUE): raise ValueError("Either the value or text argument should be set") if text is not _NO_VALUE: - return self.input_text(text, slurp=slurp) + return self.input_text(text, slurp=slurp, seq=seq) else: - return self.input_value(value, slurp=slurp) + return self.input_value(value, slurp=slurp, seq=seq) - def input_value(self, value, *, slurp=False): - return self.input_text(json.dumps(value), slurp=slurp) + def input_value(self, value, *, slurp=False, seq=False): + text = json.dumps(value) + if seq: + text = "\x1e" + text + return self.input_text(text, slurp=slurp, seq=seq) - def input_values(self, values, *, slurp=False): + def input_values(self, values, *, slurp=False, seq=False): fileobj = io.StringIO() for value in values: + if seq: + fileobj.write("\x1e") json.dump(value, fileobj) fileobj.write("\n") - return self.input_text(fileobj.getvalue(), slurp=slurp) + return self.input_text(fileobj.getvalue(), slurp=slurp, seq=seq) - def input_text(self, text, *, slurp=False): - return _ProgramWithInput(self._jq_state_pool, text.encode("utf8"), slurp=slurp) + def input_text(self, text, *, slurp=False, seq=False): + return _ProgramWithInput(self._jq_state_pool, text.encode("utf8"), + slurp=slurp, seq=seq) @property def program_string(self): @@ -291,24 +304,30 @@ cdef class _ProgramWithInput(object): cdef _JqStatePool _jq_state_pool cdef object _bytes_input cdef bint _slurp + cdef bint _seq - def __cinit__(self, jq_state_pool, bytes_input, *, bint slurp): + def __cinit__(self, jq_state_pool, bytes_input, *, bint slurp, bint seq): self._jq_state_pool = jq_state_pool self._bytes_input = bytes_input self._slurp = slurp + self._seq = seq def __iter__(self): return self._make_iterator() cdef _ResultIterator _make_iterator(self): - return _ResultIterator(self._jq_state_pool, self._bytes_input, slurp=self._slurp) + return _ResultIterator(self._jq_state_pool, self._bytes_input, + slurp=self._slurp, seq=self._seq) def text(self): # Performance testing suggests that using _jv_to_python (within the # result iterator) followed by json.dumps is faster than using # jv_dump_string to generate the string directly from the jv values. # See: https://github.com/mwilliamson/jq.py/pull/50 - return "\n".join(json.dumps(v) for v in self) + if self._seq: + return "\x1e" + "\n\x1e".join(json.dumps(v) for v in self) + else: + return "\n".join(json.dumps(v) for v in self) def all(self): return list(self) @@ -329,13 +348,14 @@ cdef class _ResultIterator(object): self._jq_state_pool.release(self._jq) jv_parser_free(self._parser) - def __cinit__(self, _JqStatePool jq_state_pool, bytes bytes_input, *, bint slurp): + def __cinit__(self, _JqStatePool jq_state_pool, bytes bytes_input, *, + bint slurp, bint seq): self._jq_state_pool = jq_state_pool self._jq = jq_state_pool.acquire() self._bytes_input = bytes_input self._slurp = slurp self._ready = False - cdef jv_parser* parser = jv_parser_new(0) + cdef jv_parser* parser = jv_parser_new(JV_PARSE_SEQ if seq else 0) cdef char* cbytes_input cdef ssize_t clen_input PyBytes_AsStringAndSize(bytes_input, &cbytes_input, &clen_input) @@ -384,17 +404,20 @@ cdef class _ResultIterator(object): return 0 cdef inline jv _parse_next_input(self) except *: - cdef jv value = jv_parser_next(self._parser) - if jv_is_valid(value): - return value - elif jv_invalid_has_msg(jv_copy(value)): - error_message = jv_invalid_get_msg(value) - message = jv_string_to_py_string(error_message) - jv_free(error_message) - raise ValueError(u"parse error: " + message) - else: - jv_free(value) - raise StopIteration() + cdef jv value + while True: + value = jv_parser_next(self._parser) + if jv_is_valid(value): + return value + elif jv_invalid_has_msg(jv_copy(value)): + error_message = jv_invalid_get_msg(value) + message = jv_string_to_py_string(error_message) + jv_free(error_message) + raise ValueError(u"parse error: " + message) + else: + if not jv_parser_remaining(self._parser): + jv_free(value) + raise StopIteration() def all(program, value=_NO_VALUE, text=_NO_VALUE): diff --git a/tests/jq_tests.py b/tests/jq_tests.py index 62b3ae0..3f4cb04 100644 --- a/tests/jq_tests.py +++ b/tests/jq_tests.py @@ -241,6 +241,57 @@ def test_unicode_strings_can_be_used_as_input(): ) +def test_record_separator_character_accepted_in_input(): + assert_equal( + [], + list(jq.compile(".").input(text='\x1e', seq=True)) + ) + assert_equal( + [], + list(jq.compile(".").input(text='\x1e\x1e', seq=True)) + ) + assert_equal( + [{}], + list(jq.compile(".").input(text='\x1e{}', seq=True)) + ) + assert_equal( + [{}], + list(jq.compile(".").input(text='\x1e\x1e{}', seq=True)) + ) + assert_equal( + [], + list(jq.compile(".").input(text='{}\x1e', seq=True)) + ) + assert_equal( + [], + list(jq.compile(".").input(text='{}\x1e\x1e', seq=True)) + ) + assert_equal( + [{}], + list(jq.compile(".").input(text='\x1e{}\x1e', seq=True)) + ) + assert_equal( + [[]], + list(jq.compile(".").input(text='{}\x1e[]', seq=True)) + ) + assert_equal( + [[]], + list(jq.compile(".").input(text='{}\x1e\x1e[]', seq=True)) + ) + assert_equal( + [{},[]], + list(jq.compile(".").input(text='\x1e{}\x1e[]', seq=True)) + ) + assert_equal( + [[]], + list(jq.compile(".").input(text='{}\x1e[]\x1e', seq=True)) + ) + assert_equal( + [{},[]], + list(jq.compile(".").input(text='\x1e{}\x1e[]\x1e', seq=True)) + ) + + def test_unicode_strings_can_be_used_as_programs(): assert_equal( "Dragon‽",