From dd778ff1e9dd95829caf206a2b930d6ed5e6fec5 Mon Sep 17 00:00:00 2001 From: Aliaksei Sandryhaila Date: Tue, 26 Jan 2016 16:53:17 -0800 Subject: [PATCH] PARQUET-418: Refactored parquet_reader utility for printing file contents. This pull request contains the following changes: * Modified parquet_reader utility: refactored, fixed memory leaks, merged compute_stats utility to get rid of code duplication. * Added a flag --only-stats to parquet_reader to print only file statistics. * Modified InMemoryInputStream to own its buffer. All the code repetition still remaining in parquet_reader clearly highlights the need for specialized ColumnReader classes. I will create a new JIRA for this improvement. Author: Aliaksei Sandryhaila Closes #18 from asandryh/PARQUET-418 and squashes the following commits: a378a1e [Aliaksei Sandryhaila] Changed the buffer in ScopedInMemoryInputStream to std::vector. 7f6f533 [Aliaksei Sandryhaila] [PARQUET-418]: Added/modified a utility for printing a file contents. --- cpp/src/parquet/parquet.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/cpp/src/parquet/parquet.h b/cpp/src/parquet/parquet.h index a1af6b7cce26b..4469a82dd2ea9 100644 --- a/cpp/src/parquet/parquet.h +++ b/cpp/src/parquet/parquet.h @@ -88,6 +88,20 @@ class InMemoryInputStream : public InputStream { int64_t offset_; }; +// A wrapper for InMemoryInputStream to manage the memory. +class ScopedInMemoryInputStream : public InputStream { + public: + ScopedInMemoryInputStream(int64_t len); + uint8_t* data(); + int64_t size(); + virtual const uint8_t* Peek(int num_to_peek, int* num_bytes); + virtual const uint8_t* Read(int num_to_read, int* num_bytes); + + private: + std::vector buffer_; + std::unique_ptr stream_; +}; + // API to read values from a single column. This is the main client facing API. class ColumnReader { public: