Skip to content

Commit

Permalink
Use mmap in json_parser_t to simplify the logic
Browse files Browse the repository at this point in the history
  • Loading branch information
filipnavara committed Jan 25, 2025
1 parent 09c6433 commit ad38b53
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 73 deletions.
96 changes: 33 additions & 63 deletions src/native/corehost/json_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,31 +17,6 @@

namespace {

// Try to match 0xEF 0xBB 0xBF byte sequence (no endianness here.)
int get_utf8_bom_length(FILE *stream)
{
if (feof(stream))
{
return 0;
}

auto peeked = fgetc(stream);
if (peeked == EOF || ((peeked & 0xFF) != 0xEF))
{
ungetc(peeked, stream);
return 0;
}

unsigned char bytes[3];
size_t ret = fread(reinterpret_cast<char*>(bytes), 1, 3, stream);
if ((ret < 3) || (bytes[1] != 0xBB) || (bytes[2] != 0xBF))
{
return 0;
}

return 3;
}

void get_line_column_from_offset(const char* data, uint64_t size, size_t offset, int *line, int *column)
{
assert(offset < size);
Expand Down Expand Up @@ -69,12 +44,6 @@ void get_line_column_from_offset(const char* data, uint64_t size, size_t offset,

} // empty namespace

void json_parser_t::realloc_buffer(size_t size)
{
m_json.resize(size + 1);
m_json[size] = '\0';
}

bool json_parser_t::parse_raw_data(char* data, int64_t size, const pal::string_t& context)
{
assert(data != nullptr);
Expand Down Expand Up @@ -116,59 +85,60 @@ bool json_parser_t::parse_file(const pal::string_t& path)
{
// This code assumes that the caller has checked that the file `path` exists
// either within the bundle, or as a real file on disk.
assert(m_bundle_data == nullptr);
assert(m_data == nullptr);
assert(m_bundle_location == nullptr);

if (bundle::info_t::is_single_file_bundle())
{
// Due to in-situ parsing on Linux,
// * The json file is mapped as copy-on-write.
// * The mapping cannot be immediately released, and will be unmapped by the json_parser destructor.
m_bundle_data = bundle::info_t::config_t::map(path, m_bundle_location);

if (m_bundle_data != nullptr)
{
bool result = parse_raw_data(m_bundle_data, m_bundle_location->size, path);
return result;
}
m_data = bundle::info_t::config_t::map(path, m_bundle_location);
m_size = (size_t)m_bundle_location->size;
}

FILE *file = pal::file_open(path, _X("r"));
if (file == nullptr)
if (m_data == nullptr)
{
trace::error(_X("Cannot use file stream for [%s]: %s"), path.c_str(), pal::strerror(errno).c_str());
return false;
}
#ifdef _WIN32
// Since we can't use in-situ parsing on Windows, as JSON data is encoded in
// UTF-8 and the host expects wide strings.
// We do not need COW and read-only mapping will be enough.
m_data = (char*)pal::mmap_read(path, &m_size);
#else // _WIN32
m_data = (char*)pal::mmap_copy_on_write(path, &m_size);
#endif // _WIN32

auto current_pos = ::get_utf8_bom_length(file);
fseek(file, 0, SEEK_END);
auto stream_size = ftell(file);
if (stream_size == -1)
{
fclose(file);
trace::error(_X("Failed to get size of file [%s]"), path.c_str());
return false;
if (m_data == nullptr)
{
trace::error(_X("Cannot use file stream for [%s]: %s"), path.c_str(), pal::strerror(errno).c_str());
return false;
}
}

fseek(file, current_pos, SEEK_SET);
char *data = m_data;
size_t size = m_size;

realloc_buffer(static_cast<size_t>(stream_size - current_pos));
auto ret = fread(m_json.data(), 1, stream_size - current_pos, file);
fclose(file);

if (ret != (size_t)(stream_size - current_pos))
// Skip over UTF-8 BOM, if present
if (size >= 3 && m_data[0] == 0xEF && m_data[1] == 0xBB && m_data[1] == 0xBF)
{
trace::error(_X("Failed to read contents of file [%s]"), path.c_str());
return false;
size -= 3;
data += 3;
}

return parse_raw_data(m_json.data(), m_json.size(), path);
return parse_raw_data(data, size, path);
}

json_parser_t::~json_parser_t()
{
if (m_bundle_data != nullptr)
if (m_data != nullptr)
{
bundle::info_t::config_t::unmap(m_bundle_data, m_bundle_location);
if (m_bundle_location != nullptr)
{
bundle::info_t::config_t::unmap(m_data, m_bundle_location);
}
else
{
pal::munmap((void*)m_data, m_size);
}
}
}
18 changes: 8 additions & 10 deletions src/native/corehost/json_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,24 +40,22 @@ class json_parser_t {
bool parse_file(const pal::string_t& path);

json_parser_t()
: m_bundle_data(nullptr)
: m_data(nullptr)
, m_bundle_location(nullptr) {}

~json_parser_t();

private:
// This is a vector of char and not pal::char_t because JSON data
// parsed by this class is always encoded in UTF-8. On Windows,
// where wide strings are used, m_json is kept in UTF-8, but converted
char* m_data; // The memory mapped bytes of the file
size_t m_size; // Size of the mapped memory

// On Windows, where wide strings are used, m_data is kept in UTF-8, but converted
// to UTF-16 by m_document during load.
std::vector<char> m_json;
document_t m_document;

// If a json file is parsed from a single-file bundle, the following two fields represent:
char* m_bundle_data; // The memory mapped bytes of the application bundle.
const bundle::location_t* m_bundle_location; // Location of this json file within the bundle.

void realloc_buffer(size_t size);
// If a json file is parsed from a single-file bundle, the following two fields represents
// the location of this json file within the bundle.
const bundle::location_t* m_bundle_location;
};

#endif // __JSON_PARSER_H__

0 comments on commit ad38b53

Please sign in to comment.