Skip to content

Commit

Permalink
Rewrite parse_type a bit
Browse files Browse the repository at this point in the history
Summary: Do not use a char buffer to avoid overflows (unchecked so far). Parse the input directly instead of incremental substrs, which simplifies to code and improves readability. Use `TypeUtil` and recursion to parse array types.

Reviewed By: wsanville

Differential Revision: D66333793

fbshipit-source-id: ca13751488655938e5f14a48f97d063df3671684
  • Loading branch information
agampe authored and facebook-github-bot committed Nov 22, 2024
1 parent 46c05d0 commit 15b1b3a
Showing 1 changed file with 46 additions and 34 deletions.
80 changes: 46 additions & 34 deletions libredex/JarLoader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "JarLoader.h"
#include "Show.h"
#include "Trace.h"
#include "TypeUtil.h"
#include "Util.h"

/******************
Expand Down Expand Up @@ -291,10 +292,10 @@ void init_basic_types() {
namespace {

DexType* parse_type(std::string_view& buf) {
char typebuffer[MAX_CLASS_NAMELEN];
always_assert(!buf.empty());
char desc = buf.at(0);
buf = buf.substr(1);
const std::string_view buf_start = buf;
buf = buf.substr(1); // Simplifies primitive types.
switch (desc) {
case 'B':
return sSimpleTypeB;
Expand All @@ -314,44 +315,55 @@ DexType* parse_type(std::string_view& buf) {
return sSimpleTypeZ;
case 'V':
return sSimpleTypeV;
}

buf = buf_start;
const size_t start_size = buf.size();
switch (desc) {
case 'L': {
char* tpout = typebuffer;
*tpout++ = desc;
always_assert(!buf.empty());
while (buf.at(0) != ';') {
*tpout++ = buf[0];
buf = buf.substr(1);
// Find semicolon.
for (size_t i = 1; i < buf.size(); i++) {
if (buf[i] == ';') {
if (i == 1) {
std::cerr << "Empty class name, bailing\n";
return nullptr;
}
auto ret = buf.substr(0, i + 1);
buf = buf.substr(i + 1);
redex_assert(buf.size() < start_size);
return DexType::make_type(ret);
}
// TODO: Check valid class name chars.
}
always_assert(!buf.empty());
*tpout++ = buf.at(0);
buf = buf.substr(1);
*tpout = '\0';
return DexType::make_type(typebuffer);
break;
std::cerr << "Could not parse reference type, no suffix semicolon\n";
return nullptr;
}
case '[': {
char* tpout = typebuffer;
*tpout++ = desc;
always_assert(!buf.empty());
while (buf.at(0) == '[') {
*tpout++ = buf[0];
buf = buf.substr(1);
always_assert(!buf.empty());
}
if (buf.at(0) == 'L') {
while (buf.at(0) != ';') {
*tpout++ = buf[0];
buf = buf.substr(1);
always_assert(!buf.empty());
// Figure out array depth.
auto depth = [&]() {
for (size_t i = 1; i < buf.size(); ++i) {
if (buf[i] != '[') {
return i;
}
}
*tpout++ = buf.at(0);
buf = buf.substr(1);
} else {
*tpout++ = buf[0];
buf = buf.substr(1);
return buf.size();
}();
if (depth == buf.size()) {
std::cerr << "Could not parse array type, no element type\n";
return nullptr;
}
*tpout++ = '\0';
return DexType::make_type(typebuffer);

// Easiest to go recursive here.
buf = buf.substr(depth);
auto* elem_type = parse_type(buf);
if (elem_type == nullptr) {
return nullptr;
}
redex_assert(!type::is_array(elem_type));
redex_assert(buf.size() < start_size);
auto ret = type::make_array_type(elem_type, depth);

return ret;
}
}
std::cerr << "Invalid parse-type '" << desc << "', bailing\n";
Expand Down

0 comments on commit 15b1b3a

Please sign in to comment.