Skip to content

Commit

Permalink
src: fix fs.write() externalized string handling
Browse files Browse the repository at this point in the history
* Respect `encoding` argument when the string is externalized.

* Copy the string when the write request can outlive the externalized
  string.

This commit removes `StringBytes::GetExternalParts()` because it is
fundamentally broken.

Fixes: nodejs#18146
PR-URL: nodejs#18216
Reviewed-By: Colin Ihrig <[email protected]>
Reviewed-By: James M Snell <[email protected]>
Reviewed-By: Joyee Cheung <[email protected]>
  • Loading branch information
bnoordhuis authored and MayaLekova committed May 8, 2018
1 parent 2a92d0f commit 34198a2
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 108 deletions.
50 changes: 32 additions & 18 deletions src/node_file.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
# include <io.h>
#endif

#include <memory>
#include <vector>

namespace node {
Expand Down Expand Up @@ -1019,40 +1020,53 @@ static void WriteString(const FunctionCallbackInfo<Value>& args) {

CHECK(args[0]->IsInt32());

std::unique_ptr<char[]> delete_on_return;
Local<Value> req;
Local<Value> string = args[1];
Local<Value> value = args[1];
int fd = args[0]->Int32Value();
char* buf = nullptr;
int64_t pos;
size_t len;
const int64_t pos = GET_OFFSET(args[2]);
const auto enc = ParseEncoding(env->isolate(), args[3], UTF8);
const auto is_async = args[4]->IsObject();

// Avoid copying the string when it is externalized but only when:
// 1. The target encoding is compatible with the string's encoding, and
// 2. The write is synchronous, otherwise the string might get neutered
// while the request is in flight, and
// 3. For UCS2, when the host system is little-endian. Big-endian systems
// need to call StringBytes::Write() to ensure proper byte swapping.
// The const_casts are conceptually sound: memory is read but not written.
if (!is_async && value->IsString()) {
auto string = value.As<String>();
if ((enc == ASCII || enc == LATIN1) && string->IsExternalOneByte()) {
auto ext = string->GetExternalOneByteStringResource();
buf = const_cast<char*>(ext->data());
len = ext->length();
} else if (enc == UCS2 && IsLittleEndian() && string->IsExternal()) {
auto ext = string->GetExternalStringResource();
buf = reinterpret_cast<char*>(const_cast<uint16_t*>(ext->data()));
len = ext->length() * sizeof(*ext->data());
}
}

// will assign buf and len if string was external
if (!StringBytes::GetExternalParts(string,
const_cast<const char**>(&buf),
&len)) {
enum encoding enc = ParseEncoding(env->isolate(), args[3], UTF8);
len = StringBytes::StorageSize(env->isolate(), string, enc);
if (buf == nullptr) {
len = StringBytes::StorageSize(env->isolate(), value, enc);
buf = new char[len];
// SYNC_CALL returns on error. Make sure to always free the memory.
if (!is_async) delete_on_return.reset(buf);
// StorageSize may return too large a char, so correct the actual length
// by the write size
len = StringBytes::Write(env->isolate(), buf, len, args[1], enc);
}
pos = GET_OFFSET(args[2]);

uv_buf_t uvbuf = uv_buf_init(const_cast<char*>(buf), len);
uv_buf_t uvbuf = uv_buf_init(buf, len);

if (args[4]->IsObject()) {
if (is_async) {
CHECK_EQ(args.Length(), 5);
AsyncCall(env, args, "write", UTF8, AfterInteger,
uv_fs_write, fd, &uvbuf, 1, pos);
} else {
// SYNC_CALL returns on error. Make sure to always free the memory.
struct Delete {
inline explicit Delete(char* pointer) : pointer_(pointer) {}
inline ~Delete() { delete[] pointer_; }
char* const pointer_;
};
Delete delete_on_return(buf);
SYNC_CALL(write, nullptr, fd, &uvbuf, 1, pos)
return args.GetReturnValue().Set(SYNC_RESULT);
}
Expand Down
113 changes: 29 additions & 84 deletions src/string_bytes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@

#include <limits.h>
#include <string.h> // memcpy

#include <algorithm>
#include <vector>

// When creating strings >= this length v8's gc spins up and consumes
Expand Down Expand Up @@ -269,39 +271,6 @@ static size_t hex_decode(char* buf,
}


bool StringBytes::GetExternalParts(Local<Value> val,
const char** data,
size_t* len) {
if (Buffer::HasInstance(val)) {
*data = Buffer::Data(val);
*len = Buffer::Length(val);
return true;
}

if (!val->IsString())
return false;

Local<String> str = val.As<String>();

if (str->IsExternalOneByte()) {
const String::ExternalOneByteStringResource* ext;
ext = str->GetExternalOneByteStringResource();
*data = ext->data();
*len = ext->length();
return true;

} else if (str->IsExternal()) {
const String::ExternalStringResource* ext;
ext = str->GetExternalStringResource();
*data = reinterpret_cast<const char*>(ext->data());
*len = ext->length() * sizeof(*ext->data());
return true;
}

return false;
}


size_t StringBytes::WriteUCS2(char* buf,
size_t buflen,
Local<String> str,
Expand Down Expand Up @@ -347,32 +316,31 @@ size_t StringBytes::Write(Isolate* isolate,
enum encoding encoding,
int* chars_written) {
HandleScope scope(isolate);
const char* data = nullptr;
size_t nbytes = 0;
const bool is_extern = GetExternalParts(val, &data, &nbytes);
const size_t external_nbytes = nbytes;
size_t nbytes;
int nchars;

if (chars_written == nullptr)
chars_written = &nchars;

CHECK(val->IsString() == true);
Local<String> str = val.As<String>();

if (nbytes > buflen)
nbytes = buflen;

int flags = String::HINT_MANY_WRITES_EXPECTED |
String::NO_NULL_TERMINATION |
String::REPLACE_INVALID_UTF8;

switch (encoding) {
case ASCII:
case LATIN1:
if (is_extern && str->IsOneByte()) {
memcpy(buf, data, nbytes);
if (str->IsExternalOneByte()) {
auto ext = str->GetExternalOneByteStringResource();
nbytes = std::min(buflen, ext->length());
memcpy(buf, ext->data(), nbytes);
} else {
uint8_t* const dst = reinterpret_cast<uint8_t*>(buf);
nbytes = str->WriteOneByte(dst, 0, buflen, flags);
}
if (chars_written != nullptr)
*chars_written = nbytes;
*chars_written = nbytes;
break;

case BUFFER:
Expand All @@ -383,14 +351,8 @@ size_t StringBytes::Write(Isolate* isolate,
case UCS2: {
size_t nchars;

if (is_extern && !str->IsOneByte()) {
memcpy(buf, data, nbytes);
nchars = nbytes / sizeof(uint16_t);
} else {
nbytes = WriteUCS2(buf, buflen, str, flags, &nchars);
}
if (chars_written != nullptr)
*chars_written = nchars;
nbytes = WriteUCS2(buf, buflen, str, flags, &nchars);
*chars_written = static_cast<int>(nchars);

// Node's "ucs2" encoding wants LE character data stored in
// the Buffer, so we need to reorder on BE platforms. See
Expand All @@ -403,27 +365,25 @@ size_t StringBytes::Write(Isolate* isolate,
}

case BASE64:
if (is_extern) {
nbytes = base64_decode(buf, buflen, data, external_nbytes);
if (str->IsExternalOneByte()) {
auto ext = str->GetExternalOneByteStringResource();
nbytes = base64_decode(buf, buflen, ext->data(), ext->length());
} else {
String::Value value(str);
nbytes = base64_decode(buf, buflen, *value, value.length());
}
if (chars_written != nullptr) {
*chars_written = nbytes;
}
*chars_written = nbytes;
break;

case HEX:
if (is_extern) {
nbytes = hex_decode(buf, buflen, data, external_nbytes);
if (str->IsExternalOneByte()) {
auto ext = str->GetExternalOneByteStringResource();
nbytes = hex_decode(buf, buflen, ext->data(), ext->length());
} else {
String::Value value(str);
nbytes = hex_decode(buf, buflen, *value, value.length());
}
if (chars_written != nullptr) {
*chars_written = nbytes;
}
*chars_written = nbytes;
break;

default:
Expand Down Expand Up @@ -500,49 +460,34 @@ size_t StringBytes::Size(Isolate* isolate,
Local<Value> val,
enum encoding encoding) {
HandleScope scope(isolate);
size_t data_size = 0;
bool is_buffer = Buffer::HasInstance(val);

if (is_buffer && (encoding == BUFFER || encoding == LATIN1))
if (Buffer::HasInstance(val) && (encoding == BUFFER || encoding == LATIN1))
return Buffer::Length(val);

const char* data;
if (GetExternalParts(val, &data, &data_size))
return data_size;

Local<String> str = val->ToString(isolate);

switch (encoding) {
case ASCII:
case LATIN1:
data_size = str->Length();
break;
return str->Length();

case BUFFER:
case UTF8:
data_size = str->Utf8Length();
break;
return str->Utf8Length();

case UCS2:
data_size = str->Length() * sizeof(uint16_t);
break;
return str->Length() * sizeof(uint16_t);

case BASE64: {
String::Value value(str);
data_size = base64_decoded_size(*value, value.length());
break;
return base64_decoded_size(*value, value.length());
}

case HEX:
data_size = str->Length() / 2;
break;

default:
CHECK(0 && "unknown encoding");
break;
return str->Length() / 2;
}

return data_size;
UNREACHABLE();
}


Expand Down
6 changes: 0 additions & 6 deletions src/string_bytes.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,6 @@ class StringBytes {
v8::Local<v8::Value> val,
enum encoding enc);

// If the string is external then assign external properties to data and len,
// then return true. If not return false.
static bool GetExternalParts(v8::Local<v8::Value> val,
const char** data,
size_t* len);

// Write the bytes from the string or buffer into the char*
// returns the number of bytes written, which will always be
// <= buflen. Use StorageSize/Size first to know how much
Expand Down
44 changes: 44 additions & 0 deletions test/parallel/test-fs-write.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
// USE OR OTHER DEALINGS IN THE SOFTWARE.

// Flags: --expose_externalize_string
'use strict';
const common = require('../common');
const assert = require('assert');
Expand All @@ -30,6 +31,49 @@ const fn3 = path.join(common.tmpDir, 'write3.txt');
const expected = 'ümlaut.';
const constants = fs.constants;

/* eslint-disable no-undef */
common.allowGlobals(externalizeString, isOneByteString, x);

{
const expected = 'ümlaut eins'; // Must be a unique string.
externalizeString(expected);
assert.strictEqual(true, isOneByteString(expected));
const fd = fs.openSync(fn, 'w');
fs.writeSync(fd, expected, 0, 'latin1');
fs.closeSync(fd);
assert.strictEqual(expected, fs.readFileSync(fn, 'latin1'));
}

{
const expected = 'ümlaut zwei'; // Must be a unique string.
externalizeString(expected);
assert.strictEqual(true, isOneByteString(expected));
const fd = fs.openSync(fn, 'w');
fs.writeSync(fd, expected, 0, 'utf8');
fs.closeSync(fd);
assert.strictEqual(expected, fs.readFileSync(fn, 'utf8'));
}

{
const expected = '中文 1'; // Must be a unique string.
externalizeString(expected);
assert.strictEqual(false, isOneByteString(expected));
const fd = fs.openSync(fn, 'w');
fs.writeSync(fd, expected, 0, 'ucs2');
fs.closeSync(fd);
assert.strictEqual(expected, fs.readFileSync(fn, 'ucs2'));
}

{
const expected = '中文 2'; // Must be a unique string.
externalizeString(expected);
assert.strictEqual(false, isOneByteString(expected));
const fd = fs.openSync(fn, 'w');
fs.writeSync(fd, expected, 0, 'utf8');
fs.closeSync(fd);
assert.strictEqual(expected, fs.readFileSync(fn, 'utf8'));
}

common.refreshTmpDir();

fs.open(fn, 'w', 0o644, common.mustCall(function(err, fd) {
Expand Down

0 comments on commit 34198a2

Please sign in to comment.