diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 1d4ee3a81803b2..37ea21e5966128 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -537,6 +537,24 @@ Maybe StringBytes::Size(Isolate* isolate, } \ } while (0) +// Converts known-valid UTF-8 (buflen >= 32) to a V8 string via the fast +// UTF-16 path. Callers must ensure buflen is range-checked. +static MaybeLocal EncodeValidNonAsciiUtf8(Isolate* isolate, + const char* buf, + size_t buflen) { + size_t u16size = simdutf::utf16_length_from_utf8(buf, buflen); + if (u16size > static_cast(v8::String::kMaxLength)) { + isolate->ThrowException(ERR_STRING_TOO_LONG(isolate)); + return MaybeLocal(); + } + return EncodeTwoByteString( + isolate, u16size, [buf, buflen, u16size](uint16_t* dst) { + size_t written = simdutf::convert_valid_utf8_to_utf16( + buf, buflen, reinterpret_cast(dst)); + CHECK_EQ(written, u16size); + }); +} + MaybeLocal StringBytes::Encode(Isolate* isolate, const char* buf, size_t buflen, @@ -586,17 +604,7 @@ MaybeLocal StringBytes::Encode(Isolate* isolate, // We know that we are non-ASCII (and are unlikely Latin1), use 2-byte // In the most likely case of valid UTF-8, we can use this fast impl // For very short input, it is slower, so we limit min size - size_t u16size = simdutf::utf16_length_from_utf8(buf, buflen); - if (u16size > static_cast(v8::String::kMaxLength)) { - isolate->ThrowException(ERR_STRING_TOO_LONG(isolate)); - return MaybeLocal(); - } - return EncodeTwoByteString( - isolate, u16size, [buf, buflen, u16size](uint16_t* dst) { - size_t written = simdutf::convert_valid_utf8_to_utf16( - buf, buflen, reinterpret_cast(dst)); - CHECK_EQ(written, u16size); - }); + return EncodeValidNonAsciiUtf8(isolate, buf, buflen); } val = @@ -671,40 +679,6 @@ MaybeLocal StringBytes::Encode(Isolate* isolate, } } -MaybeLocal StringBytes::EncodeValidUtf8(Isolate* isolate, - const char* buf, - size_t buflen) { - CHECK_BUFLEN_IN_RANGE(buflen); - if (!buflen) return String::Empty(isolate); - buflen = keep_buflen_in_range(buflen); - - // ASCII fast path - if (!simdutf::validate_ascii_with_errors(buf, buflen).error) { - return ExternOneByteString::NewFromCopy(isolate, buf, buflen); - } - - if (buflen >= 32) { - size_t u16size = simdutf::utf16_length_from_utf8(buf, buflen); - if (u16size > static_cast(v8::String::kMaxLength)) { - isolate->ThrowException(ERR_STRING_TOO_LONG(isolate)); - return MaybeLocal(); - } - return EncodeTwoByteString( - isolate, u16size, [buf, buflen, u16size](uint16_t* dst) { - size_t written = simdutf::convert_valid_utf8_to_utf16( - buf, buflen, reinterpret_cast(dst)); - CHECK_EQ(written, u16size); - }); - } - - Local str; - if (!String::NewFromUtf8(isolate, buf, v8::NewStringType::kNormal, buflen) - .ToLocal(&str)) { - isolate->ThrowException(node::ERR_STRING_TOO_LONG(isolate)); - } - return str; -} - MaybeLocal StringBytes::Encode(Isolate* isolate, const uint16_t* buf, size_t buflen) { @@ -733,4 +707,19 @@ MaybeLocal StringBytes::Encode(Isolate* isolate, return Encode(isolate, buf, len, encoding); } +MaybeLocal StringBytes::EncodeValidUtf8(Isolate* isolate, + const char* buf, + size_t buflen) { + buflen = keep_buflen_in_range(buflen); + if (!simdutf::validate_ascii_with_errors(buf, buflen).error) { + return ExternOneByteString::NewFromCopy(isolate, buf, buflen); + } + if (buflen >= 32) return EncodeValidNonAsciiUtf8(isolate, buf, buflen); + Local str; + if (!String::NewFromUtf8(isolate, buf, v8::NewStringType::kNormal, buflen) + .ToLocal(&str)) + isolate->ThrowException(node::ERR_STRING_TOO_LONG(isolate)); + return str; +} + } // namespace node diff --git a/src/string_bytes.h b/src/string_bytes.h index 71aa9ff1f90a7c..a9935b68db9647 100644 --- a/src/string_bytes.h +++ b/src/string_bytes.h @@ -83,7 +83,8 @@ class StringBytes { size_t buflen, enum encoding encoding); - // Like Encode(..., UTF8) but does not re-validate. Input must be valid UTF-8. + // Like Encode(..., UTF8) but skips UTF-8 validation. Caller must guarantee + // that buf contains valid UTF-8. static v8::MaybeLocal EncodeValidUtf8(v8::Isolate* isolate, const char* buf, size_t buflen);