From 47d964b3c6c647bb8d1eeabce33e82f0b1c8b8ea Mon Sep 17 00:00:00 2001 From: Jamie Magee Date: Wed, 27 May 2026 11:16:37 -0700 Subject: [PATCH 1/3] meta: skip scheduled workflows on forks Guards scheduled jobs in daily.yml, codeql.yml, and scorecard.yml so they only run on nodejs/node, matching the pattern already used in tools.yml, stale.yml, and others. This prevents wasted Actions minutes and failed-run email notifications on forks. Signed-off-by: Jamie Magee PR-URL: https://github.com/nodejs/node/pull/63565 Reviewed-By: Luigi Pinca Reviewed-By: Antoine du Hamel Reviewed-By: Trivikram Kamat Reviewed-By: Moshe Atlow Reviewed-By: Jake Yuesong Li --- .github/workflows/codeql.yml | 1 + .github/workflows/daily.yml | 1 + .github/workflows/scorecard.yml | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 5ff9daaa630d2a..01d7f37e38149e 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -9,6 +9,7 @@ permissions: jobs: analyze: + if: github.repository == 'nodejs/node' name: Analyze runs-on: ubuntu-slim permissions: diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index 80e7f8294d693f..0b82dd2aac04c7 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -13,6 +13,7 @@ permissions: jobs: build-lto: + if: github.repository == 'nodejs/node' || github.event_name == 'workflow_dispatch' runs-on: ubuntu-24.04-arm steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 07f05ecbbca57f..39be55d20d5fd8 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -20,6 +20,7 @@ permissions: read-all jobs: analysis: + if: github.repository == 'nodejs/node' || github.event_name == 'workflow_dispatch' name: Scorecard analysis # cannot use ubuntu-slim here because ossf/scorecard-action is dockerized # cannot use ubuntu-24.04-arm here because the docker image is x86 only From 770385a3b8a48f852871495a2261f47a7fb52d0f Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Mon, 18 May 2026 21:56:33 +0300 Subject: [PATCH 2/3] src: dispatch ToV8Value(string_view) via StringBytes::Encode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mert Can Altin PR-URL: https://github.com/nodejs/node/pull/63370 Reviewed-By: Anna Henningsen Reviewed-By: Daniel Lemire Reviewed-By: Gürgün Dayıoğlu --- benchmark/fs/readfile-utf8-fastpath.js | 62 +++++++++++ src/util-inl.h | 16 --- src/util.cc | 11 ++ src/util.h | 6 +- .../test-fs-readfile-utf8-fast-path.js | 103 ++++++++++++++++++ 5 files changed, 179 insertions(+), 19 deletions(-) create mode 100644 benchmark/fs/readfile-utf8-fastpath.js create mode 100644 test/parallel/test-fs-readfile-utf8-fast-path.js diff --git a/benchmark/fs/readfile-utf8-fastpath.js b/benchmark/fs/readfile-utf8-fastpath.js new file mode 100644 index 00000000000000..9bf00717c5f0b2 --- /dev/null +++ b/benchmark/fs/readfile-utf8-fastpath.js @@ -0,0 +1,62 @@ +'use strict'; + +const common = require('../common.js'); +const fs = require('fs'); +const path = require('path'); +const tmpdir = require('../../test/common/tmpdir'); + +const bench = common.createBenchmark(main, { + size: [64, 1024, 16384, 262144, 4194304], + content: ['ascii', 'latin1', 'utf8_mixed'], + source: ['path', 'fd'], + n: [3e3], +}); + +function buildContent(kind, size) { + if (kind === 'ascii') { + return Buffer.alloc(size, 0x61); // 'a' + } + if (kind === 'latin1') { + // 'é' in UTF-8 is 0xC3 0xA9 (2 bytes per char) + const pair = Buffer.from([0xC3, 0xA9]); + const buf = Buffer.alloc(size); + for (let i = 0; i + 2 <= size; i += 2) pair.copy(buf, i); + return buf; + } + if (kind === 'utf8_mixed') { + // mixed ASCII + 3-byte CJK (U+4E2D 中 = E4 B8 AD) + const cjk = Buffer.from([0xE4, 0xB8, 0xAD]); + const buf = Buffer.alloc(size); + let i = 0; + while (i + 4 <= size) { + buf[i++] = 0x61; + cjk.copy(buf, i); + i += 3; + } + return buf; + } + throw new Error('unknown content: ' + kind); +} + +function main({ n, size, content, source }) { + tmpdir.refresh(); + const file = path.join(tmpdir.path, `bench-${content}-${size}.bin`); + fs.writeFileSync(file, buildContent(content, size)); + + let arg; + let shouldClose = false; + if (source === 'fd') { + arg = fs.openSync(file, 'r'); + shouldClose = true; + } else { + arg = file; + } + + bench.start(); + for (let i = 0; i < n; i++) { + fs.readFileSync(arg, 'utf8'); + } + bench.end(n); + + if (shouldClose) fs.closeSync(arg); +} diff --git a/src/util-inl.h b/src/util-inl.h index d59e30a635b08b..e357d15a14496d 100644 --- a/src/util-inl.h +++ b/src/util-inl.h @@ -341,22 +341,6 @@ v8::Maybe FromV8Array(v8::Local context, return js_array->Iterate(context, PushItemToVector, &data); } -v8::MaybeLocal ToV8Value(v8::Local context, - std::string_view str, - v8::Isolate* isolate) { - if (isolate == nullptr) isolate = v8::Isolate::GetCurrent(); - if (str.size() >= static_cast(v8::String::kMaxLength)) [[unlikely]] { - // V8 only has a TODO comment about adding an exception when the maximum - // string size is exceeded. - ThrowErrStringTooLong(isolate); - return v8::MaybeLocal(); - } - - return v8::String::NewFromUtf8( - isolate, str.data(), v8::NewStringType::kNormal, str.size()) - .FromMaybe(v8::Local()); -} - v8::MaybeLocal ToV8Value(v8::Local context, std::u16string_view str, v8::Isolate* isolate) { diff --git a/src/util.cc b/src/util.cc index 1ea51cf7012963..317b8db0daac69 100644 --- a/src/util.cc +++ b/src/util.cc @@ -812,4 +812,15 @@ v8::Maybe GetValidFileMode(Environment* env, return v8::Just(mode); } +v8::MaybeLocal ToV8Value(v8::Local context, + std::string_view str, + v8::Isolate* isolate) { + if (isolate == nullptr) isolate = v8::Isolate::GetCurrent(); + if (str.size() >= static_cast(v8::String::kMaxLength)) [[unlikely]] { + ThrowErrStringTooLong(isolate); + return v8::MaybeLocal(); + } + return StringBytes::Encode(isolate, str.data(), str.size(), UTF8); +} + } // namespace node diff --git a/src/util.h b/src/util.h index 3dedeca4d227e9..48305bfdc13143 100644 --- a/src/util.h +++ b/src/util.h @@ -701,9 +701,9 @@ inline v8::Maybe FromV8Array(v8::Local context, v8::Local js_array, std::vector>* out); -inline v8::MaybeLocal ToV8Value(v8::Local context, - std::string_view str, - v8::Isolate* isolate = nullptr); +v8::MaybeLocal ToV8Value(v8::Local context, + std::string_view str, + v8::Isolate* isolate = nullptr); inline v8::MaybeLocal ToV8Value(v8::Local context, std::u16string_view str, v8::Isolate* isolate = nullptr); diff --git a/test/parallel/test-fs-readfile-utf8-fast-path.js b/test/parallel/test-fs-readfile-utf8-fast-path.js new file mode 100644 index 00000000000000..18d0d884dfa455 --- /dev/null +++ b/test/parallel/test-fs-readfile-utf8-fast-path.js @@ -0,0 +1,103 @@ +'use strict'; + +require('../common'); +const fs = require('node:fs'); +const path = require('node:path'); +const assert = require('node:assert'); +const { describe, it } = require('node:test'); +const tmpdir = require('../common/tmpdir'); + +tmpdir.refresh(); + +function writeFile(name, buf) { + const p = path.join(tmpdir.path, name); + fs.writeFileSync(p, buf); + return p; +} + +function expectMatches(filePath, rawBuf) { + assert.strictEqual( + fs.readFileSync(filePath, 'utf8'), + rawBuf.toString('utf8'), + ); +} + +describe('fs.readFileSync utf8 simdutf dispatch', () => { + it('empty file', () => { + const p = writeFile('empty.txt', Buffer.alloc(0)); + assert.strictEqual(fs.readFileSync(p, 'utf8'), ''); + }); + + it('ascii small', () => { + const buf = Buffer.from('hello'); + expectMatches(writeFile('tiny-ascii.txt', buf), buf); + }); + + it('ascii 20KB', () => { + const buf = Buffer.alloc(20 * 1024, 0x41); + expectMatches(writeFile('medium-ascii.txt', buf), buf); + }); + + it('ascii 1MB', () => { + const buf = Buffer.alloc(1024 * 1024, 0x61); + expectMatches(writeFile('large-ascii.txt', buf), buf); + }); + + it('fd input', () => { + const buf = Buffer.alloc(50 * 1024, 0x62); + const p = writeFile('fd-ascii.txt', buf); + const fd = fs.openSync(p, 'r'); + try { + assert.strictEqual(fs.readFileSync(fd, 'utf8'), buf.toString('utf8')); + } finally { + fs.closeSync(fd); + } + }); + + it('multibyte UTF-8', () => { + const buf = Buffer.from('中文测试 — café — 🚀'.repeat(500), 'utf8'); + expectMatches(writeFile('multibyte.txt', buf), buf); + }); + + it('latin1-fits utf8', () => { + const buf = Buffer.from('naïve café résumé — niño Köln '.repeat(500), 'utf8'); + expectMatches(writeFile('latin1-fits.txt', buf), buf); + }); + + it('invalid: lone continuation byte', () => { + const buf = Buffer.from([0x68, 0x69, 0x80, 0x21]); + expectMatches(writeFile('invalid-cont.txt', buf), buf); + }); + + it('invalid: overlong', () => { + const buf = Buffer.from([0x41, 0xC0, 0xAF, 0x42]); + expectMatches(writeFile('invalid-overlong.txt', buf), buf); + }); + + it('invalid: surrogate', () => { + const buf = Buffer.from([0x41, 0xED, 0xA0, 0x80, 0x42]); + expectMatches(writeFile('invalid-surrogate.txt', buf), buf); + }); + + it('latin1 boundary U+00FF', () => { + const buf = Buffer.from('ÿ'.repeat(2048), 'utf8'); + expectMatches(writeFile('latin1-boundary.txt', buf), buf); + }); + + it('above latin1 U+0100', () => { + const buf = Buffer.from('ĀāĂ'.repeat(1024), 'utf8'); + expectMatches(writeFile('above-latin1.txt', buf), buf); + }); + + it('single codepoint each UTF-8 length', () => { + for (const cp of [0x41, 0x00E9, 0x4E2D, 0x1F600]) { + const buf = Buffer.from(String.fromCodePoint(cp), 'utf8'); + expectMatches(writeFile(`single-cp-${cp.toString(16)}.txt`, buf), buf); + } + }); + + it('truncated multibyte at EOF', () => { + const buf = Buffer.from([0x41, 0xE4, 0xB8]); + expectMatches(writeFile('truncated-multibyte.txt', buf), buf); + }); +}); From 4639dcb4197299e0117f6b338b5bda26af819852 Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Sun, 10 May 2026 23:39:23 +0300 Subject: [PATCH 3/3] src: skip duplicate UTF-8 validation in TextDecoder fatal path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mert Can Altin PR-URL: https://github.com/nodejs/node/pull/63231 Reviewed-By: Yagiz Nizipli Reviewed-By: Anna Henningsen Reviewed-By: Gürgün Dayıoğlu Reviewed-By: Matteo Collina --- benchmark/util/text-decoder.js | 26 +++++++++++++++++++++----- src/encoding_binding.cc | 7 ++++--- src/string_bytes.cc | 34 ++++++++++++++++++++++++++++++++++ src/string_bytes.h | 5 +++++ 4 files changed, 64 insertions(+), 8 deletions(-) diff --git a/benchmark/util/text-decoder.js b/benchmark/util/text-decoder.js index 1aa60f2dd0bcd6..ecfba045c52fab 100644 --- a/benchmark/util/text-decoder.js +++ b/benchmark/util/text-decoder.js @@ -6,26 +6,42 @@ const bench = common.createBenchmark(main, { encoding: ['utf-8', 'windows-1252', 'iso-8859-3'], ignoreBOM: [0, 1], fatal: [0, 1], + type: ['SharedArrayBuffer', 'ArrayBuffer', 'Buffer'], + content: ['ascii', 'one-byte-string', 'two-byte-string'], len: [256, 1024 * 16, 1024 * 128], n: [1e3], - type: ['SharedArrayBuffer', 'ArrayBuffer', 'Buffer'], }); -function main({ encoding, len, n, ignoreBOM, type, fatal }) { +function buildContent(content, len) { + let base; + switch (content) { + case 'ascii': base = 'a'; break; + case 'one-byte-string': base = '\xff'; break; + case 'two-byte-string': base = 'ğ'; break; + } + const unitBytes = Buffer.byteLength(base, 'utf8'); + const copies = Math.max(1, Math.floor(len / unitBytes)); + return Buffer.from(base.repeat(copies)); +} + +function main({ encoding, len, n, ignoreBOM, type, fatal, content }) { const decoder = new TextDecoder(encoding, { ignoreBOM, fatal }); + const seed = buildContent(content, len); let buf; switch (type) { case 'SharedArrayBuffer': { - buf = new SharedArrayBuffer(len); + buf = new SharedArrayBuffer(seed.length); + new Uint8Array(buf).set(seed); break; } case 'ArrayBuffer': { - buf = new ArrayBuffer(len); + buf = new ArrayBuffer(seed.length); + new Uint8Array(buf).set(seed); break; } case 'Buffer': { - buf = Buffer.allocUnsafe(len); + buf = seed; break; } } diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index c569375383e8d9..9c84d24c84576d 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -459,14 +459,15 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo& args) { return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA( env->isolate(), "The encoded data was not valid for encoding utf-8"); } - - // TODO(chalker): save on utf8 validity recheck in StringBytes::Encode() } if (length == 0) return args.GetReturnValue().SetEmptyString(); Local ret; - if (StringBytes::Encode(env->isolate(), data, length, UTF8).ToLocal(&ret)) { + v8::MaybeLocal encoded = + has_fatal ? StringBytes::EncodeValidUtf8(env->isolate(), data, length) + : StringBytes::Encode(env->isolate(), data, length, UTF8); + if (encoded.ToLocal(&ret)) { args.GetReturnValue().Set(ret); } } diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 865302bfd1b4de..1d4ee3a81803b2 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -671,6 +671,40 @@ MaybeLocal StringBytes::Encode(Isolate* isolate, } } +MaybeLocal StringBytes::EncodeValidUtf8(Isolate* isolate, + const char* buf, + size_t buflen) { + CHECK_BUFLEN_IN_RANGE(buflen); + if (!buflen) return String::Empty(isolate); + buflen = keep_buflen_in_range(buflen); + + // ASCII fast path + if (!simdutf::validate_ascii_with_errors(buf, buflen).error) { + return ExternOneByteString::NewFromCopy(isolate, buf, buflen); + } + + if (buflen >= 32) { + size_t u16size = simdutf::utf16_length_from_utf8(buf, buflen); + if (u16size > static_cast(v8::String::kMaxLength)) { + isolate->ThrowException(ERR_STRING_TOO_LONG(isolate)); + return MaybeLocal(); + } + return EncodeTwoByteString( + isolate, u16size, [buf, buflen, u16size](uint16_t* dst) { + size_t written = simdutf::convert_valid_utf8_to_utf16( + buf, buflen, reinterpret_cast(dst)); + CHECK_EQ(written, u16size); + }); + } + + Local str; + if (!String::NewFromUtf8(isolate, buf, v8::NewStringType::kNormal, buflen) + .ToLocal(&str)) { + isolate->ThrowException(node::ERR_STRING_TOO_LONG(isolate)); + } + return str; +} + MaybeLocal StringBytes::Encode(Isolate* isolate, const uint16_t* buf, size_t buflen) { diff --git a/src/string_bytes.h b/src/string_bytes.h index 9949f508f83ffe..71aa9ff1f90a7c 100644 --- a/src/string_bytes.h +++ b/src/string_bytes.h @@ -83,6 +83,11 @@ class StringBytes { size_t buflen, enum encoding encoding); + // Like Encode(..., UTF8) but does not re-validate. Input must be valid UTF-8. + static v8::MaybeLocal EncodeValidUtf8(v8::Isolate* isolate, + const char* buf, + size_t buflen); + // Warning: This reverses endianness on BE platforms, even though the // signature using uint16_t implies that it should not. // However, the brokenness is already public API and can't therefore