Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions lib/internal/streams/fast-utf8-stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -878,11 +878,15 @@ class Utf8Stream extends EventEmitter {
function releaseWritingBuf(writingBuf, len, n) {
if (typeof writingBuf === 'string') {
const byteLength = Buffer.byteLength(writingBuf);
if (byteLength !== n) {
// Since fs.write returns the number of bytes written, we need to find
// how many complete characters fit within those n bytes.
// If a partial write splits a multi-byte UTF-8 character, we must back up
// to the start of that character to avoid data corruption.
// `fs.write` returns the number of bytes written, but `len` is tracked in
// characters and `writingBuf` is sliced by character index below, so `n`
// must be converted from bytes to characters in both cases.
if (byteLength === n) {
// The whole string was written: advance past every character.
n = writingBuf.length;
} else {
// A partial write may split a multi-byte UTF-8 character, so we must back
// up to the start of that character to avoid data corruption.
const buf = Buffer.from(writingBuf);
// Back up from position n to find a valid UTF-8 character boundary.
// UTF-8 continuation bytes have the pattern 10xxxxxx (0x80-0xBF).
Expand Down
59 changes: 59 additions & 0 deletions test/parallel/test-fastutf8stream-full-write-utf8.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
'use strict';

// Regression test: after a multi-byte UTF-8 chunk is *fully* written, the
// stream must keep flushing the remaining buffered chunks instead of stalling.
//
// `releaseWritingBuf()` tracks the buffered length in characters, but on a full
// write it used to subtract the number of *bytes* reported by fs.write instead
// of the number of *characters*. For multi-byte data this drove the internal
// length to zero, so the stream emitted 'drain' and went idle while queued
// chunks were left unwritten.

const common = require('../common');
const assert = require('node:assert');
const { Utf8Stream } = require('node:fs');

// "€" is a single JS character that encodes to three UTF-8 bytes, so the byte
// count and character count differ - which is exactly what triggered the bug.
const CHAR = '€';
const COUNT = 3;

const chunks = [];
const fsOverride = {
// Always report a full (successful) write.
write: common.mustCallAtLeast((fd, data, enc, cb) => {
chunks.push(data);
process.nextTick(cb, null, Buffer.byteLength(data));
}, COUNT),
writeSync() { throw new Error('writeSync should not be used in async mode'); },
fsync(fd, cb) { cb(); },
fsyncSync() {},
close(fd, cb) { cb(); },
open(path, flags, mode, cb) { cb(null, 42); },
mkdir(path, opts, cb) { cb(); },
mkdirSync() {},
};

const stream = new Utf8Stream({
fd: 42,
sync: false,
minLength: 0,
// Force each character into its own buffered chunk so that, while the first
// write is in flight, the remaining characters stay queued.
maxWrite: 1,
fs: fsOverride,
});

stream.on('ready', common.mustCall(() => {
for (let i = 0; i < COUNT; i++) {
stream.write(CHAR);
}

// Without calling end(): the stream must flush everything on its own.
setTimeout(common.mustCall(() => {
assert.strictEqual(chunks.length, COUNT,
`expected ${COUNT} writes, got ${chunks.length}`);
assert.strictEqual(chunks.join(''), CHAR.repeat(COUNT));
stream.destroy();
}), common.platformTimeout(100));
}));
Loading