Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -1059,6 +1059,12 @@
default=None,
help='build without FFI (Foreign Function Interface) support')

parser.add_argument('--without-ffi-fastcall',
action='store_true',
dest='without_ffi_fastcall',
default=False,
help='disable the FFI V8-fast-call path; libffi-only')

parser.add_argument('--experimental-quic',
action='store_true',
dest='experimental_quic',
Expand Down Expand Up @@ -2324,6 +2330,19 @@ def bundled_ffi_supported(os_name, target_arch):

return target_arch in supported.get(os_name, set())

def fastcall_supported(os_name, target_arch):
supported = {
'freebsd': {'arm', 'arm64', 'x64'},
'linux': {'arm', 'arm64', 'x64'},
'mac': {'arm64', 'x64'},
'win': {'arm64', 'x64'},
}

if target_arch == 'x86':
target_arch = 'ia32'

return target_arch in supported.get(os_name, set())

def configure_ffi(o):
use_ffi = not options.without_ffi

Expand All @@ -2337,6 +2356,7 @@ def configure_ffi(o):
use_ffi = False

o['variables']['node_use_ffi'] = b(use_ffi)
o['variables']['node_use_ffi_fastcall'] = b(False)

if options.without_ffi:
if options.shared_ffi:
Expand All @@ -2348,6 +2368,11 @@ def configure_ffi(o):

configure_library('ffi', o, pkgname='libffi')

use_fastcall = use_ffi and not options.without_ffi_fastcall
if use_fastcall and not fastcall_supported(flavor, o['variables']['target_arch']):
use_fastcall = False
o['variables']['node_use_ffi_fastcall'] = b(use_fastcall)

def configure_quic(o):
o['variables']['node_use_quic'] = b(options.experimental_quic and
not options.without_ssl)
Expand Down
19 changes: 18 additions & 1 deletion deps/v8/include/v8-fast-api-calls.h
Original file line number Diff line number Diff line change
Expand Up @@ -308,14 +308,28 @@ class V8_EXPORT CFunctionInfo {
kBigInt = 1, // Use BigInts to represent 64 bit integers.
};

// Whether the C function takes a JS receiver as its first argument.
// Most fast-call C functions do (matching how V8 wires up FunctionTemplate
// callbacks). Embedders that want to register a plain C function pointer
// — e.g. an FFI dispatcher that has no use for the receiver — can set this
// to kNo. In that mode V8 omits the receiver from the C call: arg_info[0]
// is the first user argument, ArgumentCount() returns the user-arg count,
// and the JS receiver value is discarded by the lowering instead of being
// passed in the first parameter register.
enum class HasReceiver : uint8_t {
kYes = 0,
kNo = 1,
};

// Construct a struct to hold a CFunction's type information.
// |return_info| describes the function's return type.
// |arg_info| is an array of |arg_count| CTypeInfos describing the
// arguments. Only the last argument may be of the special type
// CTypeInfo::kCallbackOptionsType.
CFunctionInfo(const CTypeInfo& return_info, unsigned int arg_count,
const CTypeInfo* arg_info,
Int64Representation repr = Int64Representation::kNumber);
Int64Representation repr = Int64Representation::kNumber,
HasReceiver has_receiver = HasReceiver::kYes);

const CTypeInfo& ReturnInfo() const { return return_info_; }

Expand All @@ -327,6 +341,8 @@ class V8_EXPORT CFunctionInfo {

Int64Representation GetInt64Representation() const { return repr_; }

bool HasReceiverArg() const { return has_receiver_ == HasReceiver::kYes; }

// |index| must be less than ArgumentCount().
// Note: if the last argument passed on construction of CFunctionInfo
// has type CTypeInfo::kCallbackOptionsType, it is not included in
Expand All @@ -342,6 +358,7 @@ class V8_EXPORT CFunctionInfo {
private:
const CTypeInfo return_info_;
const Int64Representation repr_;
const HasReceiver has_receiver_;
const unsigned int arg_count_;
const CTypeInfo* arg_info_;
};
Expand Down
4 changes: 3 additions & 1 deletion deps/v8/src/api/api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11992,9 +11992,11 @@ CFunction::CFunction(const void* address, const CFunctionInfo* type_info)

CFunctionInfo::CFunctionInfo(const CTypeInfo& return_info,
unsigned int arg_count, const CTypeInfo* arg_info,
Int64Representation repr)
Int64Representation repr,
HasReceiver has_receiver)
: return_info_(return_info),
repr_(repr),
has_receiver_(has_receiver),
arg_count_(arg_count),
arg_info_(arg_info) {
DCHECK(repr == Int64Representation::kNumber ||
Expand Down
8 changes: 6 additions & 2 deletions deps/v8/src/compiler/fast-api-calls.cc
Original file line number Diff line number Diff line change
Expand Up @@ -385,10 +385,14 @@ FastApiCallFunction GetFastApiCallTarget(
function_template_info.c_signatures(broker);
const size_t overloads_count = signatures.size();

// Only considers entries whose type list length matches arg_count.
// Only considers entries whose type list length matches arg_count. For
// signatures registered with HasReceiver=kNo, the C-side ArgumentCount
// already excludes the receiver, so we don't subtract it here.
for (size_t i = 0; i < overloads_count; i++) {
const CFunctionInfo* c_signature = signatures[i];
const size_t len = c_signature->ArgumentCount() - kReceiver;
const size_t len =
c_signature->ArgumentCount() -
(c_signature->HasReceiverArg() ? kReceiver : 0);
bool optimize_to_fast_call =
(len == arg_count) &&
fast_api_call::CanOptimizeFastSignature(c_signature);
Expand Down
13 changes: 10 additions & 3 deletions deps/v8/src/compiler/js-call-reducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -662,7 +662,9 @@ class FastApiCallReducerAssembler : public JSCallReducerAssembler {
// arguments, so extract c_argument_count from the first function.
const int c_argument_count =
static_cast<int>(c_function_.signature->ArgumentCount());
CHECK_GE(c_argument_count, kReceiver);
if (c_function_.signature->HasReceiverArg()) {
CHECK_GE(c_argument_count, kReceiver);
}

const int slow_arg_count =
// Arguments for CallApiCallbackOptimizedXXX builtin including
Expand All @@ -677,11 +679,16 @@ class FastApiCallReducerAssembler : public JSCallReducerAssembler {
base::SmallVector<Node*, kInlineSize> inputs(value_input_count +
kEffectAndControl);
int cursor = 0;
inputs[cursor++] = n.receiver();
const bool has_receiver_arg =
c_function_.signature->HasReceiverArg();
if (has_receiver_arg) {
inputs[cursor++] = n.receiver();
}

// TODO(turbofan): Consider refactoring CFunctionInfo to distinguish
// between receiver and arguments, simplifying this (and related) spots.
int js_args_count = c_argument_count - kReceiver;
int js_args_count =
c_argument_count - (has_receiver_arg ? kReceiver : 0);
for (int i = 0; i < js_args_count; ++i) {
if (i < n.ArgumentCount()) {
inputs[cursor++] = n.Argument(i);
Expand Down
186 changes: 186 additions & 0 deletions doc/contributing/ffi-fastcall-internals.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
# FFI Fast-Call Internals

This document is for contributors who maintain or extend the FFI
fast-call path (the V8 Fast API Calls implementation in `node:ffi`).
For end-user behavior, see [doc/api/ffi.md](../api/ffi.md).

## Overview

For each registered FFI function whose signature is fast-call eligible
(`src/ffi/types.cc:IsFastCallEligible`), Node generates a tiny native
trampoline that strips the `Local<Object>` receiver V8 fast calls
require and tail-calls the user's target function. The trampoline
address is handed to `v8::CFunction`. A JS wrapper
(`lib/internal/ffi-fastcall.js`) validates args, routes object-typed
pointer args to a libffi slow path, and checks a per-library "alive"
sentinel before each call.

The libffi path remains for callbacks (`ffi_prep_closure_loc`),
ineligible signatures (signatures containing the FFI `function` type),
and unsupported platforms.

## Eligibility (`src/ffi/types.cc:IsFastCallEligible`)

A signature is fast-call eligible iff all of:

1. The platform is supported (see Platform Support below).
2. Return type is one of: void, i8/u8/i16/u16, i32/u32, i64/u64,
f32/f64, pointer.
3. Every arg type is in that set.
4. No arg or return is the FFI `function` type.
5. Per-ABI argument caps:
- AArch64: ≤ 7 GP, ≤ 8 FP
- x86_64 SysV: ≤ 6 GP, ≤ 8 FP
- x86_64 Win64: GP + FP combined ≤ 3 (positional register slots — 4 minus the receiver)
- AArch32 hardfp: ≤ 3 GP, ≤ 8 FP; i64/u64 args and return type rejected

`IsFastCallEligible(fn, &reason)` returns false with a static reason
string on miss.

## Platform support

| ABI | Emitter file | Status |
|---|---|---|
| AArch64 (Linux/macOS/FreeBSD/Windows) | `stub_emitter_aarch64.cc` | Implemented, runtime-verified |
| x86_64 SysV (Linux/macOS/FreeBSD) | `stub_emitter_x64_sysv.cc` | Implemented, CI-verified |
| x86_64 Win64 | `stub_emitter_x64_win.cc` | Implemented, CI-verified |
| AArch32 hardfp (Linux/FreeBSD) | `stub_emitter_arm.cc` | Implemented, CI-verified |

On platforms without an emitter, all registrations fall back to libffi.

Adding a new ABI: implement `EmitForwarder` for the new platform in a
new `stub_emitter_<abi>.cc`, gate it via `node.gyp` conditions on
`target_arch` and `OS`, and add the `(os, arch)` pair to
`fastcall_supported` in `configure.py`.

## Stub generation (`src/ffi/fastcall/stub_emitter_*.cc`)

Each stub does, at most, three things:

1. Shift GP regs down by one slot (drop the receiver).
2. (Win64 only) shift FP regs down by one slot — Win64's FP/GP register
slots are positional, so stripping a GP arg also reindexes FP slots.
3. Tail-call the target via an indirect jump.

For SysV ≥ 6 GP args, the stub uses a call+ret pattern with stack
rewrite (because the 7th GP slot lives on the stack). Other ABIs cap
below their stack overflow point in v1 to keep emitters simple.

## JIT memory (`src/ffi/fastcall/jit_memory.cc`)

A process-global singleton on top of platform `mmap`/`VirtualAlloc`.
Allocates 64-byte slot-aligned chunks within page-aligned allocations.
After writing the stub, the page is transitioned to RX via `mprotect` /
`VirtualProtect`; once a page goes RX, no further allocation happens
in it (the bump cursor is locked).

The original spec called for `v8::PageAllocator`, but neither
`Isolate::GetArrayBufferAllocator()->GetPageAllocator()` nor
`Platform::GetPageAllocator()` returns a usable allocator in Node's
embedded configuration — both default to `nullptr`. The implementation
uses direct system calls (with `MAP_JIT` on Apple Silicon) instead.

`Free` decrements the live-byte counter but does not return memory.
Pages stay alive for the process lifetime.

Concurrent emit from multiple isolates is safe via
`JitMemory::EmitStub(code, size)`, which holds the singleton mutex across
allocate + memcpy + RX-transition. The lower-level `Allocate` /
`MakeExecutable` / `Free` methods remain public for the self-test only
(which writes platform-specific instruction bytes after Allocate but
before MakeExecutable, and needs that explicit step ordering).

## Self-test

`JitMemory::SelfTest` allocates a tiny stub, writes a `ret`-style
native sequence, transitions to RX, and calls it. Cached in a
process-wide atomic via `std::call_once`. Run once per process at
first FFI registration. On failure, every subsequent registration
falls back to libffi-only and a process warning is emitted via
`ProcessEmitWarning`.

This catches:
- macOS `MAP_JIT` entitlement missing (e.g., signed binary without
`com.apple.security.cs.allow-jit`).
- Hardened-runtime restrictions.
- SELinux execmem denial.

## JS wrapper (`lib/internal/ffi-fastcall.js`)

For each fast-call-eligible inner v8::Function returned from C++,
`buildWrapper` creates a JS wrapper that:

1. Reads the per-library "alive" `Uint8Array` and throws
`ERR_FFI_LIBRARY_CLOSED` if `[0] !== 0`.
2. Per-arg validation, mirroring `ToFFIArgument` in
`src/ffi/types.cc:ToFFIArgument`. Same `ERR_INVALID_ARG_VALUE`
codes, same messages, same range bounds.
3. Pointer args:
- BigInt or null/undefined: pass through as primitive.
- String / Buffer / ArrayBuffer / ArrayBufferView: `ReflectApply`
the `kFastcallInvokeSlow` libffi-backed v8::Function with the
original args.
4. Calls the inner v8::Function with positional primitives. V8's fast
call engages when TurboFan inlines the wrapper.

The wrapper body is **arity-specialized**: arities 0..6 are unrolled into
distinct closures with named parameters (`function(a0, a1, ...)`), so V8
inlines them and the per-arg type info / pointer flag are read from
closure locals instead of arrays. Arities 7+ use a rest-args fallback. This
matters: an earlier draft used a single generic `function(...args)` plus
`ReflectApply`, which dropped FFI throughput by 30–50% vs. the libffi+SB
path. The arity specialization gets the throughput back to 5–13× the
libffi+SB baseline (see commit `81d908e48da` for the fix and benchmarks).

The wrapper is patched onto `DynamicLibrary.prototype.getFunction`,
`getFunctions`, and the `functions` accessor.

## Internal symbols

The JS wrapper looks for these per-isolate Symbols on the inner
`v8::Function`. They are defined in `src/env_properties.h` and
attached by `DynamicLibrary::CreateFunction` for fast-call-eligible
signatures only:

| Symbol | Value | Purpose |
|---|---|---|
| `kFastcallAlive` | `Uint8Array(1)` shared with `DynamicLibrary` | close sentinel |
| `kFastcallInvokeSlow` | `v8::Function` over `InvokeFunction` | object-arg fallback |
| `kFastcallParams` | `string[]` of parameter type names | wrapper introspection |
| `kFastcallResult` | result type name string | wrapper introspection |

## Lifecycle

**Registration:** `CreateFunction` in `src/node_ffi.cc` builds a
`fastcall::CFunctionInfoBundle` (which owns the heap-allocated
`v8::CFunctionInfo` + `v8::CTypeInfo[]`), allocates and emits the stub via
`JitMemory::EmitStub`, then constructs the inner `v8::Function` via a
`FunctionTemplate` with the `CFunction` attached. Per-function fast-call
state is stored on `FFIFunctionInfo::fast` (a `unique_ptr<FastCallState>`,
null when fast-call is unavailable for that signature).

**Per-call:** wrapper validates → calls inner. V8 picks fast or slow
callback. Slow = `InvokeFunction` (libffi); fast = our generated stub →
target.

**`lib.close()`:** flips the alive sentinel (`alive[0] = 1`). The wrapper
throws `ERR_FFI_LIBRARY_CLOSED` on subsequent calls. Slow-path
`InvokeFunction` independently checks `fn->closed` for the same effect on
ineligible signatures. Stubs are NOT freed at close.

**Weak callback (function GC'd):** `CleanupFunctionInfo` resets
`info->fast`, whose `~FastCallState` destructor calls `JitMemory::Free`
on the stub.

## Testing

- `test/cctest/test_ffi_fastcall_*.cc`: unit tests for emitters, JIT
memory, eligibility, CFunctionInfo builder.
- `test/ffi/test-ffi-*.js`: JS-level integration tests covering
types, arity, callbacks, permissions, etc. (existing FFI suite —
reused as the integration baseline).

When debugging unexpected fast-call behavior, log the eligibility miss
reason via the second arg to `IsFastCallEligible`. Set the
`--without-ffi-fastcall` configure flag to A/B test against the
libffi-only path.
2 changes: 1 addition & 1 deletion lib/ffi.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ const {
toArrayBuffer,
} = internalBinding('ffi');

require('internal/ffi-shared-buffer');
require('internal/ffi-fastcall');

DynamicLibrary.prototype[SymbolDispose] = function() {
this.close();
Expand Down
1 change: 1 addition & 0 deletions lib/internal/errors.js
Original file line number Diff line number Diff line change
Expand Up @@ -1231,6 +1231,7 @@ E('ERR_FEATURE_UNAVAILABLE_ON_PLATFORM',
'The feature %s is unavailable on the current platform' +
', which is being used to run Node.js',
TypeError);
E('ERR_FFI_LIBRARY_CLOSED', 'Library is closed', Error);
E('ERR_FS_CP_DIR_TO_NON_DIR',
'Cannot overwrite non-directory with directory', SystemError);
E('ERR_FS_CP_EEXIST', 'Target already exists', SystemError);
Expand Down
Loading