diff --git a/NEWS.md b/NEWS.md index 1c8682503af..615a271d3d5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,20 @@ # NEWS +## Unreleased + +### What's New + +- **Rust subgraph ABI support (draft, [#6462](https://github.com/graphprotocol/graph-node/pull/6462)).** A new `rust_abi/` module (~1,450 LOC) enables subgraphs compiled from Rust to `wasm32-unknown-unknown`. Selected via `mapping.kind: wasm/rust` in `subgraph.yaml`. Key properties: + - Clean `ptr+len` calling convention — no managed heap, no `AscPtr` juggling. + - TLV entity serialization with a closed tag table; tag bytes are named constants (`tags::*` in `rust_abi/types.rs`). + - Fixed-layout trigger serialization for Ethereum log, call, and block handlers. NEAR has a documented stub with a `0xFF` sentinel. + - Wasmtime fuel metering (10 billion units per handler; `Trap::OutOfFuel` is a deterministic error). + - Language detection via manifest `mapping.kind`; `is_rust_module()` cross-checks by scanning the `graphite` import namespace. + - Bypasses the `parity_wasm` gas-injection pipeline (incompatible with modern WASM opcodes); gas is provided entirely by wasmtime fuel. + - Formal ABI specification: `docs/rust-abi-spec.md`. + - 12 unit tests; ABI test vectors cross-validated against the Graphite SDK. + - Live-tested: USDC Transfer events from Ethereum mainnet indexed end-to-end via GraphQL. + ## v0.42.1 ### Bug Fixes diff --git a/chain/ethereum/src/ethereum_adapter.rs b/chain/ethereum/src/ethereum_adapter.rs index e58055a97a7..c0f4a5400cb 100644 --- a/chain/ethereum/src/ethereum_adapter.rs +++ b/chain/ethereum/src/ethereum_adapter.rs @@ -705,6 +705,18 @@ impl EthereumAdapter { .map_err(|e| e.into_inner().unwrap_or(ContractCallError::Timeout)) } + /// Make a raw eth_call without ABI encoding. + /// Used by Rust ABI subgraphs where the SDK handles encoding/decoding. + pub async fn raw_call( + &self, + req: call::Request, + block_ptr: BlockPtr, + gas: Option, + ) -> Result { + let logger = self.provider_logger(&self.logger); + self.call(logger, req, block_ptr, gas).await + } + async fn call_and_cache( &self, logger: &ProviderLogger, diff --git a/chain/ethereum/src/runtime/runtime_adapter.rs b/chain/ethereum/src/runtime/runtime_adapter.rs index a5597efcd4d..39044a70f65 100644 --- a/chain/ethereum/src/runtime/runtime_adapter.rs +++ b/chain/ethereum/src/runtime/runtime_adapter.rs @@ -1,5 +1,7 @@ use std::{sync::Arc, time::Instant}; +use async_trait::async_trait; + use crate::adapter::EthereumRpcError; use crate::{ capabilities::NodeCapabilities, network::EthereumNetworkAdapters, Chain, ContractCallError, @@ -9,7 +11,7 @@ use anyhow::{anyhow, Context, Error}; use blockchain::HostFn; use graph::abi; use graph::abi::DynSolValueExt; -use graph::blockchain::ChainIdentifier; +use graph::blockchain::{ChainIdentifier, RawEthCall}; use graph::components::subgraph::HostMetrics; use graph::data::store::ethereum::call; use graph::data::store::scalar::BigInt; @@ -18,7 +20,7 @@ use graph::data_source; use graph::data_source::common::{ContractCall, MappingABI}; use graph::runtime::gas::Gas; use graph::runtime::{AscIndexId, IndexForAscTypeId}; -use graph::slog::debug; +use graph::slog::{debug, o, Discard}; use graph::{ blockchain::{self, BlockPtr, HostFnCtx}, cheap_clone::CheapClone, @@ -185,6 +187,98 @@ impl blockchain::RuntimeAdapter for RuntimeAdapter { Ok(host_fns) } + + fn raw_eth_call(&self) -> Option> { + Some(Arc::new(EthereumRawEthCall { + eth_adapters: self.eth_adapters.cheap_clone(), + call_cache: self.call_cache.cheap_clone(), + eth_call_gas: eth_call_gas(&self.chain_identifier), + })) + } +} + +/// Implementation of RawEthCall for Ethereum chains. +/// Used by Rust ABI subgraphs for making raw eth_call without ABI encoding. +pub struct EthereumRawEthCall { + eth_adapters: Arc, + call_cache: Arc, + eth_call_gas: Option, +} + +#[async_trait] +impl RawEthCall for EthereumRawEthCall { + async fn call( + &self, + address: [u8; 20], + calldata: &[u8], + block_ptr: &BlockPtr, + gas: Option, + ) -> Result>, HostExportError> { + // Get an adapter suitable for calls (non-archive is fine) + let eth_adapter = self + .eth_adapters + .call_or_cheapest(Some(&NodeCapabilities { + archive: false, + traces: false, + })) + .map_err(HostExportError::Unknown)?; + + // Create a raw call request + let req = call::Request::new(Address::from(address), calldata.to_vec(), 0); + + // Check cache first + let (cached, _missing) = self + .call_cache + .get_calls(&[req.cheap_clone()], block_ptr.cheap_clone()) + .await + .unwrap_or_else(|_| (Vec::new(), vec![req.cheap_clone()])); + + if let Some(resp) = cached.into_iter().next() { + return match resp.retval { + call::Retval::Value(bytes) => Ok(Some(bytes.to_vec())), + call::Retval::Null => Ok(None), + }; + } + + // Make the actual call + let result = eth_adapter + .raw_call( + req.cheap_clone(), + block_ptr.cheap_clone(), + gas.or(self.eth_call_gas), + ) + .await; + + match result { + Ok(retval) => { + // Cache the result + let cache = self.call_cache.cheap_clone(); + let _ = cache + .set_call( + &Logger::root(Discard, o!()), + req, + block_ptr.cheap_clone(), + retval.clone(), + ) + .await; + + match retval { + call::Retval::Value(bytes) => Ok(Some(bytes.to_vec())), + call::Retval::Null => Ok(None), + } + } + Err(ContractCallError::AlloyError(e)) => Err(HostExportError::PossibleReorg( + anyhow::anyhow!("eth_call RPC error: {}", e), + )), + Err(ContractCallError::Timeout) => Err(HostExportError::PossibleReorg( + anyhow::anyhow!("eth_call timed out"), + )), + Err(e) => Err(HostExportError::Unknown(anyhow::anyhow!( + "eth_call failed: {}", + e + ))), + } + } } /// function ethereum.call(call: SmartContractCall): Array | null diff --git a/chain/ethereum/src/trigger.rs b/chain/ethereum/src/trigger.rs index 6a5e1503073..bc7253fa1be 100644 --- a/chain/ethereum/src/trigger.rs +++ b/chain/ethereum/src/trigger.rs @@ -23,6 +23,9 @@ use graph::runtime::AscPtr; use graph::runtime::HostExportError; use graph::semver::Version; use graph_runtime_wasm::module::ToAscPtr; +use graph_runtime_wasm::rust_abi::{ + RustBlockTrigger, RustCallTrigger, RustLogTrigger, ToRustBytes, +}; use std::{cmp::Ordering, sync::Arc}; use crate::runtime::abi::AscEthereumBlock; @@ -649,3 +652,69 @@ impl<'a> EthereumCallData<'a> { &self.call.to } } + +// ============================================================================ +// Rust ABI serialization for Graphite SDK +// ============================================================================ + +impl ToRustBytes for MappingTrigger { + fn to_rust_bytes(&self) -> Vec { + match self { + MappingTrigger::Log { + block, + transaction, + log, + params: _, + receipt: _, + calls: _, + } => { + let rust_trigger = RustLogTrigger { + address: log.inner.address.0 .0, + tx_hash: transaction.tx_hash().0, + log_index: log.log_index.unwrap_or(0), + block_number: block.number_u64(), + block_timestamp: block.inner().header.timestamp, + topics: log.inner.data.topics().iter().map(|t| t.0).collect(), + data: log.inner.data.data.to_vec(), + }; + rust_trigger.to_rust_bytes() + } + MappingTrigger::Call { + block, + transaction, + call, + inputs: _, + outputs: _, + } => { + let rust_trigger = RustCallTrigger { + to: call.to.0 .0, + from: call.from.0 .0, + tx_hash: transaction.tx_hash().0, + block_number: block.number_u64(), + block_timestamp: block.inner().header.timestamp, + block_hash: block.inner().header.hash.0, + input: call.input.to_vec(), + output: call.output.to_vec(), + }; + rust_trigger.to_rust_bytes() + } + MappingTrigger::Block { block } => { + // Convert U256 difficulty to big-endian bytes + let difficulty: [u8; 32] = block.inner().header.difficulty.to_be_bytes(); + + let rust_trigger = RustBlockTrigger { + hash: block.inner().header.hash.0, + parent_hash: block.inner().header.parent_hash.0, + number: block.number_u64(), + timestamp: block.inner().header.timestamp, + author: block.inner().header.beneficiary.0 .0, + gas_used: block.inner().header.gas_used, + gas_limit: block.inner().header.gas_limit, + difficulty, + base_fee_per_gas: block.inner().header.base_fee_per_gas.unwrap_or(0), + }; + rust_trigger.to_rust_bytes() + } + } + } +} diff --git a/chain/near/src/trigger.rs b/chain/near/src/trigger.rs index ab68ab71beb..90012494a04 100644 --- a/chain/near/src/trigger.rs +++ b/chain/near/src/trigger.rs @@ -9,6 +9,7 @@ use graph::prelude::BlockNumber; use graph::runtime::HostExportError; use graph::runtime::{asc_new, gas::GasCounter, AscHeap, AscPtr}; use graph_runtime_wasm::module::ToAscPtr; +use graph_runtime_wasm::rust_abi::ToRustBytes; use std::{cmp::Ordering, sync::Arc}; use crate::codec; @@ -143,6 +144,52 @@ impl MappingTriggerTrait for NearTrigger { } } +/// Sentinel header byte returned by [`NearTrigger::to_rust_bytes`] to +/// signal "unsupported chain" to any SDK that attempts to decode a NEAR +/// trigger payload. Chosen as `0xFF` because it is outside the current +/// TLV value-tag range (`0x00`..=`0x09`, see +/// `runtime/wasm/src/rust_abi/types.rs::tags`) and therefore guarantees a +/// deterministic decode failure rather than silent misinterpretation. +const UNSUPPORTED_CHAIN_SENTINEL: u8 = 0xFF; + +impl ToRustBytes for NearTrigger { + /// Produce a Rust ABI payload for a NEAR trigger. + /// + /// NEAR triggers are **not** yet wired up to the Rust ABI. A real + /// implementation would need to serialise, at minimum: + /// + /// - For [`NearTrigger::Block`]: the block header fields (hash, + /// prev_hash, height, timestamp, author, gas_price, total_supply, + /// etc.) plus any chunk metadata the SDK exposes to block handlers. + /// - For [`NearTrigger::Receipt`]: the receipt id, predecessor / + /// receiver account ids, the enclosing block's hash and height, the + /// full action list (`CreateAccount`, `DeployContract`, + /// `FunctionCall`, `Transfer`, `Stake`, `AddKey`, `DeleteKey`, + /// `DeleteAccount`), and the execution outcome (status, gas burnt, + /// tokens burnt, logs, emitted receipt ids). + /// + /// None of the above is currently serialised. Instead we emit a + /// single-byte sentinel payload (`[UNSUPPORTED_CHAIN_SENTINEL]`, i.e. + /// `[0xFF]`) that is guaranteed to fail decoding on the SDK side + /// because `0xFF` is not a valid TLV value tag (the tag space is + /// `0x00`..=`0x09`; see `runtime/wasm/src/rust_abi/types.rs::tags`). + /// + /// This is intentionally louder than the previous `Vec::new()` stub: + /// an empty payload could be silently round-tripped as "no data", and + /// a Rust subgraph targeting NEAR would index garbage without ever + /// realising something was wrong. The sentinel byte forces a + /// deterministic handler failure at the first byte of the payload. + /// + /// There is deliberately no logging call here: `to_rust_bytes` has + /// no `Logger` in scope and threading one through would pollute the + /// trait for every chain. Operators will see the SDK-side decode + /// error surfaced as a mapping failure, which is the loud signal we + /// want. + fn to_rust_bytes(&self) -> Vec { + vec![UNSUPPORTED_CHAIN_SENTINEL] + } +} + pub struct ReceiptWithOutcome { // REVIEW: Do we want to actually also have those two below behind an `Arc` wrapper? pub outcome: codec::ExecutionOutcomeWithId, diff --git a/core/src/subgraph/instance_manager.rs b/core/src/subgraph/instance_manager.rs index 8273b0045cf..34bf92a3930 100644 --- a/core/src/subgraph/instance_manager.rs +++ b/core/src/subgraph/instance_manager.rs @@ -24,6 +24,7 @@ use graph::env::EnvVars; use graph::prelude::{SubgraphInstanceManager as SubgraphInstanceManagerTrait, *}; use graph::{blockchain::BlockchainMap, components::store::DeploymentLocator}; use graph_runtime_wasm::module::ToAscPtr; +use graph_runtime_wasm::rust_abi::ToRustBytes; use graph_runtime_wasm::RuntimeHostBuilder; use tokio::task; @@ -234,7 +235,7 @@ impl SubgraphInstanceManager { ) -> anyhow::Result>> where C: Blockchain, - ::MappingTrigger: ToAscPtr, + ::MappingTrigger: ToAscPtr + ToRustBytes, { self.build_subgraph_runner_inner( logger, @@ -262,7 +263,7 @@ impl SubgraphInstanceManager { ) -> anyhow::Result>> where C: Blockchain, - ::MappingTrigger: ToAscPtr, + ::MappingTrigger: ToAscPtr + ToRustBytes, { let subgraph_store = self.subgraph_store.cheap_clone(); let registry = self.metrics_registry.cheap_clone(); @@ -568,7 +569,7 @@ impl SubgraphInstanceManager { runner: SubgraphRunner>, ) -> Result<(), Error> where - ::MappingTrigger: ToAscPtr, + ::MappingTrigger: ToAscPtr + ToRustBytes, { let registry = self.metrics_registry.cheap_clone(); let subgraph_metrics = runner.metrics.subgraph.cheap_clone(); diff --git a/docs/rust-abi-spec.md b/docs/rust-abi-spec.md new file mode 100644 index 00000000000..c8ebde8b2d9 --- /dev/null +++ b/docs/rust-abi-spec.md @@ -0,0 +1,878 @@ +# Rust Subgraph ABI Specification + +Status: Draft +Version: 0.0.1 +Audience: graph-node maintainers and contributors +Companion PR: [#6462](https://github.com/graphprotocol/graph-node/pull/6462) + +This document specifies the binary interface (ABI) used between graph-node and +subgraph mappings compiled from Rust to `wasm32-unknown-unknown`. It is the +authoritative reference for the protocol implemented in +`runtime/wasm/src/rust_abi/`. + +It is intentionally written in the style of a protocol specification: it +describes wire formats, function signatures, and host semantics, not user +ergonomics. For an end-user view of how to write Rust subgraphs, see the +Graphite SDK documentation. + +--- + +## 1. Motivation + +graph-node currently supports a single mapping language, AssemblyScript (AS), +served by the `asc_abi/` serialization layer. The AS ABI was designed at a +time when AS was the only realistic option for compiling high-level code to +WASM with a managed heap. Several years of operational experience have +exposed structural problems in that ABI that cannot be fixed without breaking +existing subgraphs. + +The Rust ABI introduces a parallel serialization layer (`rust_abi/`) that +sits next to `asc_abi/` and is selected by manifest. The runtime (host +exports, store, chain ingestion, gas accounting) is unchanged. Only the +boundary between WASM and the host is replaced. + +### 1.1 Problems with the AS ABI + +The following issues motivate a new ABI rather than an incremental fix: + +1. **No closures, no first-class functions.** AS lacks closures and trait + objects, which forces the host-language bindings to be hand-written and + stringly-typed. The `AscPtr` machinery encodes type information in + Rust generics on the host side and in conventions on the AS side, with no + compile-time link between them. + +2. **Broken nullable handling.** AS represents nullability inconsistently + (`AscNullableString`, sentinel pointers, separate "is_null" flags + depending on the type). The host and the mapping disagree about how to + round-trip a nullable value of certain types, and the only safe path is + to special-case each one. + +3. **Opaque errors.** When deserialization fails on the host side, the + diagnostic is typically a generic "failed to read AscPtr" with no + indication of which field or which type the failure originated in. + Mapping authors get a runtime trap with no actionable context. + +4. **Managed heap coupling.** The AS ABI requires the host to call into the + AS runtime to allocate memory on the mapping's behalf (`__alloc`, + `__new`, `__pin`, GC roots). This couples graph-node to AS-runtime + internals: an AS compiler change can break the host without any + corresponding source change in graph-node. + +5. **No versioning story.** `apiVersion` exists in the manifest but + describes the AS schema layout, not the wire protocol. Adding a new host + function or changing a field layout requires ad-hoc compatibility code + scattered across `asc_abi/`. + +6. **BigInt endianness and signedness ambiguity.** AS `BigInt` is serialized + as little-endian unsigned bytes plus a sign flag. Several historical + bugs have been traced to mismatches between SDK and host on the + interpretation of these bytes. + +### 1.2 Why Rust → WASM is safe to adopt + +Rust targeting `wasm32-unknown-unknown` is a battle-tested toolchain. The +Substreams ecosystem already runs Rust modules in production with the same +target triple, the same `wasmtime` runtime version family, and the same +class of host-imported functions used here. There is no novel compiler or +runtime risk in this proposal: the only new surface is the ABI itself. + +--- + +## 2. Design Principles + +The Rust ABI is built around the following principles. Each is a direct +response to a specific problem in section 1.1. + +1. **Pointer + length, always.** Every variable-length value crossing the + boundary is passed as `(ptr: u32, len: u32)`. There are no opaque + handles, no tagged pointers, no implicit headers in front of buffers. + The host never needs to peek at WASM memory layout to find out how long + a buffer is. + +2. **No managed heap.** The mapping owns its memory. The host never calls + into a runtime allocator on the mapping's behalf. The mapping exposes + exactly two memory-management functions: `allocate(size)` and + `reset_arena()`. A bump allocator with arena reset semantics is + sufficient and is what the Graphite SDK currently ships. + +3. **Explicit TLV serialization.** Structured values (entities, dynamic + values) use a single, explicit Type-Length-Value format with a closed + tag table. There is no implicit type information; every value carries + exactly one byte indicating its kind. + +4. **Language detection via manifest.** Selection between `asc_abi` and + `rust_abi` is driven by `mapping.kind` in `subgraph.yaml`. The runtime + additionally inspects WASM imports for the `graphite` namespace as a + defensive cross-check. + +5. **Versioned from day one.** `apiVersion` in the manifest names the + protocol version. Any change to the wire format, host function set, + or value tag table is a version bump. Section 8 defines what counts as + breaking. + +6. **No ABI in the runtime.** The Rust ABI is a serialization layer. It + contains no business logic. Adding a new host function means writing + one impl in `HostExports` (shared with AS) and two thin wrappers (one + AS, one Rust). The runtime semantics are identical for both languages. + +--- + +## 3. Comparison with the AssemblyScript ABI + +The Rust ABI was designed by enumerating mistakes in the AS ABI and +consciously not making them. This section is included so that future +maintainers understand what the design is reacting against. + +| Aspect | AS ABI (`asc_abi`) | Rust ABI (`rust_abi`) | +|---|---|---| +| Pointer type | `AscPtr` parameterized by Rust phantom type | `(ptr: u32, len: u32)` | +| Type erasure | Phantom types on host, no enforcement on guest | Closed tag table, checked on both sides | +| Memory ownership | Host allocates into guest heap via `__alloc`/`__new` | Guest owns its heap; host only reads/writes raw bytes | +| Nullable handling | Per-type, inconsistent | Single `Null` value tag (`0x00`) | +| BigInt encoding | LE unsigned bytes + sign flag | `to_signed_bytes_le()` / `from_signed_bytes_le()` | +| String encoding | UTF-16, length-prefixed AS string header | UTF-8 bytes, `(ptr, len)` | +| Versioning | `apiVersion` describes AS class layout | `apiVersion` describes wire protocol | +| Namespace | `index` (for AS host imports) | `graphite` | +| Entry point | AS `_start` runs at instantiation | None; handlers are pure `extern "C"` | +| GC | AS runtime manages roots, requires `__pin`/`__unpin` | None | +| Error reporting | Trap with no field context | Errors carry deserialization position and type | + +The single most consequential difference is **memory ownership**. The AS ABI +requires the host to know enough about the AS runtime to call its allocator; +the Rust ABI requires only that the guest exposes `allocate(size: u32) -> u32`. +This decouples graph-node from any particular Rust toolchain version and +makes the protocol portable to other languages with the same property +(e.g., a future C/Zig/Go-via-TinyGo target). + +--- + +## 4. Protocol Specification + +This section is normative. + +### 4.1 Target Triple + +Rust mappings MUST be compiled to `wasm32-unknown-unknown`. WASI is not +supported and will not be linked. + +### 4.2 Required Exports + +A Rust subgraph WASM module MUST export the following functions in addition +to its handlers: + +```text +(func (export "memory") ;; standard linear memory export + (memory 1)) + +(func (export "allocate") + (param i32) (result i32)) + +(func (export "reset_arena")) +``` + +Semantics: + +- `memory` is the standard WASM linear memory. The host reads handler input + arguments out of it and writes host-function results into it. +- `allocate(size: u32) -> u32` reserves at least `size` bytes of contiguous + memory and returns a pointer to the first byte. The pointer MUST remain + valid until the next call to `reset_arena`. Implementations are NOT + required to track or free individual allocations; a bump allocator is + the reference implementation. +- `reset_arena()` invalidates all pointers previously returned by + `allocate`. The host calls this once after every handler invocation. + The mapping MUST NOT call `reset_arena` itself during a handler. + +The reference SDK enforces a 4 MiB allocation cap inside `allocate` to +catch runaway recursion and decoder bugs early. + +### 4.3 Handler Signature + +Every handler exported by the mapping MUST have the following signature: + +```rust +#[unsafe(no_mangle)] +pub extern "C" fn handle_xxx(event_ptr: u32, event_len: u32) -> u32; +``` + +Calling convention: + +- `event_ptr` is a pointer into the mapping's linear memory. +- `event_len` is the length, in bytes, of the serialized trigger payload. +- The byte slice `[event_ptr, event_ptr + event_len)` MUST be valid for + the duration of the call. +- The return value is a status code: `0` indicates success; any non-zero + value indicates an error. The host treats non-zero returns as a + deterministic mapping failure for that trigger. + +The host invokes handlers as follows (pseudocode): + +```text +let bytes = trigger.to_rust_bytes(); +let ptr = call("allocate", bytes.len() as u32); +write_memory(ptr, &bytes); +let rc = call(handler_name, ptr, bytes.len() as u32); +call("reset_arena"); +if rc != 0 { fail_deterministic(); } +``` + +The handler is therefore stateless from the runtime's perspective. State +is held only in the host (store, gas, data source list). + +### 4.4 Host Import Namespace + +All host functions are imported from the `graphite` namespace. The host +links only the functions that the module actually imports; missing +functions are not an error at link time. Calling an unlinked function +traps deterministically. + +Detection of a Rust ABI module is performed by scanning the module's +imports for any function imported from `graphite`. This is done in +`is_rust_module()` in `runtime/wasm/src/rust_abi/host.rs` and is used as +a defensive cross-check against the manifest `mapping.kind` field. + +### 4.5 Memory Conventions + +All multi-byte integers in the wire format are **little-endian**. This +applies to length prefixes, fixed-width integer values inside TLV, and +fixed-layout trigger fields. The single exception is `block.difficulty`, +which is serialized as 32 big-endian bytes because that is its natural +on-chain representation. + +All strings are **UTF-8**. The host validates UTF-8 on every string +read; invalid UTF-8 traps with a deterministic error. + +Buffer ownership crossing the boundary: + +- **Mapping → host**: the mapping passes `(ptr, len)`. The host reads the + bytes into a host-side `Vec` (charging gas per byte) and never + retains the pointer past the host function call. +- **Host → mapping**: the host writes into a buffer the mapping has already + allocated. The mapping passes `(out_ptr, out_cap)`; the host writes at + most `out_cap` bytes and returns the actual length used. If the host + needs to write more than `out_cap` bytes, it returns the sentinel + `u32::MAX` and writes nothing. The mapping is then expected to grow its + buffer and retry. + +### 4.6 TLV Format for Entity Values + +Entity field values are serialized in a Type-Length-Value format. The +grammar is: + +```text +EntityData := field_count:u32 Field* +Field := key_len:u32 key:[u8; key_len] Value +Value := tag:u8 ValueBody(tag) +``` + +Note that `key` is a UTF-8 string and is **not** itself a tagged Value; +field keys are always strings. + +The `ValueBody(tag)` shape is determined by the tag: + +| Tag (hex) | Name | Body | +|---|---|---| +| `0x00` | Null | (none) | +| `0x01` | String | `len:u32` `bytes:[u8; len]` (UTF-8) | +| `0x02` | Int | `i32` little-endian (4 bytes) | +| `0x03` | Int8 | `i64` little-endian (8 bytes) | +| `0x04` | BigInt | `len:u32` `bytes:[u8; len]` (signed two's-complement, little-endian) | +| `0x05` | BigDecimal | `len:u32` `bytes:[u8; len]` (UTF-8 decimal string) | +| `0x06` | Bool | `0x00` (false) or `0x01` (true) (1 byte) | +| `0x07` | Bytes | `len:u32` `bytes:[u8; len]` | +| `0x08` | Address | `bytes:[u8; 20]` (no length prefix) | +| `0x09` | Array | `len:u32` `Value*` (nested, tagged) | + +Notes: + +- `Address` is a fixed-width specialization of `Bytes`. On the host side + it is materialized as `Value::Bytes` carrying 20 bytes. The separate + tag exists so that schema-aware decoders on the SDK side can produce + `[u8; 20]` directly without a runtime length check. +- `BigInt` uses Rust's `num_bigint::BigInt::to_signed_bytes_le` / + `from_signed_bytes_le`. A length of zero encodes the number 0. +- `BigDecimal` is serialized as its canonical string representation. This + trades a few bytes of size for unambiguous round-tripping across two + independent implementations. +- `Timestamp` (a graph-core internal type) is normalized at serialization + time to a `BigInt` containing microseconds since the Unix epoch. There + is no separate `Timestamp` tag. +- The decoder on the host side rejects unknown tags with + `io::ErrorKind::InvalidData`. + +The canonical host-side tag byte values are defined as named constants in +the `tags` module in `runtime/wasm/src/rust_abi/types.rs` (`tags::NULL`, +`tags::STRING`, `tags::INT`, `tags::INT8`, `tags::BIG_INT`, +`tags::BIG_DECIMAL`, `tags::BOOL`, `tags::BYTES`, `tags::ADDRESS`, +`tags::ARRAY`). The `ValueTag` enum discriminants are derived from those +constants, so there is exactly one place in the host codebase where the +on-wire bytes are defined. Any edit to those constants is, by definition, +a breaking ABI change (see section 8.1). + +The reference implementation lives in +`runtime/wasm/src/rust_abi/entity.rs`. + +#### 4.6.1 Worked Example + +The entity `{ id: "tx-1", value: 42, active: true }` serializes to: + +```text +03 00 00 00 ; field_count = 3 +02 00 00 00 69 64 ; key_len=2, "id" +01 04 00 00 00 74 78 2d 31 ; tag=String, len=4, "tx-1" +05 00 00 00 76 61 6c 75 65 ; key_len=5, "value" +02 2a 00 00 00 ; tag=Int, 42 LE +06 00 00 00 61 63 74 69 76 65 ; key_len=6, "active" +06 01 ; tag=Bool, true +``` + +Field order is not specified. Decoders MUST NOT rely on iteration order; +the host implementation uses `HashMap`. + +### 4.7 Trigger Serialization + +Trigger payloads use a fixed binary layout, not the TLV format. Triggers +are produced by graph-node and consumed by SDK codegen, so both sides know +the schema statically; tagging would be wasted bytes. + +The reference implementation lives in +`runtime/wasm/src/rust_abi/trigger.rs`. The chain-specific producers live +under `chain//src/trigger.rs` and implement the `ToRustBytes` +trait. + +#### 4.7.1 `RustLogTrigger` + +Used for Ethereum event log handlers. + +```text +address : [u8; 20] +tx_hash : [u8; 32] +log_index : u64 (LE) +block_number : u64 (LE) +block_timestamp : u64 (LE, Unix seconds) +topic_count : u32 (LE) +topics : [u8; 32] * topic_count +data_len : u32 (LE) +data : [u8; data_len] +``` + +Total fixed prefix: 96 bytes. Then `32 * topic_count + 4 + data_len` bytes +of variable payload. + +#### 4.7.2 `RustCallTrigger` + +Used for Ethereum call handlers. + +```text +to : [u8; 20] +from : [u8; 20] +tx_hash : [u8; 32] +block_number : u64 (LE) +block_timestamp : u64 (LE, Unix seconds) +block_hash : [u8; 32] +input_len : u32 (LE) +input : [u8; input_len] +output_len : u32 (LE) +output : [u8; output_len] +``` + +Total fixed prefix: 120 bytes. + +#### 4.7.3 `RustBlockTrigger` + +Used for Ethereum block handlers. + +```text +hash : [u8; 32] +parent_hash : [u8; 32] +number : u64 (LE) +timestamp : u64 (LE, Unix seconds) +author : [u8; 20] +gas_used : u64 (LE) +gas_limit : u64 (LE) +difficulty : [u8; 32] (BIG-endian U256) +base_fee_per_gas : u64 (LE; 0 if pre-EIP-1559) +``` + +Total: 156 bytes (fixed-width). + +#### 4.7.4 Other Chains + +Only Ethereum implements `ToRustBytes` at present. NEAR has a documented +stub in `chain/near/src/trigger.rs` that returns a single-byte +"unsupported chain" sentinel (`0xFF`). The sentinel is deliberately +outside the TLV value-tag range (`0x00`..=`0x09`) so that any SDK that +attempts to decode it will fail deterministically at the first byte +rather than silently treating an empty payload as "no data". Adding a +new chain requires only an impl of +`ToRustBytes for ::MappingTrigger`; no changes to `rust_abi/` are +required. + +### 4.8 Error Codes + +Handler return values: + +| Value | Meaning | +|---|---| +| `0` | Success. The host commits store mutations and proceeds. | +| non-zero | Deterministic error. The host treats this as a failed handler. | + +Sentinel return values from host functions that write into a guest buffer: + +| Value | Meaning | +|---|---| +| `0` | Not found / null result (e.g., `store_get` for a missing entity, `ethereum_call` revert) | +| `u32::MAX` | Output buffer too small. Guest must grow its buffer and retry. | +| any other | Length, in bytes, of the data written into the guest buffer. | + +Panics in the mapping are caught by a panic hook in the SDK and forwarded +to the host via the `abort` import (see section 5.10), which produces a +deterministic trap with the panic message, file, and line number. Decode +errors inside generated handler wrappers are logged via `log_log` before +the handler returns a non-zero status. + +--- + +## 5. Host Function Reference + +All functions are imported from the `graphite` namespace. Where a function +is `async` on the host side, the `func_wrap_async` linker variant is used; +the wasmtime fuel meter pauses for the duration of the awaited future. + +Gas costs: + +- Reading bytes from guest memory costs `GAS_COST_LOAD * len` gas. +- Writing bytes to guest memory costs `GAS_COST_STORE * len` gas. +- `ethereum_call` consumes a flat `5_000_000_000` units of gas in + addition to the per-byte memory costs. +- The wasmtime fuel meter is configured with a budget of 10 billion + units per handler invocation. Exceeding this budget produces + `Trap::OutOfFuel`, which is reported as a deterministic error. + +In the signatures below, all integer parameters are `u32` (i32 in WASM +terms) unless noted. + +### 5.1 `store_set` + +```text +fn store_set( + entity_type_ptr: u32, entity_type_len: u32, + id_ptr: u32, id_len: u32, + data_ptr: u32, data_len: u32, +); +``` + +Stores an entity. `entity_type` and `id` are UTF-8 strings. `data` is a +TLV-encoded `EntityData` map (section 4.6). + +The host deserializes the entity data, looks up the schema for +`entity_type` (this is where the runtime applies type checking and +nullability rules), and forwards to `HostExports::store_set`. Errors +during deserialization or schema validation surface as host traps. + +Async on the host side because `store_set` may need to await an in-memory +write to the entity cache. + +### 5.2 `store_get` + +```text +fn store_get( + entity_type_ptr: u32, entity_type_len: u32, + id_ptr: u32, id_len: u32, + out_ptr: u32, out_cap: u32, +) -> u32; +``` + +Returns the TLV-serialized entity data, or: + +- `0` if the entity does not exist; +- `u32::MAX` if the entity exists but is larger than `out_cap` (the guest + is expected to grow `out_cap` and retry; the SDK retries from 16 KiB + up to 256 KiB before giving up). + +Otherwise the return value is the number of bytes written into +`[out_ptr, out_ptr + out_cap)`. + +### 5.3 `store_remove` + +```text +fn store_remove( + entity_type_ptr: u32, entity_type_len: u32, + id_ptr: u32, id_len: u32, +); +``` + +Removes an entity by id. No return value. Errors trap. + +### 5.4 `crypto_keccak256` + +```text +fn crypto_keccak256( + input_ptr: u32, input_len: u32, + out_ptr: u32, +); +``` + +Computes Keccak-256 over `[input_ptr, input_ptr + input_len)` and writes +the 32-byte digest into `[out_ptr, out_ptr + 32)`. The output buffer is +always exactly 32 bytes; no length is returned. + +This function is synchronous on the host side (`func_wrap`, not +`func_wrap_async`) because it does no I/O. + +### 5.5 `log_log` + +```text +fn log_log( + level: u32, + message_ptr: u32, message_len: u32, +); +``` + +Forwards a UTF-8 log message to the host logger. `level` matches the +existing AS log levels: 0 = critical, 1 = error, 2 = warning, 3 = info, +4 = debug. Critical level traps the handler. + +### 5.6 `data_source_address` + +```text +fn data_source_address(out_ptr: u32); +``` + +Writes the current data source's contract address (20 bytes) into +`[out_ptr, out_ptr + 20)`. The output is always 20 bytes; no length is +returned. + +### 5.7 `data_source_network` + +```text +fn data_source_network(out_ptr: u32, out_cap: u32) -> u32; +``` + +Writes the network name as UTF-8 bytes into the guest buffer. Returns the +number of bytes written, or `u32::MAX` if the buffer is too small. + +### 5.8 `data_source_create` + +```text +fn data_source_create( + name_ptr: u32, name_len: u32, + params_ptr: u32, params_len: u32, +); +``` + +Spawns a new dynamic data source by template name. `params` is a +TLV-style serialized `Vec`: + +```text +count:u32 (str_len:u32 str_bytes:[u8; str_len])* +``` + +This is the same shape as a TLV `Array` of `String` values, minus the +outer `0x09` array tag and minus the per-element `0x01` string tags +(since the type is statically known). + +### 5.9 `ipfs_cat` + +```text +fn ipfs_cat( + hash_ptr: u32, hash_len: u32, + out_ptr: u32, out_cap: u32, +) -> u32; +``` + +Fetches the bytes pinned at the given IPFS hash and writes them into the +guest buffer. Sentinel returns follow the same convention as `store_get`: + +- `u32::MAX` if `out_cap` is too small +- otherwise the number of bytes written + +If the underlying host fetch returns `HostExportError::PossibleReorg`, +the host marks the instance as `possible_reorg = true` so the block can +be retried, then traps. + +### 5.10 `ethereum_call` + +```text +fn ethereum_call( + addr_ptr: u32, addr_len: u32, + data_ptr: u32, data_len: u32, + out_ptr: u32, out_cap: u32, +) -> u32; +``` + +Performs an `eth_call` against the configured Ethereum endpoint at the +current block. `addr_len` MUST be exactly 20; the host traps otherwise. + +Returns: + +- `0` if the call reverted; +- `u32::MAX` if the response is larger than `out_cap`; +- otherwise the number of return bytes written into the guest buffer. + +Charges a flat `5_000_000_000` gas units in addition to per-byte memory +costs. This matches the cost the AS ABI charges for `ethereum.call`. + +`HostExportError::PossibleReorg` is propagated by setting +`possible_reorg = true` on the instance and trapping. This is +indistinguishable from any other host trap from the mapping's perspective. + +### 5.11 `abort` + +```text +fn abort( + message_ptr: u32, message_len: u32, + file_ptr: u32, file_len: u32, + line: u32, +); +``` + +Forwards a panic from the mapping's panic hook to the host. The host +marks the trap as deterministic and surfaces the panic message and line +number in the error chain. The `file` arguments are read by the host but +currently not echoed back into the error (this may change without a +protocol bump; it's a logging detail). + +### 5.12 Summary Table + +| Function | Returns | Async on host | Notes | +|---|---|---|---| +| `store_set` | (none) | yes | TLV entity payload | +| `store_get` | u32 (len / 0 / u32::MAX) | yes | Buffer-grow protocol | +| `store_remove` | (none) | yes | | +| `crypto_keccak256` | (none) | no | Output is fixed 32 bytes | +| `log_log` | (none) | yes | Critical level traps | +| `data_source_address` | (none) | yes | Output is fixed 20 bytes | +| `data_source_network` | u32 | yes | Buffer-grow protocol | +| `data_source_create` | (none) | yes | Template name + string params | +| `ipfs_cat` | u32 | yes | Buffer-grow protocol; reorg-aware | +| `ethereum_call` | u32 | yes | Reorg-aware; flat 5B gas | +| `abort` | (none, traps) | no | Panic forwarding | + +--- + +## 6. Manifest Format + +A Rust subgraph is signalled by the `mapping.kind` field in +`subgraph.yaml`: + +```yaml +specVersion: 0.0.5 +schema: + file: ./schema.graphql +dataSources: + - kind: ethereum/contract + name: ERC20 + network: mainnet + source: + address: "0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48" + abi: ERC20 + startBlock: 24756400 + mapping: + kind: wasm/rust # NEW: was "wasm/assemblyscript" + apiVersion: 0.0.1 # Rust ABI version + language: rust # NEW + file: ./target/wasm32-unknown-unknown/release/erc20.wasm + entities: + - Transfer + abis: + - name: ERC20 + file: ./abis/ERC20.json + eventHandlers: + - event: Transfer(indexed address,indexed address,uint256) + handler: handle_transfer +``` + +Recognized values for `mapping.kind`: + +- `wasm/assemblyscript` → AS ABI (existing behaviour, unchanged) +- `wasm/rust` → Rust ABI (this specification) + +The parser is `MappingLanguage::from_kind` in +`runtime/wasm/src/rust_abi/mod.rs`. Unknown values are rejected at +manifest validation time. + +The `apiVersion` field for `wasm/rust` modules names the Rust ABI +version, not the AS schema version. The two version namespaces are +independent. + +### 6.1 Linker Dispatch + +In `build_linker()`, the host dispatches on `MappingLanguage`: + +- `AssemblyScript`: link the existing `link_as_exports`/AS host functions, + populate `AscHeapCtx`, run `_start` after instantiation. +- `Rust`: link `link_rust_host_functions`, skip `id_of_type`, skip + `_start`, configure wasmtime with fuel metering enabled. + +The Rust module path also bypasses the `parity_wasm` gas-injection +pipeline. `parity_wasm` cannot parse modern WASM features (e.g., +`memory.copy`, opcode 252), and modules emitted by current Rust +toolchains use them. Gas accounting for Rust modules is delivered +entirely by wasmtime fuel metering, not by code injection. + +--- + +## 7. Maintenance Model + +This section is non-normative but is included to set expectations about +the cost of maintaining the Rust ABI alongside the AS ABI. + +### 7.1 Adding a New Host Function + +The runtime business logic for host exports lives in `HostExports`, +which is shared between both ABIs. To add a new host function, +contributors do the following: + +1. **Implement once in `HostExports`.** Add the method that does the real + work (store access, cryptography, network call, etc.). This is shared + between AS and Rust. +2. **Add an AS wrapper** in `runtime/wasm/src/host_exports.rs` / + `asc_abi/`. This handles `AscPtr` deserialization, AS-specific + nullable conventions, and AS gas charging. +3. **Add a Rust wrapper** in `runtime/wasm/src/rust_abi/host.rs`. This + reads `(ptr, len)` arguments, deserializes via `FromRustWasm`, + forwards to `HostExports`, and writes any output back via the + buffer-grow protocol. + +The Rust wrapper is typically 20–40 lines and is mechanical to write. +The async vs sync choice is dictated by whether `HostExports` is async +for that operation. There is no heap manipulation, no `AscPtr` +juggling, and no runtime version negotiation. + +### 7.2 Maintenance Surface + +Explicitly, the maintenance surface for the Rust ABI is **the +serialization layer only**. Specifically: + +- `runtime/wasm/src/rust_abi/types.rs` — primitive `ToRustWasm` / + `FromRustWasm` impls +- `runtime/wasm/src/rust_abi/entity.rs` — TLV format +- `runtime/wasm/src/rust_abi/trigger.rs` — fixed-layout trigger format +- `runtime/wasm/src/rust_abi/host.rs` — linker glue +- `runtime/wasm/src/rust_abi/mod.rs` — `MappingLanguage` enum + +Total: ~1,450 lines, of which `host.rs` (linker glue) is roughly a +third. None of this code touches the store, gas accounting, chain +ingestion, or scheduler. A breaking change in graph-node's runtime +internals never requires a Rust ABI change unless it adds a new host +function. + +### 7.3 Test Strategy + +Three layers of tests cover the Rust ABI: + +1. **Unit tests** in each `rust_abi/` file (round-trip serialization for + each `Value` variant, each trigger type, each primitive). 12 unit + tests today. ABI test vectors in `tests/integration/` cross-validate + the host-side encoding against the SDK-side decoder byte-for-byte. +2. **WASM integration test** at `tests/integration/tests/wasm_handler.rs`, + which loads a real Rust-compiled ERC20 mapping into wasmtime, + serializes a `RustLogTrigger` using the exact production format, + invokes `handle_transfer(ptr, len)`, and asserts that the resulting + `store_set` call carries the expected entity fields. +3. **Live integration test** at `scripts/live-test.sh`, which deploys an + ERC20 mapping to a running graph-node fork and indexes real USDC + `Transfer` events from Ethereum mainnet, verifying that GraphQL + queries return the correct field values. + +The unit tests are cheap and run on every commit. The integration tests +require a Rust toolchain with the `wasm32-unknown-unknown` target and a +synchronized graph-node instance respectively. + +--- + +## 8. Versioning + +`apiVersion` in the manifest mapping section names the Rust ABI version. +The current version is `0.0.1`. + +### 8.1 What Counts as Breaking + +Any of the following constitute a breaking change and require an +`apiVersion` bump: + +1. Adding, removing, or renumbering a `ValueTag` in the TLV table. +2. Changing the wire layout of any existing `ValueBody`. +3. Changing the byte layout of `RustLogTrigger`, `RustCallTrigger`, or + `RustBlockTrigger` (field order, size, endianness). +4. Changing the signature (parameter count, parameter types, return + type) of any host function in the `graphite` namespace. +5. Changing the meaning of a sentinel return value (`0`, `u32::MAX`). +6. Changing the calling convention for handlers (e.g., adding a third + argument). +7. Changing the required exports (`memory`, `allocate`, `reset_arena`). + +### 8.2 What Does Not Count as Breaking + +The following are explicitly non-breaking and may be done within a +single `apiVersion`: + +1. Adding a new host function (modules that don't import it are + unaffected; modules that do can detect availability at link time + because the host only links functions present in the import section). +2. Adding a new chain `ToRustBytes` impl. +3. Changing internal gas costs, as long as the protocol-visible cost + model (per-byte read/write, flat per-call surcharges) is preserved. +4. Improving error messages. +5. Tightening validation in the host (e.g., rejecting previously + silently-accepted malformed payloads), provided the change is + announced and SDK versions are bumped in lockstep. + +### 8.3 Compatibility Negotiation + +There is no runtime negotiation. The host knows the version it +implements; the manifest declares the version the mapping was built +against; if they don't match, the manifest is rejected at validation +time. There is no support for running multiple Rust ABI versions +simultaneously inside a single graph-node process. + +### 8.4 Relationship to AS `apiVersion` + +The Rust ABI `apiVersion` is independent of the AS ABI `apiVersion` +namespace. A Rust subgraph at `apiVersion: 0.0.1` and an AS subgraph at +`apiVersion: 0.0.7` describe completely different things. The +`mapping.kind` field disambiguates. + +--- + +## 9. Open Issues + +Items that are deliberately left out of `0.0.1` and that future versions +may address: + +1. **Shared constant crate.** The `ValueTag` enum and trigger layouts are + currently defined twice — once in `runtime/wasm/src/rust_abi/` and + once in the Graphite SDK. A shared `graphite-abi` crate (no_std, + types-only) would eliminate the drift risk. Tracking this in the + implementation plan; not a blocker for `0.0.1`. + +2. **Test vectors.** This document defines the wire format + prose-and-table; it does not currently ship a set of binary test + vectors that both the host and SDK can validate against. The unit + tests in `rust_abi/` cover round-tripping but not cross-validation. + +3. **Offchain triggers.** Offchain (`subgraph` data source) trigger + serialization is currently stubbed as empty bytes. This is a known + limitation, not a design decision. + +4. **Non-Ethereum chains.** Only Ethereum implements `ToRustBytes`. NEAR + has a documented sentinel stub (section 4.7.4) that fails decode + deterministically. Other chains will require a per-chain + serialization impl when they want to opt into Rust mappings. + +5. **`apiVersion` initial value.** Whether to start at `0.0.1` (own + namespace) or `0.0.8` (after the latest AS version) is a policy + question for upstream review. This document assumes `0.0.1`. + +--- + +## 10. References + +- Implementation: `runtime/wasm/src/rust_abi/` in this repository +- Companion PR: [#6462](https://github.com/graphprotocol/graph-node/pull/6462) +- Reference SDK: [graphite](https://github.com/cargopete/graphite) +- AS ABI for comparison: `runtime/wasm/src/asc_abi/` +- Host exports (shared between ABIs): `runtime/wasm/src/host_exports.rs` +- Wasmtime fuel metering: + [`Config::consume_fuel`](https://docs.rs/wasmtime/latest/wasmtime/struct.Config.html#method.consume_fuel) diff --git a/graph/src/blockchain/mod.rs b/graph/src/blockchain/mod.rs index f65fcea2e5b..2dd0a586840 100644 --- a/graph/src/blockchain/mod.rs +++ b/graph/src/blockchain/mod.rs @@ -546,6 +546,27 @@ pub struct HostFn { #[async_trait] pub trait RuntimeAdapter: Send + Sync { fn host_fns(&self, ds: &data_source::DataSource) -> Result, Error>; + + /// Get a raw eth_call capability for Rust ABI subgraphs. + /// Returns None if the chain doesn't support raw eth_call (e.g., non-EVM chains). + fn raw_eth_call(&self) -> Option> { + None + } +} + +/// Trait for making raw eth_call requests without ABI encoding. +/// Used by Rust ABI subgraphs where the SDK handles encoding/decoding. +#[async_trait] +pub trait RawEthCall: Send + Sync { + /// Make a raw eth_call to the given address with the provided calldata. + /// Returns Ok(Some(bytes)) on success, Ok(None) on revert, Err on RPC error. + async fn call( + &self, + address: [u8; 20], + calldata: &[u8], + block_ptr: &BlockPtr, + gas: Option, + ) -> Result>, HostExportError>; } pub trait NodeCapabilities { diff --git a/runtime/test/src/common.rs b/runtime/test/src/common.rs index b08251c8604..ac5faab4a18 100644 --- a/runtime/test/src/common.rs +++ b/runtime/test/src/common.rs @@ -130,6 +130,7 @@ pub fn mock_context( ), proof_of_indexing: SharedProofOfIndexing::ignored(), host_fns: Arc::new(Vec::new()), + raw_eth_call: None, debug_fork: None, mapping_logger: Logger::root(slog::Discard, o!()), instrument: false, diff --git a/runtime/wasm/src/host.rs b/runtime/wasm/src/host.rs index 77b03594a5e..962e81b96b9 100644 --- a/runtime/wasm/src/host.rs +++ b/runtime/wasm/src/host.rs @@ -5,7 +5,7 @@ use async_trait::async_trait; use graph::futures01::sync::mpsc::Sender; use graph::futures03::channel::oneshot::channel; -use graph::blockchain::{Blockchain, HostFn, RuntimeAdapter}; +use graph::blockchain::{Blockchain, HostFn, RawEthCall, RuntimeAdapter}; use graph::components::store::{EnsLookup, SubgraphFork}; use graph::components::subgraph::{MappingError, SharedProofOfIndexing}; use graph::data_source::{ @@ -56,7 +56,7 @@ impl RuntimeHostBuilder { impl RuntimeHostBuilderTrait for RuntimeHostBuilder where - ::MappingTrigger: ToAscPtr, + ::MappingTrigger: ToAscPtr + crate::rust_abi::ToRustBytes, { type Host = RuntimeHost; type Req = WasmRequest; @@ -106,6 +106,7 @@ where pub struct RuntimeHost { host_fns: Arc>, + raw_eth_call: Option>, data_source: DataSource, mapping_request_sender: Sender>, host_exports: Arc, @@ -143,9 +144,11 @@ where )); let host_fns = runtime_adapter.host_fns(&data_source).unwrap_or_default(); + let raw_eth_call = runtime_adapter.raw_eth_call(); Ok(RuntimeHost { host_fns: Arc::new(host_fns), + raw_eth_call, data_source, mapping_request_sender, host_exports, @@ -189,6 +192,7 @@ where timestamp: trigger.timestamp(), proof_of_indexing, host_fns: self.host_fns.cheap_clone(), + raw_eth_call: self.raw_eth_call.cheap_clone(), debug_fork: debug_fork.cheap_clone(), mapping_logger: Logger::new(logger, o!("component" => "UserMapping")), instrument, diff --git a/runtime/wasm/src/lib.rs b/runtime/wasm/src/lib.rs index 7c543a4c128..467efbf3a89 100644 --- a/runtime/wasm/src/lib.rs +++ b/runtime/wasm/src/lib.rs @@ -1,4 +1,5 @@ pub mod asc_abi; +pub mod rust_abi; mod host; pub mod to_from; diff --git a/runtime/wasm/src/mapping.rs b/runtime/wasm/src/mapping.rs index 68e7ea249f2..53b7cae594d 100644 --- a/runtime/wasm/src/mapping.rs +++ b/runtime/wasm/src/mapping.rs @@ -1,6 +1,6 @@ use crate::gas_rules::GasRules; use crate::module::{ExperimentalFeatures, ToAscPtr, WasmInstance, WasmInstanceData}; -use graph::blockchain::{BlockTime, Blockchain, HostFn}; +use graph::blockchain::{BlockTime, Blockchain, HostFn, RawEthCall}; use graph::components::store::SubgraphFork; use graph::components::subgraph::{MappingError, SharedProofOfIndexing}; use graph::data_source::{MappingTrigger, TriggerWithHandler}; @@ -29,7 +29,7 @@ pub fn spawn_module( experimental_features: ExperimentalFeatures, ) -> Result>, anyhow::Error> where - ::MappingTrigger: ToAscPtr, + ::MappingTrigger: ToAscPtr + crate::rust_abi::ToRustBytes, { static THREAD_COUNT: AtomicUsize = AtomicUsize::new(0); @@ -150,7 +150,7 @@ async fn handle_trigger( host_metrics: Arc, ) -> Result<(BlockState, Gas), MappingError> where - ::MappingTrigger: ToAscPtr, + ::MappingTrigger: ToAscPtr + crate::rust_abi::ToRustBytes, { let logger = logger.cheap_clone(); @@ -193,6 +193,8 @@ pub struct MappingContext { pub state: BlockState, pub proof_of_indexing: SharedProofOfIndexing, pub host_fns: Arc>, + /// Raw eth_call capability for Rust ABI subgraphs. + pub raw_eth_call: Option>, pub debug_fork: Option>, /// Logger for messages coming from mappings pub mapping_logger: Logger, @@ -214,6 +216,7 @@ impl MappingContext { ), proof_of_indexing: self.proof_of_indexing.cheap_clone(), host_fns: self.host_fns.cheap_clone(), + raw_eth_call: self.raw_eth_call.cheap_clone(), debug_fork: self.debug_fork.cheap_clone(), mapping_logger: Logger::new(&self.logger, o!("component" => "UserMapping")), instrument: self.instrument, @@ -258,6 +261,9 @@ pub struct ValidModule { /// Cache for asc_type_id results. Maps IndexForAscTypeId to their WASM runtime /// type IDs. Populated lazily on first use; deterministic per compiled module. asc_type_id_cache: RwLock>, + + /// The mapping language (AssemblyScript or Rust) detected from module imports. + pub language: crate::rust_abi::MappingLanguage, } impl ValidModule { @@ -267,45 +273,64 @@ impl ValidModule { raw_module: &[u8], timeout: Option, ) -> Result { - // Add the gas calls here. Module name "gas" must match. See also - // e3f03e62-40e4-4f8c-b4a1-d0375cca0b76. We do this by round-tripping the module through - // parity - injecting gas then serializing again. - let parity_module = parity_wasm::elements::Module::from_bytes(raw_module)?; - let mut parity_module = match parity_module.parse_names() { - Ok(module) => module, - Err((errs, module)) => { - for (index, err) in errs { - warn!( - logger, - "unable to parse function name for index {}: {}", - index, - err.to_string() - ); + // Pre-parse detection: scan raw bytes for the "graphite" import namespace string. + // This is a fast heuristic used solely to decide whether to skip parity_wasm gas + // injection, which cannot parse modern WASM features (bulk-memory, reference-types) + // that Rust-compiled modules use. A false positive (an AS module containing the + // literal string "graphite") would be caught later: build_linker() does a proper + // import-map check, and invoke_handler_rust() would immediately fail with + // "function 'allocate' not found" rather than silently misbehaving. + // The authoritative language detection happens in build_linker() below. + let is_rust_module = raw_module.windows(8).any(|w| w == b"graphite"); + + let (raw_module, start_function) = if is_rust_module { + info!( + logger, + "Detected Rust WASM module, skipping parity_wasm gas injection" + ); + (raw_module.to_vec(), None) + } else { + // Add the gas calls here. Module name "gas" must match. See also + // e3f03e62-40e4-4f8c-b4a1-d0375cca0b76. We do this by round-tripping the module through + // parity - injecting gas then serializing again. + let parity_module = parity_wasm::elements::Module::from_bytes(raw_module)?; + let mut parity_module = match parity_module.parse_names() { + Ok(module) => module, + Err((errs, module)) => { + for (index, err) in errs { + warn!( + logger, + "unable to parse function name for index {}: {}", + index, + err.to_string() + ); + } + + module } + }; - module - } + let start_function = parity_module.start_section().map(|index| { + let name = GN_START_FUNCTION_NAME.to_string(); + + parity_module.clear_start_section(); + parity_module + .export_section_mut() + .unwrap() + .entries_mut() + .push(ExportEntry::new( + name.clone(), + parity_wasm::elements::Internal::Function(index), + )); + + name + }); + let parity_module = + wasm_instrument::gas_metering::inject(parity_module, &GasRules, "gas") + .map_err(|_| anyhow!("Failed to inject gas counter"))?; + (parity_module.into_bytes()?, start_function) }; - let start_function = parity_module.start_section().map(|index| { - let name = GN_START_FUNCTION_NAME.to_string(); - - parity_module.clear_start_section(); - parity_module - .export_section_mut() - .unwrap() - .entries_mut() - .push(ExportEntry::new( - name.clone(), - parity_wasm::elements::Internal::Function(index), - )); - - name - }); - let parity_module = wasm_instrument::gas_metering::inject(parity_module, &GasRules, "gas") - .map_err(|_| anyhow!("Failed to inject gas counter"))?; - let raw_module = parity_module.into_bytes()?; - // We use Cranelift as a compilation engine. Cranelift is an optimizing compiler, but that // should not cause determinism issues since it adheres to the Wasm spec and NaN // canonicalization is enabled below. The optimization level is configurable via @@ -322,6 +347,12 @@ impl ValidModule { config.max_wasm_stack(ENV_VARS.mappings.max_stack_size); config.async_support(true); + // Enable wasmtime fuel metering for Rust modules. + // AS modules use parity_wasm-injected gas() calls instead. + if is_rust_module { + config.consume_fuel(true); + } + let engine = &wasmtime::Engine::new(&config)?; let module = wasmtime::Module::from_binary(engine, &raw_module)?; @@ -355,7 +386,7 @@ impl ValidModule { epoch_counter_abort_handle = Some(graph::spawn(epoch_counter).abort_handle()); } - let linker = crate::module::build_linker(engine, &import_name_to_modules)?; + let (linker, language) = crate::module::build_linker(engine, &import_name_to_modules)?; let instance_pre = linker.instantiate_pre(&module)?; Ok(ValidModule { @@ -366,6 +397,7 @@ impl ValidModule { timeout, epoch_counter_abort_handle, asc_type_id_cache: RwLock::new(HashMap::new()), + language, }) } diff --git a/runtime/wasm/src/module/context.rs b/runtime/wasm/src/module/context.rs index 68639e463b3..7f99194631a 100644 --- a/runtime/wasm/src/module/context.rs +++ b/runtime/wasm/src/module/context.rs @@ -1262,3 +1262,228 @@ fn truncate_yaml_bytes_for_logging(bytes: &[u8]) -> String { format!("0x{}", hex::encode(bytes)) } + +// ============================================================================ +// Rust ABI methods +// +// These methods are used by the Rust ABI host functions in rust_abi/host.rs. +// They take pre-extracted values rather than AscPtr types. +// ============================================================================ + +impl WasmInstanceContext<'_> { + /// Rust ABI: store.set + pub async fn rust_store_set( + &mut self, + gas: &GasCounter, + entity_type: &str, + id: &str, + data: std::collections::HashMap, + ) -> Result<(), HostExportError> { + let stopwatch = self.as_ref().host_metrics.stopwatch.cheap_clone(); + let logger = self.as_ref().ctx.logger.cheap_clone(); + + if self.as_ref().ctx.instrument { + debug!(self.as_ref().ctx.logger, "rust_store_set"; + "type" => entity_type, + "id" => id); + } + + // Convert HashMap to HashMap + let data: std::collections::HashMap = + data.into_iter().map(|(k, v)| (Word::from(k), v)).collect(); + + let host_exports = self.as_ref().ctx.host_exports.cheap_clone(); + let ctx = &mut self.as_mut().ctx; + + host_exports + .store_set( + &logger, + &mut ctx.state, + &ctx.proof_of_indexing, + ctx.timestamp, + entity_type.to_string(), + id.to_string(), + data, + &stopwatch, + gas, + ) + .await?; + + Ok(()) + } + + /// Rust ABI: store.get - returns serialized entity bytes or None + pub async fn rust_store_get( + &mut self, + gas: &GasCounter, + entity_type: &str, + id: &str, + ) -> Result>, HostExportError> { + use crate::rust_abi::serialize_entity; + + let host_exports = self.as_ref().ctx.host_exports.cheap_clone(); + let _timer = self + .as_ref() + .host_metrics + .cheap_clone() + .time_host_fn_execution_region("rust_store_get"); + + let entity_option = host_exports + .store_get( + &mut self.as_mut().ctx.state, + entity_type.to_string(), + id.to_string(), + gas, + graph::components::store::GetScope::Store, + ) + .await?; + + if self.as_ref().ctx.instrument { + debug!(self.as_ref().ctx.logger, "rust_store_get"; + "type" => entity_type, + "id" => id, + "found" => entity_option.is_some()); + } + + match entity_option { + Some(entity) => { + let bytes = serialize_entity(&entity); + Ok(Some(bytes)) + } + None => Ok(None), + } + } + + /// Rust ABI: store.remove + pub async fn rust_store_remove( + &mut self, + gas: &GasCounter, + entity_type: &str, + id: &str, + ) -> Result<(), HostExportError> { + let logger = self.as_ref().ctx.logger.cheap_clone(); + + if self.as_ref().ctx.instrument { + debug!(self.as_ref().ctx.logger, "rust_store_remove"; + "type" => entity_type, + "id" => id); + } + + let host_exports = self.as_ref().ctx.host_exports.cheap_clone(); + let ctx = &mut self.as_mut().ctx; + + host_exports.store_remove( + &logger, + &mut ctx.state, + &ctx.proof_of_indexing, + entity_type.to_string(), + id.to_string(), + gas, + ) + } + + /// Rust ABI: log.log + pub async fn rust_log( + &mut self, + gas: &GasCounter, + level: u32, + message: &str, + ) -> Result<(), HostExportError> { + // Convert u32 level to slog::Level + let slog_level = match level { + 0 => slog::Level::Debug, + 1 => slog::Level::Info, + 2 => slog::Level::Warning, + 3 => slog::Level::Error, + _ => slog::Level::Critical, + }; + + let host_exports = self.as_ref().ctx.host_exports.cheap_clone(); + let ctx = &mut self.as_mut().ctx; + + host_exports + .log_log( + &ctx.logger, + slog_level, + message.to_string(), + gas, + &mut ctx.state, + ) + .map_err(|e| HostExportError::Deterministic(e.into())) + } + + /// Rust ABI: dataSource.address + pub async fn rust_data_source_address( + &mut self, + gas: &GasCounter, + ) -> Result, HostExportError> { + let host_exports = self.as_ref().ctx.host_exports.cheap_clone(); + let ctx = &mut self.as_mut().ctx; + + let address = host_exports.data_source_address(gas, &mut ctx.state)?; + Ok(address.to_vec()) + } + + /// Rust ABI: dataSource.network + pub async fn rust_data_source_network( + &mut self, + gas: &GasCounter, + ) -> Result { + let host_exports = self.as_ref().ctx.host_exports.cheap_clone(); + let ctx = &mut self.as_mut().ctx; + + host_exports + .data_source_network(gas, &mut ctx.state) + .map_err(|e| HostExportError::Deterministic(e.into())) + } + + /// Rust ABI: dataSource.create + pub async fn rust_data_source_create( + &mut self, + gas: &GasCounter, + name: &str, + params: Vec, + ) -> Result<(), HostExportError> { + let logger = self.as_ref().ctx.logger.cheap_clone(); + + if self.as_ref().ctx.instrument { + debug!(self.as_ref().ctx.logger, "rust_data_source_create"; + "name" => name, + "params" => format!("{:?}", params)); + } + + let host_exports = self.as_ref().ctx.host_exports.cheap_clone(); + let ctx = &mut self.as_mut().ctx; + + host_exports.data_source_create( + &logger, + &mut ctx.state, + name.to_string(), + params, + None, // No context for now + ctx.block_ptr.number, + gas, + ) + } + + /// Rust ABI: ipfs.cat + pub async fn rust_ipfs_cat( + &mut self, + _gas: &GasCounter, + hash: &str, + ) -> Result, HostExportError> { + let logger = self.as_ref().ctx.logger.cheap_clone(); + + if self.as_ref().ctx.instrument { + debug!(self.as_ref().ctx.logger, "rust_ipfs_cat"; + "hash" => hash); + } + + let host_exports = self.as_ref().ctx.host_exports.cheap_clone(); + + host_exports + .ipfs_cat(&logger, hash.to_string()) + .await + .map_err(HostExportError::PossibleReorg) + } +} diff --git a/runtime/wasm/src/module/instance.rs b/runtime/wasm/src/module/instance.rs index a4bf1f34d81..56a0535edda 100644 --- a/runtime/wasm/src/module/instance.rs +++ b/runtime/wasm/src/module/instance.rs @@ -113,19 +113,216 @@ impl WasmInstance { trigger: TriggerWithHandler>, ) -> Result<(BlockState, Gas), MappingError> where - ::MappingTrigger: ToAscPtr, + ::MappingTrigger: ToAscPtr + crate::rust_abi::ToRustBytes, + { + use crate::rust_abi::MappingLanguage; + + let language = self.store.data().valid_module.language; + + match language { + MappingLanguage::AssemblyScript => { + // Existing AS path + let handler_name = trigger.handler_name().to_owned(); + let gas = self.gas.clone(); + let logging_extras = trigger.logging_extras().cheap_clone(); + let error_context = trigger.trigger.error_context(); + let mut ctx = self.instance_ctx(); + let asc_trigger = trigger.to_asc_ptr(&mut ctx, &gas).await?; + + self.invoke_handler(&handler_name, asc_trigger, logging_extras, error_context) + .await + } + MappingLanguage::Rust => { + // Rust ABI path + self.handle_trigger_rust(trigger).await + } + } + } + + /// Handle a trigger for Rust WASM modules. + /// + /// Rust handlers have signature: `fn(event_ptr: u32, event_len: u32) -> u32` + async fn handle_trigger_rust( + self, + trigger: TriggerWithHandler>, + ) -> Result<(BlockState, Gas), MappingError> + where + ::MappingTrigger: crate::rust_abi::ToRustBytes, { let handler_name = trigger.handler_name().to_owned(); - let gas = self.gas.clone(); let logging_extras = trigger.logging_extras().cheap_clone(); let error_context = trigger.trigger.error_context(); - let mut ctx = self.instance_ctx(); - let asc_trigger = trigger.to_asc_ptr(&mut ctx, &gas).await?; - self.invoke_handler(&handler_name, asc_trigger, logging_extras, error_context) + // Serialize the trigger to TLV bytes for Rust handlers + use crate::rust_abi::ToRustBytes; + let trigger_bytes = match &trigger.trigger { + MappingTrigger::Onchain(t) => t.to_rust_bytes(), + MappingTrigger::Offchain(_) => Vec::new(), // TODO: Offchain triggers + MappingTrigger::Subgraph(_) => Vec::new(), // TODO: Subgraph triggers + }; + + self.invoke_handler_rust(&handler_name, &trigger_bytes, logging_extras, error_context) .await } + /// Invoke a Rust handler with (ptr, len) calling convention. + async fn invoke_handler_rust( + mut self, + handler: &str, + data: &[u8], + logging_extras: Arc, + error_context: Option, + ) -> Result<(BlockState, Gas), MappingError> { + // Get the handler function + let func = self + .instance + .get_func(self.store.as_context_mut(), handler) + .with_context(|| format!("function {} not found", handler))?; + + // Rust handlers have signature: fn(ptr: u32, len: u32) -> u32 + let func = func + .typed::<(u32, u32), u32>(self.store.as_context_mut()) + .context("wasm function has incorrect signature for Rust handler")?; + + // Allocate memory in the WASM module and write the trigger data + let allocate = self + .instance + .get_func(self.store.as_context_mut(), "allocate") + .with_context(|| "Rust WASM module must export 'allocate' function")? + .typed::(self.store.as_context_mut()) + .context("'allocate' has wrong signature")?; + + let ptr = allocate + .call_async(self.store.as_context_mut(), data.len() as u32) + .await + .context("allocate call failed")?; + + // Write trigger data to allocated memory + let memory = self + .instance + .get_memory(self.store.as_context_mut(), "memory") + .context("failed to get memory export")?; + + memory + .write(self.store.as_context_mut(), ptr as usize, data) + .context("failed to write trigger data to memory")?; + + // Enter handler context + self.instance_ctx().as_mut().ctx.state.enter_handler(); + + // Call the handler + let deterministic_error: Option = match func + .call_async(self.store.as_context_mut(), (ptr, data.len() as u32)) + .await + { + Ok(result_code) => { + if result_code != 0 { + // Non-zero return code indicates handler error + Some(anyhow::anyhow!( + "handler returned error code {}", + result_code + )) + } else { + assert!(!self.instance_ctx().as_ref().possible_reorg); + assert!(!self.instance_ctx().as_ref().deterministic_host_trap); + None + } + } + Err(trap) if self.instance_ctx().as_ref().possible_reorg => { + self.instance_ctx().as_mut().ctx.state.exit_handler(); + return Err(MappingError::PossibleReorg(trap)); + } + Err(trap) + if trap + .chain() + .any(|e| e.downcast_ref::() == Some(&Trap::Interrupt)) => + { + self.instance_ctx().as_mut().ctx.state.exit_handler(); + return Err(MappingError::Unknown(trap.context(format!( + "Handler '{}' hit the timeout of '{}' seconds", + handler, + self.instance_ctx() + .as_ref() + .valid_module + .timeout + .unwrap() + .as_secs() + )))); + } + Err(trap) + if trap + .chain() + .any(|e| e.downcast_ref::() == Some(&Trap::OutOfFuel)) => + { + // Fuel exhaustion is deterministic — same code always burns the same fuel. + Some(anyhow::anyhow!( + "Handler '{}' exceeded fuel limit (possible infinite loop)", + handler + )) + } + Err(trap) => { + let trap_is_deterministic = is_trap_deterministic(&trap) + || self.instance_ctx().as_ref().deterministic_host_trap; + match trap_is_deterministic { + true => Some(trap), + false => { + self.instance_ctx().as_mut().ctx.state.exit_handler(); + return Err(MappingError::Unknown(trap)); + } + } + } + }; + + // Try to call reset_arena if it exists (optional cleanup) + if let Some(reset_arena) = self + .instance + .get_func(self.store.as_context_mut(), "reset_arena") + { + if let Ok(reset_fn) = reset_arena.typed::<(), ()>(self.store.as_context_mut()) { + let _ = reset_fn.call_async(self.store.as_context_mut(), ()).await; + } + } + + // Handle deterministic errors + if let Some(deterministic_error) = deterministic_error { + let deterministic_error = match error_context { + Some(error_context) => deterministic_error.context(error_context), + None => deterministic_error, + }; + let message = format!("{:#}", deterministic_error).replace('\n', "\t"); + + error!(&self.instance_ctx().as_ref().ctx.logger, + "Handler skipped due to execution failure"; + "handler" => handler, + "error" => &message, + logging_extras + ); + let subgraph_error = SubgraphError { + subgraph_id: self + .instance_ctx() + .as_ref() + .ctx + .host_exports + .subgraph_id + .clone(), + message, + block_ptr: Some(self.instance_ctx().as_ref().ctx.block_ptr.cheap_clone()), + handler: Some(handler.to_string()), + deterministic: true, + }; + self.instance_ctx() + .as_mut() + .ctx + .state + .exit_handler_and_discard_changes_due_to_error(subgraph_error); + } else { + self.instance_ctx().as_mut().ctx.state.exit_handler(); + } + + let gas = self.gas.get(); + Ok((self.take_ctx().take_state(), gas)) + } + pub fn take_ctx(self) -> WasmInstanceData { self.store.into_data() } @@ -332,12 +529,52 @@ fn link_chain_host_fn( /// All host functions (builtins + chain-specific) are registered here. Chain-specific host functions /// are dispatched generically by looking up the `HostFn` by name from `caller.data().ctx.host_fns` /// at call time rather than capturing concrete closures at link time. +/// +/// Returns the linker and the detected mapping language (AssemblyScript or Rust). pub(crate) fn build_linker( engine: &wasmtime::Engine, import_name_to_modules: &BTreeMap>, -) -> Result, anyhow::Error> { +) -> Result<(Linker, crate::rust_abi::MappingLanguage), anyhow::Error> { + use crate::rust_abi::{is_rust_module, link_rust_host_functions, MappingLanguage}; + let mut linker: Linker = wasmtime::Linker::new(engine); + // Detect if this is a Rust module by checking for graphite namespace imports + let language = if is_rust_module(import_name_to_modules) { + MappingLanguage::Rust + } else { + MappingLanguage::AssemblyScript + }; + + // For Rust modules, link the Rust ABI host functions + if language == MappingLanguage::Rust { + link_rust_host_functions(&mut linker, import_name_to_modules)?; + + // Link gas metering (shared between AS and Rust) + linker.func_wrap( + "gas", + "gas", + |mut caller: wasmtime::Caller<'_, WasmInstanceData>, + gas_used: u32| + -> anyhow::Result<()> { + use graph::runtime::gas::SaturatingInto; + if let Err(e) = caller + .data() + .gas + .consume_host_fn_with_metrics(gas_used.saturating_into(), "gas") + { + caller.data_mut().deterministic_host_trap = true; + return Err(e.into()); + } + Ok(()) + }, + )?; + + return Ok((linker, language)); + } + + // AssemblyScript module - use the existing link! macro + // Helper to turn a parameter name into 'u32' for a tuple type // (param1, parma2, ..) : (u32, u32, ..) macro_rules! param_u32 { @@ -595,7 +832,7 @@ pub(crate) fn build_linker( }, )?; - Ok(linker) + Ok((linker, language)) } impl WasmInstance { @@ -630,18 +867,29 @@ impl WasmInstance { // See also: runtime-timeouts store.set_epoch_deadline(2); + // For Rust modules, set initial fuel for wasmtime's built-in fuel metering. + // This replaces the parity_wasm gas injection which can't parse modern WASM features. + // 10 billion fuel units is generous for any reasonable handler but catches infinite loops. + if valid_module.language == crate::rust_abi::MappingLanguage::Rust { + store.set_fuel(10_000_000_000)?; + } + let instance = valid_module .instance_pre .instantiate_async(store.as_context_mut()) .await?; + let language = valid_module.language; let asc_heap = AscHeapCtx::new( &instance, &mut WasmInstanceContext::new(&mut store), api_version.clone(), + language, )?; store.data_mut().set_asc_heap(asc_heap); + let is_rust = language == crate::rust_abi::MappingLanguage::Rust; + // See start_function comment for more information // TL;DR; we need the wasmtime::Instance to create the heap, therefore // we cannot execute anything that requires access to the heap before it's created. @@ -654,15 +902,18 @@ impl WasmInstance { .await?; } - match api_version { - version if version <= Version::new(0, 0, 4) => {} - _ => { - instance - .get_func(store.as_context_mut(), "_start") - .context("`_start` function not found")? - .typed::<(), ()>(store.as_context_mut())? - .call_async(store.as_context_mut(), ()) - .await?; + // _start is an AssemblyScript-specific entry point; Rust modules don't export it. + if !is_rust { + match api_version { + version if version <= Version::new(0, 0, 4) => {} + _ => { + instance + .get_func(store.as_context_mut(), "_start") + .context("`_start` function not found")? + .typed::<(), ()>(store.as_context_mut())? + .call_async(store.as_context_mut(), ()) + .await?; + } } } diff --git a/runtime/wasm/src/module/mod.rs b/runtime/wasm/src/module/mod.rs index fddfe4b8c01..1077043de85 100644 --- a/runtime/wasm/src/module/mod.rs +++ b/runtime/wasm/src/module/mod.rs @@ -186,7 +186,10 @@ impl AscHeapCtx { instance: &wasmtime::Instance, ctx: &mut WasmInstanceContext<'_>, api_version: Version, + language: crate::rust_abi::MappingLanguage, ) -> anyhow::Result> { + let is_rust = language == crate::rust_abi::MappingLanguage::Rust; + // Provide access to the WASM runtime linear memory let memory = instance .get_memory(ctx.as_context_mut(), "memory") @@ -203,15 +206,20 @@ impl AscHeapCtx { .typed(ctx.as_context())? .clone(); - let id_of_type = match &api_version { - version if *version <= Version::new(0, 0, 4) => None, - _ => Some( - instance - .get_func(ctx.as_context_mut(), "id_of_type") - .context("`id_of_type` function not found")? - .typed(ctx)? - .clone(), - ), + // id_of_type is an AssemblyScript-specific export; Rust modules don't have it. + let id_of_type = if is_rust { + None + } else { + match &api_version { + version if *version <= Version::new(0, 0, 4) => None, + _ => Some( + instance + .get_func(ctx.as_context_mut(), "id_of_type") + .context("`id_of_type` function not found")? + .typed(ctx)? + .clone(), + ), + } }; Ok(Arc::new(AscHeapCtx { diff --git a/runtime/wasm/src/rust_abi/entity.rs b/runtime/wasm/src/rust_abi/entity.rs new file mode 100644 index 00000000000..60efc818d59 --- /dev/null +++ b/runtime/wasm/src/rust_abi/entity.rs @@ -0,0 +1,516 @@ +//! Entity serialization for Rust WASM modules. +//! +//! Serializes entity data to/from the TLV format used by Rust subgraphs. +//! Works with `HashMap` for deserialization since graph-node's +//! `Entity` type requires schema context for construction. + +use super::types::{tags, FromRustWasm, ToRustWasm, ValueTag}; +use graph::data::store::scalar::Bytes; +use graph::data::store::{Entity, Value}; +use graph::prelude::*; +use std::collections::HashMap; +use std::io::{self, Read, Write}; + +/// A map of field names to values, representing entity data. +/// This is what we deserialize from WASM before passing to host_exports. +pub type EntityData = HashMap; + +/// Serialize an Entity to Rust WASM format. +/// +/// Format: +/// ```text +/// field_count: u32 +/// for each field: +/// key_len: u32 +/// key: [u8; key_len] +/// value: Value (tagged) +/// ``` +pub fn serialize_entity(entity: &Entity) -> Vec { + let mut buf = Vec::new(); + entity.write_to(&mut buf).expect("write to vec cannot fail"); + buf +} + +/// Deserialize entity data from Rust WASM format. +/// +/// Returns a HashMap that can be passed to host_exports.store_set(). +pub fn deserialize_entity_data(bytes: &[u8]) -> io::Result { + EntityData::read_from(&mut &bytes[..]) +} + +impl ToRustWasm for Entity { + fn write_to(&self, writer: &mut W) -> io::Result<()> { + let fields: Vec<_> = self.into_iter().collect(); + writer.write_all(&(fields.len() as u32).to_le_bytes())?; + + for (key, value) in fields { + // Write key (as String) + let key_bytes = key.as_bytes(); + writer.write_all(&(key_bytes.len() as u32).to_le_bytes())?; + writer.write_all(key_bytes)?; + // Write value + value.write_to(writer)?; + } + + Ok(()) + } +} + +impl ToRustWasm for EntityData { + fn write_to(&self, writer: &mut W) -> io::Result<()> { + writer.write_all(&(self.len() as u32).to_le_bytes())?; + + for (key, value) in self { + // Write key + key.as_str().write_to(writer)?; + // Write value + value.write_to(writer)?; + } + + Ok(()) + } +} + +impl FromRustWasm for EntityData { + fn read_from(reader: &mut R) -> io::Result { + let mut len_buf = [0u8; 4]; + reader.read_exact(&mut len_buf)?; + let count = u32::from_le_bytes(len_buf) as usize; + + let mut fields = HashMap::with_capacity(count); + for _ in 0..count { + let key = String::read_from(reader)?; + let value = Value::read_from(reader)?; + fields.insert(key, value); + } + + Ok(fields) + } +} + +impl ToRustWasm for Value { + fn write_to(&self, writer: &mut W) -> io::Result<()> { + match self { + Value::Null => { + writer.write_all(&[tags::NULL])?; + } + Value::String(s) => { + writer.write_all(&[tags::STRING])?; + s.as_str().write_to(writer)?; + } + Value::Int(n) => { + writer.write_all(&[tags::INT])?; + n.write_to(writer)?; + } + Value::Int8(n) => { + writer.write_all(&[tags::INT8])?; + n.write_to(writer)?; + } + Value::BigInt(n) => { + writer.write_all(&[tags::BIG_INT])?; + n.write_to(writer)?; + } + Value::BigDecimal(n) => { + writer.write_all(&[tags::BIG_DECIMAL])?; + n.write_to(writer)?; + } + Value::Bool(b) => { + writer.write_all(&[tags::BOOL])?; + b.write_to(writer)?; + } + Value::Bytes(b) => { + writer.write_all(&[tags::BYTES])?; + b.as_slice().to_vec().write_to(writer)?; + } + Value::List(arr) => { + writer.write_all(&[tags::ARRAY])?; + writer.write_all(&(arr.len() as u32).to_le_bytes())?; + for v in arr { + v.write_to(writer)?; + } + } + Value::Timestamp(ts) => { + // Serialize timestamp as BigInt (microseconds since epoch) + writer.write_all(&[tags::BIG_INT])?; + BigInt::from(ts.as_microseconds_since_epoch()).write_to(writer)?; + } + } + Ok(()) + } +} + +impl FromRustWasm for Value { + fn read_from(reader: &mut R) -> io::Result { + let mut tag_buf = [0u8; 1]; + reader.read_exact(&mut tag_buf)?; + + let tag = ValueTag::from_u8(tag_buf[0]) + .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "unknown value tag"))?; + + let value = match tag { + ValueTag::Null => Value::Null, + ValueTag::String => Value::String(String::read_from(reader)?), + ValueTag::Int => Value::Int(i32::read_from(reader)?), + ValueTag::Int8 => Value::Int8(i64::read_from(reader)?), + ValueTag::BigInt => Value::BigInt(BigInt::read_from(reader)?), + ValueTag::BigDecimal => Value::BigDecimal(BigDecimal::read_from(reader)?), + ValueTag::Bool => Value::Bool(bool::read_from(reader)?), + ValueTag::Bytes => { + let bytes = Vec::::read_from(reader)?; + Value::Bytes(Bytes::from(bytes)) + } + ValueTag::Address => { + let addr = <[u8; 20]>::read_from(reader)?; + Value::Bytes(Bytes::from(addr.as_slice())) + } + ValueTag::Array => { + let mut len_buf = [0u8; 4]; + reader.read_exact(&mut len_buf)?; + let len = u32::from_le_bytes(len_buf) as usize; + + let mut arr = Vec::with_capacity(len); + for _ in 0..len { + arr.push(Value::read_from(reader)?); + } + Value::List(arr) + } + }; + + Ok(value) + } +} + +/// Add ToRustWasm impl for &str so we can serialize string slices directly. +impl ToRustWasm for &str { + fn write_to(&self, writer: &mut W) -> io::Result<()> { + let bytes = self.as_bytes(); + writer.write_all(&(bytes.len() as u32).to_le_bytes())?; + writer.write_all(bytes) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn value_null_roundtrip() { + let v = Value::Null; + let bytes = v.to_bytes(); + let recovered = Value::from_bytes(&bytes).unwrap(); + assert_eq!(v, recovered); + } + + #[test] + fn value_string_roundtrip() { + let v = Value::String("hello world".to_string()); + let bytes = v.to_bytes(); + let recovered = Value::from_bytes(&bytes).unwrap(); + assert_eq!(v, recovered); + } + + #[test] + fn value_int_roundtrip() { + let v = Value::Int(42); + let bytes = v.to_bytes(); + let recovered = Value::from_bytes(&bytes).unwrap(); + assert_eq!(v, recovered); + } + + #[test] + fn value_bool_roundtrip() { + let v = Value::Bool(true); + let bytes = v.to_bytes(); + let recovered = Value::from_bytes(&bytes).unwrap(); + assert_eq!(v, recovered); + } + + #[test] + fn value_array_roundtrip() { + let v = Value::List(vec![ + Value::Int(1), + Value::Int(2), + Value::String("three".into()), + ]); + let bytes = v.to_bytes(); + let recovered = Value::from_bytes(&bytes).unwrap(); + assert_eq!(v, recovered); + } + + #[test] + fn entity_data_roundtrip() { + let mut data = EntityData::new(); + data.insert("id".to_string(), Value::String("test-123".to_string())); + data.insert("count".to_string(), Value::Int(42)); + data.insert( + "balance".to_string(), + Value::BigInt(BigInt::from(1000000000000_u64)), + ); + data.insert("active".to_string(), Value::Bool(true)); + + let bytes = data.to_bytes(); + let recovered = EntityData::from_bytes(&bytes).unwrap(); + + assert_eq!(data.get("id"), recovered.get("id")); + assert_eq!(data.get("count"), recovered.get("count")); + assert_eq!(data.get("balance"), recovered.get("balance")); + assert_eq!(data.get("active"), recovered.get("active")); + } + + // ------------------------------------------------------------------------- + // ABI cross-validation vectors. + // + // These tests encode known byte sequences by hand and assert that + // graph-node's ToRustWasm/FromRustWasm impls produce identical bytes. + // The same raw bytes are validated against the Graphite SDK in + // graphite/src/abi_vectors_tests.rs. + // ------------------------------------------------------------------------- + + fn le32(n: u32) -> [u8; 4] { + n.to_le_bytes() + } + + // -- Null (tag 0x00) -- + + #[test] + fn abi_vec_null_encodes_to_single_byte() { + let bytes = Value::Null.to_bytes(); + assert_eq!(bytes, [0x00u8]); + } + + #[test] + fn abi_vec_null_decode() { + let v = Value::from_bytes(&[0x00u8]).unwrap(); + assert_eq!(v, Value::Null); + } + + // -- String (tag 0x01, len:u32 LE, utf-8 bytes) -- + + #[test] + fn abi_vec_string_known_bytes() { + // "hi" → [0x01, 0x02 0x00 0x00 0x00, 0x68 0x69] + let v = Value::String("hi".to_string()); + let bytes = v.to_bytes(); + let mut expected = vec![0x01u8]; + expected.extend_from_slice(&le32(2)); + expected.extend_from_slice(b"hi"); + assert_eq!(bytes, expected); + } + + #[test] + fn abi_vec_string_decode_known_bytes() { + let mut raw = vec![0x01u8]; + raw.extend_from_slice(&le32(2)); + raw.extend_from_slice(b"hi"); + let v = Value::from_bytes(&raw).unwrap(); + assert_eq!(v, Value::String("hi".to_string())); + } + + // -- Int (tag 0x02, i32 LE 4 bytes) -- + + #[test] + fn abi_vec_int_known_bytes() { + let v = Value::Int(42); + let bytes = v.to_bytes(); + let mut expected = vec![0x02u8]; + expected.extend_from_slice(&42i32.to_le_bytes()); + assert_eq!(bytes, expected); + } + + #[test] + fn abi_vec_int_negative() { + let v = Value::Int(-1); + let bytes = v.to_bytes(); + let mut expected = vec![0x02u8]; + expected.extend_from_slice(&(-1i32).to_le_bytes()); + assert_eq!(bytes, expected); + } + + // -- Int8 (tag 0x03, i64 LE 8 bytes) -- + + #[test] + fn abi_vec_int8_known_bytes() { + let v = Value::Int8(i64::MAX); + let bytes = v.to_bytes(); + let mut expected = vec![0x03u8]; + expected.extend_from_slice(&i64::MAX.to_le_bytes()); + assert_eq!(bytes, expected); + } + + // -- BigInt (tag 0x04, len:u32 LE, signed-LE bytes) -- + + #[test] + fn abi_vec_bigint_uses_signed_le() { + // 1000 in signed-LE is [0xe8, 0x03] + let n = BigInt::from(1000u64); + let le = n.to_signed_bytes_le(); + assert_eq!(le, vec![0xe8u8, 0x03]); + + let v = Value::BigInt(n); + let bytes = v.to_bytes(); + assert_eq!(bytes[0], 0x04); + let len = u32::from_le_bytes(bytes[1..5].try_into().unwrap()) as usize; + assert_eq!(len, 2); + assert_eq!(&bytes[5..], &[0xe8u8, 0x03]); + } + + #[test] + fn abi_vec_bigint_decode_known_le_bytes() { + // hand-encoded 1000 as signed-LE + let mut raw = vec![0x04u8]; + raw.extend_from_slice(&le32(2)); + raw.push(0xe8); + raw.push(0x03); + let v = Value::from_bytes(&raw).unwrap(); + assert_eq!(v, Value::BigInt(BigInt::from(1000u64))); + } + + #[test] + fn abi_vec_bigint_zero_len_zero() { + let v = Value::BigInt(BigInt::from(0i32)); + let bytes = v.to_bytes(); + // zero may encode as empty or as [0x00]; either is valid as long as decode round-trips + let recovered = Value::from_bytes(&bytes).unwrap(); + assert_eq!(recovered, Value::BigInt(BigInt::from(0i32))); + } + + // -- BigDecimal (tag 0x05, len:u32 LE, UTF-8 string) -- + + #[test] + fn abi_vec_bigdecimal_encodes_as_string() { + use std::str::FromStr; + let d = graph::prelude::BigDecimal::from_str("3.14").unwrap(); + let v = Value::BigDecimal(d); + let bytes = v.to_bytes(); + assert_eq!(bytes[0], 0x05); + let len = u32::from_le_bytes(bytes[1..5].try_into().unwrap()) as usize; + let s = std::str::from_utf8(&bytes[5..5 + len]).unwrap(); + // must be a valid decimal string representation + assert!( + s.contains('.') + || s.chars() + .all(|c| c.is_ascii_digit() || c == '-' || c == 'E' || c == 'e'), + "expected decimal string, got: {}", + s + ); + } + + #[test] + fn abi_vec_bigdecimal_decode_known_bytes() { + use std::str::FromStr; + let s = b"3.14"; + let mut raw = vec![0x05u8]; + raw.extend_from_slice(&le32(s.len() as u32)); + raw.extend_from_slice(s); + let v = Value::from_bytes(&raw).unwrap(); + let expected = Value::BigDecimal(graph::prelude::BigDecimal::from_str("3.14").unwrap()); + assert_eq!(v, expected); + } + + // -- Bool (tag 0x06) -- + + #[test] + fn abi_vec_bool_true_known_bytes() { + assert_eq!(Value::Bool(true).to_bytes(), [0x06u8, 0x01]); + } + + #[test] + fn abi_vec_bool_false_known_bytes() { + assert_eq!(Value::Bool(false).to_bytes(), [0x06u8, 0x00]); + } + + // -- Bytes (tag 0x07, len:u32 LE, raw bytes) -- + + #[test] + fn abi_vec_bytes_known_bytes() { + use graph::data::store::scalar::Bytes as StoreBytes; + let payload = vec![0xde, 0xad, 0xbe, 0xef]; + let v = Value::Bytes(StoreBytes::from(payload.clone())); + let encoded = v.to_bytes(); + let mut expected = vec![0x07u8]; + expected.extend_from_slice(&le32(4)); + expected.extend_from_slice(&payload); + assert_eq!(encoded, expected); + } + + // -- Address (tag 0x08, 20 raw bytes, NO length prefix) -- + + #[test] + fn abi_vec_address_decode_tag_0x08() { + // The SDK writes Address as tag 0x08 + 20 raw bytes (no length prefix). + // graph-node decodes tag 0x08 as Value::Bytes(20 bytes). + // Note: graph-node re-encodes Bytes as tag 0x07 (length-prefixed); the + // asymmetry is intentional — 0x08 is a SDK-write/host-read optimisation. + let addr = [0xabu8; 20]; + let mut raw = vec![0x08u8]; + raw.extend_from_slice(&addr); + assert_eq!(raw.len(), 21); // tag + 20 bytes, no length prefix + + // graph-node must decode SDK-written 0x08 as Bytes carrying the 20-byte address + let v = Value::from_bytes(&raw).unwrap(); + if let Value::Bytes(b) = &v { + assert_eq!(b.as_slice(), &addr); + } else { + panic!("expected Bytes for Address tag, got {:?}", v); + } + } + + // -- Array (tag 0x09, len:u32 LE, tagged Values) -- + + #[test] + fn abi_vec_array_empty() { + let v = Value::List(vec![]); + let bytes = v.to_bytes(); + let mut expected = vec![0x09u8]; + expected.extend_from_slice(&le32(0)); + assert_eq!(bytes, expected); + } + + #[test] + fn abi_vec_array_known_bytes() { + // [Int(1), Bool(true)] + let v = Value::List(vec![Value::Int(1), Value::Bool(true)]); + let bytes = v.to_bytes(); + assert_eq!(bytes[0], 0x09); + let count = u32::from_le_bytes(bytes[1..5].try_into().unwrap()); + assert_eq!(count, 2); + } + + #[test] + fn abi_vec_array_decode_known_bytes() { + let mut raw = vec![0x09u8]; + raw.extend_from_slice(&le32(2)); + raw.push(0x02); + raw.extend_from_slice(&1i32.to_le_bytes()); + raw.push(0x06); + raw.push(0x01); + let v = Value::from_bytes(&raw).unwrap(); + assert_eq!(v, Value::List(vec![Value::Int(1), Value::Bool(true)])); + } + + // -- Cross-wire: spec worked example -- + + #[test] + fn abi_vec_spec_worked_example() { + // { id: "tx-1", value: 42, active: true } (spec section 4.6.1) + // field_count: 3, field order unspecified but all fields must survive + let mut data = EntityData::new(); + data.insert("id".to_string(), Value::String("tx-1".to_string())); + data.insert("value".to_string(), Value::Int(42)); + data.insert("active".to_string(), Value::Bool(true)); + + let bytes = data.to_bytes(); + + // field count must be 3 + let count = u32::from_le_bytes(bytes[0..4].try_into().unwrap()); + assert_eq!(count, 3); + + let recovered = EntityData::from_bytes(&bytes).unwrap(); + assert_eq!( + recovered.get("id"), + Some(&Value::String("tx-1".to_string())) + ); + assert_eq!(recovered.get("value"), Some(&Value::Int(42))); + assert_eq!(recovered.get("active"), Some(&Value::Bool(true))); + } +} diff --git a/runtime/wasm/src/rust_abi/host.rs b/runtime/wasm/src/rust_abi/host.rs new file mode 100644 index 00000000000..10cc2e58824 --- /dev/null +++ b/runtime/wasm/src/rust_abi/host.rs @@ -0,0 +1,545 @@ +//! Rust ABI host function bindings. +//! +//! Links host functions for Rust WASM modules using the `graphite` namespace. +//! These use ptr+len calling convention instead of AS's AscPtr. + +use std::collections::BTreeMap; + +use super::entity::deserialize_entity_data; +use crate::gas_rules::{GAS_COST_LOAD, GAS_COST_STORE}; +use crate::module::{WasmInstanceContext, WasmInstanceData}; +use graph::prelude::*; +use graph::runtime::gas::Gas; +use wasmtime::{AsContext, AsContextMut, Caller, Linker, Memory}; + +/// Read bytes from WASM memory with gas metering. +fn read_bytes_with_gas( + memory: &Memory, + store: impl AsContext, + ptr: u32, + len: u32, + gas: &graph::runtime::gas::GasCounter, +) -> Result, anyhow::Error> { + // Charge gas for memory read + gas.consume_host_fn_with_metrics(Gas::new(GAS_COST_LOAD as u64 * len as u64), "rust_abi_read")?; + + let data = memory.data(&store); + let start = ptr as usize; + let end = start + len as usize; + + if end > data.len() { + anyhow::bail!( + "memory access out of bounds: {}..{} (memory size: {})", + start, + end, + data.len() + ); + } + + Ok(data[start..end].to_vec()) +} + +/// Read a string from WASM memory. +fn read_string_with_gas( + memory: &Memory, + store: impl AsContext, + ptr: u32, + len: u32, + gas: &graph::runtime::gas::GasCounter, +) -> Result { + let bytes = read_bytes_with_gas(memory, store, ptr, len, gas)?; + String::from_utf8(bytes).map_err(|e| anyhow::anyhow!("invalid UTF-8: {}", e)) +} + +/// Write bytes to WASM memory with gas metering. +fn write_bytes_with_gas( + memory: &Memory, + mut store: impl AsContextMut, + ptr: u32, + data: &[u8], + gas: &graph::runtime::gas::GasCounter, +) -> Result<(), anyhow::Error> { + // Charge gas for memory write + gas.consume_host_fn_with_metrics( + Gas::new(GAS_COST_STORE as u64 * data.len() as u64), + "rust_abi_write", + )?; + + let mem_data = memory.data_mut(&mut store); + let start = ptr as usize; + let end = start + data.len(); + + if end > mem_data.len() { + anyhow::bail!( + "memory access out of bounds: {}..{} (memory size: {})", + start, + end, + mem_data.len() + ); + } + + mem_data[start..end].copy_from_slice(data); + Ok(()) +} + +/// Get memory from a WASM instance. +fn get_memory(caller: &mut Caller<'_, WasmInstanceData>) -> Result { + caller + .get_export("memory") + .and_then(|e| e.into_memory()) + .ok_or_else(|| anyhow::anyhow!("failed to get WASM memory export")) +} + +/// Deserialize a Vec from TLV format. +/// +/// Format: [count: u32] [str_len: u32, str_bytes]... +fn deserialize_string_vec(bytes: &[u8]) -> Result, anyhow::Error> { + if bytes.len() < 4 { + return Ok(Vec::new()); + } + + let count = u32::from_le_bytes(bytes[0..4].try_into()?) as usize; + let mut result = Vec::with_capacity(count); + let mut pos = 4; + + for _ in 0..count { + if pos + 4 > bytes.len() { + anyhow::bail!("truncated string vec data"); + } + let str_len = u32::from_le_bytes(bytes[pos..pos + 4].try_into()?) as usize; + pos += 4; + + if pos + str_len > bytes.len() { + anyhow::bail!("truncated string data"); + } + let s = String::from_utf8(bytes[pos..pos + str_len].to_vec())?; + pos += str_len; + + result.push(s); + } + + Ok(result) +} + +/// Link Rust ABI host functions to a wasmtime Linker. +/// +/// This registers all host functions in the `graphite` namespace +/// with ptr+len calling convention. Only links functions that the +/// module actually imports. +pub fn link_rust_host_functions( + linker: &mut Linker, + import_name_to_modules: &BTreeMap>, +) -> anyhow::Result<()> { + // Helper to check if a function is imported from graphite namespace + let is_graphite_import = |name: &str| -> bool { + import_name_to_modules + .get(name) + .map(|modules| modules.iter().any(|m| m == "graphite")) + .unwrap_or(false) + }; + + // ========== Store Operations ========== + + if is_graphite_import("store_set") { + linker.func_wrap_async( + "graphite", + "store_set", + |mut caller: Caller<'_, WasmInstanceData>, + (entity_type_ptr, entity_type_len, id_ptr, id_len, data_ptr, data_len): ( + u32, + u32, + u32, + u32, + u32, + u32, + )| { + Box::new(async move { + let memory = get_memory(&mut caller)?; + let gas = caller.data().gas.cheap_clone(); + + let entity_type = read_string_with_gas( + &memory, + &caller, + entity_type_ptr, + entity_type_len, + &gas, + )?; + let id = read_string_with_gas(&memory, &caller, id_ptr, id_len, &gas)?; + let data_bytes = + read_bytes_with_gas(&memory, &caller, data_ptr, data_len, &gas)?; + + let entity_data = deserialize_entity_data(&data_bytes) + .map_err(|e| anyhow::anyhow!("failed to deserialize entity: {}", e))?; + + // Call the actual store_set through WasmInstanceContext + let mut ctx = std::pin::pin!(WasmInstanceContext::new(&mut caller)); + ctx.rust_store_set(&gas, &entity_type, &id, entity_data) + .await?; + + Ok(()) + }) + }, + )?; + } + + if is_graphite_import("store_get") { + linker.func_wrap_async( + "graphite", + "store_get", + |mut caller: Caller<'_, WasmInstanceData>, + (entity_type_ptr, entity_type_len, id_ptr, id_len, out_ptr, out_cap): ( + u32, + u32, + u32, + u32, + u32, + u32, + )| { + Box::new(async move { + let memory = get_memory(&mut caller)?; + let gas = caller.data().gas.cheap_clone(); + + let entity_type = read_string_with_gas( + &memory, + &caller, + entity_type_ptr, + entity_type_len, + &gas, + )?; + let id = read_string_with_gas(&memory, &caller, id_ptr, id_len, &gas)?; + + // Call the actual store_get through WasmInstanceContext + let mut ctx = std::pin::pin!(WasmInstanceContext::new(&mut caller)); + let result = ctx.rust_store_get(&gas, &entity_type, &id).await?; + + match result { + Some(bytes) => { + if bytes.len() > out_cap as usize { + // Buffer too small, return required size as error indicator + Ok(u32::MAX) + } else { + let memory = get_memory(&mut caller)?; + write_bytes_with_gas(&memory, &mut caller, out_ptr, &bytes, &gas)?; + Ok(bytes.len() as u32) + } + } + None => Ok(0), // Not found + } + }) + }, + )?; + } + + if is_graphite_import("store_remove") { + linker.func_wrap_async( + "graphite", + "store_remove", + |mut caller: Caller<'_, WasmInstanceData>, + (entity_type_ptr, entity_type_len, id_ptr, id_len): (u32, u32, u32, u32)| { + Box::new(async move { + let memory = get_memory(&mut caller)?; + let gas = caller.data().gas.cheap_clone(); + + let entity_type = read_string_with_gas( + &memory, + &caller, + entity_type_ptr, + entity_type_len, + &gas, + )?; + let id = read_string_with_gas(&memory, &caller, id_ptr, id_len, &gas)?; + + let mut ctx = std::pin::pin!(WasmInstanceContext::new(&mut caller)); + ctx.rust_store_remove(&gas, &entity_type, &id).await?; + + Ok(()) + }) + }, + )?; + } + + // ========== Crypto Operations ========== + + if is_graphite_import("crypto_keccak256") { + linker.func_wrap( + "graphite", + "crypto_keccak256", + |mut caller: Caller<'_, WasmInstanceData>, + input_ptr: u32, + input_len: u32, + out_ptr: u32| + -> anyhow::Result<()> { + let memory = get_memory(&mut caller)?; + let gas = caller.data().gas.cheap_clone(); + + let input = read_bytes_with_gas(&memory, &caller, input_ptr, input_len, &gas)?; + let hash = tiny_keccak::keccak256(&input); + + write_bytes_with_gas(&memory, &mut caller, out_ptr, &hash, &gas)?; + Ok(()) + }, + )?; + } + + // ========== Logging ========== + + if is_graphite_import("log_log") { + linker.func_wrap_async( + "graphite", + "log_log", + |mut caller: Caller<'_, WasmInstanceData>, + (level, message_ptr, message_len): (u32, u32, u32)| { + Box::new(async move { + let memory = get_memory(&mut caller)?; + let gas = caller.data().gas.cheap_clone(); + + let message = + read_string_with_gas(&memory, &caller, message_ptr, message_len, &gas)?; + + let mut ctx = std::pin::pin!(WasmInstanceContext::new(&mut caller)); + ctx.rust_log(&gas, level, &message).await?; + + Ok(()) + }) + }, + )?; + } + + // ========== Data Source Operations ========== + + if is_graphite_import("data_source_address") { + linker.func_wrap_async( + "graphite", + "data_source_address", + |mut caller: Caller<'_, WasmInstanceData>, (out_ptr,): (u32,)| { + Box::new(async move { + let gas = caller.data().gas.cheap_clone(); + + let mut ctx = std::pin::pin!(WasmInstanceContext::new(&mut caller)); + let address = ctx.rust_data_source_address(&gas).await?; + + let memory = get_memory(&mut caller)?; + write_bytes_with_gas(&memory, &mut caller, out_ptr, &address, &gas)?; + Ok(()) + }) + }, + )?; + } + + if is_graphite_import("data_source_network") { + linker.func_wrap_async( + "graphite", + "data_source_network", + |mut caller: Caller<'_, WasmInstanceData>, (out_ptr, out_cap): (u32, u32)| { + Box::new(async move { + let gas = caller.data().gas.cheap_clone(); + + let mut ctx = std::pin::pin!(WasmInstanceContext::new(&mut caller)); + let network = ctx.rust_data_source_network(&gas).await?; + + let bytes = network.as_bytes(); + if bytes.len() > out_cap as usize { + return Ok(u32::MAX); + } + + let memory = get_memory(&mut caller)?; + write_bytes_with_gas(&memory, &mut caller, out_ptr, bytes, &gas)?; + Ok(bytes.len() as u32) + }) + }, + )?; + } + + // ========== Dynamic Data Sources ========== + + if is_graphite_import("data_source_create") { + linker.func_wrap_async( + "graphite", + "data_source_create", + |mut caller: Caller<'_, WasmInstanceData>, + (name_ptr, name_len, params_ptr, params_len): (u32, u32, u32, u32)| { + Box::new(async move { + let memory = get_memory(&mut caller)?; + let gas = caller.data().gas.cheap_clone(); + + let name = read_string_with_gas(&memory, &caller, name_ptr, name_len, &gas)?; + let params_bytes = + read_bytes_with_gas(&memory, &caller, params_ptr, params_len, &gas)?; + + // Deserialize params (Vec in TLV format) + let params = deserialize_string_vec(¶ms_bytes)?; + + let mut ctx = std::pin::pin!(WasmInstanceContext::new(&mut caller)); + ctx.rust_data_source_create(&gas, &name, params).await?; + + Ok(()) + }) + }, + )?; + } + + // ========== IPFS Operations ========== + + if is_graphite_import("ipfs_cat") { + linker.func_wrap_async( + "graphite", + "ipfs_cat", + |mut caller: Caller<'_, WasmInstanceData>, + (hash_ptr, hash_len, out_ptr, out_cap): (u32, u32, u32, u32)| { + Box::new(async move { + let memory = get_memory(&mut caller)?; + let gas = caller.data().gas.cheap_clone(); + + let hash = read_string_with_gas(&memory, &caller, hash_ptr, hash_len, &gas)?; + + let mut ctx = std::pin::pin!(WasmInstanceContext::new(&mut caller)); + let result = ctx.rust_ipfs_cat(&gas, &hash).await; + + match result { + Ok(bytes) => { + if bytes.len() > out_cap as usize { + Ok(u32::MAX) // Buffer too small + } else { + let memory = get_memory(&mut caller)?; + write_bytes_with_gas(&memory, &mut caller, out_ptr, &bytes, &gas)?; + Ok(bytes.len() as u32) + } + } + Err(e) => { + // Propagate reorg signals as traps so the block can be retried. + if let graph::runtime::HostExportError::PossibleReorg(_) = &e { + caller.data_mut().possible_reorg = true; + } + Err(anyhow::anyhow!("ipfs_cat failed: {}", e)) + } + } + }) + }, + )?; + } + + // ========== Ethereum Call ========== + + // Gas cost for ethereum_call - matches the AS implementation + const ETHEREUM_CALL_GAS: Gas = Gas::new(5_000_000_000); + + if is_graphite_import("ethereum_call") { + linker.func_wrap_async( + "graphite", + "ethereum_call", + |mut caller: Caller<'_, WasmInstanceData>, + (addr_ptr, addr_len, data_ptr, data_len, out_ptr, out_cap): ( + u32, + u32, + u32, + u32, + u32, + u32, + )| { + Box::new(async move { + let memory = get_memory(&mut caller)?; + let gas = caller.data().gas.cheap_clone(); + + // Consume gas for ethereum_call + gas.consume_host_fn_with_metrics(ETHEREUM_CALL_GAS, "ethereum_call")?; + + // Read address (should be 20 bytes) + let addr_bytes = + read_bytes_with_gas(&memory, &caller, addr_ptr, addr_len, &gas)?; + if addr_bytes.len() != 20 { + anyhow::bail!( + "ethereum_call: address must be 20 bytes, got {}", + addr_bytes.len() + ); + } + let mut address = [0u8; 20]; + address.copy_from_slice(&addr_bytes); + + // Read calldata + let calldata = read_bytes_with_gas(&memory, &caller, data_ptr, data_len, &gas)?; + + // Get the raw_eth_call capability from ctx + let raw_eth_call = caller + .data() + .ctx + .raw_eth_call + .as_ref() + .ok_or_else(|| { + anyhow::anyhow!( + "ethereum_call is not available for this chain/data source" + ) + })? + .cheap_clone(); + + let block_ptr = caller.data().ctx.block_ptr.cheap_clone(); + + // Make the call + let result = raw_eth_call + .call(address, &calldata, &block_ptr, None) + .await; + + match result { + Ok(Some(bytes)) => { + if bytes.len() > out_cap as usize { + // Buffer too small + Ok(u32::MAX) + } else { + let memory = get_memory(&mut caller)?; + write_bytes_with_gas(&memory, &mut caller, out_ptr, &bytes, &gas)?; + Ok(bytes.len() as u32) + } + } + Ok(None) => { + // Call reverted - return 0 to indicate null/revert + Ok(0) + } + Err(e) => { + // Mark as possible reorg if appropriate + if let graph::runtime::HostExportError::PossibleReorg(_) = &e { + caller.data_mut().possible_reorg = true; + } + Err(anyhow::anyhow!("ethereum_call failed: {}", e)) + } + } + }) + }, + )?; + } + + // ========== Abort ========== + + if is_graphite_import("abort") { + linker.func_wrap( + "graphite", + "abort", + |mut caller: Caller<'_, WasmInstanceData>, + message_ptr: u32, + message_len: u32, + _file_ptr: u32, + _file_len: u32, + line: u32| + -> anyhow::Result<()> { + let memory = get_memory(&mut caller)?; + let gas = caller.data().gas.cheap_clone(); + + let message = + read_string_with_gas(&memory, &caller, message_ptr, message_len, &gas) + .unwrap_or_else(|_| "".to_string()); + + // Mark as deterministic trap + caller.data_mut().deterministic_host_trap = true; + + Err(anyhow::anyhow!("abort at line {}: {}", line, message)) + }, + )?; + } + + Ok(()) +} + +/// Check if a WASM module uses Rust ABI by looking for graphite namespace imports. +pub fn is_rust_module(import_name_to_modules: &BTreeMap>) -> bool { + import_name_to_modules + .values() + .any(|modules| modules.iter().any(|m| m == "graphite")) +} diff --git a/runtime/wasm/src/rust_abi/mod.rs b/runtime/wasm/src/rust_abi/mod.rs new file mode 100644 index 00000000000..0a1979a552d --- /dev/null +++ b/runtime/wasm/src/rust_abi/mod.rs @@ -0,0 +1,40 @@ +//! Rust ABI for WASM subgraph modules. +//! +//! This module provides serialization and host function bindings for +//! Rust-compiled WASM subgraphs, as an alternative to the AssemblyScript +//! ABI in `asc_abi/`. +//! +//! # Protocol +//! +//! - Strings: UTF-8 bytes, passed as (ptr, len) +//! - Entities: TLV (Type-Length-Value) format +//! - Host functions use `graphite` import namespace +//! - Handler signature: `fn(event_ptr: u32, event_len: u32) -> u32` + +mod entity; +mod host; +mod trigger; +mod types; + +pub use entity::{deserialize_entity_data, serialize_entity, EntityData}; +pub use host::{is_rust_module, link_rust_host_functions}; +pub use trigger::{RustBlockTrigger, RustCallTrigger, RustLogTrigger, ToRustBytes}; +pub use types::{FromRustWasm, ToRustWasm, ValueTag}; + +/// Language enum for dispatch. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum MappingLanguage { + AssemblyScript, + Rust, +} + +impl MappingLanguage { + /// Parse from manifest mapping.kind string. + pub fn from_kind(kind: &str) -> Option { + match kind { + "wasm/assemblyscript" => Some(Self::AssemblyScript), + "wasm/rust" => Some(Self::Rust), + _ => None, + } + } +} diff --git a/runtime/wasm/src/rust_abi/trigger.rs b/runtime/wasm/src/rust_abi/trigger.rs new file mode 100644 index 00000000000..f320e8d6518 --- /dev/null +++ b/runtime/wasm/src/rust_abi/trigger.rs @@ -0,0 +1,247 @@ +//! Trigger serialization for Rust WASM modules. +//! +//! Provides the `ToRustBytes` trait for serializing blockchain triggers +//! to the TLV format expected by Graphite SDK handlers. + +use super::types::ToRustWasm; +use std::io::{self, Write}; + +/// Trait for serializing trigger data to Rust WASM format. +/// +/// Implemented by chain-specific trigger types (e.g., Ethereum MappingTrigger). +/// The serialized format matches what Graphite SDK's `FromWasmBytes` expects. +pub trait ToRustBytes { + /// Serialize to TLV bytes for Rust handlers. + fn to_rust_bytes(&self) -> Vec; +} + +/// Log trigger data in a format suitable for serialization. +/// +/// This struct provides a chain-agnostic representation of a log trigger +/// that can be serialized for Rust WASM handlers. +#[derive(Debug, Clone)] +pub struct RustLogTrigger { + /// Contract address that emitted the log (20 bytes) + pub address: [u8; 20], + /// Transaction hash (32 bytes) + pub tx_hash: [u8; 32], + /// Log index within the block + pub log_index: u64, + /// Block number + pub block_number: u64, + /// Block timestamp (Unix seconds) + pub block_timestamp: u64, + /// Log topics (event selector + indexed params) + pub topics: Vec<[u8; 32]>, + /// ABI-encoded non-indexed event data + pub data: Vec, +} + +impl ToRustBytes for RustLogTrigger { + fn to_rust_bytes(&self) -> Vec { + let mut buf = Vec::new(); + self.write_to(&mut buf).expect("write to vec cannot fail"); + buf + } +} + +impl ToRustWasm for RustLogTrigger { + fn write_to(&self, writer: &mut W) -> io::Result<()> { + // Fixed-size fields first (no length prefix) + writer.write_all(&self.address)?; // 20 bytes + writer.write_all(&self.tx_hash)?; // 32 bytes + writer.write_all(&self.log_index.to_le_bytes())?; // 8 bytes + writer.write_all(&self.block_number.to_le_bytes())?; // 8 bytes + writer.write_all(&self.block_timestamp.to_le_bytes())?; // 8 bytes + + // Topics array: count + data + writer.write_all(&(self.topics.len() as u32).to_le_bytes())?; + for topic in &self.topics { + writer.write_all(topic)?; // 32 bytes each + } + + // Data: length + bytes + writer.write_all(&(self.data.len() as u32).to_le_bytes())?; + writer.write_all(&self.data)?; + + Ok(()) + } +} + +/// Call trigger data for Rust handlers. +#[derive(Debug, Clone)] +pub struct RustCallTrigger { + /// Contract address being called (20 bytes) + pub to: [u8; 20], + /// Caller address (20 bytes) + pub from: [u8; 20], + /// Transaction hash (32 bytes) + pub tx_hash: [u8; 32], + /// Block number + pub block_number: u64, + /// Block timestamp (Unix seconds) + pub block_timestamp: u64, + /// Block hash (32 bytes) + pub block_hash: [u8; 32], + /// Call input data + pub input: Vec, + /// Call output data + pub output: Vec, +} + +impl ToRustBytes for RustCallTrigger { + fn to_rust_bytes(&self) -> Vec { + let mut buf = Vec::new(); + self.write_to(&mut buf).expect("write to vec cannot fail"); + buf + } +} + +impl ToRustWasm for RustCallTrigger { + fn write_to(&self, writer: &mut W) -> io::Result<()> { + // Fixed-size fields + writer.write_all(&self.to)?; // 20 bytes + writer.write_all(&self.from)?; // 20 bytes + writer.write_all(&self.tx_hash)?; // 32 bytes + writer.write_all(&self.block_number.to_le_bytes())?; // 8 bytes + writer.write_all(&self.block_timestamp.to_le_bytes())?; // 8 bytes + writer.write_all(&self.block_hash)?; // 32 bytes + + // Input: length + bytes + writer.write_all(&(self.input.len() as u32).to_le_bytes())?; + writer.write_all(&self.input)?; + + // Output: length + bytes + writer.write_all(&(self.output.len() as u32).to_le_bytes())?; + writer.write_all(&self.output)?; + + Ok(()) + } +} + +/// Block trigger data for Rust handlers. +#[derive(Debug, Clone)] +pub struct RustBlockTrigger { + /// Block hash (32 bytes) + pub hash: [u8; 32], + /// Parent block hash (32 bytes) + pub parent_hash: [u8; 32], + /// Block number + pub number: u64, + /// Block timestamp (Unix seconds) + pub timestamp: u64, + /// Block author/miner address (20 bytes) + pub author: [u8; 20], + /// Gas used in the block + pub gas_used: u64, + /// Gas limit for the block + pub gas_limit: u64, + /// Block difficulty (32 bytes, big-endian U256) + pub difficulty: [u8; 32], + /// Block base fee per gas (0 if pre-EIP-1559) + pub base_fee_per_gas: u64, +} + +impl ToRustBytes for RustBlockTrigger { + fn to_rust_bytes(&self) -> Vec { + let mut buf = Vec::new(); + self.write_to(&mut buf).expect("write to vec cannot fail"); + buf + } +} + +impl ToRustWasm for RustBlockTrigger { + fn write_to(&self, writer: &mut W) -> io::Result<()> { + writer.write_all(&self.hash)?; // 32 bytes + writer.write_all(&self.parent_hash)?; // 32 bytes + writer.write_all(&self.number.to_le_bytes())?; // 8 bytes + writer.write_all(&self.timestamp.to_le_bytes())?; // 8 bytes + writer.write_all(&self.author)?; // 20 bytes + writer.write_all(&self.gas_used.to_le_bytes())?; // 8 bytes + writer.write_all(&self.gas_limit.to_le_bytes())?; // 8 bytes + writer.write_all(&self.difficulty)?; // 32 bytes + writer.write_all(&self.base_fee_per_gas.to_le_bytes())?; // 8 bytes + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn serialize_log_trigger() { + let trigger = RustLogTrigger { + address: [0xde; 20], + tx_hash: [0xab; 32], + log_index: 42, + block_number: 12345678, + block_timestamp: 1700000000, + topics: vec![[0x11; 32], [0x22; 32]], + data: vec![1, 2, 3, 4], + }; + + let bytes = trigger.to_rust_bytes(); + + // Verify structure: + // 20 (address) + 32 (tx_hash) + 8*3 (log_index, block_number, timestamp) + // + 4 (topics count) + 64 (2 topics) + 4 (data len) + 4 (data) + assert_eq!(bytes.len(), 20 + 32 + 24 + 4 + 64 + 4 + 4); + + // Check address + assert_eq!(&bytes[0..20], &[0xde; 20]); + + // Check topics count + let topics_offset = 20 + 32 + 24; + let topics_count = + u32::from_le_bytes(bytes[topics_offset..topics_offset + 4].try_into().unwrap()); + assert_eq!(topics_count, 2); + } + + #[test] + fn serialize_call_trigger() { + let trigger = RustCallTrigger { + to: [0xaa; 20], + from: [0xbb; 20], + tx_hash: [0xcc; 32], + block_number: 100, + block_timestamp: 1700000000, + block_hash: [0xdd; 32], + input: vec![0x12, 0x34, 0x56, 0x78], + output: vec![0xab, 0xcd], + }; + + let bytes = trigger.to_rust_bytes(); + + // Fixed: 20+20+32+8+8+32 = 120, plus 4+4 + 4+2 = 14 + assert_eq!(bytes.len(), 120 + 14); + + // Check addresses + assert_eq!(&bytes[0..20], &[0xaa; 20]); + assert_eq!(&bytes[20..40], &[0xbb; 20]); + } + + #[test] + fn serialize_block_trigger() { + let trigger = RustBlockTrigger { + hash: [0x11; 32], + parent_hash: [0x22; 32], + number: 12345678, + timestamp: 1700000000, + author: [0x33; 20], + gas_used: 21000, + gas_limit: 30000000, + difficulty: [0x00; 32], + base_fee_per_gas: 1000000000, + }; + + let bytes = trigger.to_rust_bytes(); + + // 32+32+8+8+20+8+8+32+8 = 156 bytes + assert_eq!(bytes.len(), 156); + + // Check hash + assert_eq!(&bytes[0..32], &[0x11; 32]); + assert_eq!(&bytes[32..64], &[0x22; 32]); + } +} diff --git a/runtime/wasm/src/rust_abi/types.rs b/runtime/wasm/src/rust_abi/types.rs new file mode 100644 index 00000000000..5a0fadb74be --- /dev/null +++ b/runtime/wasm/src/rust_abi/types.rs @@ -0,0 +1,267 @@ +//! Rust WASM serialization types. +//! +//! Defines traits and constants for serializing data between +//! graph-node and Rust WASM modules. + +use graph::prelude::*; +use std::io::{self, Read, Write}; +use std::str::FromStr; + +/// Canonical TLV value tag bytes. +/// +/// These are the single source of truth for the tag byte of each `Value` +/// variant on the wire. They match the table in +/// `docs/rust-abi-spec.md` section 4.6 one-for-one. Changing any value in +/// this module is a breaking ABI change and requires an `apiVersion` bump. +pub mod tags { + pub const NULL: u8 = 0x00; + pub const STRING: u8 = 0x01; + pub const INT: u8 = 0x02; + pub const INT8: u8 = 0x03; + pub const BIG_INT: u8 = 0x04; + pub const BIG_DECIMAL: u8 = 0x05; + pub const BOOL: u8 = 0x06; + pub const BYTES: u8 = 0x07; + pub const ADDRESS: u8 = 0x08; + pub const ARRAY: u8 = 0x09; +} + +/// Value type tags for TLV serialization. +/// +/// Discriminants are pulled from the [`tags`] module so there is a single +/// authoritative definition of each on-wire byte. +#[repr(u8)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ValueTag { + Null = tags::NULL, + String = tags::STRING, + Int = tags::INT, + Int8 = tags::INT8, + BigInt = tags::BIG_INT, + BigDecimal = tags::BIG_DECIMAL, + Bool = tags::BOOL, + Bytes = tags::BYTES, + Address = tags::ADDRESS, + Array = tags::ARRAY, +} + +impl ValueTag { + pub fn from_u8(v: u8) -> Option { + match v { + tags::NULL => Some(Self::Null), + tags::STRING => Some(Self::String), + tags::INT => Some(Self::Int), + tags::INT8 => Some(Self::Int8), + tags::BIG_INT => Some(Self::BigInt), + tags::BIG_DECIMAL => Some(Self::BigDecimal), + tags::BOOL => Some(Self::Bool), + tags::BYTES => Some(Self::Bytes), + tags::ADDRESS => Some(Self::Address), + tags::ARRAY => Some(Self::Array), + _ => None, + } + } +} + +/// Trait for types that can be serialized to Rust WASM format. +pub trait ToRustWasm { + fn write_to(&self, writer: &mut W) -> io::Result<()>; + + fn to_bytes(&self) -> Vec { + let mut buf = Vec::new(); + self.write_to(&mut buf).expect("write to vec cannot fail"); + buf + } +} + +/// Trait for types that can be deserialized from Rust WASM format. +pub trait FromRustWasm: Sized { + fn read_from(reader: &mut R) -> io::Result; + + fn from_bytes(bytes: &[u8]) -> io::Result { + Self::read_from(&mut &bytes[..]) + } +} + +// ============================================================================ +// Primitive implementations +// ============================================================================ + +impl ToRustWasm for String { + fn write_to(&self, writer: &mut W) -> io::Result<()> { + let bytes = self.as_bytes(); + writer.write_all(&(bytes.len() as u32).to_le_bytes())?; + writer.write_all(bytes) + } +} + +impl FromRustWasm for String { + fn read_from(reader: &mut R) -> io::Result { + let mut len_buf = [0u8; 4]; + reader.read_exact(&mut len_buf)?; + let len = u32::from_le_bytes(len_buf) as usize; + + let mut buf = vec![0u8; len]; + reader.read_exact(&mut buf)?; + + String::from_utf8(buf).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) + } +} + +impl ToRustWasm for i32 { + fn write_to(&self, writer: &mut W) -> io::Result<()> { + writer.write_all(&self.to_le_bytes()) + } +} + +impl FromRustWasm for i32 { + fn read_from(reader: &mut R) -> io::Result { + let mut buf = [0u8; 4]; + reader.read_exact(&mut buf)?; + Ok(i32::from_le_bytes(buf)) + } +} + +impl ToRustWasm for i64 { + fn write_to(&self, writer: &mut W) -> io::Result<()> { + writer.write_all(&self.to_le_bytes()) + } +} + +impl FromRustWasm for i64 { + fn read_from(reader: &mut R) -> io::Result { + let mut buf = [0u8; 8]; + reader.read_exact(&mut buf)?; + Ok(i64::from_le_bytes(buf)) + } +} + +impl ToRustWasm for bool { + fn write_to(&self, writer: &mut W) -> io::Result<()> { + writer.write_all(&[if *self { 1 } else { 0 }]) + } +} + +impl FromRustWasm for bool { + fn read_from(reader: &mut R) -> io::Result { + let mut buf = [0u8; 1]; + reader.read_exact(&mut buf)?; + Ok(buf[0] != 0) + } +} + +impl ToRustWasm for Vec { + fn write_to(&self, writer: &mut W) -> io::Result<()> { + writer.write_all(&(self.len() as u32).to_le_bytes())?; + writer.write_all(self) + } +} + +impl FromRustWasm for Vec { + fn read_from(reader: &mut R) -> io::Result { + let mut len_buf = [0u8; 4]; + reader.read_exact(&mut len_buf)?; + let len = u32::from_le_bytes(len_buf) as usize; + + let mut buf = vec![0u8; len]; + reader.read_exact(&mut buf)?; + Ok(buf) + } +} + +impl ToRustWasm for [u8; 20] { + fn write_to(&self, writer: &mut W) -> io::Result<()> { + writer.write_all(self) + } +} + +impl FromRustWasm for [u8; 20] { + fn read_from(reader: &mut R) -> io::Result { + let mut buf = [0u8; 20]; + reader.read_exact(&mut buf)?; + Ok(buf) + } +} + +impl ToRustWasm for [u8; 32] { + fn write_to(&self, writer: &mut W) -> io::Result<()> { + writer.write_all(self) + } +} + +impl FromRustWasm for [u8; 32] { + fn read_from(reader: &mut R) -> io::Result { + let mut buf = [0u8; 32]; + reader.read_exact(&mut buf)?; + Ok(buf) + } +} + +// ============================================================================ +// Graph-specific type implementations +// ============================================================================ + +impl ToRustWasm for BigInt { + fn write_to(&self, writer: &mut W) -> io::Result<()> { + let bytes = self.to_signed_bytes_le(); + writer.write_all(&(bytes.len() as u32).to_le_bytes())?; + writer.write_all(&bytes) + } +} + +impl FromRustWasm for BigInt { + fn read_from(reader: &mut R) -> io::Result { + let mut len_buf = [0u8; 4]; + reader.read_exact(&mut len_buf)?; + let len = u32::from_le_bytes(len_buf) as usize; + + let mut buf = vec![0u8; len]; + reader.read_exact(&mut buf)?; + BigInt::from_signed_bytes_le(&buf) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) + } +} + +impl ToRustWasm for BigDecimal { + fn write_to(&self, writer: &mut W) -> io::Result<()> { + // Serialize as string for simplicity and accuracy + let s = self.to_string(); + s.write_to(writer) + } +} + +impl FromRustWasm for BigDecimal { + fn read_from(reader: &mut R) -> io::Result { + let s = String::read_from(reader)?; + BigDecimal::from_str(&s).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn string_roundtrip() { + let s = "hello world".to_string(); + let bytes = s.to_bytes(); + let recovered = String::from_bytes(&bytes).unwrap(); + assert_eq!(s, recovered); + } + + #[test] + fn bigint_roundtrip() { + let n = BigInt::from(12345678901234567890_u128); + let bytes = n.to_bytes(); + let recovered = BigInt::from_bytes(&bytes).unwrap(); + assert_eq!(n, recovered); + } + + #[test] + fn negative_bigint_roundtrip() { + let n = BigInt::from(-999999999999_i64); + let bytes = n.to_bytes(); + let recovered = BigInt::from_bytes(&bytes).unwrap(); + assert_eq!(n, recovered); + } +}