diff --git a/Cargo.lock b/Cargo.lock index 17573acd02c..b2ae24e0169 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "Inflector" @@ -2073,6 +2073,7 @@ dependencies = [ "never", "parity-wasm", "semver", + "serde_yaml", "uuid", "wasm-instrument", "wasmtime", diff --git a/graph/src/runtime/gas/costs.rs b/graph/src/runtime/gas/costs.rs index 6436fc2102d..06decdf03aa 100644 --- a/graph/src/runtime/gas/costs.rs +++ b/graph/src/runtime/gas/costs.rs @@ -83,3 +83,10 @@ pub const JSON_FROM_BYTES: GasOp = GasOp { base_cost: DEFAULT_BASE_COST, size_mult: DEFAULT_GAS_PER_BYTE * 100, }; + +// Deeply nested YAML can take up more than 100 times the memory of the serialized format. +// Multiplying the size cost by 100 accounts for this. +pub const YAML_FROM_BYTES: GasOp = GasOp { + base_cost: DEFAULT_BASE_COST, + size_mult: DEFAULT_GAS_PER_BYTE * 100, +}; diff --git a/graph/src/runtime/mod.rs b/graph/src/runtime/mod.rs index f015e1e9563..5622d37c100 100644 --- a/graph/src/runtime/mod.rs +++ b/graph/src/runtime/mod.rs @@ -371,7 +371,17 @@ pub enum IndexForAscTypeId { // Subgraph Data Source types AscEntityTrigger = 4500, - // Reserved discriminant space for a future blockchain type IDs: [4,500, 5,499] + // Reserved discriminant space for YAML type IDs: [5,500, 6,499] + YamlValue = 5500, + YamlTaggedValue = 5501, + YamlTypedMapEntryValueValue = 5502, + YamlTypedMapValueValue = 5503, + YamlArrayValue = 5504, + YamlArrayTypedMapEntryValueValue = 5505, + YamlWrappedValue = 5506, + YamlResultValueBool = 5507, + + // Reserved discriminant space for a future blockchain type IDs: [6,500, 7,499] // // Generated with the following shell script: // diff --git a/runtime/test/src/test.rs b/runtime/test/src/test.rs index bad91ef2158..53a84aec5f1 100644 --- a/runtime/test/src/test.rs +++ b/runtime/test/src/test.rs @@ -1698,3 +1698,71 @@ async fn test_store_ts() { "Cannot get entity of type `Stats`. The type must be an @entity type", ); } + +async fn test_yaml_parsing(api_version: Version, gas_used: u64) { + let mut module = test_module( + "yamlParsing", + mock_data_source( + &wasm_file_path("yaml_parsing.wasm", api_version.clone()), + api_version.clone(), + ), + api_version, + ) + .await; + + let mut test = |input: &str, expected: &str| { + let ptr: AscPtr = module.invoke_export1("handleYaml", input.as_bytes()); + let resp: String = module.asc_get(ptr).unwrap(); + assert_eq!(resp, expected, "failed on input: {input}"); + }; + + // Test invalid YAML; + test("{a: 1, - b: 2}", "error"); + + // Test size limit; + test(&"x".repeat(10_000_0001), "error"); + + // Test nulls; + test("null", "(0) null"); + + // Test booleans; + test("false", "(1) false"); + test("true", "(1) true"); + + // Test numbers; + test("12345", "(2) 12345"); + test("12345.6789", "(2) 12345.6789"); + + // Test strings; + test("aa bb cc", "(3) aa bb cc"); + test("\"aa bb cc\"", "(3) aa bb cc"); + + // Test arrays; + test("[1, 2, 3, 4]", "(4) [(2) 1, (2) 2, (2) 3, (2) 4]"); + test("- 1\n- 2\n- 3\n- 4", "(4) [(2) 1, (2) 2, (2) 3, (2) 4]"); + + // Test objects; + test("{a: 1, b: 2, c: 3}", "(5) {a: (2) 1, b: (2) 2, c: (2) 3}"); + test("a: 1\nb: 2\nc: 3", "(5) {a: (2) 1, b: (2) 2, c: (2) 3}"); + + // Test tagged values; + test("!AA bb cc", "(6) !AA (3) bb cc"); + + // Test nesting; + test( + "aa:\n bb:\n - cc: !DD ee", + "(5) {aa: (5) {bb: (4) [(5) {cc: (6) !DD (3) ee}]}}", + ); + + assert_eq!(module.gas_used(), gas_used, "gas used"); +} + +#[tokio::test] +async fn yaml_parsing_v0_0_4() { + test_yaml_parsing(API_VERSION_0_0_4, 10462217077171).await; +} + +#[tokio::test] +async fn yaml_parsing_v0_0_5() { + test_yaml_parsing(API_VERSION_0_0_5, 10462245390665).await; +} diff --git a/runtime/test/wasm_test/api_version_0_0_4/yaml_parsing.ts b/runtime/test/wasm_test/api_version_0_0_4/yaml_parsing.ts new file mode 100644 index 00000000000..b3efc9ba205 --- /dev/null +++ b/runtime/test/wasm_test/api_version_0_0_4/yaml_parsing.ts @@ -0,0 +1,20 @@ +import "allocator/arena"; + +import {Bytes, Result} from "../api_version_0_0_5/common/types"; +import {debug, YAMLValue} from "../api_version_0_0_5/common/yaml"; + +export {memory}; + +declare namespace yaml { + function try_fromBytes(data: Bytes): Result; +} + +export function handleYaml(data: Bytes): string { + let result = yaml.try_fromBytes(data); + + if (result.isError) { + return "error"; + } + + return debug(result.value); +} diff --git a/runtime/test/wasm_test/api_version_0_0_4/yaml_parsing.wasm b/runtime/test/wasm_test/api_version_0_0_4/yaml_parsing.wasm new file mode 100644 index 00000000000..cb132344ce3 Binary files /dev/null and b/runtime/test/wasm_test/api_version_0_0_4/yaml_parsing.wasm differ diff --git a/runtime/test/wasm_test/api_version_0_0_5/common/yaml.ts b/runtime/test/wasm_test/api_version_0_0_5/common/yaml.ts new file mode 100644 index 00000000000..135635475f1 --- /dev/null +++ b/runtime/test/wasm_test/api_version_0_0_5/common/yaml.ts @@ -0,0 +1,139 @@ +import {TypedMap} from './types'; + +export enum YAMLValueKind { + NULL = 0, + BOOL = 1, + NUMBER = 2, + STRING = 3, + ARRAY = 4, + OBJECT = 5, + TAGGED = 6, +} + +export class YAMLValue { + kind: YAMLValueKind; + data: u64; + + isBool(): boolean { + return this.kind == YAMLValueKind.BOOL; + } + + isNumber(): boolean { + return this.kind == YAMLValueKind.NUMBER; + } + + isString(): boolean { + return this.kind == YAMLValueKind.STRING; + } + + isArray(): boolean { + return this.kind == YAMLValueKind.ARRAY; + } + + isObject(): boolean { + return this.kind == YAMLValueKind.OBJECT; + } + + isTagged(): boolean { + return this.kind == YAMLValueKind.TAGGED; + } + + + toBool(): boolean { + assert(this.isBool(), 'YAML value is not a boolean'); + return this.data != 0; + } + + toNumber(): string { + assert(this.isNumber(), 'YAML value is not a number'); + return changetype(this.data as usize); + } + + toString(): string { + assert(this.isString(), 'YAML value is not a string'); + return changetype(this.data as usize); + } + + toArray(): Array { + assert(this.isArray(), 'YAML value is not an array'); + return changetype>(this.data as usize); + } + + toObject(): TypedMap { + assert(this.isObject(), 'YAML value is not an object'); + return changetype>(this.data as usize); + } + + toTagged(): YAMLTaggedValue { + assert(this.isTagged(), 'YAML value is not tagged'); + return changetype(this.data as usize); + } +} + +export class YAMLTaggedValue { + tag: string; + value: YAMLValue; +} + + +export function debug(value: YAMLValue): string { + return "(" + value.kind.toString() + ") " + debug_value(value); +} + +function debug_value(value: YAMLValue): string { + switch (value.kind) { + case YAMLValueKind.NULL: + return "null"; + case YAMLValueKind.BOOL: + return value.toBool() ? "true" : "false"; + case YAMLValueKind.NUMBER: + return value.toNumber(); + case YAMLValueKind.STRING: + return value.toString(); + case YAMLValueKind.ARRAY: { + let arr = value.toArray(); + + let s = "["; + for (let i = 0; i < arr.length; i++) { + if (i > 0) { + s += ", "; + } + s += debug(arr[i]); + } + s += "]"; + + return s; + } + case YAMLValueKind.OBJECT: { + let arr = value.toObject().entries.sort((a, b) => { + if (a.key.toString() < b.key.toString()) { + return -1; + } + + if (a.key.toString() > b.key.toString()) { + return 1; + } + + return 0; + }); + + let s = "{"; + for (let i = 0; i < arr.length; i++) { + if (i > 0) { + s += ", "; + } + s += debug_value(arr[i].key) + ": " + debug(arr[i].value); + } + s += "}"; + + return s; + } + case YAMLValueKind.TAGGED: { + let tagged = value.toTagged(); + + return tagged.tag + " " + debug(tagged.value); + } + default: + return "undefined"; + } +} diff --git a/runtime/test/wasm_test/api_version_0_0_5/yaml_parsing.ts b/runtime/test/wasm_test/api_version_0_0_5/yaml_parsing.ts new file mode 100644 index 00000000000..c89eb611bb2 --- /dev/null +++ b/runtime/test/wasm_test/api_version_0_0_5/yaml_parsing.ts @@ -0,0 +1,62 @@ +import {debug, YAMLValue, YAMLTaggedValue} from './common/yaml'; +import {Bytes, Result, TypedMap, TypedMapEntry, Wrapped} from './common/types'; + +enum TypeId { + STRING = 0, + UINT8_ARRAY = 6, + + YamlValue = 5500, + YamlTaggedValue = 5501, + YamlTypedMapEntryValueValue = 5502, + YamlTypedMapValueValue = 5503, + YamlArrayValue = 5504, + YamlArrayTypedMapEntryValueValue = 5505, + YamlWrappedValue = 5506, + YamlResultValueBool = 5507, +} + +export function id_of_type(type_id_index: TypeId): usize { + switch (type_id_index) { + case TypeId.STRING: + return idof(); + case TypeId.UINT8_ARRAY: + return idof(); + + case TypeId.YamlValue: + return idof(); + case TypeId.YamlTaggedValue: + return idof(); + case TypeId.YamlTypedMapEntryValueValue: + return idof>(); + case TypeId.YamlTypedMapValueValue: + return idof>(); + case TypeId.YamlArrayValue: + return idof>(); + case TypeId.YamlArrayTypedMapEntryValueValue: + return idof>>(); + case TypeId.YamlWrappedValue: + return idof>(); + case TypeId.YamlResultValueBool: + return idof>(); + default: + return 0; + } +} + +export function allocate(n: usize): usize { + return __alloc(n); +} + +declare namespace yaml { + function try_fromBytes(data: Bytes): Result; +} + +export function handleYaml(data: Bytes): string { + let result = yaml.try_fromBytes(data); + + if (result.isError) { + return "error"; + } + + return debug(result.value); +} diff --git a/runtime/test/wasm_test/api_version_0_0_5/yaml_parsing.wasm b/runtime/test/wasm_test/api_version_0_0_5/yaml_parsing.wasm new file mode 100644 index 00000000000..131ded5d04c Binary files /dev/null and b/runtime/test/wasm_test/api_version_0_0_5/yaml_parsing.wasm differ diff --git a/runtime/wasm/Cargo.toml b/runtime/wasm/Cargo.toml index 0e6e5d64100..3e74e9f985e 100644 --- a/runtime/wasm/Cargo.toml +++ b/runtime/wasm/Cargo.toml @@ -20,3 +20,5 @@ wasm-instrument = { version = "0.2.0", features = ["std", "sign_ext"] } # AssemblyScript uses sign extensions parity-wasm = { version = "0.45", features = ["std", "sign_ext"] } + +serde_yaml = { workspace = true } diff --git a/runtime/wasm/src/asc_abi/class.rs b/runtime/wasm/src/asc_abi/class.rs index 366ff844b08..1fae1ad9ce0 100644 --- a/runtime/wasm/src/asc_abi/class.rs +++ b/runtime/wasm/src/asc_abi/class.rs @@ -398,6 +398,17 @@ impl AscIndexId for Array> { const INDEX_ASC_TYPE_ID: IndexForAscTypeId = IndexForAscTypeId::ArrayBigDecimal; } +impl AscIndexId for Array>> { + const INDEX_ASC_TYPE_ID: IndexForAscTypeId = IndexForAscTypeId::YamlArrayValue; +} + +impl AscIndexId + for Array, AscEnum>>> +{ + const INDEX_ASC_TYPE_ID: IndexForAscTypeId = + IndexForAscTypeId::YamlArrayTypedMapEntryValueValue; +} + /// Represents any `AscValue` since they all fit in 64 bits. #[repr(C)] #[derive(Copy, Clone, Default)] @@ -505,6 +516,10 @@ impl AscIndexId for AscEnum { const INDEX_ASC_TYPE_ID: IndexForAscTypeId = IndexForAscTypeId::JsonValue; } +impl AscIndexId for AscEnum { + const INDEX_ASC_TYPE_ID: IndexForAscTypeId = IndexForAscTypeId::YamlValue; +} + pub type AscEnumArray = AscPtr>>>; #[repr(u32)] @@ -613,6 +628,10 @@ impl AscIndexId for AscTypedMapEntry> { const INDEX_ASC_TYPE_ID: IndexForAscTypeId = IndexForAscTypeId::TypedMapEntryStringJsonValue; } +impl AscIndexId for AscTypedMapEntry, AscEnum> { + const INDEX_ASC_TYPE_ID: IndexForAscTypeId = IndexForAscTypeId::YamlTypedMapEntryValueValue; +} + pub(crate) type AscTypedMapEntryArray = Array>>; #[repr(C)] @@ -638,6 +657,10 @@ impl AscIndexId for AscTypedMap, AscEnum> { + const INDEX_ASC_TYPE_ID: IndexForAscTypeId = IndexForAscTypeId::YamlTypedMapValueValue; +} + pub type AscEntity = AscTypedMap>; pub(crate) type AscJson = AscTypedMap>; @@ -725,6 +748,10 @@ impl AscIndexId for AscResult>, bool> { const INDEX_ASC_TYPE_ID: IndexForAscTypeId = IndexForAscTypeId::ResultJsonValueBool; } +impl AscIndexId for AscResult>, bool> { + const INDEX_ASC_TYPE_ID: IndexForAscTypeId = IndexForAscTypeId::YamlResultValueBool; +} + #[repr(C)] #[derive(AscType, Copy, Clone)] pub struct AscWrapped { @@ -742,3 +769,54 @@ impl AscIndexId for AscWrapped { impl AscIndexId for AscWrapped>> { const INDEX_ASC_TYPE_ID: IndexForAscTypeId = IndexForAscTypeId::WrappedJsonValue; } + +impl AscIndexId for AscWrapped>> { + const INDEX_ASC_TYPE_ID: IndexForAscTypeId = IndexForAscTypeId::YamlWrappedValue; +} + +#[repr(u32)] +#[derive(AscType, Clone, Copy)] +pub enum YamlValueKind { + Null, + Bool, + Number, + String, + Array, + Object, + Tagged, +} + +impl Default for YamlValueKind { + fn default() -> Self { + YamlValueKind::Null + } +} + +impl AscValue for YamlValueKind {} + +impl YamlValueKind { + pub(crate) fn get_kind(value: &serde_yaml::Value) -> Self { + use serde_yaml::Value; + + match value { + Value::Null => Self::Null, + Value::Bool(_) => Self::Bool, + Value::Number(_) => Self::Number, + Value::String(_) => Self::String, + Value::Sequence(_) => Self::Array, + Value::Mapping(_) => Self::Object, + Value::Tagged(_) => Self::Tagged, + } + } +} + +#[repr(C)] +#[derive(AscType)] +pub struct AscYamlTaggedValue { + pub tag: AscPtr, + pub value: AscPtr>, +} + +impl AscIndexId for AscYamlTaggedValue { + const INDEX_ASC_TYPE_ID: IndexForAscTypeId = IndexForAscTypeId::YamlTaggedValue; +} diff --git a/runtime/wasm/src/host_exports.rs b/runtime/wasm/src/host_exports.rs index b9735cc1e0d..bd1c8706c4a 100644 --- a/runtime/wasm/src/host_exports.rs +++ b/runtime/wasm/src/host_exports.rs @@ -1236,6 +1236,36 @@ impl HostExports { .map(|mut tokens| tokens.pop().unwrap()) .context("Failed to decode") } + + pub(crate) fn yaml_from_bytes( + &self, + bytes: &[u8], + gas: &GasCounter, + state: &mut BlockState, + ) -> Result { + const YAML_MAX_SIZE_BYTES: usize = 10_000_000; + + Self::track_gas_and_ops( + gas, + state, + gas::YAML_FROM_BYTES.with_args(complexity::Size, bytes), + "yaml_from_bytes", + )?; + + if bytes.len() > YAML_MAX_SIZE_BYTES { + return Err(DeterministicHostError::Other( + anyhow!( + "YAML size exceeds max size of {} bytes", + YAML_MAX_SIZE_BYTES + ) + .into(), + )); + } + + serde_yaml::from_slice(bytes) + .context("failed to parse YAML from bytes") + .map_err(DeterministicHostError::from) + } } fn string_to_h160(string: &str) -> Result { diff --git a/runtime/wasm/src/module/context.rs b/runtime/wasm/src/module/context.rs index ddf8eba3f1d..03cbf244c23 100644 --- a/runtime/wasm/src/module/context.rs +++ b/runtime/wasm/src/module/context.rs @@ -1188,4 +1188,64 @@ impl WasmInstanceContext<'_> { "`box.profile` has been removed." ))) } + + /// function yaml.fromBytes(bytes: Bytes): YAMLValue + pub fn yaml_from_bytes( + &mut self, + gas: &GasCounter, + bytes_ptr: AscPtr, + ) -> Result>, HostExportError> { + let bytes: Vec = asc_get(self, bytes_ptr, gas)?; + let host_exports = self.as_ref().ctx.host_exports.cheap_clone(); + let ctx = &mut self.as_mut().ctx; + + let yaml_value = host_exports + .yaml_from_bytes(&bytes, gas, &mut ctx.state) + .inspect_err(|_| { + debug!( + &self.as_ref().ctx.logger, + "Failed to parse YAML from byte array"; + "bytes" => truncate_yaml_bytes_for_logging(&bytes), + ); + })?; + + asc_new(self, &yaml_value, gas) + } + + /// function yaml.try_fromBytes(bytes: Bytes): Result + pub fn yaml_try_from_bytes( + &mut self, + gas: &GasCounter, + bytes_ptr: AscPtr, + ) -> Result>, bool>>, HostExportError> { + let bytes: Vec = asc_get(self, bytes_ptr, gas)?; + let host_exports = self.as_ref().ctx.host_exports.cheap_clone(); + let ctx = &mut self.as_mut().ctx; + + let result = host_exports + .yaml_from_bytes(&bytes, gas, &mut ctx.state) + .map_err(|err| { + warn!( + &self.as_ref().ctx.logger, + "Failed to parse YAML from byte array"; + "bytes" => truncate_yaml_bytes_for_logging(&bytes), + "error" => format!("{:#}", err), + ); + + true + }); + + asc_new(self, &result, gas) + } +} + +/// For debugging, it might be useful to know exactly which bytes could not be parsed as YAML, but +/// since we can parse large YAML documents, even one bad mapping could produce terabytes of logs. +/// To avoid this, we only log the first 1024 bytes of the failed YAML source. +fn truncate_yaml_bytes_for_logging(bytes: &[u8]) -> String { + if bytes.len() > 1024 { + return format!("(truncated) 0x{}", hex::encode(&bytes[..1024])); + } + + format!("0x{}", hex::encode(bytes)) } diff --git a/runtime/wasm/src/module/instance.rs b/runtime/wasm/src/module/instance.rs index 55d3e8574d2..63845e81c60 100644 --- a/runtime/wasm/src/module/instance.rs +++ b/runtime/wasm/src/module/instance.rs @@ -468,6 +468,9 @@ impl WasmInstance { link!("json.toF64", json_to_f64, ptr); link!("json.toBigInt", json_to_big_int, ptr); + link!("yaml.fromBytes", yaml_from_bytes, ptr); + link!("yaml.try_fromBytes", yaml_try_from_bytes, ptr); + link!("crypto.keccak256", crypto_keccak_256, ptr); link!("bigInt.plus", big_int_plus, x_ptr, y_ptr); diff --git a/runtime/wasm/src/to_from/external.rs b/runtime/wasm/src/to_from/external.rs index 30740e77696..9bbe0298abc 100644 --- a/runtime/wasm/src/to_from/external.rs +++ b/runtime/wasm/src/to_from/external.rs @@ -506,3 +506,54 @@ impl ToAscObj for EntitySourceOperation { impl AscIndexId for AscEntityTrigger { const INDEX_ASC_TYPE_ID: IndexForAscTypeId = IndexForAscTypeId::AscEntityTrigger; } + +impl ToAscObj> for serde_yaml::Value { + fn to_asc_obj( + &self, + heap: &mut H, + gas: &GasCounter, + ) -> Result, HostExportError> { + use serde_yaml::Value; + + let payload = match self { + Value::Null => EnumPayload(0), + Value::Bool(val) => EnumPayload::from(*val), + Value::Number(val) => asc_new(heap, &val.to_string(), gas)?.into(), + Value::String(val) => asc_new(heap, val, gas)?.into(), + Value::Sequence(val) => asc_new(heap, val.as_slice(), gas)?.into(), + Value::Mapping(val) => asc_new(heap, val, gas)?.into(), + Value::Tagged(val) => asc_new(heap, val.as_ref(), gas)?.into(), + }; + + Ok(AscEnum { + kind: YamlValueKind::get_kind(self), + _padding: 0, + payload, + }) + } +} + +impl ToAscObj, AscEnum>> for serde_yaml::Mapping { + fn to_asc_obj( + &self, + heap: &mut H, + gas: &GasCounter, + ) -> Result, AscEnum>, HostExportError> { + Ok(AscTypedMap { + entries: asc_new(heap, &*self.iter().collect::>(), gas)?, + }) + } +} + +impl ToAscObj for serde_yaml::value::TaggedValue { + fn to_asc_obj( + &self, + heap: &mut H, + gas: &GasCounter, + ) -> Result { + Ok(AscYamlTaggedValue { + tag: asc_new(heap, &self.tag.to_string(), gas)?, + value: asc_new(heap, &self.value, gas)?, + }) + } +}