From 0c3fe9f5e8b1602b6f1c7d3cbd97431723953b46 Mon Sep 17 00:00:00 2001 From: Eric Buehler Date: Sat, 26 Apr 2025 13:15:38 -0400 Subject: [PATCH 01/17] Add features and dep --- Cargo.lock | 1355 +++++++++++++++++++++++++++++++++++- Cargo.toml | 1 + README.md | 5 + constensor-core/Cargo.toml | 5 +- 4 files changed, 1335 insertions(+), 31 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a77dcf9..58d2471 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,18 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy 0.7.35", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -17,6 +29,15 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anes" version = "0.1.6" @@ -38,12 +59,54 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "ash" +version = "0.38.0+1.3.281" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bb44936d800fea8f016d7f2311c6a4f97aebd5dc86f09906139ec848cf3a46f" +dependencies = [ + "libloading", +] + +[[package]] +name = "async-channel" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89b47800b0be77592da0afd425cc03468052844aff33b84e33cc696f64e77b6a" +dependencies = [ + "concurrent-queue", + "event-listener-strategy", + "futures-core", + "pin-project-lite", +] + [[package]] name = "autocfg" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" version = "1.3.2" @@ -52,9 +115,18 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.5.0" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" +dependencies = [ + "serde", +] + +[[package]] +name = "block" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" [[package]] name = "bumpalo" @@ -104,7 +176,7 @@ dependencies = [ "rand_distr 0.4.3", "rayon", "safetensors", - "thiserror", + "thiserror 1.0.61", "ug", "yoke", "zip", @@ -122,6 +194,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "ciborium" version = "0.2.2" @@ -174,13 +252,34 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" +[[package]] +name = "codespan-reporting" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe6d2e5af09e8c8ad56c969f2157a3d4238cebc7c55f0a517728c38f7b200f81" +dependencies = [ + "serde", + "termcolor", + "unicode-width", +] + +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "constensor-core" version = "0.1.1" dependencies = [ "candle-core", "criterion", - "cudarc", + "cubecl", + "cudarc 0.16.1", "dirs", "gemm 0.18.2", "half", @@ -189,7 +288,34 @@ dependencies = [ "rand 0.9.1", "rand_distr 0.5.1", "rayon", - "thiserror", + "thiserror 1.0.61", +] + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "core-graphics-types" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45390e6114f68f718cc7a830514a96f903cccd70d02a8f6d9f643ac4ba45afaf" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "libc", ] [[package]] @@ -268,6 +394,273 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +[[package]] +name = "cubecl" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1e438056cf7c25b3adde38240b89842e1c924b8e914731c82ad81161d23e6ff" +dependencies = [ + "cubecl-core", + "cubecl-cuda", + "cubecl-hip", + "cubecl-linalg", + "cubecl-runtime", + "cubecl-std", + "cubecl-wgpu", + "half", +] + +[[package]] +name = "cubecl-common" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79251bfc7f067ac9038232fe38a317adc2f31cb2fc3800e69fd409ccac7abc1f" +dependencies = [ + "bytemuck", + "derive-new", + "derive_more", + "dirs", + "embassy-futures", + "futures-lite", + "half", + "hashbrown 0.14.5", + "log", + "num-traits", + "portable-atomic", + "rand 0.9.1", + "sanitize-filename", + "serde", + "serde_json", + "spin", +] + +[[package]] +name = "cubecl-core" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b03bf4211cdbd68bb0fb8291e0ed825c13da0d1ac01b7c02dce3cee44a6138be" +dependencies = [ + "bitflags 2.9.0", + "bytemuck", + "cubecl-common", + "cubecl-ir", + "cubecl-macros", + "cubecl-runtime", + "derive-new", + "derive_more", + "half", + "hashbrown 0.14.5", + "log", + "num-traits", + "paste", + "serde", + "serde_json", + "variadics_please", +] + +[[package]] +name = "cubecl-cpp" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5eef85cbcc34be7e25fc9d39edf99ed68559862dbf25c1877ebdf4a9595d31b" +dependencies = [ + "bytemuck", + "cubecl-common", + "cubecl-core", + "cubecl-runtime", + "derive-new", + "half", + "log", +] + +[[package]] +name = "cubecl-cuda" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e091e4e3a3900faff440aec4053805ec4456f94f4acc4afe8e6b27519c6d16" +dependencies = [ + "bytemuck", + "cubecl-common", + "cubecl-core", + "cubecl-cpp", + "cubecl-runtime", + "cudarc 0.13.9", + "derive-new", + "half", + "log", + "serde", +] + +[[package]] +name = "cubecl-hip" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2f8c00207517de61cccdc4ca2724bc1db9dab94840beaf4329e43cead3bc4a" +dependencies = [ + "bytemuck", + "cubecl-common", + "cubecl-core", + "cubecl-cpp", + "cubecl-hip-sys", + "cubecl-runtime", + "derive-new", + "half", + "log", + "paste", + "serde", +] + +[[package]] +name = "cubecl-hip-sys" +version = "6.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7557762176858fa0357504025f09ae6e979c3547776ff8b6a1025ef0702450" +dependencies = [ + "libc", +] + +[[package]] +name = "cubecl-ir" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e096d77646590f0180ed4ce1aa7df4ecc7219f3c4616e9fe72d93ab63a352855" +dependencies = [ + "cubecl-common", + "cubecl-macros-internal", + "derive_more", + "float-ord", + "fnv", + "half", + "hashbrown 0.14.5", + "num-traits", + "portable-atomic", + "serde", + "variadics_please", +] + +[[package]] +name = "cubecl-linalg" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75aacf86f6004c274e63589aed55c5edcbcdf1b292eaf4ce2c1688c04c41a194" +dependencies = [ + "bytemuck", + "cubecl-common", + "cubecl-core", + "cubecl-reduce", + "cubecl-runtime", + "cubecl-std", + "half", + "serde", +] + +[[package]] +name = "cubecl-macros" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd74622b5c8cb161e3f7fa0b2b751784ef89ab45acfa355f511eb2219dde337e" +dependencies = [ + "cubecl-common", + "darling", + "derive-new", + "ident_case", + "prettyplease", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "cubecl-macros-internal" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a89898212c1eaba0e2f0dffcadc9790b20b75d2ec8836da084370b043be2623" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "cubecl-reduce" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7afbdfe03e7e3ca71f61890ebebc6b4390494204b545e6f6bf51a43755449073" +dependencies = [ + "cubecl-core", + "cubecl-runtime", + "cubecl-std", + "num-traits", + "serde", +] + +[[package]] +name = "cubecl-runtime" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "385234520c9e392382737f32ad372b05f345656eb798ba00b72d2722c68b698c" +dependencies = [ + "async-channel", + "bytemuck", + "cfg-if", + "cfg_aliases", + "cubecl-common", + "cubecl-ir", + "derive-new", + "hashbrown 0.14.5", + "log", + "md5", + "serde", + "serde_json", + "spin", + "variadics_please", + "wasm-bindgen-futures", +] + +[[package]] +name = "cubecl-std" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38868eea6fdc183feb3c46bcf5e666c78e6cf0ddca2c4f3a877785cc0eabd71e" +dependencies = [ + "cubecl-core", + "cubecl-runtime", + "half", + "serde", +] + +[[package]] +name = "cubecl-wgpu" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77fa2dcfaa6d75cfbc5ff05cafe99ec4a7fb7c0fa7197917e0fd20f5b90979fe" +dependencies = [ + "async-channel", + "bytemuck", + "cfg-if", + "cfg_aliases", + "cubecl-common", + "cubecl-core", + "cubecl-cpp", + "cubecl-runtime", + "derive-new", + "derive_more", + "hashbrown 0.14.5", + "log", + "web-time", + "wgpu", +] + +[[package]] +name = "cudarc" +version = "0.13.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "486c221362668c63a1636cfa51463b09574433b39029326cff40864b3ba12b6e" +dependencies = [ + "libloading", +] + [[package]] name = "cudarc" version = "0.16.1" @@ -278,6 +671,52 @@ dependencies = [ "libloading", ] +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn", +] + +[[package]] +name = "derive-new" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d150dea618e920167e5973d70ae6ece4385b7164e0d799fe7c122dd0a5d912ad" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "derive_arbitrary" version = "1.4.1" @@ -289,6 +728,27 @@ dependencies = [ "syn", ] +[[package]] +name = "derive_more" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "unicode-xid", +] + [[package]] name = "dirs" version = "5.0.1" @@ -321,6 +781,15 @@ dependencies = [ "syn", ] +[[package]] +name = "document-features" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95249b50c6c185bee49034bcb378a49dc2b5dff0be90ff6616d31d64febab05d" +dependencies = [ + "litrs", +] + [[package]] name = "dyn-stack" version = "0.10.0" @@ -346,6 +815,12 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" +[[package]] +name = "embassy-futures" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f878075b9794c1e4ac788c95b728f26aa6366d32eeb10c7051389f898f7d067" + [[package]] name = "enum-as-inner" version = "0.6.1" @@ -364,18 +839,109 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "event-listener" +version = "5.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3492acde4c3fc54c845eaab3eed8bd00c7a7d881f78bfc801e43a93dec1331ae" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener-strategy" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" +dependencies = [ + "event-listener", + "pin-project-lite", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "fixedbitset" version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" +[[package]] +name = "float-ord" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce81f49ae8a0482e4c55ea62ebbd7e5a686af544c00b9d090bba3ff9be97b3d" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "foldhash" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foreign-types" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" +dependencies = [ + "foreign-types-macros", + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-macros" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "foreign-types-shared" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-lite" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5edaec856126859abb19ed65f39e90fea3a9574b9707f13539acf4abf7eb532" +dependencies = [ + "fastrand", + "futures-core", + "futures-io", + "parking", + "pin-project-lite", +] + [[package]] name = "gemm" version = "0.17.1" @@ -636,18 +1202,113 @@ dependencies = [ "wasi 0.14.2+wasi-0.2.4", ] +[[package]] +name = "gl_generator" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a95dfc23a2b4a9a2f5ab41d194f8bfda3cabec42af4e39f08c339eb2a0c124d" +dependencies = [ + "khronos_api", + "log", + "xml-rs", +] + +[[package]] +name = "glow" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5e5ea60d70410161c8bf5da3fdfeaa1c72ed2c15f8bbb9d19fe3a4fad085f08" +dependencies = [ + "js-sys", + "slotmap", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "glutin_wgl_sys" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c4ee00b289aba7a9e5306d57c2d05499b2e5dc427f84ac708bd2c090212cf3e" +dependencies = [ + "gl_generator", +] + +[[package]] +name = "gpu-alloc" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbcd2dba93594b227a1f57ee09b8b9da8892c34d55aa332e034a228d0fe6a171" +dependencies = [ + "bitflags 2.9.0", + "gpu-alloc-types", +] + +[[package]] +name = "gpu-alloc-types" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98ff03b468aa837d70984d55f5d3f846f6ec31fe34bbb97c4f85219caeee1ca4" +dependencies = [ + "bitflags 2.9.0", +] + +[[package]] +name = "gpu-allocator" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c151a2a5ef800297b4e79efa4f4bec035c5f51d5ae587287c9b952bdf734cacd" +dependencies = [ + "log", + "presser", + "thiserror 1.0.61", + "windows", +] + +[[package]] +name = "gpu-descriptor" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcf29e94d6d243368b7a56caa16bc213e4f9f8ed38c4d9557069527b5d5281ca" +dependencies = [ + "bitflags 2.9.0", + "gpu-descriptor-types", + "hashbrown 0.15.2", +] + +[[package]] +name = "gpu-descriptor-types" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdf242682df893b86f33a73828fb09ca4b2d3bb6cc95249707fc684d27484b91" +dependencies = [ + "bitflags 2.9.0", +] + [[package]] name = "half" -version = "2.4.1" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" dependencies = [ "bytemuck", "cfg-if", "crunchy", "num-traits", - "rand 0.8.5", - "rand_distr 0.4.3", + "rand 0.9.1", + "rand_distr 0.5.1", + "serde", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", + "serde", ] [[package]] @@ -679,6 +1340,18 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbd780fe5cc30f81464441920d82ac8740e2e46b29a6fad543ddd075229ce37e" +[[package]] +name = "hexf-parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfa686283ad6dd069f105e5ab091b04c62850d3e4cf5d67debad1933f55023df" + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "indexmap" version = "2.9.0" @@ -686,7 +1359,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.15.2", ] [[package]] @@ -715,6 +1388,12 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jni-sys" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" + [[package]] name = "js-sys" version = "0.3.77" @@ -725,11 +1404,34 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "khronos-egl" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6aae1df220ece3c0ada96b8153459b67eebe9ae9212258bb0134ae60416fdf76" +dependencies = [ + "libc", + "libloading", + "pkg-config", +] + +[[package]] +name = "khronos_api" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2db585e1d738fc771bf08a151420d3ed193d9d895a36df7f6f8a9456b911ddc" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" -version = "0.2.155" +version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" [[package]] name = "libloading" @@ -753,16 +1455,47 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.9.0", "libc", ] +[[package]] +name = "litrs" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ce301924b7887e9d637144fdade93f9dfff9b60981d4ac161db09720d39aa5" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +[[package]] +name = "malloc_buf" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb" +dependencies = [ + "libc", +] + +[[package]] +name = "md5" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" + [[package]] name = "memchr" version = "2.7.4" @@ -779,6 +1512,55 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "metal" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f569fb946490b5743ad69813cb19629130ce9374034abe31614a36402d18f99e" +dependencies = [ + "bitflags 2.9.0", + "block", + "core-graphics-types", + "foreign-types", + "log", + "objc", + "paste", +] + +[[package]] +name = "naga" +version = "25.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b977c445f26e49757f9aca3631c3b8b836942cb278d69a92e7b80d3b24da632" +dependencies = [ + "arrayvec", + "bit-set", + "bitflags 2.9.0", + "cfg_aliases", + "codespan-reporting", + "half", + "hashbrown 0.15.2", + "hexf-parse", + "indexmap", + "log", + "num-traits", + "once_cell", + "rustc-hash", + "spirv", + "strum", + "thiserror 2.0.12", + "unicode-ident", +] + +[[package]] +name = "ndk-sys" +version = "0.5.0+25.2.9519653" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c196769dd60fd4f363e11d948139556a344e79d451aeb2fa2fd040738ef7691" +dependencies = [ + "jni-sys", +] + [[package]] name = "num" version = "0.4.3" @@ -885,6 +1667,15 @@ dependencies = [ "syn", ] +[[package]] +name = "objc" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1" +dependencies = [ + "malloc_buf", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -903,6 +1694,44 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "ordered-float" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" +dependencies = [ + "num-traits", +] + +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + [[package]] name = "paste" version = "1.0.15" @@ -916,7 +1745,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a98c6720655620a521dcc722d0ad66cd8afd5d86e34a89ef691c50b7b24de06" dependencies = [ "fixedbitset", - "hashbrown", + "hashbrown 0.15.2", "indexmap", "serde", ] @@ -927,6 +1756,12 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + [[package]] name = "plotters" version = "0.3.7" @@ -955,13 +1790,38 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "portable-atomic" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" +dependencies = [ + "serde", +] + [[package]] name = "ppv-lite86" version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ - "zerocopy", + "zerocopy 0.8.24", +] + +[[package]] +name = "presser" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8cf8e6a8aa66ce33f63993ffc4ea4271eb5b0530a9002db8455ea6050c77bfa" + +[[package]] +name = "prettyplease" +version = "0.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "664ec5419c51e34154eec046ebcba56312d5a2fc3b09a06da188e1ad21afadf6" +dependencies = [ + "proc-macro2", + "syn", ] [[package]] @@ -975,13 +1835,19 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.85" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] +[[package]] +name = "profiling" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afbdc74edc00b6f6a218ca6a5364d6226a259d4b8ea1af4a0ea063f27e179f4d" + [[package]] name = "pulp" version = "0.18.22" @@ -1102,6 +1968,12 @@ dependencies = [ "rand 0.9.1", ] +[[package]] +name = "range-alloc" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d6831663a5098ea164f89cff59c6284e95f4e3c76ce9848d4529f5ccca9bde" + [[package]] name = "raw-cpuid" version = "10.7.0" @@ -1117,9 +1989,15 @@ version = "11.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.9.0", ] +[[package]] +name = "raw-window-handle" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539" + [[package]] name = "rayon" version = "1.10.0" @@ -1146,6 +2024,15 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03251193000f4bd3b042892be858ee50e8b3719f2b08e5833ac4353724632430" +[[package]] +name = "redox_syscall" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2f103c6d277498fbceb16e84d317e2a400f160f46904d5f5410848c829511a3" +dependencies = [ + "bitflags 2.9.0", +] + [[package]] name = "redox_users" version = "0.4.5" @@ -1154,7 +2041,7 @@ checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" dependencies = [ "getrandom 0.2.15", "libredox", - "thiserror", + "thiserror 1.0.61", ] [[package]] @@ -1186,6 +2073,18 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "renderdoc-sys" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b30a45b0cd0bcca8037f3d0dc3421eaf95327a17cad11964fb8179b4fc4832" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustversion" version = "1.0.20" @@ -1217,6 +2116,22 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "sanitize-filename" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ed72fbaf78e6f2d41744923916966c4fbe3d7c74e3037a8ee482f1115572603" +dependencies = [ + "lazy_static", + "regex", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "seq-macro" version = "0.3.6" @@ -1255,17 +2170,85 @@ dependencies = [ "serde", ] +[[package]] +name = "slotmap" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbff4acf519f630b3a3ddcfaea6c06b42174d9a44bc70c620e9ed1649d58b82a" +dependencies = [ + "version_check", +] + +[[package]] +name = "smallvec" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", + "portable-atomic", +] + +[[package]] +name = "spirv" +version = "0.3.0+sdk-1.3.268.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eda41003dc44290527a59b13432d4a0379379fa074b70174882adfbdfd917844" +dependencies = [ + "bitflags 2.9.0", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + [[package]] name = "syn" -version = "2.0.66" +version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" dependencies = [ "proc-macro2", "quote", @@ -1289,11 +2272,11 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.9.0", "byteorder", "enum-as-inner", "libc", - "thiserror", + "thiserror 1.0.61", "walkdir", ] @@ -1303,21 +2286,39 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.9.0", "byteorder", "enum-as-inner", "libc", - "thiserror", + "thiserror 1.0.61", "walkdir", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "thiserror" version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.61", +] + +[[package]] +name = "thiserror" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" +dependencies = [ + "thiserror-impl 2.0.12", ] [[package]] @@ -1331,6 +2332,17 @@ dependencies = [ "syn", ] +[[package]] +name = "thiserror-impl" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -1405,7 +2417,7 @@ dependencies = [ "rayon", "safetensors", "serde", - "thiserror", + "thiserror 1.0.61", "tracing", "yoke", ] @@ -1416,6 +2428,29 @@ version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "variadics_please" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41b6d82be61465f97d42bd1d15bf20f3b0a3a0905018f38f9d6f6962055b0b5c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "version_check" version = "0.9.5" @@ -1473,6 +2508,19 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" +dependencies = [ + "cfg-if", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.100" @@ -1515,6 +2563,163 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "wgpu" +version = "25.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca6049eb2014a0e0d8689f9b787605dd71d5bbfdc74095ead499f3cff705c229" +dependencies = [ + "arrayvec", + "bitflags 2.9.0", + "cfg_aliases", + "document-features", + "hashbrown 0.15.2", + "js-sys", + "log", + "naga", + "parking_lot", + "portable-atomic", + "profiling", + "raw-window-handle", + "smallvec", + "static_assertions", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "wgpu-core", + "wgpu-hal", + "wgpu-types", +] + +[[package]] +name = "wgpu-core" +version = "25.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a19813e647da7aa3cdaa84f5846e2c64114970ea7c86b1e6aae8be08091f4bdc" +dependencies = [ + "arrayvec", + "bit-set", + "bit-vec", + "bitflags 2.9.0", + "cfg_aliases", + "document-features", + "hashbrown 0.15.2", + "indexmap", + "log", + "naga", + "once_cell", + "parking_lot", + "portable-atomic", + "profiling", + "raw-window-handle", + "rustc-hash", + "smallvec", + "thiserror 2.0.12", + "wgpu-core-deps-apple", + "wgpu-core-deps-emscripten", + "wgpu-core-deps-windows-linux-android", + "wgpu-hal", + "wgpu-types", +] + +[[package]] +name = "wgpu-core-deps-apple" +version = "25.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfd488b3239b6b7b185c3b045c39ca6bf8af34467a4c5de4e0b1a564135d093d" +dependencies = [ + "wgpu-hal", +] + +[[package]] +name = "wgpu-core-deps-emscripten" +version = "25.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f09ad7aceb3818e52539acc679f049d3475775586f3f4e311c30165cf2c00445" +dependencies = [ + "wgpu-hal", +] + +[[package]] +name = "wgpu-core-deps-windows-linux-android" +version = "25.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cba5fb5f7f9c98baa7c889d444f63ace25574833df56f5b817985f641af58e46" +dependencies = [ + "wgpu-hal", +] + +[[package]] +name = "wgpu-hal" +version = "25.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb7c4a1dc42ff14c23c9b11ebf1ee85cde661a9b1cf0392f79c1faca5bc559fb" +dependencies = [ + "android_system_properties", + "arrayvec", + "ash", + "bit-set", + "bitflags 2.9.0", + "block", + "bytemuck", + "cfg-if", + "cfg_aliases", + "core-graphics-types", + "glow", + "glutin_wgl_sys", + "gpu-alloc", + "gpu-allocator", + "gpu-descriptor", + "hashbrown 0.15.2", + "js-sys", + "khronos-egl", + "libc", + "libloading", + "log", + "metal", + "naga", + "ndk-sys", + "objc", + "ordered-float", + "parking_lot", + "portable-atomic", + "profiling", + "range-alloc", + "raw-window-handle", + "renderdoc-sys", + "smallvec", + "thiserror 2.0.12", + "wasm-bindgen", + "web-sys", + "wgpu-types", + "windows", + "windows-core", +] + +[[package]] +name = "wgpu-types" +version = "25.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2aa49460c2a8ee8edba3fca54325540d904dd85b2e086ada762767e17d06e8bc" +dependencies = [ + "bitflags 2.9.0", + "bytemuck", + "js-sys", + "log", + "thiserror 2.0.12", + "web-sys", +] + [[package]] name = "winapi-util" version = "0.1.9" @@ -1524,6 +2729,70 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "windows" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" +dependencies = [ + "windows-core", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-core" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-result", + "windows-strings", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-implement" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets 0.52.6", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -1678,9 +2947,15 @@ version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.9.0", ] +[[package]] +name = "xml-rs" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a62ce76d9b56901b19a74f19431b0d8b3bc7ca4ad685a746dfd78ca8f4fc6bda" + [[package]] name = "yoke" version = "0.7.5" @@ -1705,13 +2980,33 @@ dependencies = [ "synstructure", ] +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "zerocopy-derive 0.7.35", +] + [[package]] name = "zerocopy" version = "0.8.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879" dependencies = [ - "zerocopy-derive", + "zerocopy-derive 0.8.24", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -1758,5 +3053,5 @@ dependencies = [ "displaydoc", "indexmap", "num_enum", - "thiserror", + "thiserror 1.0.61", ] diff --git a/Cargo.toml b/Cargo.toml index c4078c6..dd3fc28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,3 +24,4 @@ gemm = "0.18" num_cpus = "1.16.0" rand = "0.9.1" rand_distr = "0.5.1" +cubecl = "0.5.0" diff --git a/README.md b/README.md index 1df586c..faafcc0 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,11 @@ Experimental machine learning framework featuring a graph-based JIT compiler. - Multi-device support (CPU, optional CUDA) - Graph visualization (requires Graphviz) - Zero-cost abstractions with idiomatic Rust API +- Broad GPU support: + - CUDA (NVIDIA): `--features cuda` + - HIP (AMD): `--features hip` + - Metal (Apple): `--features metal` + - WGPU (recommended for [all others](https://github.com/gfx-rs/wgpu?tab=readme-ov-file#supported-platforms)): `--features wgpu` ## Installation diff --git a/constensor-core/Cargo.toml b/constensor-core/Cargo.toml index 122fa0d..1410e59 100644 --- a/constensor-core/Cargo.toml +++ b/constensor-core/Cargo.toml @@ -20,11 +20,14 @@ gemm.workspace = true num_cpus.workspace = true rand.workspace = true rand_distr.workspace = true +cubecl = { workspace = true, features = ["compilation-cache", "wgpu"] } [features] default = [] all = ["cuda", "half", "bfloat"] -cuda = ["cudarc"] +cuda = ["cudarc", "cubecl/cuda"] +hip = ["cubecl/hip"] +metal = ["cubecl/wgpu-msl"] half = ["dep:half"] bfloat = ["dep:half"] slow_integral_fma_cuda = [] From 66a46984855101b264e68173c54a408e311303d5 Mon Sep 17 00:00:00 2001 From: Eric Buehler Date: Sat, 26 Apr 2025 13:16:20 -0400 Subject: [PATCH 02/17] Sum --- constensor-core/src/cpu_storage/pool.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/constensor-core/src/cpu_storage/pool.rs b/constensor-core/src/cpu_storage/pool.rs index 84154b2..35ce628 100644 --- a/constensor-core/src/cpu_storage/pool.rs +++ b/constensor-core/src/cpu_storage/pool.rs @@ -128,7 +128,7 @@ impl BufferPool { self.pool .iter() .map(|b| b.capacity() * size_of::()) - .sum() + .sum::() ); buf @@ -161,7 +161,7 @@ impl BufferPool { self.pool .iter() .map(|b| b.capacity() * size_of::()) - .sum() + .sum::() ); self.trim_excess(); @@ -195,7 +195,7 @@ impl BufferPool { self.pool .iter() .map(|b| b.capacity() * size_of::()) - .sum() + .sum::() ); } From a85b9512479f8be70252409446a92aca4ca7bb2f Mon Sep 17 00:00:00 2001 From: Eric Buehler Date: Sat, 26 Apr 2025 13:17:41 -0400 Subject: [PATCH 03/17] Update defaults --- constensor-core/Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/constensor-core/Cargo.toml b/constensor-core/Cargo.toml index 1410e59..1950b5d 100644 --- a/constensor-core/Cargo.toml +++ b/constensor-core/Cargo.toml @@ -23,8 +23,7 @@ rand_distr.workspace = true cubecl = { workspace = true, features = ["compilation-cache", "wgpu"] } [features] -default = [] -all = ["cuda", "half", "bfloat"] +default = ["half", "bfloat"] cuda = ["cudarc", "cubecl/cuda"] hip = ["cubecl/hip"] metal = ["cubecl/wgpu-msl"] From 8d1bb4c031620661df1fd13127014df75a33c000 Mon Sep 17 00:00:00 2001 From: Eric Buehler Date: Sat, 26 Apr 2025 13:21:12 -0400 Subject: [PATCH 04/17] Backend refactoring --- .../src/{cpu_storage => backends/cpu_backend}/mod.rs | 0 .../src/{cpu_storage => backends/cpu_backend}/pool.rs | 0 constensor-core/src/{ => backends}/cuda_backend/error.rs | 0 constensor-core/src/{ => backends}/cuda_backend/mod.rs | 0 constensor-core/src/{ => backends}/cuda_backend/util.rs | 0 constensor-core/src/backends/mod.rs | 3 +++ constensor-core/src/device.rs | 2 +- constensor-core/src/lib.rs | 5 ++--- constensor-core/src/storage.rs | 2 +- 9 files changed, 7 insertions(+), 5 deletions(-) rename constensor-core/src/{cpu_storage => backends/cpu_backend}/mod.rs (100%) rename constensor-core/src/{cpu_storage => backends/cpu_backend}/pool.rs (100%) rename constensor-core/src/{ => backends}/cuda_backend/error.rs (100%) rename constensor-core/src/{ => backends}/cuda_backend/mod.rs (100%) rename constensor-core/src/{ => backends}/cuda_backend/util.rs (100%) create mode 100644 constensor-core/src/backends/mod.rs diff --git a/constensor-core/src/cpu_storage/mod.rs b/constensor-core/src/backends/cpu_backend/mod.rs similarity index 100% rename from constensor-core/src/cpu_storage/mod.rs rename to constensor-core/src/backends/cpu_backend/mod.rs diff --git a/constensor-core/src/cpu_storage/pool.rs b/constensor-core/src/backends/cpu_backend/pool.rs similarity index 100% rename from constensor-core/src/cpu_storage/pool.rs rename to constensor-core/src/backends/cpu_backend/pool.rs diff --git a/constensor-core/src/cuda_backend/error.rs b/constensor-core/src/backends/cuda_backend/error.rs similarity index 100% rename from constensor-core/src/cuda_backend/error.rs rename to constensor-core/src/backends/cuda_backend/error.rs diff --git a/constensor-core/src/cuda_backend/mod.rs b/constensor-core/src/backends/cuda_backend/mod.rs similarity index 100% rename from constensor-core/src/cuda_backend/mod.rs rename to constensor-core/src/backends/cuda_backend/mod.rs diff --git a/constensor-core/src/cuda_backend/util.rs b/constensor-core/src/backends/cuda_backend/util.rs similarity index 100% rename from constensor-core/src/cuda_backend/util.rs rename to constensor-core/src/backends/cuda_backend/util.rs diff --git a/constensor-core/src/backends/mod.rs b/constensor-core/src/backends/mod.rs new file mode 100644 index 0000000..c839b04 --- /dev/null +++ b/constensor-core/src/backends/mod.rs @@ -0,0 +1,3 @@ +pub mod cpu_backend; +#[cfg(feature = "cuda")] +pub mod cuda_backend; diff --git a/constensor-core/src/device.rs b/constensor-core/src/device.rs index fe03c45..c144d70 100644 --- a/constensor-core/src/device.rs +++ b/constensor-core/src/device.rs @@ -1,7 +1,7 @@ #[cfg(feature = "cuda")] use crate::cuda_backend::CudaDevice; use crate::{ - cpu_storage::CpuDevice, + cpu_backend::CpuDevice, storage::{BackendDevice, Storage}, CompiledGraph, DType, GraphNode, Result, Shape, }; diff --git a/constensor-core/src/lib.rs b/constensor-core/src/lib.rs index 0733715..109b404 100644 --- a/constensor-core/src/lib.rs +++ b/constensor-core/src/lib.rs @@ -43,9 +43,8 @@ //! assert_eq!(tensor.data().unwrap().to_vec(), vec![vec![9.0; 4]; 3],); //! ``` -mod cpu_storage; -#[cfg(feature = "cuda")] -mod cuda_backend; +mod backends; +use backends::*; mod device; mod dtype; mod error; diff --git a/constensor-core/src/storage.rs b/constensor-core/src/storage.rs index d274f07..612715e 100644 --- a/constensor-core/src/storage.rs +++ b/constensor-core/src/storage.rs @@ -2,7 +2,7 @@ use std::borrow::Cow; #[cfg(feature = "cuda")] use crate::cuda_backend::CudaStorage; -use crate::{cpu_storage::CpuStorage, device::Dev, CompiledGraph, DType, GraphNode, Result, Shape}; +use crate::{cpu_backend::CpuStorage, device::Dev, CompiledGraph, DType, GraphNode, Result, Shape}; pub enum Storage { #[cfg(feature = "cuda")] From 59b81c5a752ae75d6e0b329448cbd29b7f6f4306 Mon Sep 17 00:00:00 2001 From: Eric Buehler Date: Sat, 26 Apr 2025 15:51:41 -0400 Subject: [PATCH 05/17] Initial kernel launch --- Cargo.lock | 602 ++---------------- constensor-core/Cargo.toml | 5 - constensor-core/benches/cpu_graph.rs | 68 +- constensor-core/examples/test/main.rs | 17 + constensor-core/src/backends/mod.rs | 1 + .../src/backends/wgpu_backend/mod.rs | 130 ++++ constensor-core/src/device.rs | 12 + constensor-core/src/graph.rs | 3 +- constensor-core/src/lib.rs | 2 +- constensor-core/src/storage.rs | 8 +- constensor-core/src/tensor/concretetensor.rs | 3 +- graph.png | Bin 4348 -> 15130 bytes 12 files changed, 244 insertions(+), 607 deletions(-) create mode 100644 constensor-core/examples/test/main.rs create mode 100644 constensor-core/src/backends/wgpu_backend/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 58d2471..a3ce9a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -50,15 +50,6 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" -[[package]] -name = "arbitrary" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" -dependencies = [ - "derive_arbitrary", -] - [[package]] name = "arrayvec" version = "0.7.6" @@ -160,28 +151,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" -[[package]] -name = "candle-core" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef79e00300acaaa6b0b33168e2ed5050f06125d1add87e7103c40a119083581" -dependencies = [ - "byteorder", - "gemm 0.17.1", - "half", - "memmap2", - "num-traits", - "num_cpus", - "rand 0.8.5", - "rand_distr 0.4.3", - "rayon", - "safetensors", - "thiserror 1.0.61", - "ug", - "yoke", - "zip", -] - [[package]] name = "cast" version = "0.3.0" @@ -276,17 +245,16 @@ dependencies = [ name = "constensor-core" version = "0.1.1" dependencies = [ - "candle-core", "criterion", "cubecl", "cudarc 0.16.1", "dirs", - "gemm 0.18.2", + "gemm", "half", "num_cpus", "petgraph", - "rand 0.9.1", - "rand_distr 0.5.1", + "rand", + "rand_distr", "rayon", "thiserror 1.0.61", ] @@ -318,15 +286,6 @@ dependencies = [ "libc", ] -[[package]] -name = "crc32fast" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" -dependencies = [ - "cfg-if", -] - [[package]] name = "criterion" version = "0.5.1" @@ -427,7 +386,7 @@ dependencies = [ "log", "num-traits", "portable-atomic", - "rand 0.9.1", + "rand", "sanitize-filename", "serde", "serde_json", @@ -717,17 +676,6 @@ dependencies = [ "syn", ] -[[package]] -name = "derive_arbitrary" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "derive_more" version = "1.0.0" @@ -770,17 +718,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "displaydoc" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "document-features" version = "0.2.11" @@ -790,16 +727,6 @@ dependencies = [ "litrs", ] -[[package]] -name = "dyn-stack" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e53799688f5632f364f8fb387488dd05db9fe45db7011be066fc20e7027f8b" -dependencies = [ - "bytemuck", - "reborrow", -] - [[package]] name = "dyn-stack" version = "0.13.0" @@ -942,58 +869,23 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "gemm" -version = "0.17.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ab24cc62135b40090e31a76a9b2766a501979f3070fa27f689c27ec04377d32" -dependencies = [ - "dyn-stack 0.10.0", - "gemm-c32 0.17.1", - "gemm-c64 0.17.1", - "gemm-common 0.17.1", - "gemm-f16 0.17.1", - "gemm-f32 0.17.1", - "gemm-f64 0.17.1", - "num-complex", - "num-traits", - "paste", - "raw-cpuid 10.7.0", - "seq-macro", -] - [[package]] name = "gemm" version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab96b703d31950f1aeddded248bc95543c9efc7ac9c4a21fda8703a83ee35451" dependencies = [ - "dyn-stack 0.13.0", - "gemm-c32 0.18.2", - "gemm-c64 0.18.2", - "gemm-common 0.18.2", - "gemm-f16 0.18.2", - "gemm-f32 0.18.2", - "gemm-f64 0.18.2", + "dyn-stack", + "gemm-c32", + "gemm-c64", + "gemm-common", + "gemm-f16", + "gemm-f32", + "gemm-f64", "num-complex", "num-traits", "paste", - "raw-cpuid 11.5.0", - "seq-macro", -] - -[[package]] -name = "gemm-c32" -version = "0.17.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9c030d0b983d1e34a546b86e08f600c11696fde16199f971cd46c12e67512c0" -dependencies = [ - "dyn-stack 0.10.0", - "gemm-common 0.17.1", - "num-complex", - "num-traits", - "paste", - "raw-cpuid 10.7.0", + "raw-cpuid", "seq-macro", ] @@ -1003,27 +895,12 @@ version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6db9fd9f40421d00eea9dd0770045a5603b8d684654816637732463f4073847" dependencies = [ - "dyn-stack 0.13.0", - "gemm-common 0.18.2", + "dyn-stack", + "gemm-common", "num-complex", "num-traits", "paste", - "raw-cpuid 11.5.0", - "seq-macro", -] - -[[package]] -name = "gemm-c64" -version = "0.17.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbb5f2e79fefb9693d18e1066a557b4546cd334b226beadc68b11a8f9431852a" -dependencies = [ - "dyn-stack 0.10.0", - "gemm-common 0.17.1", - "num-complex", - "num-traits", - "paste", - "raw-cpuid 10.7.0", + "raw-cpuid", "seq-macro", ] @@ -1033,33 +910,13 @@ version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfcad8a3d35a43758330b635d02edad980c1e143dc2f21e6fd25f9e4eada8edf" dependencies = [ - "dyn-stack 0.13.0", - "gemm-common 0.18.2", - "num-complex", - "num-traits", - "paste", - "raw-cpuid 11.5.0", - "seq-macro", -] - -[[package]] -name = "gemm-common" -version = "0.17.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2e7ea062c987abcd8db95db917b4ffb4ecdfd0668471d8dc54734fdff2354e8" -dependencies = [ - "bytemuck", - "dyn-stack 0.10.0", - "half", + "dyn-stack", + "gemm-common", "num-complex", "num-traits", - "once_cell", "paste", - "pulp 0.18.22", - "raw-cpuid 10.7.0", - "rayon", + "raw-cpuid", "seq-macro", - "sysctl 0.5.5", ] [[package]] @@ -1069,36 +926,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a352d4a69cbe938b9e2a9cb7a3a63b7e72f9349174a2752a558a8a563510d0f3" dependencies = [ "bytemuck", - "dyn-stack 0.13.0", + "dyn-stack", "half", "libm", "num-complex", "num-traits", "once_cell", "paste", - "pulp 0.21.4", - "raw-cpuid 11.5.0", - "rayon", - "seq-macro", - "sysctl 0.6.0", -] - -[[package]] -name = "gemm-f16" -version = "0.17.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ca4c06b9b11952071d317604acb332e924e817bd891bec8dfb494168c7cedd4" -dependencies = [ - "dyn-stack 0.10.0", - "gemm-common 0.17.1", - "gemm-f32 0.17.1", - "half", - "num-complex", - "num-traits", - "paste", - "raw-cpuid 10.7.0", + "pulp", + "raw-cpuid", "rayon", "seq-macro", + "sysctl", ] [[package]] @@ -1107,60 +946,30 @@ version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cff95ae3259432f3c3410eaa919033cd03791d81cebd18018393dc147952e109" dependencies = [ - "dyn-stack 0.13.0", - "gemm-common 0.18.2", - "gemm-f32 0.18.2", + "dyn-stack", + "gemm-common", + "gemm-f32", "half", "num-complex", "num-traits", "paste", - "raw-cpuid 11.5.0", + "raw-cpuid", "rayon", "seq-macro", ] -[[package]] -name = "gemm-f32" -version = "0.17.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9a69f51aaefbd9cf12d18faf273d3e982d9d711f60775645ed5c8047b4ae113" -dependencies = [ - "dyn-stack 0.10.0", - "gemm-common 0.17.1", - "num-complex", - "num-traits", - "paste", - "raw-cpuid 10.7.0", - "seq-macro", -] - [[package]] name = "gemm-f32" version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc8d3d4385393304f407392f754cd2dc4b315d05063f62cf09f47b58de276864" dependencies = [ - "dyn-stack 0.13.0", - "gemm-common 0.18.2", - "num-complex", - "num-traits", - "paste", - "raw-cpuid 11.5.0", - "seq-macro", -] - -[[package]] -name = "gemm-f64" -version = "0.17.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa397a48544fadf0b81ec8741e5c0fba0043008113f71f2034def1935645d2b0" -dependencies = [ - "dyn-stack 0.10.0", - "gemm-common 0.17.1", + "dyn-stack", + "gemm-common", "num-complex", "num-traits", "paste", - "raw-cpuid 10.7.0", + "raw-cpuid", "seq-macro", ] @@ -1170,12 +979,12 @@ version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35b2a4f76ce4b8b16eadc11ccf2e083252d8237c1b589558a49b0183545015bd" dependencies = [ - "dyn-stack 0.13.0", - "gemm-common 0.18.2", + "dyn-stack", + "gemm-common", "num-complex", "num-traits", "paste", - "raw-cpuid 11.5.0", + "raw-cpuid", "seq-macro", ] @@ -1295,8 +1104,8 @@ dependencies = [ "cfg-if", "crunchy", "num-traits", - "rand 0.9.1", - "rand_distr 0.5.1", + "rand", + "rand_distr", "serde", ] @@ -1502,16 +1311,6 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" -[[package]] -name = "memmap2" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" -dependencies = [ - "libc", - "stable_deref_trait", -] - [[package]] name = "metal" version = "0.31.0" @@ -1561,30 +1360,6 @@ dependencies = [ "jni-sys", ] -[[package]] -name = "num" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - -[[package]] -name = "num-bigint" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" -dependencies = [ - "num-integer", - "num-traits", -] - [[package]] name = "num-complex" version = "0.4.6" @@ -1595,37 +1370,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-integer" -version = "0.1.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.19" @@ -1646,27 +1390,6 @@ dependencies = [ "libc", ] -[[package]] -name = "num_enum" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e613fc340b2220f734a8595782c551f1250e969d87d3be1ae0579e8d4065179" -dependencies = [ - "num_enum_derive", -] - -[[package]] -name = "num_enum_derive" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56" -dependencies = [ - "proc-macro-crate", - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "objc" version = "0.2.7" @@ -1824,15 +1547,6 @@ dependencies = [ "syn", ] -[[package]] -name = "proc-macro-crate" -version = "3.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35" -dependencies = [ - "toml_edit", -] - [[package]] name = "proc-macro2" version = "1.0.95" @@ -1848,18 +1562,6 @@ version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "afbdc74edc00b6f6a218ca6a5364d6226a259d4b8ea1af4a0ea063f27e179f4d" -[[package]] -name = "pulp" -version = "0.18.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0a01a0dc67cf4558d279f0c25b0962bd08fc6dec0137699eae304103e882fe6" -dependencies = [ - "bytemuck", - "libm", - "num-complex", - "reborrow", -] - [[package]] name = "pulp" version = "0.21.4" @@ -1889,35 +1591,14 @@ version = "5.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] - [[package]] name = "rand" version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" dependencies = [ - "rand_chacha 0.9.0", - "rand_core 0.9.3", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core 0.6.4", + "rand_chacha", + "rand_core", ] [[package]] @@ -1927,16 +1608,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core 0.9.3", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom 0.2.15", + "rand_core", ] [[package]] @@ -1948,16 +1620,6 @@ dependencies = [ "getrandom 0.3.2", ] -[[package]] -name = "rand_distr" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" -dependencies = [ - "num-traits", - "rand 0.8.5", -] - [[package]] name = "rand_distr" version = "0.5.1" @@ -1965,7 +1627,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" dependencies = [ "num-traits", - "rand 0.9.1", + "rand", ] [[package]] @@ -1974,15 +1636,6 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3d6831663a5098ea164f89cff59c6284e95f4e3c76ce9848d4529f5ccca9bde" -[[package]] -name = "raw-cpuid" -version = "10.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "raw-cpuid" version = "11.5.0" @@ -2097,16 +1750,6 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" -[[package]] -name = "safetensors" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44560c11236a6130a46ce36c836a62936dc81ebf8c36a37947423571be0e55b6" -dependencies = [ - "serde", - "serde_json", -] - [[package]] name = "same-file" version = "1.0.6" @@ -2204,12 +1847,6 @@ dependencies = [ "bitflags 2.9.0", ] -[[package]] -name = "stable_deref_trait" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" - [[package]] name = "static_assertions" version = "1.1.0" @@ -2255,31 +1892,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "synstructure" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "sysctl" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea" -dependencies = [ - "bitflags 2.9.0", - "byteorder", - "enum-as-inner", - "libc", - "thiserror 1.0.61", - "walkdir", -] - [[package]] name = "sysctl" version = "0.6.0" @@ -2353,75 +1965,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "toml_datetime" -version = "0.6.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3" - -[[package]] -name = "toml_edit" -version = "0.22.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10558ed0bd2a1562e630926a2d1f0b98c827da99fabd3fe20920a59642504485" -dependencies = [ - "indexmap", - "toml_datetime", - "winnow", -] - -[[package]] -name = "tracing" -version = "0.1.41" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" -dependencies = [ - "pin-project-lite", - "tracing-attributes", - "tracing-core", -] - -[[package]] -name = "tracing-attributes" -version = "0.1.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "tracing-core" -version = "0.1.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" -dependencies = [ - "once_cell", -] - -[[package]] -name = "ug" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03719c61a91b51541f076dfdba45caacf750b230cefaa4b32d6f5411c3f7f437" -dependencies = [ - "gemm 0.18.2", - "half", - "libloading", - "memmap2", - "num", - "num-traits", - "num_cpus", - "rayon", - "safetensors", - "serde", - "thiserror 1.0.61", - "tracing", - "yoke", -] - [[package]] name = "unicode-ident" version = "1.0.12" @@ -2932,15 +2475,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" -[[package]] -name = "winnow" -version = "0.7.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6cb8234a863ea0e8cd7284fcdd4f145233eb00fee02bbdd9861aec44e6477bc5" -dependencies = [ - "memchr", -] - [[package]] name = "wit-bindgen-rt" version = "0.39.0" @@ -2956,30 +2490,6 @@ version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a62ce76d9b56901b19a74f19431b0d8b3bc7ca4ad685a746dfd78ca8f4fc6bda" -[[package]] -name = "yoke" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" -dependencies = [ - "serde", - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", -] - [[package]] name = "zerocopy" version = "0.7.35" @@ -3019,39 +2529,3 @@ dependencies = [ "quote", "syn", ] - -[[package]] -name = "zerofrom" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" -dependencies = [ - "zerofrom-derive", -] - -[[package]] -name = "zerofrom-derive" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", -] - -[[package]] -name = "zip" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cc23c04387f4da0374be4533ad1208cbb091d5c11d070dfef13676ad6497164" -dependencies = [ - "arbitrary", - "crc32fast", - "crossbeam-utils", - "displaydoc", - "indexmap", - "num_enum", - "thiserror 1.0.61", -] diff --git a/constensor-core/Cargo.toml b/constensor-core/Cargo.toml index 1950b5d..0a33b91 100644 --- a/constensor-core/Cargo.toml +++ b/constensor-core/Cargo.toml @@ -31,13 +31,8 @@ half = ["dep:half"] bfloat = ["dep:half"] slow_integral_fma_cuda = [] -[[example]] -name = "hello_world" -required-features = [] - [dev-dependencies] criterion = "0.5" -candle-core = "0.8" [[bench]] name = "cpu_graph" diff --git a/constensor-core/benches/cpu_graph.rs b/constensor-core/benches/cpu_graph.rs index 4e62613..129b361 100644 --- a/constensor-core/benches/cpu_graph.rs +++ b/constensor-core/benches/cpu_graph.rs @@ -1,4 +1,4 @@ -use candle_core::{Device, Tensor}; +// use candle_core::{Device, Tensor}; use constensor_core::{Cpu, Graph, GraphTensor, R3}; use criterion::{criterion_group, criterion_main, Criterion}; @@ -44,46 +44,46 @@ fn bench_cpu_graph_matmul_256(c: &mut Criterion) { }); } -fn bench_candle_matmul_64(c: &mut Criterion) { - const N: usize = 64; - let a = Tensor::rand(0f32, 1f32, &[1, N, N], &Device::Cpu).unwrap(); - let b = Tensor::rand(0f32, 1f32, &[1, N, N], &Device::Cpu).unwrap(); - c.bench_function("candle_matmul_64x64", |bencher| { - bencher.iter(|| { - let _ = a.matmul(&b).unwrap(); - }); - }); -} +// fn bench_candle_matmul_64(c: &mut Criterion) { +// const N: usize = 64; +// let a = Tensor::rand(0f32, 1f32, &[1, N, N], &Device::Cpu).unwrap(); +// let b = Tensor::rand(0f32, 1f32, &[1, N, N], &Device::Cpu).unwrap(); +// c.bench_function("candle_matmul_64x64", |bencher| { +// bencher.iter(|| { +// let _ = a.matmul(&b).unwrap(); +// }); +// }); +// } -fn bench_candle_matmul_128(c: &mut Criterion) { - const N: usize = 128; - let a = Tensor::rand(0f32, 1f32, &[1, N, N], &Device::Cpu).unwrap(); - let b = Tensor::rand(0f32, 1f32, &[1, N, N], &Device::Cpu).unwrap(); - c.bench_function("candle_matmul_128x128", |bencher| { - bencher.iter(|| { - let _ = a.matmul(&b).unwrap(); - }); - }); -} +// fn bench_candle_matmul_128(c: &mut Criterion) { +// const N: usize = 128; +// let a = Tensor::rand(0f32, 1f32, &[1, N, N], &Device::Cpu).unwrap(); +// let b = Tensor::rand(0f32, 1f32, &[1, N, N], &Device::Cpu).unwrap(); +// c.bench_function("candle_matmul_128x128", |bencher| { +// bencher.iter(|| { +// let _ = a.matmul(&b).unwrap(); +// }); +// }); +// } -fn bench_candle_matmul_256(c: &mut Criterion) { - const N: usize = 256; - let a = Tensor::rand(0f32, 1f32, &[1, N, N], &Device::Cpu).unwrap(); - let b = Tensor::rand(0f32, 1f32, &[1, N, N], &Device::Cpu).unwrap(); - c.bench_function("candle_matmul_256x256", |bencher| { - bencher.iter(|| { - let _ = a.matmul(&b).unwrap(); - }); - }); -} +// fn bench_candle_matmul_256(c: &mut Criterion) { +// const N: usize = 256; +// let a = Tensor::rand(0f32, 1f32, &[1, N, N], &Device::Cpu).unwrap(); +// let b = Tensor::rand(0f32, 1f32, &[1, N, N], &Device::Cpu).unwrap(); +// c.bench_function("candle_matmul_256x256", |bencher| { +// bencher.iter(|| { +// let _ = a.matmul(&b).unwrap(); +// }); +// }); +// } criterion_group!( benches, bench_cpu_graph_matmul_64, bench_cpu_graph_matmul_128, bench_cpu_graph_matmul_256, - bench_candle_matmul_64, - bench_candle_matmul_128, - bench_candle_matmul_256 + // bench_candle_matmul_64, + // bench_candle_matmul_128, + // bench_candle_matmul_256 ); criterion_main!(benches); diff --git a/constensor-core/examples/test/main.rs b/constensor-core/examples/test/main.rs new file mode 100644 index 0000000..6b57b8f --- /dev/null +++ b/constensor-core/examples/test/main.rs @@ -0,0 +1,17 @@ +use constensor_core::{Graph, GraphTensor, Tensor, Wgpu, R2}; + +fn main() { + let mut graph: Graph = Graph::empty(); + let a = GraphTensor::, f32, Wgpu>::fill(&mut graph, 1.0); + let b = GraphTensor::, f32, Wgpu>::fill(&mut graph, 2.0); + let res = a + b; + + graph.visualize("graph.png").unwrap(); + + let compiled: constensor_core::CompiledGraph, f32, Wgpu> = graph.compile().unwrap(); + let res = compiled.run().unwrap(); + + let tensor: Tensor, f32, Wgpu> = res; + + assert_eq!(tensor.data().unwrap().to_vec(), vec![vec![9.0; 4]; 3],); +} diff --git a/constensor-core/src/backends/mod.rs b/constensor-core/src/backends/mod.rs index c839b04..26672b2 100644 --- a/constensor-core/src/backends/mod.rs +++ b/constensor-core/src/backends/mod.rs @@ -1,3 +1,4 @@ pub mod cpu_backend; #[cfg(feature = "cuda")] pub mod cuda_backend; +pub mod wgpu_backend; diff --git a/constensor-core/src/backends/wgpu_backend/mod.rs b/constensor-core/src/backends/wgpu_backend/mod.rs new file mode 100644 index 0000000..98af1d1 --- /dev/null +++ b/constensor-core/src/backends/wgpu_backend/mod.rs @@ -0,0 +1,130 @@ +use std::{borrow::Cow, marker::PhantomData}; + +use cubecl::{cube, prelude::*, wgpu::WgpuRuntime}; + +use crate::{ + device::Dev, + graph::BinaryOpType, + storage::{BackendDevice, BackendStorage, Storage}, + CompiledGraph, DType, GraphNode, Result, Shape, +}; + +use super::cpu_backend::CpuStorage; + +type RT = WgpuRuntime; + +pub struct WgpuDevice; + +pub struct WgpuStorage { + ghost: PhantomData, +} + +impl BackendStorage for WgpuStorage { + fn to_cpu_storage(&self) -> Result>> { + todo!() + } + + fn cast(&self) -> Result> { + todo!() + } +} + +#[cube(launch_unchecked)] +fn binary( + a: &Sequence>, + b: &Sequence>, + out: &mut Sequence>, + #[comptime] numel: u32, + #[comptime] ops: Sequence, +) { + if ABSOLUTE_POS < numel { + #[unroll] + for index in 0..ops.len() { + let op = comptime! { ops.index(index.clone()) }; + let a = a.index(index); + let b = b.index(index); + let o = out.index_mut(index); + + match op { + BinaryOpType::Add => o[ABSOLUTE_POS] = a[ABSOLUTE_POS] + b[ABSOLUTE_POS], + BinaryOpType::Sub => o[ABSOLUTE_POS] = a[ABSOLUTE_POS] - b[ABSOLUTE_POS], + BinaryOpType::Mul => o[ABSOLUTE_POS] = a[ABSOLUTE_POS] * b[ABSOLUTE_POS], + BinaryOpType::Div => o[ABSOLUTE_POS] = a[ABSOLUTE_POS] / b[ABSOLUTE_POS], + } + } + } +} + +impl BackendDevice for WgpuDevice { + type Storage = WgpuStorage; + + fn compile( + &self, + graph: Vec>, + ) -> Result> { + #[cfg(any(feature = "cuda", feature = "hip"))] + let device = cubecl::wgpu::WgpuDevice::DiscreteGpu(0); + #[cfg(feature = "metal")] + let device = cubecl::wgpu::WgpuDevice::IntegratedGpu(0); + #[cfg(not(any(feature = "cuda", feature = "hip", feature = "metal")))] + let device = cubecl::wgpu::WgpuDevice::DefaultDevice; + + let client = RT::client(&device); + + let a = &[1., 2., 3., 4., 5., 6., 7., 8.]; + let b = &[1., 2., 3., 4., 5., 6., 7., 8.]; + let vectorization = 4; + let output_handle = client.empty(a.len() * core::mem::size_of::()); + let a_handle = client.create(f32::as_bytes(a)); + let b_handle = client.create(f32::as_bytes(b)); + + unsafe { + let mut a_seq = SequenceArg::new(); + a_seq.push(ArrayArg::from_raw_parts::( + &a_handle, + a.len(), + vectorization as u8, + )); + + let mut b_seq = SequenceArg::new(); + b_seq.push(ArrayArg::from_raw_parts::( + &b_handle, + b.len(), + vectorization as u8, + )); + + let mut out_seq = SequenceArg::new(); + out_seq.push(ArrayArg::from_raw_parts::( + &output_handle, + a.len(), + vectorization as u8, + )); + + let mut ops = Sequence::new(); + ops.push(BinaryOpType::Add); + binary::launch_unchecked::( + &client, + CubeCount::Static(vectorization, 1, 1), + CubeDim::new((a.len() as u32).div_ceil(vectorization), 1, 1), + a_seq, + b_seq, + out_seq, + a.len() as u32, + ops, + ) + }; + + let bytes = client.read_one(output_handle.binding()); + let output = f32::from_bytes(&bytes); + + println!("Executed runtime {:?} => {output:?}", RT::name(&client)); + todo!() + } + + fn run_graph( + &self, + graph: &CompiledGraph, + ) -> Result> { + todo!() + } +} diff --git a/constensor-core/src/device.rs b/constensor-core/src/device.rs index c144d70..d18011c 100644 --- a/constensor-core/src/device.rs +++ b/constensor-core/src/device.rs @@ -1,6 +1,7 @@ #[cfg(feature = "cuda")] use crate::cuda_backend::CudaDevice; use crate::{ + backends::wgpu_backend::WgpuDevice, cpu_backend::CpuDevice, storage::{BackendDevice, Storage}, CompiledGraph, DType, GraphNode, Result, Shape, @@ -19,6 +20,14 @@ impl Dev for Cpu { Ok(Device::Cpu) } } +#[derive(Clone)] +pub struct Wgpu; + +impl Dev for Wgpu { + fn resolve() -> Result { + Ok(Device::Wgpu) + } +} #[cfg(feature = "cuda")] #[derive(Clone)] @@ -68,6 +77,7 @@ pub enum Device { #[cfg(feature = "cuda")] Cuda(CudaDevice), Cpu, + Wgpu, } impl Device { @@ -79,6 +89,7 @@ impl Device { #[cfg(feature = "cuda")] Self::Cuda(cuda) => Ok(Storage::Cuda(cuda.run_graph::(graph)?)), Self::Cpu => Ok(Storage::Cpu(CpuDevice.run_graph::(graph)?)), + Self::Wgpu => Ok(Storage::Wgpu(WgpuDevice.run_graph::(graph)?)), } } @@ -90,6 +101,7 @@ impl Device { #[cfg(feature = "cuda")] Self::Cuda(cuda) => cuda.compile::(graph), Self::Cpu => CpuDevice.compile::(graph), + Self::Wgpu => WgpuDevice.compile::(graph), } } } diff --git a/constensor-core/src/graph.rs b/constensor-core/src/graph.rs index 6a9bd68..9f7187f 100644 --- a/constensor-core/src/graph.rs +++ b/constensor-core/src/graph.rs @@ -13,6 +13,7 @@ use std::{ use crate::{device::Dev, tensor::concretetensor::from_storage, DType, Result, Shape, Tensor}; +use cubecl::CubeType; use petgraph::Graph as PetGraph; use petgraph::{dot::Dot, graph::NodeIndex}; @@ -679,7 +680,7 @@ impl CompiledGraph { } } -#[derive(PartialEq, Debug, Clone, Copy)] +#[derive(CubeType, Clone, Copy, Debug, Hash, PartialEq, Eq)] pub enum BinaryOpType { Add, Div, diff --git a/constensor-core/src/lib.rs b/constensor-core/src/lib.rs index 109b404..cff8235 100644 --- a/constensor-core/src/lib.rs +++ b/constensor-core/src/lib.rs @@ -55,7 +55,7 @@ mod tensor; #[cfg(feature = "cuda")] pub use device::Cuda; -pub use device::{BestDevice, Cpu}; +pub use device::{BestDevice, Cpu, Wgpu}; pub use dtype::DType; pub use error::{Context, Error, Result}; pub use graph::{CompiledGraph, Graph, GraphNode, Op}; diff --git a/constensor-core/src/storage.rs b/constensor-core/src/storage.rs index 612715e..9b62793 100644 --- a/constensor-core/src/storage.rs +++ b/constensor-core/src/storage.rs @@ -2,18 +2,23 @@ use std::borrow::Cow; #[cfg(feature = "cuda")] use crate::cuda_backend::CudaStorage; -use crate::{cpu_backend::CpuStorage, device::Dev, CompiledGraph, DType, GraphNode, Result, Shape}; +use crate::{ + backends::wgpu_backend::WgpuStorage, cpu_backend::CpuStorage, device::Dev, CompiledGraph, + DType, GraphNode, Result, Shape, +}; pub enum Storage { #[cfg(feature = "cuda")] Cuda(CudaStorage), Cpu(CpuStorage), + Wgpu(WgpuStorage), } impl Storage { pub(crate) fn to_cpu_storage(&self) -> Result>> { match self { Self::Cpu(cpu) => cpu.to_cpu_storage(), + Self::Wgpu(wgpu) => wgpu.to_cpu_storage(), #[cfg(feature = "cuda")] Self::Cuda(cuda) => cuda.to_cpu_storage(), } @@ -22,6 +27,7 @@ impl Storage { pub(crate) fn cast(&self) -> Result> { match self { Self::Cpu(cpu) => cpu.cast::(), + Self::Wgpu(wgpu) => wgpu.cast::(), #[cfg(feature = "cuda")] Self::Cuda(cuda) => cuda.cast::(), } diff --git a/constensor-core/src/tensor/concretetensor.rs b/constensor-core/src/tensor/concretetensor.rs index b925b36..dbf3946 100644 --- a/constensor-core/src/tensor/concretetensor.rs +++ b/constensor-core/src/tensor/concretetensor.rs @@ -1,5 +1,5 @@ use crate::{ - device::{Cpu, Dev}, + device::{Cpu, Dev, Wgpu}, storage::Storage, DType, Result, Shape, R1, R2, R3, }; @@ -108,6 +108,7 @@ macro_rules! tensor_api { } tensor_api!(Cpu); +tensor_api!(Wgpu); #[cfg(feature = "cuda")] tensor_api!(Cuda<0>); diff --git a/graph.png b/graph.png index 584fddd098184ce1d05f049f84849596ba5668de..85ca980c3bb9a18f5639096a50b12e3886e682a1 100644 GIT binary patch literal 15130 zcmZv@2RN5|{6GA)XBo-NthA8Ouw_R^nHhx;W$$d2RT@%AsK`n(v&qOxWtEwoGO|iY zspoZ{-|u;z>wjJUbI#Q{m*e}r@B4GVKkxTzeWLYsHMY=m)00S~EhjWpPm@SwqWJGZ zT59|mD?Vp~e^J|LYp9a`5r03f$%-S91V|@Tj~n_W&!=2AWiwe_d!(dH4n@013pa1^wwX}YKTk<@xG1}vR76WT zKs8`{$*1ynb;z*~ml*a6cpY|A?C4<{HY%G9p7WUfP-9f~o`snutlmnzl4jqB+f6jP1hmAG*n)zsFXXxzg#f38c zz1K8%O$1+Za8NpTj(>1)kU3fv|AQ^<)wXJ=~`*%l6I{8<`Tv9+}oP*mjq_3PJO z5fRl>r&zxK_;Jd@A|@?O=w3pC3cj?qwpLq;lbhRP>)W?)88#CXsL7p0u^Idy<_^%fu(Abo@Bk^&2;06A~gPCT#GX z=|x4XMn*=KX~#7*B0X({g@wfr9}a);fGg(aO=}mItp)}Lrf1I7zkk1#nwlEFH9%Y0 z)6@9ojn=1{01i4jI!$eD6?Jv$wVdqiu{S?wdLlhJ+1c5}j~t1>eQm7FP~fkH^9~NQ zg{F*QN}&ot8*W321I7MDUyX2YgM(%tK77Dp5$lzfcE$7h)YMe+q=<;f>#H~7c_PF_w3nIUS3|;+sjTKrbL(5=D>j06SDb}gLT`s*t>VrtUR%B4l+Sr zUS7u{yS$!A;7LqHzI@Bmd!#_TBysW3_;~h}pFg*Yoz+`?&bN5EZHjnVZs}L>f?P^E zSE|a(6Frl4IKH){WE2(_UU?@iBV*HgN6^i~<4S31DXUqrf##_*S@$14ym6`Tg`lu7 zb#!#Jrk0knu`xS|s0a#bM*Vdfsi5c2pFhsXXv$wV-YX`iZf`FvAt|{%Mp0g#S6W(H zKxleyu5n<1OH546&E35|ie=~6!orK9mJe}vG+0%qO4!o& zJywZeoQbVriB`Sx^y#i$!onVpWPKMNKYdDRW@g5D#EG-Ix_WGCD%^dtx&H0j7}vJp z)RGb@Rdw}=jh}PnKJ(lQBQ*>UPEu=YYcHgA*?JB?KiVz)^z=U5*qy}0R_U4bb-$-s zSy!XP-NuxNTRVI9ET^PBqlKlV(RlS_hoc?JUqpn_+S;0S%NDiEm!%oEY&jIT?iRx( zNzKQ{$0_S0^4jJQ|98WPKYs#M)zkzfBv{K_N1OA^%kI^N!g zg@lBdxw)?w6o~KIwM$7wg<`VhcFU&<&qD#LPMkbEKST}eTHn5X-4?gU{rl$=pDMi> z$iqlG)nmq--nX~kkY~G0?NPDAcj41<2Zz1NDk>~oT+yg`Mm{-C&o6K5@kQgav(H4N zEf-6KcJ6fl)_;hSntG!CDvg_uPsF1~{AUA~)nYggrI{!c&7Y*Fr)OqokLbzLo5TmR z^Yg#%=+Disz0PsgFU&Ps3!8}M)t~PSU$bLkV#F`?io8vh-WDgUb9&&n-f%cJB=J7+ zL0U#eRXj5F)*Vq5o?mT7sxPBPG);^jk6w`k?~|of7&yds#cHaea#r3P$k)e%VL5B3y!V~Xxraxbxo>h-(A(QvLP8=f zoPneJf#ikk+S*jt%koLDU#psxx$th`6b~iciHxL~A1IF8#d^ZTFyT{#@An zR?Va%&fH`I*iiJewA1c;G-@WS?=JD4GcGDF-ie3ypS*c=ue5Z9q|auc?g25eTf#a?sw9VR z{e^6-tleB`oR&oI-<&)5PY6pGw6XFezjou<(3X@p?d>y-Op3dlot-6)9?e>;{e8<) zz$me~SzRV@T^y|?HYthw-6O?E9-f|$E+p`Jnq_2Ulm!0rIBjCG=itGE8$UV}A8Amk zwB3u3@4oOTNL@xo=JWKl=GTr#U(LHEW80^^md1{wZ|=rs7<_q&7nz@cl7r@ooY`oaCF=^dY6;;>5CV)YX5xK?*903w}ysB<>ept z0lr;6Sw`p1=^Ze6u`5hT=?2Ha%}eJr9xmnaSyMt>{PClZzZH{O zT>G`Lz86#j))uDBx1ui1YWxrT#y-!<88>H5kBe=yU71QacNcey(l7Nc|L2rtq_3}p zkt7@t5TJG9#O2@&#`|i*aa~xW69UYgsD{fuf(h+2HESa~(XM5@rkP!3INywSDE`}3 zXnOKs?Yhv%-d=^=^_~=CYil%h=`IxL45~FH>+X|or-7nyla`mZZLHlNK0F-_{(UoH zgX_&#)H)D>Djw*v@$FpitlbIijT!l23ecr1}9+g_^qh{+A`^1G!FNy*Jk z9P2BNW%@idHQs*k)wQZBSubyI78aJUs3_XkEiGjmWF(-0d-w0xH#SmiY;17Kcff+$KA{yYcZ6qapuzRU)?FGPA4j(2XB^*4bjyEA9DmwI$?@is%5YM4Qht~i8UL0RsWa8yb5zCjv$Sl6l(HzMi z9JMwQkl#K>VK};geN-5*=7KJm_V_Ug@5PvwZC$GJ;V@(+P5|N5u20-};hrNZe)+pCu_qp!RL6+hMe;&V74rWuSLL@XL?(a%fG&!! z7LakDpmZK8y@@ucnQM|SER+W5^L=$SGj5L_Ygc;9l&sG;0n*CPzH?=j-`vS5C>(!% zpW1a_tHSQdlP6MkZ`jC{P;!?a-k!tjmU14HLhDS!!o;GlZRg-P@O-qy(V%Kj#L@(b5w6X?Xa)%(&70d9i)_qE}W{TD!aNN=Zr4k}9gI%4Sk(7o2?a zU;ULC=JR3CL6kt-@K#l4V zeJQ_2oirV%r*L`pkhgDDds~~JsHiB~sDq=oM=_g?>a}yhzo>4oi=Mt1HJZseLmKbz9%BtZrXUw=Dp%(gw<0+?IhpP7RBXG+b5tOq-P+ZaY3x*Y^iGXiV-4X9 z77yZ;Uze7ATbb!;ZEtV<^XrHC^W3~O)}*NDXvbBi#nD%a+}zv%5N||_OG>VVg^?-z z{wC>iMq?3&Si;DFA{d>#&Z-?aM<;Umderw)KN>WxSjWFEb1i&U3*G3}k z+qW-;0$vudKHBq+o>P! z>bLoBVt6k2Y7D050)ePw#wYZ;FOJA($SqyIyI|$3(>mpWF<^sUl@~v=&*$ss&(}bo zA&eoYdLiEXkzLE^I{8_Pwxde0m~ou*O*%IE$ntIq66r{>Sequipygepy)_3_EcNiHn{A zdnqg}E#He(UI(7Zkmm|YUaoeQ=aU28CI%K3pQfxI-i(Pc?EOOeJO84vkmcbK=Y}ULk*R|{i~D>`EG!rwSNxzd zb29_$*1be#oBYzBI^ZJ_YauZ3|D%)d3gn1tVp%-MI*RY zQE~ak2{W^AjyHj~_vj|;UoZb^_h)^1zV`SC>(;GXaT9*)OV)d`{k@p#TU)aS66LUv zByqnbtMDlub-Rlf2lxH-Wd|6Fx_NW{`@_$~D0`nOf1|RpQYvd9;?dF_NhT z<^8DQ6|RfRIQ;Z8ua>LcTpc>*^NrKP!=uvBRO7Lj79ZLQ2{RP!PQG}d)S>gkhu7E- zm@-caBZ26mK#fJ8OfFmg*x=d@&8qLhi9EzD`^Ywj7qO_;5Z%0s*rdCF@DsNF^H$E1)%+1_p7ZF2jRuKP-njFiB1T2<|5n05aka`Qv`0eilpXFw>o3 zF<3M+(jaGZS=uXn-u`#78pZPREMwiWJ zM!B>kH4TjeP8__vIv8o0qUNQYcpn6E7Ui4FK9xR=X6~PAKWm7=)!N(~VgD{EqeNzI zcDD81yPM51T!8T@!da4Jtm!E_eu3` zh!RGE%7Du1*o%&9J~waP^jaLLG48uyYkTLU@Fn-*;VRz~+S<9r2BO}a(W=Um%{O;4 zFx(Ie3Ho}1m6cT!1N_hCrnz%|Ds6FJ$Io@G_|z+NE^qKK21+o!5pnrb}il0Gc^BV zU&PQYUfEQmm4R68hdUC$!h(L!m(>WxHo3mqR7aWUYgrQXY4i8!KVvP+4k3B@6gfG$ z>~pVzQGu^> z8i3oi_LndnB{}y|mmQYnZegw`VvczHUUg2Ays#BNPLlE7pOKk~Z{hVdQ@yPl`Y3oK z8U;u^+(_ULo!%d(PoZBpj@AYdRj>Ty4!ek{5}_B37RkJRd8x0wvkx5nF_`k&bsE*- z*zD}RNeV$S_-YSd-;iFfh|;A%-CR0g#FpEH__XJ=fx%9GetwzYKgSMBNqIFdaCIC9 z-f#v6hv1`$>#~bk^q&_Kad&qwaT{00@23NTcA}Z&;9a<$sY%5^B0m0WeVv)aE$=Ut zBJG)3EGCjNS(0?8&6bUgO%=DZ=kVdfvwiveLEqc<+)GUCfACQ}^*EZ==Ehw9=earU zix)52T3a6n#>)8g$)YyH8$ui%LGxH|G(FA9`L9V((a;p%$Ydob9mF30wPD}3y1LMk zmoFXbuTZhEuyhR=dB5oKPyY7J$*jsp^o*GqD+$dc4eN5+!otGDXdpM|`SYjg8n^F4 zVVIenCFsyhPgYNkL6-kJ%6FLOKyi<|(lxe`FhGSuLPBH$R-d1aD_LO&TMUnk?76=E z0NeW?=wd!oCE2!Qr}}DHL=)Oswr>wdtC=Vp4Q>P~A0HbFb?nKk@9X2(CVVn9EbO?6 z)^s7Ed`U^Mafn&efiR*0|EIhB`enWPZNMV5Y(x{18qoHCYD&2~1!Po6KH>TK2LpqG ztk0isEPL&80>nb>Y;{y;r#>YOO*(i2bS?rzLfoj5Ti_18C+2H*TSc`zcvA)d10saj z7i>G_Cn84bvVMQjv6&JqK zrVqy^C-3-!xkF1wcU)DK5^sqRx}X->VBm5dzQ}g4ji-_VQUFvqbw9sjgwn&oLGwhP z7S#M?ip+Kp9T)cW+#9FxV(vooy%rw+-zpqDOGq)GdC4g$;}GCZrK=Mh4A1kfp=H(U z`t|F~92}9;(+(D|f_b~Tx-iiN(fXm)QJp(?j?lzpp(mZc~`(TA_u#MGb&Y=#dwE9UUD36*#3Fn6WZrlap7` zNWTvhj`capx8W_a@bcb7IgSHY0@NONb`}L!8JnJt5>$yGz7fh|7)am7+G0n>#=;A< zp`9Rh5Klkt5flu&k#BqH5(5_D{}ly~IGL6y+8d=|K0{ZUa1xV|Nn!}Y-o6RZ%k#+B zZ{HXJA|Qi*f4=dHkU-HrPoF-0=gyr_s7D~HJ9qA+fTR`?5t01v@PMVk0_}FCm&v_x z=wcHK!&QW2tE^1kSEuvj{KpR;tgNlcARhJ)4AethhJG%7=ujBQz{!&*t!!+%$Jd8D zw6XmNG2YZPs-Zy{0A;77gh27ZhxYin3y0pG3jGJZ!1|DkKOgY zH|cNY)2D971po|>1r$TZy?JxX|4R0ro}P(GNu^0+FLNE8oCwv2PtGs&$`x`pc6K3I z*@wktknhnINPhQPdPYW4iz_m9!=M7Yem%*f@0|6K8xBLc?_z30+vv(B#vHwSbGWh~ zq20p|zyOI-72@RNRNBRQT!g)`u8str(b?5?N?Z#6Z-L*$z<{tjTr3T`Nztn0(0##f zHFvQ>6Neg44lTa5qr;Vb{KMC21Tjq}W?U|JBqfNsW2m8uX?%~-dja2uDR*B(3DF-{a|iIQomw$G-S7AGhQ5J;ijoouT6uwWBX#YR_`SNjyR|_k3xj_9 z)MnszduElt&CeI&|2p*-JQ><0WakwFiDHLh@Q&ZB^D2+P>*IUUts5J~BBI10HGKa3 zc?=VKc)5AcBo`jAzNv{SI5@b!CgA9(hrf)IU;jri-ETu>OfZJ(pawQWLE|WJ=n|Tq znHdN9nK0MN50w_(ZRO|~4fMlqUgpvSjk1bBCPYotogxUHwb?(Qh9} z(qQnFdw$(2xx}aB<8uVU{_;ZAq79H|VEc?{!9J!pPiID2lBGSDfP|JrBbRr8DWX3; ze*8FeIM-0@TWY)n6rlRsihrZz16Bo&9ZPhTj`?Z@d=eEERR2Ppx zuyU1IqTu2C9%d@ijBT?FWo!~}B(UAOj6T2}dt5BrWznd`bnZecPCL-}G%r2&D$H3?S|7&B*`41uj?)n8y;0Da)s zkFNv&B0}(rws-dSHp3EO;p2O6|0b97QQ&tvdIpBVkbl8n3JfW~lo(R_`uhI)@#x=c z#lOo;5W|5y$;P1m5Sm=!r9Lruf%ws~BEBsAj#kFf_wQFSGlgM)^lv%G2fT1CH}^oq zR^IEF#>DUavu;!|Qns|@jb!3icXAQ|m3dQ^eD)m(_9^rpyy7L%@MX%kjyas@ou*Lz zdtX@Ifb)fIp%wgZ(--x{toUb%;^9i}m4fjn^g;>khpZbY(W9Wy6vuRHea7Pu=2fO{ z%JtAt6823}V?^XLO#|UaPlRHC_BEjV9dm_RM{8dZ=J9Nz;M~vCkMg#HO9ge|2l|)xT5}Wv{j5gi%U$ zwHz$TvAnX82H=_YvShw?Yy$v+7|4n+1RwqPn`&s)NAf~Tqw5WauxR0w@($@>eBg!6#xCwlM7sDu1k-89u^k1 zw7h&ubMkMAv%e6;ssHU*geONf?Z>lHS~|JK#)|=vp!N0ZYa^GxrW|pV{J)8vLT{kS z^UC@#lQw`Vhe~wxRG?>HquWLpIu|cps+i(+zB=;3NAB)hf3smq_yG|0X|SeE(8LEz zZvKz$`~(dm@F5S$YOF5olfU=RFaQ2taPBeC zJaM8v>rwFa4u#*0si~>-b25X}H5v2k5kE^RxvG2@BcLl8KQ~=|TLeeoQ=6n!9jU*+ zzYe6yeDrkoeSF0>qGs3DBBB2&{Q4YcKlkV(HKYww0AOHNLe>BQehfd!q$Fj39iE#J zLK}QOD{yMRUsDOou(C?bdV6}dVZhX-pILnrv^pTUHX7?($pcMB-NIsr%W&mK8}*c| zm-VR3LbM}H@4EVWvY|3p4eS62jR9&yIh28;M(4d^nr?6yVC7~|Uc4h3YHDi5Jj?Pa z5XuSeL;z<$evq1dWRFqWLP9Ve1S>2?&aiUnMGi+Tz6b=UXTH(o_cg_LJ-*u+R$t3Q z*V<@Yx6ILRm|MqH`0c}^OeDat*0iMkEMH10_vt(c1jda74_g+^GB3c*>b9W@*;tM*a8i-$W@#R`!Eu-pz2$5CEI#e~nKx-PpxUJN@-h z$iltN6uHZHckuGUX!`!dZ^(Wmh%WWTKP>`0E)@o~RWTU@XO(-_@1OP+S z?c0&iphrDqQl38-DX*-YfUDIA*@jp-v;L)mqx#S>IXF0~SGu*=XR;olf97D0L#5+F z36}@`E>*rluYUU)9-0CLhp&vpa$R+mPI*)6JY@DZyK%Y#kE~7Xs6%)Hk+ao=%0WMe zZd>3rb2{~&JdaQV(Q5&p!>6#Grp5MKV21UHq;F0CiaA>og_sTQtO1S@q31)4atkf31bE5|%O(*jor5c;^u3yXR*^vlKr$uRc;93G?6VM;dRb&pL=IfDXf?V^eA>B70hY`8_3+S-Ly_2k&<`%iIQJqS2T1!_BiJzG#(8d2##*Kro011`ohRlANaz(-gu2k#mr zrhp0XX3>VSRqYsSt#oq}#>U6P5C&QQ_3NdX`u_Vu$BvoWorYF=9~Jqe$@}8P+b4Z7 zwh8J9$a{a^&H9~qKp~i!!J9u#Tvq+GYhD0S2*Dbo@qS0gz;LN63}${E7|w-`RN{U6 z+j;elFyhhf93Ana0H$GWw<`?rbpaT3&zh@y1& z@K8pdju$yYPO@M%cyKb}&K+ij40qx^1h@&X(PQLA9KO^`m;^9J#?J6wJ=p*GG&R%+ zn3lIer@%3l&CPj$^E->=FiR=y}6#-^izegKbXL=+Fh{i>` zb*nmt$mNfX`n>%NJ&apHfry>1pr9aXZ}%cQTM69tS#IvBGiT^>4C$j)yPv>Pm=1_!x;whLaq42N1ly@i|7)APp* zr?b={D2;81@Ti_V$v81NiL}`j-0C0rzS((sR3uQzy7%vI$yOkg)dj=C)ZDxge*xw@$B}6$O79b~VqP- z@KDRk%hygl6TybWFla$uph6GD2(=gKGjaR3oPGfT=il7hHx1`<)PpOpF**Jjsw;kMHDWECiotU55Dte|&P10(ven$G$}r z(i3?@Y}F|b=< zMV=qcXfQM}nf#Dr0Lyr80>OR&M!~WgX!rw3ii(Op<1_xJm14xwtE-a<0iO_j zb91#9tAA{Tu1G|6qOoz~&{XKcm5T}r9s}X)2L;KG7WqrGVMbAX`1sKpHfn!~8i&^=A?q?(_-=DXPF9j7~=c>VaIQ@i`8)ZGEQ$8mcP9*lhTO7U#)FT0b;$5`N4 zG-6O=>3_vH>kDD49djEaVMAHl+Fm1SWq$Cb-n+f89ryuw=qzgdx98WaawB9CyfL%q z&%A5Mq&ce!m4MKYu4Pp zzUQS4S0-YFgq&z@Tox&v_jL@JlDY%=)mOmWcwTL;nwLe#*XfI`uCA7QOwl8MK`_l^ z#gJ6j6T{p`0!T4pS?Q+;B@*ipytz?7 zFi>%(qyq~~+%ulE8Nhx(P^Yb}4F-{hjg1h(dt);*bg16x>1nSOaWWDVTEhE(^(y3H zSw-M4350jdlQaI7lp3(ZXaj3Jj_j|?r}BC#1KSd=BU4XmfY}HO;DsJvXW$A-DZhGgMkf%l$QbY3b+bAVH7meKw7&%qG4r}H~dAAvTbPS zp>C?&fQu-?#3hg};GkqdW!^6)mI-{l0lf$J9t{{E6POGVOPCdiqrU4sk#TyzsHn5| z5LgzGqDvADF&o+!2YVHX*>r5v12qeLN)n!C^Pdq-AdHd9px;Lh{_H?uKF`aGj+(|6 zq6~%5pRM2A+8Tu}_RzTsH$aZ4^YN54JG=nG{lN#{^#%bU68;Dz^7)ZPJY(z7P}Rqu zSo{vT@y*T68Ek0!_Q7tSHxt;ZsONp6qDW#@9db%F4j3qND z#JJ;_{*#lJ==ceTkEEoeROdNI2y6t&0!S!1#%NLDu^aK7rL|Z8QrcOE32)@jpFagw z^;f=jmtlvAoGGG21N&4|T9Q&kWHHdV-4Vh&JG=NELuV^~pJ;4Id;G|(hvzCgPR+Y5Wv3_s|&-qV}T0){&=FJStH{p z+G}TJ6^bw1DJVz=ubxCgDEB--Dk&6E$H0x5tYco&ca+~0!igeE^~cu^*PcT!BQUiY z85x%bE7}gr~mDYj=JZOrE8M@8)w5C}b~qeDl6Ef#^;jf$lG zTLDsu&%Bu~1qtoU?^*A^KVKCoL;o({4k1!^Cm^w)RS}F(7>uO5t4ojsalXY_5;8IhxM)gf_YQ7NI&Hnobz>M_nxK4dn%WIB zkpzq#Od@3y6Att&6*Dt#IA`wYY>ZqIlq7M7P5~myhD|?F8UP69zP4bA3?G&1H3VZw zw@>%*%la^$3tXl`Ya=7UkJbA)SW@d4f@w>n?xJXymzVtzDM2MLb8<#uZq~v7`rmj9 z+p6bS9og&GuiaklW~Bo~563bdxMyB$e^Yr$6%jn>?dc^YY=BqJF0yt{va%v!#xb*C zm*$&QZhPza@@l5Wq)wxVQ8P?gPi8JIE&@6NuO5Ob4DFcAwR)MFRKnSj+qMtU_!RRa zwyn${ofM?C#M+{w?>XGlA;peKgL(TxYJ_WCC)FV}^ zCmMLx3VA8`IWgRa>CpyW;|+-0yrxCCBD<**Td)U6&u~8#$>p@IkI$;X;ZLtZ_(|od z!;_UsoC<-*NCHQXCRB04>Ij$68}WsVN@(G!($dBIQUfkte5NIn2WJ;3Xh}4EsNUsu zSsW^E92FCBQh3r_tE%wj%ciCAM&zN~5IESn*a9@!7_mhKs2Sg7E}C%nF8SfZFSBI) zz~>ws9qR!S|NrDnd|#x@@b)hygKaS02?+C_;F6&t zC4CJsSh>DnpT&_aI1T}|sigcb1ZbqKUw@x!Kjby7?{eAFGDW!pO+T%+mJbXCo+ueu z&?!U16^p?nI5IR#t!Hw%AztDv3PF?Km)&!rL5H-3TxMDuD6Q~kNd+|RyJ!}1bO89= zrSuQ3K}dyvH|7pPBk&YFjaUdt$y$NP;*uVhmd2Y4-`DEu?~ek3A%(&K?ae=T8>ya2 zBv=izb8$)ptTzHbM^NT*!3(%UP>4P)0xn_7HFR_^k;GA17i2^McftiPT*4+)5n(@% z09S`d?#UNtYpw%{SRt1uqM@WQ_H*3sXa?fmh?42R#WXx~rmbwpEkv*G0J)F}fY>0c zn1Ky=H!EvT%~Asck>4FGzQAOnuxC%?|0fGwxq5X5Nb3U-H@Omj;1U@Q5OgBVp@p*= zpH{-r8(sJjF%<^7HY;&png3J~2X{vevAm~{eea$WHs~ya-GR59jub|;&-|bofut@6 z1Q@_yHu*B$l}-f0G15AQ`O?*IrrlQ9n66msc>s=b7iVPNB}>Nv4nj`Z`R#D(t-A>c z8t{IkE`Hd%f8V}w^R0_GU}Iot$YyupLYbL8GH$jmF1ql&*m!ufu`5O6kq+ZVtNT*1 zg5xv^ux$6r%Wqb!_B=o~^G;r#XhlWEDX-~e#}8yA;b2;(AXV)eN-7DsOYhw|Zq z+N?}>frJO!pFxE(^2(Ha4A6lTP!F|*&HhbU0&s^7+89od%q=O%#KDM);gw|vA%J1v zyn5|kmrr7k|5nr(jDf8%@Br_3B3tcxVn4p`WSr>us?67IsUs_eRl9gD06j~aHsV3 zLm@WVj5mZQBKPue`|!=Z2M#pCg}6n;OQDE@tv1OnR$uiUxlDtz0^{aqUE$i<86e02 z=0v2v%iYGwQC#9UjZ%$JK4jW%&kF_CUJ4+@2rJYh#KFSKN{*NPHt8?}1G`8ac9nU| z_4;WE@TNGN6x)G$2BED95cf%X`c)icIgjvHBGNKE_Dh-`9uj}oCz&9TYz0Xpk$_4? zObbZG_U~+J&i@77F+$fO9xx2lf&zE}EIb0LknqXGP#6~kL$uJBRsV`)5kU&UU?x#) zu-5As4ptNC5NqOMi2LyT&7p=_B!mV*V07>>1Y>UzR*P_4eOntH0?$o}htA_P)mBhs zK>pe(A6aC8Vi62LneGJA2FC?R2}9H=>@_sDOXogbmCjpGf@I+9p$1+G z-4c>|B-jpOzQk98S739oU8UY~1wS0UYZL)Sxogj!okxzaLt!F<4?yPJJUmbG^O=Y= z12!3P5C~ZE2|SQ8+vXk&LD_C}S)8x>R2%eAmSu8s5>6Ne6bR%=2~LlvAT9v;fjH^q z;xe-H!gOC>-%dy(h#P3?>Z+r5ux$v)A$F}8*7ny=4;3&rZI5@s?A)y)-t3XOt=g(6R;1Or~AW;18`9Vx*1{gf|FF!F6j(bvG&Iv9_$U4Mf z!Ry!W#KkoR;XujqSUrUc4}R~MtZZXf7t5_%w?3_5t`t+U%0pG~r$8vI_05~p#|Dav zitgZu9FB|G$Lju19-#{g5OM)-(YpYKA7^FJ1O){lKnVld;nJl8Cwm~3o#sq6HZt0` z>$3BP=zT3gh)Ku=0NH>~6Q>jjvtM&pa{9#+2`^tBhJIuP(lQo`qc9KcK3lIGOYE_c zo6Wd?@7~@OhooR_q!{oygaiPt3H2wK;&y}7Q7I|6-_LN&Ls^32w(?HJ;78dr|8u7E z3Z$eR);+GcEgaZt{P5wPXR1!f8P}Y?#{W4YTCkUikvct~{{K8MO43~ty@Dg9gPJ4% aC>9@BnzO9Epu}NU(g`(P)fdWESNQzU>K9Ccn=>Z+H#p3UPkES zDNq`-HQkCX&Tp^kAlSTRv?*UU6^OT~@AnP0gFnzp%uIwkjL~V!HvZLkb$Pu%^4HQe z!y3ode(3(4qRmr`DF$V79ttO$K_;{R{Pyp^Qb8(Uh+ z>g#DlP^6@>xNc3XBB-SMzqhv3e0^no$B+$ zdRo`e5N#+F6CI5gqt3I3#q!W5u6_H4E0N<)&lQ(vuB(=sni{abhI4p$XlQC0lbK1o zVJL#qhq1A-ZEPqgDys9--W8(%*xyeW0xMF$$Hl3xsp)m9cR(OMWrrWoZT3AVZ*G=v zY;G>aU{gVQB1#LjdT-$`C` zDltygjGMEwGq;KgNBGqVlZ=dvp`|5>d_IMjm)F_ZSYO-oAzWgHTs zys)%H2cb>w%g)W!@&C14GByTBKyMaJ9zMJ`kj7C)K*l(}HBpv0iZrpXVCJ~v9pUEg zp1N|$q($>Wy`(srh$<6AmM^cyy2TiYtdP!=Jy5l>vfBC>OGZyer(tGh_TTq|t!OG( zF)#=?(Es?UH$gTUeLtB|oR>3Q8#gjyw%Sr-VPPS9Atupoa(Y@*JB^(uQBxfBfhGoO~n3~6g?YebH+vdSXKNn+I(+k6Fu zQNz8JK@PaXLqjEXbyB!$y}iADe-D55dJo5Bycqd;yx>xl0SahgetsMzR#jJ5DEGtD z2WlrLC-hug57;T=`Dsa3Is<*V;knFCJG;BCBQM3B9Ubr5+1VX$7KNX-yy+$rx^*k| ziHC>la;wkr(HB)VcWIot+(IQYtWFE~m7M&hXi@+XMszG(}%t+lOB@h1=yZPk;(eN=f1O z_4QQ@IrH{P?y^yPPz1x=&l1S!=ulDt=N$fB_E?TpS6`o>G5KEbpI=ym49Ot!P2egq z1x2h@SudEJwXLnkmpR`B;wWTgWuGiHQ-r{{NJvN^5g^YwN0{lUX&e{cKiUGIV`aq( z>>`}ArsI2=$_7<`JL801Y})_xXW)2yielqY`1Rk-qFufD1{W$yPUVDTCdrRWOG_5r zGB)jgBsr4yT84(yBBG-8hyhCd0!4;0?esWPkyulazN?Er6r2z#&*eKp^rZOs_&t-8 zv?dk$kybTYy1Jyaw6w*BLZFb27wyU`D~ZD|4~Q!C(YTPu9v*$$Q?GrFw~Vs0vwNnd z=*qNGcV?O%xws&1uCD@)w`e*$I~_bdDIu*s-wA&_Re1I4)%i4+MsEs>>vLSgM))|v$u%}mCemBqoSjQ!OhMxEFR0P8Q>Y)pvEP;YKy|o6G2vQ zfg?ZQA}KL(F#zs@;{qlB(<;?C(l#6T^XJdAU%YS-O1{2(97n-6I5d>FxVV_Ixk4sp z*O_Z$W0R1uHC<(bA5m-DQIR$9@nh8Gzk^j5f&CE=e63&`L3$CIN~e>HsmbR&&a}Z-^a&C@$XAX`&H~CJFjzu z(0q#*7l<13`Il>^m5LK;8hD{{B7>+IxhwM{bA2g@exWi8q`i-b<$SoVrDY)RwmbRk z=(-=*&US;V9IK|bHe!1clVVV1`t9uCI{-CG1W3eB@mygEWK0s=Q0SaP2b=tpi1_$; z_^|_cPa3;I)ZyXN{^rn&=haqCx`Myjp;yxM|MmAr^*Qkxx;vkEv&1WK=Jk@MFXCOC@V)XrMTMi9*d_E0Q&N{rvp6Z{J4MD<~*Le%V9- zAV{hCa3wwekBT=wqjmK3rt)je)gauA$*H>wWcY-Hyu7^M&?ZQvVu34hlT|8qsYRn~ z0s`TLg(UPPGa*2cf%)5Rpt?B4MgwYt2Rxqp8XFs{7}$aT(9Eo&rFL=A%GBKadn$Bw zydGEpQFC>zO1moXU+r!4-ON3@K8~-aPVN&D5t+=NHBb{57l$}Jp7xLfH7n&lE4C5R zm&6cs1Omgy&yUZ;!}D)4z>Cq1iFATapkA)riW0p4{95sRC%U|{vi|K`6rCWvxTGYq zUN!WbzNf)H?0j%!1TVr9cZU=jdM#T0wovdvVR3gir`qoegb=ezkX%_Z)3!V{v(j_1 z2$u)r#EF-4y(=pmEuJg5`%k*f^z~EV?HJ6X4?8t)-l$t!TSIYt?y$4NLYOBeCV=G@ zAHq~jMHt3jDd0hJdKdgw#-`{`m=OO2jFiLlCwTq32lD!8|D8+^>@GwHNN6gzR{$DYOfwb)tpHFcV_z^&!wWA7Dg5^W6PUae&hiHN3owN>HY;-*y z5-d7?9h?%SUczl|ZXR@V^(T0E)>nDl+&=W4jYLjnRu<7*2aUd|DZOp0ccj9cTN6J@ zw+hh4!2SvlJVD1>8NJ6q=1xAA3!36o4p39(Ob2#e{66C@HLiLLD*eaS*4g`^L=`eX z494o}xb+KrH~RcemX<7_cv(6Z`W;r zw|FVzHNJ-aSqbM&fGH@TyDtw%cwBdX3}rP29&LbMkLa10Ow`mMKnfY%y?fUy=~zO* z3a}6W^@LCB<-vvyvx}qY^$;m09&&kNyxm?pQhuVRG5{>wO+NRY;0u-^JCYYtu5x19RuNXcTLnvhi; z0C*>Xu1jspUSra-cRu>tcRMR93$RWk00nLl5mKPQb8ovdo`rNv+Xi?D3kj*Yxrx-; zv_-K&J=JQ=_<%?U;Fqf!4x=kaYQ{ZB+w-ai1u2ddsiupX*8=uqVy7gcO~eO|6H`)Z zIXD#GHzX7i6Vn72)!KhZ1KN4eS!RBIo?A?e+;?-VFZg$ti`3I`9rEAm-=DU;mZ*QaP6g4$9#LMgV9MUUsl{aT3BP;8ZSWjU_Mr3=t;(ma#4nHl@ z^+!)n&s~zrT|qts=3GKVMC1h+@9_W43wk0;OG_ZW2y#YZb8?tMLPFLyH;K5oxB>zL z@AA`1PsP|jbaQhvw6UpfP!9!oOPBLwV}meJGYTLlc=p-FMS8hArzLJs1}HJ*fPDzi zwt!kRz@>*rN9a03AOP+tD#~0qx3;zQ_VvXsINuYZR~Mo`Re}c<%R!;C`L!T(H3YJr zEs2(i)_Hu+l?MFr8Sfit5>-_{l-S61s-`@_VNnO7Vxml25n8(YJ1L2@v8hQqaN&tO z;1eEtQZzOfsG92PY6lMw3IMbcIb>idK)(Z>2aTPcUh){Dk!|Fmt Date: Sat, 26 Apr 2025 16:23:26 -0400 Subject: [PATCH 06/17] Dispatch for binary kernel --- .../src/backends/cpu_backend/mod.rs | 46 +--------- .../src/backends/cuda_backend/mod.rs | 43 +-------- constensor-core/src/backends/mod.rs | 1 + constensor-core/src/backends/scheduler.rs | 51 ++++++++++ .../src/backends/wgpu_backend/mod.rs | 92 +++++++------------ constensor-core/src/dtype/cubecl.rs | 51 ++++++++++ constensor-core/src/dtype/mod.rs | 1 + constensor-core/src/graph.rs | 6 ++ 8 files changed, 149 insertions(+), 142 deletions(-) create mode 100644 constensor-core/src/backends/scheduler.rs create mode 100644 constensor-core/src/dtype/cubecl.rs diff --git a/constensor-core/src/backends/cpu_backend/mod.rs b/constensor-core/src/backends/cpu_backend/mod.rs index 4b62f0a..1b21a8c 100644 --- a/constensor-core/src/backends/cpu_backend/mod.rs +++ b/constensor-core/src/backends/cpu_backend/mod.rs @@ -1,5 +1,4 @@ -use petgraph::algo::toposort; -use petgraph::graphmap::DiGraphMap; +use super::scheduler::topo_order; use std::{borrow::Cow, marker::PhantomData}; use pool::{BufferPool, PooledBuffer}; @@ -43,47 +42,8 @@ impl BackendDevice for CpuDevice { &self, graph: Vec>, ) -> Result> { - // Build a dependency graph of tensor indices - let mut dep_graph = DiGraphMap::::new(); - for id in 0..graph.len() { - dep_graph.add_node(id); - } - - for node in graph.iter() { - let idx = node.id.get(); - match &node.op { - Op::BinaryOp { l_id, r_id, .. } => { - dep_graph.add_edge(l_id.get(), idx, ()); - dep_graph.add_edge(r_id.get(), idx, ()); - } - Op::UnaryOp { v_id, .. } => { - dep_graph.add_edge(v_id.get(), idx, ()); - } - Op::FusedMulAdd { a_id, b_id, c_id } => { - dep_graph.add_edge(a_id.get(), idx, ()); - dep_graph.add_edge(b_id.get(), idx, ()); - dep_graph.add_edge(c_id.get(), idx, ()); - } - Op::MatMul { - l_id, r_id, o_id, .. - } => { - dep_graph.add_edge(l_id.get(), idx, ()); - dep_graph.add_edge(r_id.get(), idx, ()); - if let Some(o_id) = o_id { - dep_graph.add_edge(o_id.get(), idx, ()); - } - } - Op::Permute { v_id } => { - dep_graph.add_edge(v_id.get(), idx, ()); - } - // NoOp, Fill/Arange, Rand/Randn don’t create incoming edges - Op::NoOp | Op::Fill { .. } | Op::Arange { .. } | Op::Rand | Op::Randn { .. } => {} - } - } - - // Compute topological order - let order = toposort(&dep_graph, None).expect("Cycle detected in graph!"); - + // Compute topological order using shared scheduler + let order = topo_order(&graph); Ok(CompiledGraph::Cpu { order, graph, diff --git a/constensor-core/src/backends/cuda_backend/mod.rs b/constensor-core/src/backends/cuda_backend/mod.rs index 0675220..5ec5a0f 100644 --- a/constensor-core/src/backends/cuda_backend/mod.rs +++ b/constensor-core/src/backends/cuda_backend/mod.rs @@ -1,3 +1,4 @@ +use super::scheduler::topo_order; use cudarc::{ cublas::CudaBlas, driver::{ @@ -6,7 +7,6 @@ use cudarc::{ nvrtc::{CompileOptions, Ptx}, }; use error::WrapErr; -use petgraph::{algo::toposort, prelude::DiGraphMap}; use std::sync::{ atomic::{AtomicUsize, Ordering}, Arc, Mutex, RwLock, @@ -474,45 +474,8 @@ impl BackendDevice for CudaDevice { &self, graph: Vec>, ) -> Result> { - // Build a dependency graph of tensor indices - let mut dep_graph = DiGraphMap::::new(); - for idx in graph.len() { - dep_graph.add_node(idx); - } - - for (idx, node) in graph.iter().enumerate() { - match &node.op { - Op::BinaryOp { l_id, r_id, .. } => { - dep_graph.add_edge(l_id.get(), idx, ()); - dep_graph.add_edge(r_id.get(), idx, ()); - } - Op::UnaryOp { v_id, .. } => { - dep_graph.add_edge(v_id.get(), idx, ()); - } - Op::FusedMulAdd { a_id, b_id, c_id } => { - dep_graph.add_edge(a_id.get(), idx, ()); - dep_graph.add_edge(b_id.get(), idx, ()); - dep_graph.add_edge(c_id.get(), idx, ()); - } - Op::MatMul { - l_id, r_id, o_id, .. - } => { - dep_graph.add_edge(l_id.get(), idx, ()); - dep_graph.add_edge(r_id.get(), idx, ()); - if let Some(o_id) = o_id { - dep_graph.add_edge(o_id.get(), idx, ()); - } - } - Op::Permute { v_id } => { - dep_graph.add_edge(v_id.get(), idx, ()); - } - // These don’t create incoming edges - Op::NoOp | Op::Fill { .. } | Op::Rand | Op::Randn { .. } | Op::Arange { .. } => {} - } - } - - // Compute topological order - let order = toposort(&dep_graph, None).expect("Cycle detected in graph!"); + // Compute topological order using shared scheduler + let order = topo_order(&graph); // New kernel and grouping logic with matmul input tracking let mut kernels = Vec::>::new(); diff --git a/constensor-core/src/backends/mod.rs b/constensor-core/src/backends/mod.rs index 26672b2..784a95a 100644 --- a/constensor-core/src/backends/mod.rs +++ b/constensor-core/src/backends/mod.rs @@ -1,4 +1,5 @@ pub mod cpu_backend; #[cfg(feature = "cuda")] pub mod cuda_backend; +pub mod scheduler; pub mod wgpu_backend; diff --git a/constensor-core/src/backends/scheduler.rs b/constensor-core/src/backends/scheduler.rs new file mode 100644 index 0000000..28b4b01 --- /dev/null +++ b/constensor-core/src/backends/scheduler.rs @@ -0,0 +1,51 @@ +use petgraph::algo::toposort; +use petgraph::graphmap::DiGraphMap; +use std::marker::PhantomData; + +use crate::{DType, GraphNode, Op}; + +/// Compute a topological ordering of the computation graph nodes. +/// +/// # Panics +/// Panics if the graph contains a cycle. +pub fn topo_order(graph: &[GraphNode]) -> Vec { + // Build dependency graph + let mut dep_graph = DiGraphMap::::new(); + for node in graph.iter() { + let idx = node.id.get(); + dep_graph.add_node(idx); + } + for node in graph.iter() { + let dst = node.id.get(); + match &node.op { + Op::BinaryOp { l_id, r_id, .. } => { + dep_graph.add_edge(l_id.get(), dst, ()); + dep_graph.add_edge(r_id.get(), dst, ()); + } + Op::UnaryOp { v_id, .. } => { + dep_graph.add_edge(v_id.get(), dst, ()); + } + Op::FusedMulAdd { a_id, b_id, c_id } => { + dep_graph.add_edge(a_id.get(), dst, ()); + dep_graph.add_edge(b_id.get(), dst, ()); + dep_graph.add_edge(c_id.get(), dst, ()); + } + Op::MatMul { + l_id, r_id, o_id, .. + } => { + dep_graph.add_edge(l_id.get(), dst, ()); + dep_graph.add_edge(r_id.get(), dst, ()); + if let Some(o) = o_id { + dep_graph.add_edge(o.get(), dst, ()); + } + } + Op::Permute { v_id } => { + dep_graph.add_edge(v_id.get(), dst, ()); + } + // No incoming edges for other ops + _ => {} + } + } + // Compute topological order + toposort(&dep_graph, None).expect("Cycle detected in graph!") +} diff --git a/constensor-core/src/backends/wgpu_backend/mod.rs b/constensor-core/src/backends/wgpu_backend/mod.rs index 98af1d1..fa39f5c 100644 --- a/constensor-core/src/backends/wgpu_backend/mod.rs +++ b/constensor-core/src/backends/wgpu_backend/mod.rs @@ -1,5 +1,7 @@ use std::{borrow::Cow, marker::PhantomData}; +use super::scheduler::topo_order; +use crate::Op; use cubecl::{cube, prelude::*, wgpu::WgpuRuntime}; use crate::{ @@ -62,69 +64,41 @@ impl BackendDevice for WgpuDevice { &self, graph: Vec>, ) -> Result> { - #[cfg(any(feature = "cuda", feature = "hip"))] - let device = cubecl::wgpu::WgpuDevice::DiscreteGpu(0); - #[cfg(feature = "metal")] - let device = cubecl::wgpu::WgpuDevice::IntegratedGpu(0); - #[cfg(not(any(feature = "cuda", feature = "hip", feature = "metal")))] - let device = cubecl::wgpu::WgpuDevice::DefaultDevice; - - let client = RT::client(&device); - - let a = &[1., 2., 3., 4., 5., 6., 7., 8.]; - let b = &[1., 2., 3., 4., 5., 6., 7., 8.]; - let vectorization = 4; - let output_handle = client.empty(a.len() * core::mem::size_of::()); - let a_handle = client.create(f32::as_bytes(a)); - let b_handle = client.create(f32::as_bytes(b)); - - unsafe { - let mut a_seq = SequenceArg::new(); - a_seq.push(ArrayArg::from_raw_parts::( - &a_handle, - a.len(), - vectorization as u8, - )); - - let mut b_seq = SequenceArg::new(); - b_seq.push(ArrayArg::from_raw_parts::( - &b_handle, - b.len(), - vectorization as u8, - )); - - let mut out_seq = SequenceArg::new(); - out_seq.push(ArrayArg::from_raw_parts::( - &output_handle, - a.len(), - vectorization as u8, - )); - - let mut ops = Sequence::new(); - ops.push(BinaryOpType::Add); - binary::launch_unchecked::( - &client, - CubeCount::Static(vectorization, 1, 1), - CubeDim::new((a.len() as u32).div_ceil(vectorization), 1, 1), - a_seq, - b_seq, - out_seq, - a.len() as u32, - ops, - ) - }; - - let bytes = client.read_one(output_handle.binding()); - let output = f32::from_bytes(&bytes); - - println!("Executed runtime {:?} => {output:?}", RT::name(&client)); - todo!() + // Compute topological order using shared scheduler + let order = topo_order(&graph); + Ok(CompiledGraph::Wgpu { + order, + graph, + ghost: PhantomData, + }) } fn run_graph( &self, - graph: &CompiledGraph, + comp: &CompiledGraph, ) -> Result> { - todo!() + // Sequential execution of nodes in topological order. + #[allow(irrefutable_let_patterns)] + let CompiledGraph::Wgpu { + order, + graph, + ghost, + } = comp + else { + unreachable!("Expected Wgpu compiled graph"); + }; + for &idx in order.iter() { + let node = &graph[idx]; + if let Op::BinaryOp { + l_id, + r_id, + operator, + } = &node.op + { + // TODO: dispatch binary operation via cubecl kernel + // e.g., binary::(...) + } + } + Ok(WgpuStorage { ghost: PhantomData }) } } diff --git a/constensor-core/src/dtype/cubecl.rs b/constensor-core/src/dtype/cubecl.rs new file mode 100644 index 0000000..0c0c45f --- /dev/null +++ b/constensor-core/src/dtype/cubecl.rs @@ -0,0 +1,51 @@ +//! Instantiate cubecl kernels for binary operations on all supported dtypes. + +use crate::graph::BinaryOpType; +use cubecl::{cube, prelude::*}; +#[cfg(feature = "bfloat")] +use half::bf16; +#[cfg(feature = "half")] +use half::f16; + +macro_rules! define_binary_kernel { + ($name:ident, $ty:ty) => { + #[cube(launch_unchecked)] + fn $name( + a: &Sequence>, + b: &Sequence>, + out: &mut Sequence>, + #[comptime] numel: u32, + #[comptime] ops: Sequence, + ) { + if ABSOLUTE_POS < numel { + #[unroll] + for index in 0..ops.len() { + let op = comptime! { ops.index(index.clone()) }; + let av = a.index(index); + let bv = b.index(index); + let ov = out.index_mut(index); + match op { + BinaryOpType::Add => ov[ABSOLUTE_POS] = av[ABSOLUTE_POS] + bv[ABSOLUTE_POS], + BinaryOpType::Sub => ov[ABSOLUTE_POS] = av[ABSOLUTE_POS] - bv[ABSOLUTE_POS], + BinaryOpType::Mul => ov[ABSOLUTE_POS] = av[ABSOLUTE_POS] * bv[ABSOLUTE_POS], + BinaryOpType::Div => ov[ABSOLUTE_POS] = av[ABSOLUTE_POS] / bv[ABSOLUTE_POS], + } + } + } + } + }; +} + +// Integer types +define_binary_kernel!(binary_u8, u8); +define_binary_kernel!(binary_u32, u32); +define_binary_kernel!(binary_i32, i32); +define_binary_kernel!(binary_i64, i64); + +// Floating-point types +define_binary_kernel!(binary_f32, f32); +define_binary_kernel!(binary_f64, f64); +#[cfg(feature = "half")] +define_binary_kernel!(binary_f16, f16); +#[cfg(feature = "bfloat")] +define_binary_kernel!(binary_bf16, bf16); diff --git a/constensor-core/src/dtype/mod.rs b/constensor-core/src/dtype/mod.rs index bee9dc5..07482bf 100644 --- a/constensor-core/src/dtype/mod.rs +++ b/constensor-core/src/dtype/mod.rs @@ -15,6 +15,7 @@ use gemm::GemmDispatch; use rand::RandDispatch; use simd_ops::SimdSupported; +mod cubecl; mod gemm; mod rand; mod simd_ops; diff --git a/constensor-core/src/graph.rs b/constensor-core/src/graph.rs index 9f7187f..2f5e1b2 100644 --- a/constensor-core/src/graph.rs +++ b/constensor-core/src/graph.rs @@ -664,6 +664,12 @@ pub enum CompiledGraph { graph: Vec>, ghost: PhantomData<(S, T, D)>, }, + /// Compiled graph for WGPU backend. + Wgpu { + order: Vec, + graph: Vec>, + ghost: PhantomData<(S, T, D)>, + }, #[cfg(feature = "cuda")] Cuda { kernels: Vec>, From efff5c5c3f22fe9157783cbcedba8bf11d6ec5f7 Mon Sep 17 00:00:00 2001 From: Eric Buehler Date: Sat, 26 Apr 2025 18:04:35 -0400 Subject: [PATCH 07/17] Dummy launch --- constensor-core/src/backends/scheduler.rs | 1 - .../src/backends/wgpu_backend/mod.rs | 82 +++++++++++++------ 2 files changed, 56 insertions(+), 27 deletions(-) diff --git a/constensor-core/src/backends/scheduler.rs b/constensor-core/src/backends/scheduler.rs index 28b4b01..03c5b89 100644 --- a/constensor-core/src/backends/scheduler.rs +++ b/constensor-core/src/backends/scheduler.rs @@ -1,6 +1,5 @@ use petgraph::algo::toposort; use petgraph::graphmap::DiGraphMap; -use std::marker::PhantomData; use crate::{DType, GraphNode, Op}; diff --git a/constensor-core/src/backends/wgpu_backend/mod.rs b/constensor-core/src/backends/wgpu_backend/mod.rs index fa39f5c..8a3acb0 100644 --- a/constensor-core/src/backends/wgpu_backend/mod.rs +++ b/constensor-core/src/backends/wgpu_backend/mod.rs @@ -31,32 +31,6 @@ impl BackendStorage for WgpuStorage { } } -#[cube(launch_unchecked)] -fn binary( - a: &Sequence>, - b: &Sequence>, - out: &mut Sequence>, - #[comptime] numel: u32, - #[comptime] ops: Sequence, -) { - if ABSOLUTE_POS < numel { - #[unroll] - for index in 0..ops.len() { - let op = comptime! { ops.index(index.clone()) }; - let a = a.index(index); - let b = b.index(index); - let o = out.index_mut(index); - - match op { - BinaryOpType::Add => o[ABSOLUTE_POS] = a[ABSOLUTE_POS] + b[ABSOLUTE_POS], - BinaryOpType::Sub => o[ABSOLUTE_POS] = a[ABSOLUTE_POS] - b[ABSOLUTE_POS], - BinaryOpType::Mul => o[ABSOLUTE_POS] = a[ABSOLUTE_POS] * b[ABSOLUTE_POS], - BinaryOpType::Div => o[ABSOLUTE_POS] = a[ABSOLUTE_POS] / b[ABSOLUTE_POS], - } - } - } -} - impl BackendDevice for WgpuDevice { type Storage = WgpuStorage; @@ -95,6 +69,62 @@ impl BackendDevice for WgpuDevice { operator, } = &node.op { + #[cfg(any(feature = "cuda", feature = "hip"))] + let device = cubecl::wgpu::WgpuDevice::DiscreteGpu(0); + #[cfg(feature = "metal")] + let device = cubecl::wgpu::WgpuDevice::IntegratedGpu(0); + #[cfg(not(any(feature = "cuda", feature = "hip", feature = "metal")))] + let device = cubecl::wgpu::WgpuDevice::DefaultDevice; + + let client = RT::client(&device); + + let a = &[1., 2., 3., 4., 5., 6., 7., 8.]; + let b = &[1., 2., 3., 4., 5., 6., 7., 8.]; + let vectorization = 4; + let output_handle = client.empty(a.len() * core::mem::size_of::()); + let a_handle = client.create(f32::as_bytes(a)); + let b_handle = client.create(f32::as_bytes(b)); + + unsafe { + // let mut a_seq = SequenceArg::new(); + // a_seq.push(ArrayArg::from_raw_parts::( + // &a_handle, + // a.len(), + // vectorization as u8, + // )); + + // let mut b_seq = SequenceArg::new(); + // b_seq.push(ArrayArg::from_raw_parts::( + // &b_handle, + // b.len(), + // vectorization as u8, + // )); + + // let mut out_seq = SequenceArg::new(); + // out_seq.push(ArrayArg::from_raw_parts::( + // &output_handle, + // a.len(), + // vectorization as u8, + // )); + + // let mut ops = Sequence::new(); + // ops.push(BinaryOpType::Add); + // binary::launch_unchecked::( + // &client, + // CubeCount::Static(vectorization, 1, 1), + // CubeDim::new((a.len() as u32).div_ceil(vectorization), 1, 1), + // a_seq, + // b_seq, + // out_seq, + // a.len() as u32, + // ops, + // ) + }; + + let bytes = client.read_one(output_handle.binding()); + let output = f32::from_bytes(&bytes); + + println!("Executed runtime {:?} => {output:?}", RT::name(&client)); // TODO: dispatch binary operation via cubecl kernel // e.g., binary::(...) } From ace391cc1f3352eae269960a09082d58028a1901 Mon Sep 17 00:00:00 2001 From: Eric Buehler Date: Sun, 27 Apr 2025 06:28:22 -0400 Subject: [PATCH 08/17] Add kernels --- .../src/backends/wgpu_backend/kernels.rs | 29 ++++++++ .../src/backends/wgpu_backend/mod.rs | 73 ++++++++++--------- constensor-core/src/dtype/cubecl.rs | 51 ------------- constensor-core/src/dtype/mod.rs | 4 +- 4 files changed, 70 insertions(+), 87 deletions(-) create mode 100644 constensor-core/src/backends/wgpu_backend/kernels.rs delete mode 100644 constensor-core/src/dtype/cubecl.rs diff --git a/constensor-core/src/backends/wgpu_backend/kernels.rs b/constensor-core/src/backends/wgpu_backend/kernels.rs new file mode 100644 index 0000000..aef4f1a --- /dev/null +++ b/constensor-core/src/backends/wgpu_backend/kernels.rs @@ -0,0 +1,29 @@ +//! Instantiate cubecl kernels for binary operations on all supported dtypes. + +use crate::{dtype::DTypeOps, graph::BinaryOpType}; +use cubecl::{cube, prelude::*}; + +#[cube(launch_unchecked)] +pub(super) fn binary( + a: &Sequence>, + b: &Sequence>, + out: &mut Sequence>, + #[comptime] numel: u32, + #[comptime] ops: Sequence, +) { + if ABSOLUTE_POS < numel { + #[unroll] + for index in 0..ops.len() { + let op = comptime! { ops.index(index.clone()) }; + let av = a.index(index); + let bv = b.index(index); + let ov = out.index_mut(index); + match op { + BinaryOpType::Add => ov[ABSOLUTE_POS] = av[ABSOLUTE_POS] + bv[ABSOLUTE_POS], + BinaryOpType::Sub => ov[ABSOLUTE_POS] = av[ABSOLUTE_POS] - bv[ABSOLUTE_POS], + BinaryOpType::Mul => ov[ABSOLUTE_POS] = av[ABSOLUTE_POS] * bv[ABSOLUTE_POS], + BinaryOpType::Div => ov[ABSOLUTE_POS] = av[ABSOLUTE_POS] / bv[ABSOLUTE_POS], + } + } + } +} diff --git a/constensor-core/src/backends/wgpu_backend/mod.rs b/constensor-core/src/backends/wgpu_backend/mod.rs index 8a3acb0..4d29873 100644 --- a/constensor-core/src/backends/wgpu_backend/mod.rs +++ b/constensor-core/src/backends/wgpu_backend/mod.rs @@ -1,4 +1,4 @@ -use std::{borrow::Cow, marker::PhantomData}; +use std::{borrow::Cow, collections::HashMap, marker::PhantomData}; use super::scheduler::topo_order; use crate::Op; @@ -11,6 +11,8 @@ use crate::{ CompiledGraph, DType, GraphNode, Result, Shape, }; +mod kernels; + use super::cpu_backend::CpuStorage; type RT = WgpuRuntime; @@ -61,6 +63,8 @@ impl BackendDevice for WgpuDevice { else { unreachable!("Expected Wgpu compiled graph"); }; + + let mut handles = HashMap::new(); for &idx in order.iter() { let node = &graph[idx]; if let Op::BinaryOp { @@ -86,41 +90,42 @@ impl BackendDevice for WgpuDevice { let b_handle = client.create(f32::as_bytes(b)); unsafe { - // let mut a_seq = SequenceArg::new(); - // a_seq.push(ArrayArg::from_raw_parts::( - // &a_handle, - // a.len(), - // vectorization as u8, - // )); - - // let mut b_seq = SequenceArg::new(); - // b_seq.push(ArrayArg::from_raw_parts::( - // &b_handle, - // b.len(), - // vectorization as u8, - // )); - - // let mut out_seq = SequenceArg::new(); - // out_seq.push(ArrayArg::from_raw_parts::( - // &output_handle, - // a.len(), - // vectorization as u8, - // )); - - // let mut ops = Sequence::new(); - // ops.push(BinaryOpType::Add); - // binary::launch_unchecked::( - // &client, - // CubeCount::Static(vectorization, 1, 1), - // CubeDim::new((a.len() as u32).div_ceil(vectorization), 1, 1), - // a_seq, - // b_seq, - // out_seq, - // a.len() as u32, - // ops, - // ) + let mut a_seq: SequenceArg<'_, RT, Array> = SequenceArg::new(); + a_seq.push(ArrayArg::from_raw_parts::( + &a_handle, + a.len(), + vectorization as u8, + )); + + let mut b_seq: SequenceArg<'_, RT, Array> = SequenceArg::new(); + b_seq.push(ArrayArg::from_raw_parts::( + &b_handle, + b.len(), + vectorization as u8, + )); + + let mut out_seq: SequenceArg<'_, RT, Array> = SequenceArg::new(); + out_seq.push(ArrayArg::from_raw_parts::( + &output_handle, + a.len(), + vectorization as u8, + )); + + let mut ops = Sequence::new(); + ops.push(BinaryOpType::Add); + kernels::binary::launch_unchecked( + &client, + CubeCount::Static(vectorization, 1, 1), + CubeDim::new((a.len() as u32).div_ceil(vectorization), 1, 1), + a_seq, + b_seq, + out_seq, + a.len() as u32, + ops, + ); }; + handles.insert(idx, output_handle.clone()); let bytes = client.read_one(output_handle.binding()); let output = f32::from_bytes(&bytes); diff --git a/constensor-core/src/dtype/cubecl.rs b/constensor-core/src/dtype/cubecl.rs deleted file mode 100644 index 0c0c45f..0000000 --- a/constensor-core/src/dtype/cubecl.rs +++ /dev/null @@ -1,51 +0,0 @@ -//! Instantiate cubecl kernels for binary operations on all supported dtypes. - -use crate::graph::BinaryOpType; -use cubecl::{cube, prelude::*}; -#[cfg(feature = "bfloat")] -use half::bf16; -#[cfg(feature = "half")] -use half::f16; - -macro_rules! define_binary_kernel { - ($name:ident, $ty:ty) => { - #[cube(launch_unchecked)] - fn $name( - a: &Sequence>, - b: &Sequence>, - out: &mut Sequence>, - #[comptime] numel: u32, - #[comptime] ops: Sequence, - ) { - if ABSOLUTE_POS < numel { - #[unroll] - for index in 0..ops.len() { - let op = comptime! { ops.index(index.clone()) }; - let av = a.index(index); - let bv = b.index(index); - let ov = out.index_mut(index); - match op { - BinaryOpType::Add => ov[ABSOLUTE_POS] = av[ABSOLUTE_POS] + bv[ABSOLUTE_POS], - BinaryOpType::Sub => ov[ABSOLUTE_POS] = av[ABSOLUTE_POS] - bv[ABSOLUTE_POS], - BinaryOpType::Mul => ov[ABSOLUTE_POS] = av[ABSOLUTE_POS] * bv[ABSOLUTE_POS], - BinaryOpType::Div => ov[ABSOLUTE_POS] = av[ABSOLUTE_POS] / bv[ABSOLUTE_POS], - } - } - } - } - }; -} - -// Integer types -define_binary_kernel!(binary_u8, u8); -define_binary_kernel!(binary_u32, u32); -define_binary_kernel!(binary_i32, i32); -define_binary_kernel!(binary_i64, i64); - -// Floating-point types -define_binary_kernel!(binary_f32, f32); -define_binary_kernel!(binary_f64, f64); -#[cfg(feature = "half")] -define_binary_kernel!(binary_f16, f16); -#[cfg(feature = "bfloat")] -define_binary_kernel!(binary_bf16, bf16); diff --git a/constensor-core/src/dtype/mod.rs b/constensor-core/src/dtype/mod.rs index 07482bf..ec88c0a 100644 --- a/constensor-core/src/dtype/mod.rs +++ b/constensor-core/src/dtype/mod.rs @@ -3,6 +3,7 @@ use std::{ ops::{Add, Div, Mul, Sub}, }; +use cubecl::prelude::CubePrimitive; #[cfg(feature = "bfloat")] use half::bf16; #[cfg(feature = "half")] @@ -15,7 +16,6 @@ use gemm::GemmDispatch; use rand::RandDispatch; use simd_ops::SimdSupported; -mod cubecl; mod gemm; mod rand; mod simd_ops; @@ -150,7 +150,7 @@ maybe_neg!(f64); /// Marker trait for tensor datatypes. pub trait DType: - Debug + Clone + DTypeOps + Send + Sync + MaybeNeg + DeviceReprLike + 'static + Debug + Clone + DTypeOps + Send + Sync + MaybeNeg + DeviceReprLike + CubePrimitive + 'static { const ZERO: Self; const ONE: Self; From 679b3346622747f281d32c47e8139a9d21d2f888 Mon Sep 17 00:00:00 2001 From: Eric Buehler Date: Sun, 27 Apr 2025 06:50:16 -0400 Subject: [PATCH 09/17] Add some things --- .../src/backends/cpu_backend/mod.rs | 12 +- .../src/backends/wgpu_backend/kernels.rs | 13 ++ .../src/backends/wgpu_backend/mod.rs | 180 +++++++++++------- constensor-core/src/dtype/mod.rs | 25 ++- constensor-core/src/tensor/graphtensor.rs | 2 +- 5 files changed, 152 insertions(+), 80 deletions(-) diff --git a/constensor-core/src/backends/cpu_backend/mod.rs b/constensor-core/src/backends/cpu_backend/mod.rs index 1b21a8c..63f86ad 100644 --- a/constensor-core/src/backends/cpu_backend/mod.rs +++ b/constensor-core/src/backends/cpu_backend/mod.rs @@ -30,7 +30,7 @@ impl BackendStorage for CpuStorage { Ok(Cow::Borrowed(self)) } fn cast(&self) -> Result> { - let new = self.0.iter().map(|x| U::from_f64(x.to_f64())); + let new = self.0.iter().map(|x| U::from_f64(x.cast_f64())); Ok(Storage::Cpu(CpuStorage(new.collect()))) } } @@ -200,10 +200,10 @@ fn eval_node( } Op::Arange { start, step, stop } => { let mut buf = pool.lock().unwrap().get_empty_buffer(out_elem_count); - let mut x = start.to_f64(); - while x < stop.to_f64() { + let mut x = start.cast_f64(); + while x < stop.cast_f64() { buf.push(T::from_f64(x)); - x += step.to_f64(); + x += step.cast_f64(); } PooledBuffer::new(buf, pool.clone()) } @@ -215,8 +215,8 @@ fn eval_node( PooledBuffer::new(buf, pool.clone()) } Op::Randn { mean, std } => { - let mean_f = mean.to_f64(); - let std_f = std.to_f64(); + let mean_f = mean.cast_f64(); + let std_f = std.cast_f64(); let normal = Normal::new(mean_f, std_f).unwrap(); let mut buf = pool.lock().unwrap().get_buffer(out_elem_count); for elt in &mut buf { diff --git a/constensor-core/src/backends/wgpu_backend/kernels.rs b/constensor-core/src/backends/wgpu_backend/kernels.rs index aef4f1a..e26289f 100644 --- a/constensor-core/src/backends/wgpu_backend/kernels.rs +++ b/constensor-core/src/backends/wgpu_backend/kernels.rs @@ -27,3 +27,16 @@ pub(super) fn binary( } } } + +#[cube(launch_unchecked)] +pub(super) fn fill< + T: CubeType + CubePrimitive + Send + Sync + LaunchArgExpand + Numeric + DTypeOps, +>( + out: &mut Array, + value: T, + #[comptime] numel: u32, +) { + if ABSOLUTE_POS < numel { + out[ABSOLUTE_POS] = value; + } +} diff --git a/constensor-core/src/backends/wgpu_backend/mod.rs b/constensor-core/src/backends/wgpu_backend/mod.rs index 4d29873..05287b1 100644 --- a/constensor-core/src/backends/wgpu_backend/mod.rs +++ b/constensor-core/src/backends/wgpu_backend/mod.rs @@ -2,7 +2,13 @@ use std::{borrow::Cow, collections::HashMap, marker::PhantomData}; use super::scheduler::topo_order; use crate::Op; -use cubecl::{cube, prelude::*, wgpu::WgpuRuntime}; +use cubecl::{ + channel::MutexComputeChannel, + cube, + prelude::*, + server::Handle, + wgpu::{WgpuRuntime, WgpuServer}, +}; use crate::{ device::Dev, @@ -17,14 +23,36 @@ use super::cpu_backend::CpuStorage; type RT = WgpuRuntime; +#[cfg(any(feature = "cuda", feature = "hip"))] +const DEVICE: cubecl::wgpu::WgpuDevice = cubecl::wgpu::WgpuDevice::DiscreteGpu(0); +#[cfg(feature = "metal")] +const DEVICE: cubecl::wgpu::WgpuDevice = cubecl::wgpu::WgpuDevice::IntegratedGpu(0); +#[cfg(not(any(feature = "cuda", feature = "hip", feature = "metal")))] +const DEVICE: cubecl::wgpu::WgpuDevice = cubecl::wgpu::WgpuDevice::DefaultDevice; + +fn client() -> ComputeClient> { + RT::client(&DEVICE) +} + +const VECTORIZATION: u32 = 4; + pub struct WgpuDevice; pub struct WgpuStorage { + handle: Handle, ghost: PhantomData, } impl BackendStorage for WgpuStorage { fn to_cpu_storage(&self) -> Result>> { + let client = client(); + + let bytes = client.read_one(self.handle.clone().binding()); + let output = X::from_bytes(&bytes); + + println!("Executed runtime {:?} => {output:?}", RT::name(&client)); + // TODO: dispatch binary operation via cubecl kernel + // e.g., binary::(...) todo!() } @@ -64,76 +92,94 @@ impl BackendDevice for WgpuDevice { unreachable!("Expected Wgpu compiled graph"); }; + let client = client(); + let mut handles = HashMap::new(); for &idx in order.iter() { let node = &graph[idx]; - if let Op::BinaryOp { - l_id, - r_id, - operator, - } = &node.op - { - #[cfg(any(feature = "cuda", feature = "hip"))] - let device = cubecl::wgpu::WgpuDevice::DiscreteGpu(0); - #[cfg(feature = "metal")] - let device = cubecl::wgpu::WgpuDevice::IntegratedGpu(0); - #[cfg(not(any(feature = "cuda", feature = "hip", feature = "metal")))] - let device = cubecl::wgpu::WgpuDevice::DefaultDevice; - - let client = RT::client(&device); - - let a = &[1., 2., 3., 4., 5., 6., 7., 8.]; - let b = &[1., 2., 3., 4., 5., 6., 7., 8.]; - let vectorization = 4; - let output_handle = client.empty(a.len() * core::mem::size_of::()); - let a_handle = client.create(f32::as_bytes(a)); - let b_handle = client.create(f32::as_bytes(b)); - - unsafe { - let mut a_seq: SequenceArg<'_, RT, Array> = SequenceArg::new(); - a_seq.push(ArrayArg::from_raw_parts::( - &a_handle, - a.len(), - vectorization as u8, - )); - - let mut b_seq: SequenceArg<'_, RT, Array> = SequenceArg::new(); - b_seq.push(ArrayArg::from_raw_parts::( - &b_handle, - b.len(), - vectorization as u8, - )); - - let mut out_seq: SequenceArg<'_, RT, Array> = SequenceArg::new(); - out_seq.push(ArrayArg::from_raw_parts::( - &output_handle, - a.len(), - vectorization as u8, - )); - - let mut ops = Sequence::new(); - ops.push(BinaryOpType::Add); - kernels::binary::launch_unchecked( - &client, - CubeCount::Static(vectorization, 1, 1), - CubeDim::new((a.len() as u32).div_ceil(vectorization), 1, 1), - a_seq, - b_seq, - out_seq, - a.len() as u32, - ops, - ); - }; - - handles.insert(idx, output_handle.clone()); - let bytes = client.read_one(output_handle.binding()); - let output = f32::from_bytes(&bytes); - - println!("Executed runtime {:?} => {output:?}", RT::name(&client)); - // TODO: dispatch binary operation via cubecl kernel - // e.g., binary::(...) + let out_elem_count: usize = node.shape.iter().product(); + + match &node.op { + Op::Fill { v } => { + let output_handle = client.empty(out_elem_count * core::mem::size_of::()); + + let out: ArrayArg<'_, RT> = unsafe { + ArrayArg::from_raw_parts::( + &output_handle, + out_elem_count, + VECTORIZATION as u8, + ) + }; + unsafe { + kernels::fill::launch_unchecked::( + &client, + CubeCount::Static(VECTORIZATION, 1, 1), + CubeDim::new((out_elem_count as u32).div_ceil(VECTORIZATION), 1, 1), + out, + ScalarArg::new(*v), + out_elem_count as u32, + ); + }; + + handles.insert(idx, output_handle.clone()); + } + Op::BinaryOp { + l_id, + r_id, + operator, + } => { + let a_handle = &handles[&l_id.get()]; + let b_handle = &handles[&r_id.get()]; + let output_handle = client.empty(out_elem_count * core::mem::size_of::()); + + unsafe { + let mut a_seq: SequenceArg<'_, RT, Array> = SequenceArg::new(); + a_seq.push(ArrayArg::from_raw_parts::( + &a_handle, + out_elem_count, + VECTORIZATION as u8, + )); + + let mut b_seq: SequenceArg<'_, RT, Array> = SequenceArg::new(); + b_seq.push(ArrayArg::from_raw_parts::( + &b_handle, + out_elem_count, + VECTORIZATION as u8, + )); + + let mut out_seq: SequenceArg<'_, RT, Array> = SequenceArg::new(); + out_seq.push(ArrayArg::from_raw_parts::( + &output_handle, + out_elem_count, + VECTORIZATION as u8, + )); + + let mut ops = Sequence::new(); + ops.push(BinaryOpType::Add); + kernels::binary::launch_unchecked( + &client, + CubeCount::Static(VECTORIZATION, 1, 1), + CubeDim::new((out_elem_count as u32).div_ceil(VECTORIZATION), 1, 1), + a_seq, + b_seq, + out_seq, + out_elem_count as u32, + ops, + ); + }; + + handles.insert(idx, output_handle.clone()); + } + + _ => todo!(), } } - Ok(WgpuStorage { ghost: PhantomData }) + + let key = *handles.keys().max().unwrap(); + let final_handle = handles.remove(&key).expect("No output"); + Ok(WgpuStorage { + handle: final_handle, + ghost: PhantomData, + }) } } diff --git a/constensor-core/src/dtype/mod.rs b/constensor-core/src/dtype/mod.rs index ec88c0a..5d0deb7 100644 --- a/constensor-core/src/dtype/mod.rs +++ b/constensor-core/src/dtype/mod.rs @@ -3,7 +3,10 @@ use std::{ ops::{Add, Div, Mul, Sub}, }; -use cubecl::prelude::CubePrimitive; +use cubecl::{ + prelude::{CubePrimitive, Numeric}, + CubeElement, +}; #[cfg(feature = "bfloat")] use half::bf16; #[cfg(feature = "half")] @@ -150,7 +153,17 @@ maybe_neg!(f64); /// Marker trait for tensor datatypes. pub trait DType: - Debug + Clone + DTypeOps + Send + Sync + MaybeNeg + DeviceReprLike + CubePrimitive + 'static + Debug + + Clone + + DTypeOps + + Send + + Sync + + MaybeNeg + + DeviceReprLike + + CubePrimitive + + CubeElement + + Numeric + + 'static { const ZERO: Self; const ONE: Self; @@ -158,7 +171,7 @@ pub trait DType: const C_DEP: Option<&'static str>; const INTEGRAL: bool; - fn to_f64(&self) -> f64; + fn cast_f64(&self) -> f64; fn from_f64(x: f64) -> Self; } @@ -172,7 +185,7 @@ macro_rules! dtype { const C_DEP: Option<&'static str> = None; const INTEGRAL: bool = $integral; - fn to_f64(&self) -> f64 { + fn cast_f64(&self) -> f64 { *self as f64 } fn from_f64(x: f64) -> Self { @@ -203,7 +216,7 @@ impl DType for f16 { const C_DEP: Option<&'static str> = Some("#include \"cuda_fp16.h\""); const INTEGRAL: bool = false; - fn to_f64(&self) -> f64 { + fn cast_f64(&self) -> f64 { self.to_f64_const() } fn from_f64(x: f64) -> Self { @@ -224,7 +237,7 @@ impl DType for bf16 { const C_DEP: Option<&'static str> = Some("#include \"cuda_bf16.h\""); const INTEGRAL: bool = false; - fn to_f64(&self) -> f64 { + fn cast_f64(&self) -> f64 { self.to_f64_const() } fn from_f64(x: f64) -> Self { diff --git a/constensor-core/src/tensor/graphtensor.rs b/constensor-core/src/tensor/graphtensor.rs index 006ac18..2eb880d 100644 --- a/constensor-core/src/tensor/graphtensor.rs +++ b/constensor-core/src/tensor/graphtensor.rs @@ -177,7 +177,7 @@ impl GraphTensor, T, D> { /// A GraphTensor representing a vector ranging from `start` to `stop` with `step` computed using A. pub fn arange(graph: &mut Graph, start: T, stop: T) -> Self { let id = graph.next_id(); - let step = (stop.to_f64() - start.to_f64()) / (A as f64); + let step = (stop.cast_f64() - start.cast_f64()) / (A as f64); let strides = contiguous_strides(&[A]); graph.add_op::>( Op::Arange { From e9baf36eedd6446eefa2629642fb7385ce0f6f6d Mon Sep 17 00:00:00 2001 From: Eric Buehler Date: Sun, 27 Apr 2025 06:52:18 -0400 Subject: [PATCH 10/17] Wire the binary op up --- constensor-core/examples/test/main.rs | 5 +++-- .../src/backends/wgpu_backend/mod.rs | 7 ++----- graph.png | Bin 15130 -> 27206 bytes 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/constensor-core/examples/test/main.rs b/constensor-core/examples/test/main.rs index 6b57b8f..2655433 100644 --- a/constensor-core/examples/test/main.rs +++ b/constensor-core/examples/test/main.rs @@ -4,7 +4,8 @@ fn main() { let mut graph: Graph = Graph::empty(); let a = GraphTensor::, f32, Wgpu>::fill(&mut graph, 1.0); let b = GraphTensor::, f32, Wgpu>::fill(&mut graph, 2.0); - let res = a + b; + let c = GraphTensor::, f32, Wgpu>::fill(&mut graph, 3.0); + let res = a * b + c; graph.visualize("graph.png").unwrap(); @@ -13,5 +14,5 @@ fn main() { let tensor: Tensor, f32, Wgpu> = res; - assert_eq!(tensor.data().unwrap().to_vec(), vec![vec![9.0; 4]; 3],); + assert_eq!(tensor.data().unwrap().to_vec(), vec![vec![5.0; 4]; 3],); } diff --git a/constensor-core/src/backends/wgpu_backend/mod.rs b/constensor-core/src/backends/wgpu_backend/mod.rs index 05287b1..e98b2a2 100644 --- a/constensor-core/src/backends/wgpu_backend/mod.rs +++ b/constensor-core/src/backends/wgpu_backend/mod.rs @@ -50,10 +50,7 @@ impl BackendStorage for WgpuStorage { let bytes = client.read_one(self.handle.clone().binding()); let output = X::from_bytes(&bytes); - println!("Executed runtime {:?} => {output:?}", RT::name(&client)); - // TODO: dispatch binary operation via cubecl kernel - // e.g., binary::(...) - todo!() + Ok(Cow::Owned(CpuStorage(output.to_vec()))) } fn cast(&self) -> Result> { @@ -155,7 +152,7 @@ impl BackendDevice for WgpuDevice { )); let mut ops = Sequence::new(); - ops.push(BinaryOpType::Add); + ops.push(*operator); kernels::binary::launch_unchecked( &client, CubeCount::Static(VECTORIZATION, 1, 1), diff --git a/graph.png b/graph.png index 85ca980c3bb9a18f5639096a50b12e3886e682a1..e75153d2b7fdac08587f7a68b6e43b341d33f460 100644 GIT binary patch literal 27206 zcmZs@2RxQ<`#*k@Bq7Squ2d=^MRu7LWfh4+DkDN9AtZYzQBp=$(UcW3vXX2{W@KiB zj0(T^`T0E0|NH&_Ucc9~J>2(oo!5CD$9o-Dp!P9!MtW{~3WdUWSVKjZLZR};zqZlQ z;wQ&>R@(6&wB|?ERVXXu9|=WiQ51>*<*>>@{c8_?bh#SqpZ~gMs^{YidWNIaH@Cl{ zySa-#J$Acj!p*yG`GwKpMJGk?-93NYYVL08;+=d8RXfv^9pCPT$NHr163G)xFs**_ zW4kPa*vDVXedVq^RH~+f4ECOij~2IF{Pf_@{pNi`GD8Zry9I=(%f%R)c~j^X!@SHm z{rcALq-H1=>*DPiAGs;@7bxW@odWKYyP5y@?6$hYueFVruhU zj~qF|s<4?oSa3sN-n-C{kc7lUs+Wcgn>TO9A6r^F)jkqy_Abz*mA^Jc85tR=uB%hE zvlFGKr`I+#4Cw4MW@TebeDQ)gB_)M?u&r&DuDO_`~{Sf92^vrkoY!T_xUrY`Abe(TH1!THoxNH;<8UAQEtoO z{9M)F9m&_KXKmfOwX^Y3e~+rFsXKLM)eTZOtxTTyy&LX)J>Mfs5zPCFgW`x3;!cfBdLlom**OZ7nk2 z^<#XzZ*-KNgM;ImtE<`P7iPm>UT^E^+xsDnPgO}cN^t*v_Lx0qD=UIIST=16e(-=N zKt)LUz=6_=3g4sJ92_yC`MJh;B&T!j1b6HRbn7Z~-B?jkfu*bLE%oUeA7{LL`Em&N zK6-38cmK|j6l?(*8E*2CDMw@M|IFD+N=lY|_z<+SEc8G-+x*@05?s1^dXabTyk{%A zb&KY4N{V+tZ*Q-cS-D^7*RPv4tA%*Q#hs4%(^U9-IAr zgrKsR`Fpx+*RPuoR7a9Wx>@c`B%XC_ET@XPdVj|oo4o74M67LW%&=1x6%`j27tP9U zt)=fW(466uyXJLe=##^Nn8-*f9E`mD{2c=iLPHsQ5~rUO7V@lJyVkhdr0^QcdVYml z85uj&!ujgHKX~(IUr(aMcv^uFPBF{IjXvf6>-&crlgWc;U}O}(@R4)j&!6)8`t?jq zOi$=i^qxL>a!6mFb<2jeLPA0jv9Wacs)gUb%?Cf;i`{R_t*NE8$fRd$VPVlb)mJX< zz99DOOcA}el2@X#zvF?S@$tH2O^QCoqE&TqQi72h0+AZbyu6GV`a#~S)|mgE9f>_~ zMYyxGbAc(!YSVWP@8Mxf9=U7GW@ctMCqA!V@4fIb);n+4ipY^>>MOGd1mt3+iO@6(*6 z`FL_FlaiAiMq75=^7ZY*akjU&FZp9#?RR(oG<}JC zAy}!p`}=~iSE}E?zjelKe4mWNH=5SAwrSooF?a9WQ6ZnVy!2-yWqNk@F}~Fri)Yc}ftgLMG%3^I-o@b3!7{x;LSHaHuNbz`M{OIh@71L zffeRPh1bNf0e|vxH@^zoLEYHc$jZ#DM8Sdw-}Lc$+gX{q_pR%U@a|KuZq3fl;sl25 zzw}uN9l+hcRWv_8e|GuLRp^nLpI2}FGMK(`LVaDwRNkyy87>#dc{Sik$VG<2gbYJ z#(b=+t7&PubM?pPgSr`aCSSy!w6?TNQ1)kvEGm*mw`%lb+$7V*q`~p)uWBz&hlREE z5ft5Qm&tBX&$5?0YpbghteR6|(4J%Y|IS$nA8ym^!DzUa7tjwai_?~usu-6t-XhR|by?(f>Y zyQZ?z55;nUNxif&S-s}-=h)7!t{n;r4=3$onQli%tM*rgZ`-|lcSnKq&ad|dj*kT? z9Xv?IxM_a~0|UcwZqE(9tJ)0f>DR5x=l}jGUUremA9tQ*^d{)*2=>LG?3PrYeUg$5 z1={^V%uQwwwRL%5@h>NHPU9a$o3ze}2x(Y$*7 zIvGX$1Z_pScjzXm+ZtPvP#&Ul-rBT_ZlL#d`teyjI;VDOvNCkj7WUu~pY;m8vUeu7 zoSI!6dgR&NxoT^UT~`p!*8@4%4abfhJL2i3dhMF@u}2D_DA0o2wyha!%kuH}UvqtS z*gHLayNar+prBx41qUTIE-v};W15E#A2zhNS0xpB>iObvlOu{d8(>&hBbQ z6iWZkDt=GXhJ=Ik&Gios_}sa3XXyIpE?nx1b5%^8MQ$8nVPOqTP1KaXI1$@-?Yb2m z-FhW!{fVicKTCg&wf|h2?>V>pcd?{0f>X;i7vwb8I(JC3r54o!x!R&DhY8bGJ#s9bic*EuqMUg`fRv zDIUv9k4deKk2^^EhX=&;wPo8|7k#U$(U0$AiqJCiHqpOtL+@)7JITz+x($!|V z4(>E~wgvYdJy%zEZ?8XK;ZYTpHS77dP@Lv|obq)A;;X5v%c{8u#Aj6z#I{@XEsA^1 z`sevpD&c%Dd;9yFEx3So??guaG*<1!w_2E*Dxt1*X8-K3YEDuOW*v-E^t=(ZDaYuw zf_~qh$#>gKOiV;XMNeAGIygAcc;P^$wOoDp_>=2AN?|j=LBMch*q5izi;qx-`@h~C-w&ePLZrvzOs zxa+M;KiXJl_MUftH%u1Jgq2BGW}V`HGF7?hGN5E{b>!wFwimHE>>VA;V#Uwh>G%}Y zvTyRp(W8C+{pHSFzb3jio%LMWhqG;d?%bo_@iK1RzebwZL~T1>i4Jw5!_KAkF&%IX zIW1IFR3eP;ySn0!7U2IY@8~G_qENdH0P3J{11npJ4?=ID-N78Z{`Tw&7gDcma|QI>7h%sTiwC#SN+d)1>a2HkAaqGDpq;^N{F zH5MY5-@diVp`&=ce*OB$s-M%-H{ZQmNlv(R$oKuFKuzKDn6ozmv8WWKBS+`~U3}Wl zdh-7K`Lmw495{Qux_TA6zxAcRRJj)(w0`?$&dSOv$<>l__T3E%U+A@8c^qfmS%678 zdu&hffB5#}M@UPHeoBtR{DeugvDeh&PZ2JmD2o8XVt{AtLn9)tLjwc1o;(p5`2PK8 z(+!uVfzR0m1>6*U$9>iPDDP%gRy9jS*a#PfjvhVg^ZfZ9O7GnG*X^RBgWa3NOettg z-sh^qYI+YK1ZLwV#+icKrBy3Twd=&hM6vTTIQ(sVVe_ zpB?-0{Si9TX+b=ge&P-1WL?2!1G>FoWDu4L=P7x8^-(u0i!ot>ZS12p424~NPqY(*g*)ou!o@BChGAJmBBDwp5 zW|fUDjj@S|ATyo)uhA0<-&{q}1q=a3Zk4Uv2(4OKk^?)W^xpFy$ZGu@ZltT5Bv#~5 zKUm{nh?=sNl6Ph3n16Op3GMn2huFtjz7GmFLJ_KmRB{} zHP?CUBmHi~TVc>W{^j)=+yzjNK>%7)%9q8rhq~kC-Pm!^s^Tse*SKu& zI`mq#7Y#`L-HoLeKpH7qS7v}#(}C-)tVZ;Po@Qp!BwRa`@urz}J>TCJA#DSL_JO^j zD02S5jJwC{KGa8s9%lB#+;Ssc92rcGe)UcC%k`Sl7j6KRiblsz+f3(bLrhbN@S zZQlIP+&F>h?H9XSr=PugwIP^&ciH2^QTIJEX_IMocd1>)ZfR<2N&V@+>Z?V+`Y;A$35@N8l3e|y?_6H*JxxhYO2$>_klyp#vUH>T}5uW z74yx_%^8V_iC^X+M3DY*=kDFP&T$cCu;)&9Qa&Z|ahBvv^r~2tv=n zo*RE`b2-<|qQPX}Sh^ma?c7^J^?M-dxPDXHYxIlAw6tx%Mq7jJx(X_6c7^)(ro@~{ zLXV!}4u0ZPRTy<7Wp?827a~E_{)6%Yl zuReLVx>`fV<9D|1SIaCEyQsuOK}J5gSO75K+JFFab93y=Ab^?tG^?x5T{|Wxx{Hni zy#W=TSkANdYc3FKfL0NdnVFe>{E_kUc-&F-vNz=ZDPcI6L_>HJudVhca$eZ>vn-jG)pe&`4UjT#YF>`hUHgg(o5X9|mrp$f|-qQsB zZPVymE7;{{Hpd&;Kv`gQ>puDt{YI%8i>esIdXh^PHnninogw!rw+652@(TJI!e|@9| z4b(Z!STQQxnT>=RRdYsq4xL;EBxDvAmI#e4(r$CRCc56P!7pCCc=4K>+qs3QzNy*S zTTxMZQGL?|1!ZN2-d9y^{#PU<9}WSH0s#lVTHe#cf8)lDsp;v_h*GVBb&#i^z=0VP z#ZEzi58A?T*IOxw0)A+R=b%N|U%BD~<+rEriN3S*K9o7%ygX^qNt7hO8!^)o3Y5B1 zh-l|3L(F8YP1d|q?L9m{*`toHCGu)*%$|O9_4JGk@6=Rb9$wzQZ{Mh;q@+MpN*Wtk zmVW>8!4~Z?%RQ>2!-C!y0gbU}!^VyIL$zN&e=dQzSnkK@gVjgN?i_jdS|%ei^L^`6 zgZu}NovK73e1LqqBM zbs1n+Xn*++Y(BoNd|7o*RJ|yuBnqmF12oUh0_Sysfq}+l` zNmU0te8~48K0co@3=%Qazt5ac(+vIqDl~CDI60Mev6oj@(-Bdt2kG#cMex<{7nAgWnzVV_+TwD9gFR=Yu7G389LM)PA;w;182hd9D6)Oc*bnA^7uaj)21zPuhR zb?dWzsT+qMsP>+MPZJRnQ#H=Q#>xt{jTwJJziIsT?G1NgVv^z2p!KIef4*jFm}ahD z(HiGqcDT{!&*@$4%3C%-!+-QgM{2nN8{f>*G60Ka4i>op6!{Yc8Kz9;*ZB6~V~=%r zfX}mVa|Z!#n}7ch4JN0AmB@{>`|E-$2M-}=(qg!#l82Svm%i(mjEoHA4qt5U_vYfc z8=GU@{QUenBLnW;&;jzN!*0Yz%14Nl%{8zkK;Jlj5`rx+R>2 z3UJvMmwOb7%gtijG|ZOP9}!}uCvXz9Ec5I;h-92>*M{OTxRVffB z;RWCdeudBAnQ@;ik6K_DpL`G$6f<;9xH^NfJbqUD+ISyZsN8~x0pw0J_#Z099QuZa zbns|MX(ftGPoLPw8Pl$VAB36dUO z_Tts6GN^LFz_9&gzU$om%bgOy-Z#t1@c=KLcWIP}-!R6cn~27}83jk`a<|2%5K#f> zKX!I@*Zz#>zJzv0Za2p?Cg6$gxt^c1Bh5ddHB9p|H8Y`EpnO%=*IyjE9{VXIexvqQ zY{9>?&By8J=?_8l!sU*?@c46;Xj~BO*eI!b=hUg?V)tHT!r6?BjjiVU*t)hW|H>Ml zHR~&%oOt@;>bMd4x;fmVyJg{PNJNbb-($`$F!{SON!(*lP*6yJ`t+R3&Ald6V@%&j zErd253S7tt@QT}$^MC$snHvoIbM9sSofOTOk}#TUMCjy^y~=>f0|#zMmS>R5uDdW0 z0OK3Pld?7Z?jl@@H@`)qr{`oGT7MI{Drpo zV}-G~^~HpMb3$^mkH?=mK{WBM!AbB+i6q=#Rasw!_dRf;+Ot0?MKvjs{@h=d~Jq02bwSb&Ni=jAi&` zti0JBU*|O$8*wO6-`7job(WwFFEXW_K3(j70L}ypE9-lg?F`BKnRMw~Xo7?g1E?G{ zHy7&Zb2{qz_vf0qiLPJ*xx3z1Il1C~8Ed{fhVaRAqlyKDhLUp$b$Sy>u~%^LI$_ON zuijo$>;um8=eiD#YHVi}l9uL#2ws8aMZWkH*gNbyBGluMaYmb}Druo=o1;4|j6QwN z%)wFRBHdMnhY0o^Wb*cEDU@t-$6LuAZx*7Vp&@%5Wc8v?%yu2ra|i_Dko(+Mmge!y z#h#Q+aGrF5DnU-Y;_}b+?l=qX{>|sL9?Egm)YRm`sF?lr3y;)H-xvGd1%$Ga95iT| zz5g7v=B?wM`5QqAKfiU^gN>M59;CEwD%|t$Di}!rd?X5a_U|j?H;O%Ez0}oN$rB^s zptmv93MFZ3W~Lk};pt!Zo^9W|cO$6cd&qF&)-Bs9wrAIL*>;JW(1^z$0*tzM?_T5S z%SF%QQwRT>1!x_u5BYy-uqu!r4}w<#;y(9WdV1o~h&Aw_C5{=foB)A4XLFwUOk_{Q zif+EgyVhw#YHBJDa~Wtk46KZk9+hy$(HWWPebF3OQD%R9*+^+Xv-ub=JD;!6+#Rq+PeeqmhnXiVX=B>cM8oePGN5?}j zz$qnNUAz?dm?cHe6B}neS6(YLp8#a&1uLq6Oh>zC&mQ0Tjsm8F+JbWh>{qum)cl<) zUylnh%(?Vs?~t>L%g}x`V0wReji6k4^KovH)A$g!Y72>vj}-Z##V;)MhsPDJWI9LO zz3T^kfP@F2tGrLuiqk%Owr6)6Z}YDgo4t=!q7g&3g3pfH}fxssac~%*gmMu#}cX&Zb%GlM|?m{;+k38?Z^ruB56e z?R2JW>bh@#GHIIkuj3GX{7~La_sp3yaWOh4Pqq!bw>NO;*jgg_2|a$9aIT8 z2tqL*BYI#d|D`6k8&)N=xUNteShlsL&k!LZ~4y zlvA7<9$&fJWRX97&Ej$$n;21(GN~SSAG8th0n+ExZ1Jl#wx3^6K}a=xVM-MsPy6H) zoby;kPX(e8`>x|a*(pzFJ4qvR0BxR>4$#kqK7am)W3Pb%C|;nt=WnI%SNJ`=nU$U0 z9QBL0DO&IB+4Ynkk<6&vK}sCGeSO~Hip!kjXe^_8o%9givu8aFlvNZS87I1`aDIae z0fh%#CU|Zv^sRwhxys}BtkwK9oC|wM1C$bS6Sr>Ny1FOIY~elM)-F|S1_piw9ta@; zK|$8-f%VUlGBX*_?WU%tN&zuQ^ z%wuU~^^^DZ2R06l^43~pKqE6q@}#R< zhqHN$Rs#iNOT7m%VBWN;)Hcyt+-`EFFyDuOZ|DZV@H6smb&lEgnqjoT#l(9Fm` ze&g^52tc8e#ebN<{3;e_h8CIBGj1#|UVNb7!HpJ&#`E!tsXBERVQ&uO9s9OGRV*p- znzWZOMvwzo`-Mf#hD)Dccw_IR?ltQ=*mV#T16B9ubnTvg^v*%q2TWUIYQ^n3rLh^l z0shdhs@%&kqnRn2E-SWgpQ^StGnUxbbLCGD3UMON{7u+$c(yO7OT=ukS)|0tQH_`7 zz}b2a;>p(i7B|hl+vvTXMZKCzti0QP@MzZ5w8xK4!Qc(wUfo?-Sa>Zc%uGhAjkhHf zidPx>9w5eN2zT(9ihqypbpKhwfp&atVJgw+;a+mm;bdW(tiSX%hw*apqA-3BA{^B{ zO+Sz;DOppwaM*R5>2NEYUI?v=p$AD z?MDX`2}edqQj!&q2Z*5r73CC_Y3x<9sthcp?d=>WtDqP#9Vir7u)B=1=^zsJ9VknM z6X_38Nm(>z)wS98`nZ-;uP^j9Br40PSBb@Q%Zhi#TU@wc-jc2p5Ew|s;;@=R0j{@4 zAb~I0<_zGC=$XQ5HuRm(Ya!~wL9WD8)G;>xcTOv!hpc`^<^CG zC;1ado!^EZiXy6_n~`1PBt*^92Pc*jHo~!oay80M49$z=Pr+q-yIgvF^+mo{R@sJW zZ^sXA(QkIk2?z*CQ($^qT+D~0$ooRq8T+A6yliTfVkWD0Qg;!tfKb+Ak3WZ$lfT)zBrvc@74u+G*je^Vzn!IJWS)pg_>FKIvJ4B$$;(GZM0j!Sy^t*(4lV;A z+Vb7V8yo~i#{NGKwHLNKbL=@20_yzv%a;rZNxu;Z{GA%8!HtzAmx+mv!7zFfbHQH! z$!wwrqF=|p1^E-OwBCW5sLwgJr*nDaL(OF9aiI+mK*Eq)LSZaEEC}wVA2&mdKhgTO zF;In5#%ViY>4&3)f3k9wJMp4o=%Eoo*Qf1v9pa^UDG5+0a7ufD$=v-a4>*sqx(J;H z723{fj!l|p-$#v;S9b-gx$i|>1t$_DX3%odv$9kn-6F7{ZDd4`^4e_<=kpd)H~$&*PC(?OVEr-PyD#inIuYW9U$VEugi_*Nsc77`-BN)LWVhuj_r%kb!^;|v%V zvOEG35*u)G4a%g>%g74-6R7*}K^xgAXnMl=2C!zPxbR1j8t<17{h`58BHWpa>)Z%LA&&`Q-;UJcFJ{}5{XVi z?%@wPLTIR#mX=DuQ6S(5So-4ICMKt+9mxWKhQlu<$_1oOlEVCsHBW-e_#!7~@C<=W zQBfOz&rjA*@M`OUWx;Tex_$e0#O>QRA=^wu$HmolaU?x`T5Ic7_wi#M;|tI(~dfE+i#okVw*i5OeOj z9gvYJtEgCwGZAlfNA@I5!A%I-TefcPLp&qT_~1z;<11Hs5#LCwGRv81@9G(2&r6fi%RfC(C2}Q^tT&^ zIG)hg7l1ekN=~Q>Ii^StwY0V#+_Iq*9ro(gt1sTX@x__S6@T;cr8m~Tr%&oB{8aQ) z`iiGNqwnAU;34Ja=9UMQ4<8LI$GQJfY{A2JbJc{`fVyzkgqS z{P=OA9%A_>U%Vya0}1U-f3MAR8reW{q_}CrQ>RK>Tcf{jzgkxG{UkV^hL)D9uI>hd z*$o>u0R7Sm0Sm4IfS@JeLm)mNpw6WOa^|W^UEpS0wr$%iEKG|sM)X~6V`By!-`U9? zek>mmH^al%(=u{DwqK}~6GBJ@S84qAs#f3D=p&6!4fcfa9N0ioi1;mHUiA5Y*&&BG zF5+Oy=ynLIK`~3n$XE;Ik$g2ilM@*&mv`zeC$L%I2)%#*UI2XsMVd5nGi&Q0v|JJn zBq}oCJ%8Uzp@c9}qEV9m0&{^hBR=O1=%W_K6Nq0vhh~S6!{l892`5(Qs$fsOe-~#0 zLPFjHP@&s0!T<*iBn}ZQdqsqCN?p>ew?Z)_{SR`h1MELkKn7kJRw5G5%s}^a$(0_x zv_9WBEKch3A)!bmp-B647hu~mH>Vx1LeK0Q987&;Z^)v}0ojpm-MVscUT;Xc2!ujU z5&{&1FzXHRajgDi688D~ii$qy6=l#69FR}7w6Qq{40~-PB}NQBpq-nl?$xXNnRuj2 zY!^@?#NhmCf`1Y}?d$TNPdNOvzy#9h83ga5mPkuWo05Pe^c93`?|gBm59rf^a_4*N z78FYUwKUWc;^K(_2Ra%WbkTS3A{v-`QeR&M0VGMAwp|;7E6`%WTfz?=I+Tvw-{jPt zW5Q99DnjIMBC)OR#l|++JTFq)cI-g{Hgj?LSQ83`<#@-N7Q{oSmGCGbMW}$*f3Ot2 z$4146X5WC;cKGz^IAR-qdT?NN(sLyaBJUzosFfNN#-+Khmx*%$8cO0+by7W#;cyfy zkJ%$ToPGV5^{W>z1mM=dga9y9!%1-WZyQIykOAr^Nu2CAZw~KbZ~j&hykqz7tj%#| zs!AuI5R$wd!fR_Op*#m}15h7@66x+=Dd+$9uY1R<^Xu~S@|Y>yvR8L4VgJBMDsizK zza1BMbn@Mb0@P9z5-yTIgQ&X56ucOYbVf&^>t3h~x3LaI%fH9&Jb2K_5_~~b=?lKi zw!82i%8@Fu))ZG}6rF&95k%}b43FpD-izCjjf88AV%G%E20xtP?YM%=O<~)4G6FpL zl#x*rIGs9t8+ZST?1bXthj9PYQ6&(pv_g&rW>V4IuG?%>=dF+!M0h3|QiwVUT!B#O z9y`X=V2bzxY6#2ZE|Nn+o=6ev#-fMT+{QlqiruoRRn zE?*{ERyNXUIS-#=^|-$E_t%t{`#@oViJVXreSc&Nss?c>DGhMQTjo~WkiP~`yh(lq zfh>d}t6elSHyuvVHOjT){RK%E=E4Z%iu$*7Ab__ug^3k+8#>S8!h$vth6Kgsr11a` zNJ~fH)RIl$?w|Zm=JC=c&54N#lj-+YUC`B3|0i)cHsf^kl~98Yw09B_!*fR#QY(I0 zMX*@`3zzLQqKj1HECe83C}+=qVviWTv1Y@rH_EdYoct-q`y7;xq#}hP11@}e@TFry z9v4>zL9_;XJ+iHzyA_j(sWn@+Zcjr1%jV%ypEZT=A?I4v+}=vkVK^v-7zFqP22@jB z9Uy${fd=?s{eAg+UE-a`AUnoNI|uy`Z^^*VFf((1gAla zAb8ffN(NGs5eipRQxma~g(94G{81$WD-lsqKF(uprO*(O>rrxYk|bJfL_|pu!36j} z5ypr`YQ?q55~T%T8}l3C2YqPY>_|obC-gH?j6r+WVUR;1*bmoHCVNRuC_ z`J3_NNm=A(^%pLaXXM-$n2|}!x?%kToAG~zC5;6Q+6%{8pK?GpdSTrvLTWxx(Pg_F z^Q*@aLZRRsfq_uixv&rHw5g#X0TrMFq8(KNWa|x$$;&-xL)dyGt_v~@d-oWLBf>wv zot3qlSH?-h%oWIUpub-k&E>{~p~%jiR*Tc$2m1OBZV0@Epwcj)dA4~4J(8kpp01(J?aOhKL!816vIt6Djm#ST$B&-Z~E?CgjP<6|TMBA)}Yl(4bAN z1S@#Nc5!hr(%;|z$fI8~nVoVlDX9dYBgA5t4mA0p?s4Z0;K_|ny*hL-(0Ncqo<=w0L-FayH*fj1dvkq+F6qs zySX1MQLw;40$Z@%=MA2 zL4kpZHx7>dUvPVtkb#G5H;X2 z=3AjGr6Db`<73*h{}s9H?EIQZm#u1PxgBMC`xajj58Q*KRnwM#H9U%zohERt!E054 z-Md364R?0vb@0Nnm!(rWTBC|BVu9iZYFqz{B7yY9+}zwOycQA7=g*&~VT+L%0SINNe9%pA~*U-Mpx7z)^J}O4@1_ER7^X5P9w4|n{M*daV%&31!2?)AV()rU{Fh`QK zjdR<1Pd=fT5#t!-3`d{+-dGLA7Nwfvg`W0LFmt(j5bdAhRTg*{nCt9~#nqS^>q|w# zhy#-he#WBe7^rD`el{GBA_+Y%ZV162f9>t>u{-YD)|Qk|P|%gp9D|DS+oo8VYxGD1xB z!otE39kh)$L5QTIqtot~KHb#zi-&__O};UctgI}hZph+_$9$cZ%?{6pg@;lx+J!ar z92ARzQ{eC8<5Pyz(QjxEOgOc1sh?aqI5`6V@2V|crCxqgAmqF{Sv}m&aeMj^T;D9B zo(`U9+iz>+>Uh^8rQLu4#bI@If45>SpnI)&nP^G_eo0G31x3Tc&JHnY2q=;szn$(& zT5X_8fIw|8PDy|`sJO3f<$ zE64fPwzRi5L+i*-t{8N@9RSjtS#!qoO0&_2M^z^Ij=oUL=RV5XAiP9JK`dXz>^GdP zwUn$gMMh=?1%q7x*=xPZB5OTgOeEwG6@&!l-2H#*gR}`@@{8Od8CQY7rHSNi%j=8v z!cy7$>>Bu2N9({^Ut4eKcb3TNOM&n4%S z-Dg3%$N5e*lDZ0_Z3)^S35R7A1e{8HSmKiCw$pkv52F=yhCYEU-giy5e?lgi&BCxTBVy97x@|#wvw35&G-IwD`@7F&WJq81X+!Ochc>_5g|O zG3~MMj-MGd*3(Ge4dDJiYv$HT|;oQM)-asIsgKEKMOp9|6%$LIQ)5NV|lA~W&8 z*KpU3QAY?ROKI#um5e`7F&-B1C_r8kf`|&9am0qWlpyaq9n-P|St4DT8F9sy_xRg= zi<_D0dI6+RGYhB6)?wu>f@k*6^~Bt{vt9G~lPA<(!u=~xY`U?NSHsMzQ)u*J^WIN_ zLWuBv-9cX(1<^pvqM8}XoaBMT-*RL`?b?(n=AHz>F)9{yb#1O3s0RcQ9A&5KqM;^|NadJE8lH5qD1K%M@k%lnx&_&F~&^Nd4n?zkaU3f4O7t1MOL04E* z1lz%=SlKG7lD`^wwd2hL{u>Ka5OL%7+gi^7!%S}8wTl5!?h|?a^EHelZl-hkbTFLn zG89`V9ODyqwRLr?r_zTrqCn`}F;_&q0ettJwolMx5&j6q(&M`41h&d&4Z6;xB_vQV z^0~YCc}j{e@>W73B1g=wA38*hKuiASZ)&5lE@?9W= z&bB$gJ6<&3U5=k7{78F)k(+*q(-oMegJ77^lJn3XO$w|Op_)fBXRjRi(qUEi9U^i$ zt~Gz+Fonz;Fy`Q?3SD zx)C1)TGkF2_> zwi8CK$2>fi^d^lk%y8L3{>kZX+s?d58fx#d^WAl(stH9<0qPzch=P)I5T5dYEYF*E zLz;(Mu_57yeobX;4`SPS?H$(-2Rx5$5Hx_l`_C)?h4=N(7e16Hs>q3M1^$jGMcb5P zuDOWbfK+7Le$Ba9_aR709UE-+9dH0iDg#@c&s|J>Q-Oghfb$n8X7~;`-U75*F|Jj# z@1=}>-WdDeEC5OF+OEwiCy7yH6AlY7Z||Ia?_YUns_E#SjIL144^4OfTJ3bMkJ;)fOz=o}$?e--m`~cyHf?rcBSkK)jjX zNF9Oe1x-f8UcFsok&V<5#^kr4FX8EVJPdy=lt2o}EV5)#kgrK(0uwkZ#^JgbWFA2$ zH+p@65wedC%J>L&dY|lLcIcr0B&)f(b@9_3l9G4u;roL^LbkZLxJ)GZY~zpVqR*t; zuIJz&UbM3O=W$k+R!&Y19@zeWW@~q)uEJ$D!^$K>pBU92!J)h{I5X0!dgF#d(a*|F zNcs@O;WYS>3!!hL5&SBFe|YL-5U~J&>@-J3ke3Lokp>Kiks}NDWVn32qWkFiJ0ZW1 zYG@>^UdK)bZ13H_--IeAGrqPR<-YNs(Ktq4nLB_{%OBNu?fnbuFR`RV37-wx`7^Ut z@w5*c)d?`sWE?VW5|YNlL=V}%eR}{euhRyYa*t7ZxW&zBprDNQyi>IO@+zfv_GwjB zRWs25F%t{V;5dwE{k$C!ed-u<2GtFK8HB2Y;4zsn2oq_^q?-a_4nyY}L6A!rkE*G? zJq@@w9x0`-O=(rzb>Lc%zl3yg2+g!}@IYLA=BEZ^-`!L7u$mHb8`Qb2v{guj}+n|0oKlyUAv81 zN%AkAo}TznA%H`uyV-zeSLHt>y~YkoLgL~&Sle0(5j&w7kDyy$x~TfUa?@(q$249K z9z3vwEC1n#wI;IDBMvt{UP0`$^4-ePPT z92syBqDCVPL*lM@?S=W@g`Y(FWoBkhXrVHd@d*iGAOqv5!2!X+K8Cw05l88roLrAf zN6d4E*E|jBzyKg;NYMh&Z$SCUz#Y1t*p)#|J9qP~N1*0uK^p6WrbT8fSh%>8LCqT<*Bq?z8I*3<-tn}iR4FV*tMY0O%I>`ws zDHSex);nZnH>rp5lp;LH!p7!1I%*C5xdfSJzynUe@jX5nF0|+bL<_=<4s?GAi+FEG z@A!BqMg&YD>S$GN*Oq_=7YtEWMNQ37d3x)%ZSb+l3onS(fO$Gp2E+s!pPzll#Fdcz z`svfBL)H` zJsmAFQg3}yB$^%MoRHL;Dk3D35U^p*_M;ZtH?Ng-z2Rx-sKv5IWZRaW`LSq@2LlhH z!iTojOVuCPR*!K5A7rP>5uJv^0$1_7jGYgLt}6hl?4c182LKiH+EgzUae*uGm4$ZiW;~S0oO`)PjaXDr1YsuK)1=+Faih~{%UbMlj2RzH zI~K1ndh5_+0E!n&po+%f!;y;C+~v?wYKCYBXBH8uuxx$8hWBqEw{Cjw+$y*+lC~YJ z_<|SN+4@0xf>7&WL{!08iNpvpwg;$!Alw<$=;=rMHuemUjDR&CJoA^lO=NhO4!FpA z>Hv_C=w%+88}z=17PG@JI!~tF5EuH-TRvcNFyQQ3wqn+hLRAKyu3;G zwF?#&y*TXfiIwc^p1J3|;D!JOla=;Sy?dNR5G7I3d@j2Lj77FHk^hy%4`t2E}S?n~PpA z!`LPbT2iyA>3(JQ;5^!}(cW-0gR;s>wPnwGoFjJyerP|vGmBz)Z44H;jAjO|_w={3 zFu@`NaIsv$w1j=EdVY4Ao2wk68x5maHNUj*|o&9-D4j!clYu5coFs3k6QK~Ly z&A*zBYWfhG-$MnDAd}_Pb2;}>O8!B@2dV%Ts7=@D1McIH{#%7sCVU9=)}*=ttVj-0u7V5 zg&?IzDiZbx!YaEiKeoBSfiFfX9-AjCV;f!}LZJX_(x=5EtBKJS3XL^s^@mp4#bk&eTanS0^K?cx%G5nRs~?Pel5T=9 zI;J5Ke4Scl2T9D<)~>^LLtf*i#3Q|}{H2)AZXhus{5_yMz-#VP~vltWBIHSLoLd$O^qYG+b5%qx<#Hpkd@p7R)cyMxB6dtj)9 z^qj9{f0mrSdUP#wif~j78H2#I!#8Nip+6_qO*4`Bm`<{qAYS@{e713w&YG5u0w1*W z^fm%&slhD52C8!AnP$N{nIWgJ0HT&okrWtR8=>Z{Aoyhe=uZbfJC?1X%1TNk%oB(y z&^}xuboy^H8y3AzW*PayP=$FOMz6Fg`60zha`+ibtBQ-aLlF71qjgLDh$fX{;;;f= z^Kkf&z=Py<<&Pak+OjrXsJrhAT+#W?^U;{}fIJ}cMoc-;;XMGeVY<@7CHn;5W`PafeD=-rrceM*DFLkco2d;Cc?0y$<1TJDICgij)A0} z(OwWu*9bkl`6mFIIbyG*6DlkYY<$+AE-S;U!o3H{lS@6tyl_u2vD7QWr!b8;kQJ{>#(5XS$=u76$l#_!1!!)WDJDQ-l*dWrFfz&oSfBI zZAB-u*mPo|J56+Q;VG$L4y@gw`fN~eaATtK8bBRBWRCGALBmeWI=uKu5)1zc7EInw zgh#T6Xs~!Aiq?kY$5=i6AVfLfGLUf>SQE_wcX(u-hMCDw5;itQnD7`M@5febP0Gj! zgt#T=I!zoPY&#oB$ixss>i5WMr9hqIUg?kV3jipzFp!mS+xcP1SCOE@9`r$?26Iw+ z;hL_Ek0n(05xtf~bAfp*|;>%Kkj;jGLvMciNyhtSd4C|lL#sghp7J!vTRaKp=t zz5yxWTQ?z}P1rvSrvOY6+w=Mt@^uLv(LY_cMe^O!+$ONGT4$bU13(63AK=n`11ZA$ zGO)E>EB1t?bi;1}e9k(VnaELSPV zhoAE~cPrf+d@;8btHR% zH(C6nBvKMEWR3X6fUIruhvMbmHt2O`L{0hcM^R(#S}LKaI3QU|5)-&`jI7)k{|#{& z9fOKOwiF3&3wWBGemnQ5J}yT`6%f0egeV}B0+sA2JlWHq7*-Xz6Fb?u|NCJ}*sqI~ zzrcj|T=we7v0VrB0)6$6ndf9d=##@*S`{Qr4}Iku(%)IXe#eCo=YU<*m=3-Id~Naq z65v{E7oL0-Y=V=-xIFzZ-J`zYI=^jE! zWsW5aK?6o@!_y!y%ee8}L&v9HUW8i3*7f>zA;gZc=_<#+=H`brTWZGbX9RA*Ec%*# z-$cBEC^|McmBTF=8%ab4jQ3gp_M*+|VPtM(0edMJ+OZ?AzdJ=AJEs|1GHj^`ysPAs zO9B#J6fYH_Mb5@M#}G(?A1Dq*6}guwpC7MLttplobWA!d0@lwVdtpYO7cYehg%S0x zbw7*{L%3EH2$-v+A&pdpR3E71#3vwD=ZBr=5#hmyAU9<2RTQY=jfD+7iHVQbF4p4J z7v3;m$lKu1S8=RGFj7(q4G}L7TZMCr&bi*-@3!#cN(5-=k%xtE=*X*@K_YhR*mspc z70RHbqopxV-^QwtwA<(WE8EmUxzUn7A^Jt883B#ZKqS?@MiU=W1ow>LoFeB+BLDD; zBgUj3K%JV50Aq}LpDso|N(v|m_|_W=!t&pl<8N&LBGKXlQ%DfOT_jXcR{7WbqM=(P zpzcT4TMoPpjTqPM4$|b6fzOM7(?9ZDKEOs{Q&;thFtBWY#(Q_q*)SN1NQ#AxNi*8P zvNR1b17tN;7v$%PXA*kM6e13=(+>fXB(i~(^1@mK@K&8LuzC1qw2;LHWSAE7P|gLN z8xdr={BFq)3>Z@j!T3Z1*vVLJ=Bd}|l=QXaE@;sJNM@adk1q^fkpu5gat2xwHUdUr z!r;^$g4{EvG0>0JE_hi2!Z)dM$X=D8dgX`BH10AfV8w9eXN+&NQT82@r#jS)D2O){ zL5#XFb8%%jDsS&WZr-H8sT86F!c3BQUVh5t7iq(*%E56Qpx|EW$j)dRBQ+U=z_kZo zx$MOV4siYsxow!fWoGu`lfO>hrpB{%vvH!3QuS##B4uFY;>ap3ZjpqV$SG;bjLqbL zcRGb_EIxxl9&9~Eyr~awV^doGOG2yQ;pfowMcT^+FRP{b(7CSJb5_m-FE}HqX{35c zU=?H6Zxqh&jKWfyV$_aAti3RYhhbVlcr;{yiM%w-@j-;Wk_Mieixc2R-rKjy*!%G0 zr`6afAX{*<4`>?XvTRoCMNSVro|b|@7w8%p$i<|>kTh=}oykUl0TZ%E$sn!1$m(;N ziIm~tVc0FV%blE~YAi67aGQfdneI9MK-wEP376-{7H%I9geT!G?ni*|l5%aFR}v)`2=|b3 z#dRC_tZw5Ye6aF}hK?IXve>S~;+Bm*4V~*c5Dv0Ai1C4hC~mOOQT}|Ps5USJ5Leq` z0mhFDTqeWN0QxaZV^d}a5#x9wxBUIDL}c3G{+|CRA}gw{ zzJ}BS#Hzp?%MZ$w?GvHCRhSH!@*A46A%r|cgA%i{vZk)1TH(c1^*RL|fU&1+u1Z4K zM&`Q@&mKb@NqYh_#PPQ+J`e?P@X0$ONf+vyjlf*wll*o zMGY(&gl>*FKefcHzZXYE21q96m@s2MB+aHHj1^o|@>|az2B`))GB(&60byZ-G8-UY zQ-Q)<>dZqhlLbV1KxTuEM91YbzRc4(h?kMPJT9Ui`wMwRy=z67Y(d5uR0<6Xn;OI! zz6KUD@kM+_Mn+l1Xt236M7wBcVq zD~1=5csdTvEP~hqyxoMHR@SA892-4{tCYIcno#emKC$d4k@O3xjor7f@m^tDyvs{nh2s;lksf@Gl{+zJS|m@AEm@8*3Ry}|bdows?&~wxoyUJu=X8F*-}ke; zx8oZS;Ps}_u+YP*b06$_Vz|C?WtU@b$FQAZW{4O0>ThyYR)ax;GapFtlzpnyIoci|l#s**g&V4#*th$d&Y zYgnpUEy1u3D_<)B-;BhU2v^KE56V2<#^hfA&f73XZlM`I6Rj}b7{FgYuov(@{RxI8 zyOoy~V>Xv)FN`h+9nt(vnLfQ0lOhb80GMoLQD}OPcm(y57wos+LqxJ-@Bktev9>^1 zm6h9(q?|x7Q<>4gqMBW=Ex+H^2m>gK9L>4@Jc<9aP@_X{C03If$ zrW+aZ00f@_kEWeJzeN}?`= zgDmT#+OUDmY%9ZX&Z?KgbhjyUF-HFF5ys>agJ=S9%4*<`%S>Jf_2iAvQMy>}j6kX^ zOH{u+9(svOV+U%%&Cong>B7i&VMxAM+y>3f6lbg}9)3I+o3XqWXI0wt#oa0nF~(dx zi;}J?LO|PDzm+*0axf|OK~jd=FZ$rY?hJ#*&>0f9K&LDaud9t>bCQXqp(6PwaZyM` zxtroCp`{d-nkIb+f5E7a6Z(3*;&Mq81N+Dp4yO?FqI+{HnwCgRG2+OrJOzFTCdA=N zif;Pa+66_%JL0x5jDEFqh+rd>RK)wa@X}N?r(^fgyI{o?CmFwD$gdWpPpP2IQ99{FauMmIML#R}`PZ znkdeXfBIzaioxQz#@k1>P zbJxdR)|cfP^>PerIGp|#~5)ikJ#E5i_5CJC2C8XJ0FDXg*hn3ac#?rOK z@vxBDO{eL-$$($@!0_jA{x6=UMjpaW^`rv(%z=-Z2eo zXjZE2+|2lj%F44`oz+j?BL0g{Oc^pPJSs@Wz0D4?RN1Fzb8I`YMO~%!B8ESB@elCNZC{M&?;@I3^r0gQpJ7?fa zin^G9F=)1A?=7vSYLd#GPfk~9W$Cnom@qGr84ll?)pVi$+f|l%l9s?5KBIyp1}v<4 zUHx@&8EvDWVzmKf+&ZU=J8d~>_xdBU$Y9<^BNmTjOc{$W4yvzBr%nZuNqK&mPu0VW zcA_)_x#v))!I@&LFGEO2g79Uwz?$-_U8Gd=B&1q6m(b&t<&a0C!kc?>DkBk}3wVI` zR^-970eh0nWNuX?cLHXi!KwOOW$~$Y9W5L-Bi%GQ{W-mHW>DKb7#ZpM_BV=WGGKR@ z)OR;0DELD&qPB5BEale#$HHH%V+;_V589rZcTdNO;+jt)y%#3IK%t+@unw#F;!zCP zfJ7XSf^o4&9ts^rVI6VdfkoJ~^W;e@Wh{yTpTik1>kfUIldbAW!Svk;W@pTfEebpN z19_TGFic9h0=4Vff(=X~4K=w#^hab@-qpCF_{e;cb zzvOH{kJ0Pwl#4P%o*V9Z^VY4&&51Z&_!&jvSSS&2McM_v;T;n1pLFk{57Vo%n^ZZt ztHM53^jCeXNVca8a8+1AHAPd5Br)T5if4bl4xj^*QzO>Hbw`Heheoig|Fn~r5Mk;l3F1`(OBYF22zVwF@PP&ls*{?wRqEa#{5uWHs1c(C`? z=?OO`E@e~pEbL38H$n}zF5EG)D9Z2BM$7{JaBmTiQRYxMa`9h(42L`+#}Yqu#=5tN zom(K5<-lUaIukOQi5JEaBq#Z1DAt&ynf?YCUDw=e}!H-9$Z7ODX7Kd@!RB`7UQ0K*?}DdHK9B@+eyuC1QG?iK)^ zIS9og&iG+)@Y=e%0OqtkskMar4jVl>tJBa#XPSNq^nB@hN6S6aYy!qbxA&7jj#0$B zU{Te=2W>7{Buvcb9rF3~CA{nJv3VD$ie_E9ynPeuhSi z9ZPgq^@Sl|6$BacI_cD0XNJG(|K_n0xGJAFoirn8%L6RsZ}{pbCt$vQ{ombTj7mBnWRCJ0=35u@(TROJ9b#ZGYbAr6BN3H1Lan^^9?yN? zPjWmpDB9_d^wsw1Rc-N%Xrl;gn z5OgHio&Bn+9;h24IhyqOGU=c6^(h7o<7K8j!HE@)jmJ!w zAEuTCpcG>C*7`1)iw~Zh+D4+9Wmz?mCl}H-M@~##M+ia!c{eAtCzGE zPLOY4;1-B7L(CK21^q0?Lem05?-1Pk%C0^VFph^w!a!I;ms?D>EgEi6yb!SI8LeA} zKEBGL9rKE0Wk|~7)qHbP)+os>p!kW=Nb%*nBY3lQ&m&fqM|PRYS~C?=3kyhOTH0W& zdY9N3fkXwOn=^j8^VF2COy;?g$wM$8bg@7i^8XPSbx;Z{nA(EBeCdPmm&{g4OeZ1& zsjrv)52KOKx9VZ~2H=8d>_4KhVfeZP&P zv{+z>*Nm?0ThR;|_-_-chw1^lJeKQq0qB;E!<+T}p%GhAk#X8tuqQ~@4%}P3*MaW- z3%7JOVjUN}Q1HfVR0Jt+!!Vo#RrLBndOsHKcZCVLSKg%OESSxx{QtZ?ur6nt{g?3j zQai_?u`1%GbsDN2qF=~uubV>X14NxZ`L5VgS#>4(jpd$?%9CcmTK3yw?W8eh`mPOj zYNosJ7dxv|qr|}*AHS6vAi4x1M5wB6+?CtxL8p#HL}vO~Y37U+r~=6lw?iTb zSw^%q?wYUl?m5W71CZcv(xpK3pT@n+^qoyE)dmi3F zf03)GlO>2_ak6MR@{6y(z(-+W*_ozi$fq{M^(38+*dn?;|3H?I;k1k^h$IjP6>hg5 zcQ-YzATxBo+> jxy+$OpUozd=6`my%TrIt8lg5*_|M&Sl8eUKchi3X<44Qf literal 15130 zcmZv@2RN5|{6GA)XBo-NthA8Ouw_R^nHhx;W$$d2RT@%AsK`n(v&qOxWtEwoGO|iY zspoZ{-|u;z>wjJUbI#Q{m*e}r@B4GVKkxTzeWLYsHMY=m)00S~EhjWpPm@SwqWJGZ zT59|mD?Vp~e^J|LYp9a`5r03f$%-S91V|@Tj~n_W&!=2AWiwe_d!(dH4n@013pa1^wwX}YKTk<@xG1}vR76WT zKs8`{$*1ynb;z*~ml*a6cpY|A?C4<{HY%G9p7WUfP-9f~o`snutlmnzl4jqB+f6jP1hmAG*n)zsFXXxzg#f38c zz1K8%O$1+Za8NpTj(>1)kU3fv|AQ^<)wXJ=~`*%l6I{8<`Tv9+}oP*mjq_3PJO z5fRl>r&zxK_;Jd@A|@?O=w3pC3cj?qwpLq;lbhRP>)W?)88#CXsL7p0u^Idy<_^%fu(Abo@Bk^&2;06A~gPCT#GX z=|x4XMn*=KX~#7*B0X({g@wfr9}a);fGg(aO=}mItp)}Lrf1I7zkk1#nwlEFH9%Y0 z)6@9ojn=1{01i4jI!$eD6?Jv$wVdqiu{S?wdLlhJ+1c5}j~t1>eQm7FP~fkH^9~NQ zg{F*QN}&ot8*W321I7MDUyX2YgM(%tK77Dp5$lzfcE$7h)YMe+q=<;f>#H~7c_PF_w3nIUS3|;+sjTKrbL(5=D>j06SDb}gLT`s*t>VrtUR%B4l+Sr zUS7u{yS$!A;7LqHzI@Bmd!#_TBysW3_;~h}pFg*Yoz+`?&bN5EZHjnVZs}L>f?P^E zSE|a(6Frl4IKH){WE2(_UU?@iBV*HgN6^i~<4S31DXUqrf##_*S@$14ym6`Tg`lu7 zb#!#Jrk0knu`xS|s0a#bM*Vdfsi5c2pFhsXXv$wV-YX`iZf`FvAt|{%Mp0g#S6W(H zKxleyu5n<1OH546&E35|ie=~6!orK9mJe}vG+0%qO4!o& zJywZeoQbVriB`Sx^y#i$!onVpWPKMNKYdDRW@g5D#EG-Ix_WGCD%^dtx&H0j7}vJp z)RGb@Rdw}=jh}PnKJ(lQBQ*>UPEu=YYcHgA*?JB?KiVz)^z=U5*qy}0R_U4bb-$-s zSy!XP-NuxNTRVI9ET^PBqlKlV(RlS_hoc?JUqpn_+S;0S%NDiEm!%oEY&jIT?iRx( zNzKQ{$0_S0^4jJQ|98WPKYs#M)zkzfBv{K_N1OA^%kI^N!g zg@lBdxw)?w6o~KIwM$7wg<`VhcFU&<&qD#LPMkbEKST}eTHn5X-4?gU{rl$=pDMi> z$iqlG)nmq--nX~kkY~G0?NPDAcj41<2Zz1NDk>~oT+yg`Mm{-C&o6K5@kQgav(H4N zEf-6KcJ6fl)_;hSntG!CDvg_uPsF1~{AUA~)nYggrI{!c&7Y*Fr)OqokLbzLo5TmR z^Yg#%=+Disz0PsgFU&Ps3!8}M)t~PSU$bLkV#F`?io8vh-WDgUb9&&n-f%cJB=J7+ zL0U#eRXj5F)*Vq5o?mT7sxPBPG);^jk6w`k?~|of7&yds#cHaea#r3P$k)e%VL5B3y!V~Xxraxbxo>h-(A(QvLP8=f zoPneJf#ikk+S*jt%koLDU#psxx$th`6b~iciHxL~A1IF8#d^ZTFyT{#@An zR?Va%&fH`I*iiJewA1c;G-@WS?=JD4GcGDF-ie3ypS*c=ue5Z9q|auc?g25eTf#a?sw9VR z{e^6-tleB`oR&oI-<&)5PY6pGw6XFezjou<(3X@p?d>y-Op3dlot-6)9?e>;{e8<) zz$me~SzRV@T^y|?HYthw-6O?E9-f|$E+p`Jnq_2Ulm!0rIBjCG=itGE8$UV}A8Amk zwB3u3@4oOTNL@xo=JWKl=GTr#U(LHEW80^^md1{wZ|=rs7<_q&7nz@cl7r@ooY`oaCF=^dY6;;>5CV)YX5xK?*903w}ysB<>ept z0lr;6Sw`p1=^Ze6u`5hT=?2Ha%}eJr9xmnaSyMt>{PClZzZH{O zT>G`Lz86#j))uDBx1ui1YWxrT#y-!<88>H5kBe=yU71QacNcey(l7Nc|L2rtq_3}p zkt7@t5TJG9#O2@&#`|i*aa~xW69UYgsD{fuf(h+2HESa~(XM5@rkP!3INywSDE`}3 zXnOKs?Yhv%-d=^=^_~=CYil%h=`IxL45~FH>+X|or-7nyla`mZZLHlNK0F-_{(UoH zgX_&#)H)D>Djw*v@$FpitlbIijT!l23ecr1}9+g_^qh{+A`^1G!FNy*Jk z9P2BNW%@idHQs*k)wQZBSubyI78aJUs3_XkEiGjmWF(-0d-w0xH#SmiY;17Kcff+$KA{yYcZ6qapuzRU)?FGPA4j(2XB^*4bjyEA9DmwI$?@is%5YM4Qht~i8UL0RsWa8yb5zCjv$Sl6l(HzMi z9JMwQkl#K>VK};geN-5*=7KJm_V_Ug@5PvwZC$GJ;V@(+P5|N5u20-};hrNZe)+pCu_qp!RL6+hMe;&V74rWuSLL@XL?(a%fG&!! z7LakDpmZK8y@@ucnQM|SER+W5^L=$SGj5L_Ygc;9l&sG;0n*CPzH?=j-`vS5C>(!% zpW1a_tHSQdlP6MkZ`jC{P;!?a-k!tjmU14HLhDS!!o;GlZRg-P@O-qy(V%Kj#L@(b5w6X?Xa)%(&70d9i)_qE}W{TD!aNN=Zr4k}9gI%4Sk(7o2?a zU;ULC=JR3CL6kt-@K#l4V zeJQ_2oirV%r*L`pkhgDDds~~JsHiB~sDq=oM=_g?>a}yhzo>4oi=Mt1HJZseLmKbz9%BtZrXUw=Dp%(gw<0+?IhpP7RBXG+b5tOq-P+ZaY3x*Y^iGXiV-4X9 z77yZ;Uze7ATbb!;ZEtV<^XrHC^W3~O)}*NDXvbBi#nD%a+}zv%5N||_OG>VVg^?-z z{wC>iMq?3&Si;DFA{d>#&Z-?aM<;Umderw)KN>WxSjWFEb1i&U3*G3}k z+qW-;0$vudKHBq+o>P! z>bLoBVt6k2Y7D050)ePw#wYZ;FOJA($SqyIyI|$3(>mpWF<^sUl@~v=&*$ss&(}bo zA&eoYdLiEXkzLE^I{8_Pwxde0m~ou*O*%IE$ntIq66r{>Sequipygepy)_3_EcNiHn{A zdnqg}E#He(UI(7Zkmm|YUaoeQ=aU28CI%K3pQfxI-i(Pc?EOOeJO84vkmcbK=Y}ULk*R|{i~D>`EG!rwSNxzd zb29_$*1be#oBYzBI^ZJ_YauZ3|D%)d3gn1tVp%-MI*RY zQE~ak2{W^AjyHj~_vj|;UoZb^_h)^1zV`SC>(;GXaT9*)OV)d`{k@p#TU)aS66LUv zByqnbtMDlub-Rlf2lxH-Wd|6Fx_NW{`@_$~D0`nOf1|RpQYvd9;?dF_NhT z<^8DQ6|RfRIQ;Z8ua>LcTpc>*^NrKP!=uvBRO7Lj79ZLQ2{RP!PQG}d)S>gkhu7E- zm@-caBZ26mK#fJ8OfFmg*x=d@&8qLhi9EzD`^Ywj7qO_;5Z%0s*rdCF@DsNF^H$E1)%+1_p7ZF2jRuKP-njFiB1T2<|5n05aka`Qv`0eilpXFw>o3 zF<3M+(jaGZS=uXn-u`#78pZPREMwiWJ zM!B>kH4TjeP8__vIv8o0qUNQYcpn6E7Ui4FK9xR=X6~PAKWm7=)!N(~VgD{EqeNzI zcDD81yPM51T!8T@!da4Jtm!E_eu3` zh!RGE%7Du1*o%&9J~waP^jaLLG48uyYkTLU@Fn-*;VRz~+S<9r2BO}a(W=Um%{O;4 zFx(Ie3Ho}1m6cT!1N_hCrnz%|Ds6FJ$Io@G_|z+NE^qKK21+o!5pnrb}il0Gc^BV zU&PQYUfEQmm4R68hdUC$!h(L!m(>WxHo3mqR7aWUYgrQXY4i8!KVvP+4k3B@6gfG$ z>~pVzQGu^> z8i3oi_LndnB{}y|mmQYnZegw`VvczHUUg2Ays#BNPLlE7pOKk~Z{hVdQ@yPl`Y3oK z8U;u^+(_ULo!%d(PoZBpj@AYdRj>Ty4!ek{5}_B37RkJRd8x0wvkx5nF_`k&bsE*- z*zD}RNeV$S_-YSd-;iFfh|;A%-CR0g#FpEH__XJ=fx%9GetwzYKgSMBNqIFdaCIC9 z-f#v6hv1`$>#~bk^q&_Kad&qwaT{00@23NTcA}Z&;9a<$sY%5^B0m0WeVv)aE$=Ut zBJG)3EGCjNS(0?8&6bUgO%=DZ=kVdfvwiveLEqc<+)GUCfACQ}^*EZ==Ehw9=earU zix)52T3a6n#>)8g$)YyH8$ui%LGxH|G(FA9`L9V((a;p%$Ydob9mF30wPD}3y1LMk zmoFXbuTZhEuyhR=dB5oKPyY7J$*jsp^o*GqD+$dc4eN5+!otGDXdpM|`SYjg8n^F4 zVVIenCFsyhPgYNkL6-kJ%6FLOKyi<|(lxe`FhGSuLPBH$R-d1aD_LO&TMUnk?76=E z0NeW?=wd!oCE2!Qr}}DHL=)Oswr>wdtC=Vp4Q>P~A0HbFb?nKk@9X2(CVVn9EbO?6 z)^s7Ed`U^Mafn&efiR*0|EIhB`enWPZNMV5Y(x{18qoHCYD&2~1!Po6KH>TK2LpqG ztk0isEPL&80>nb>Y;{y;r#>YOO*(i2bS?rzLfoj5Ti_18C+2H*TSc`zcvA)d10saj z7i>G_Cn84bvVMQjv6&JqK zrVqy^C-3-!xkF1wcU)DK5^sqRx}X->VBm5dzQ}g4ji-_VQUFvqbw9sjgwn&oLGwhP z7S#M?ip+Kp9T)cW+#9FxV(vooy%rw+-zpqDOGq)GdC4g$;}GCZrK=Mh4A1kfp=H(U z`t|F~92}9;(+(D|f_b~Tx-iiN(fXm)QJp(?j?lzpp(mZc~`(TA_u#MGb&Y=#dwE9UUD36*#3Fn6WZrlap7` zNWTvhj`capx8W_a@bcb7IgSHY0@NONb`}L!8JnJt5>$yGz7fh|7)am7+G0n>#=;A< zp`9Rh5Klkt5flu&k#BqH5(5_D{}ly~IGL6y+8d=|K0{ZUa1xV|Nn!}Y-o6RZ%k#+B zZ{HXJA|Qi*f4=dHkU-HrPoF-0=gyr_s7D~HJ9qA+fTR`?5t01v@PMVk0_}FCm&v_x z=wcHK!&QW2tE^1kSEuvj{KpR;tgNlcARhJ)4AethhJG%7=ujBQz{!&*t!!+%$Jd8D zw6XmNG2YZPs-Zy{0A;77gh27ZhxYin3y0pG3jGJZ!1|DkKOgY zH|cNY)2D971po|>1r$TZy?JxX|4R0ro}P(GNu^0+FLNE8oCwv2PtGs&$`x`pc6K3I z*@wktknhnINPhQPdPYW4iz_m9!=M7Yem%*f@0|6K8xBLc?_z30+vv(B#vHwSbGWh~ zq20p|zyOI-72@RNRNBRQT!g)`u8str(b?5?N?Z#6Z-L*$z<{tjTr3T`Nztn0(0##f zHFvQ>6Neg44lTa5qr;Vb{KMC21Tjq}W?U|JBqfNsW2m8uX?%~-dja2uDR*B(3DF-{a|iIQomw$G-S7AGhQ5J;ijoouT6uwWBX#YR_`SNjyR|_k3xj_9 z)MnszduElt&CeI&|2p*-JQ><0WakwFiDHLh@Q&ZB^D2+P>*IUUts5J~BBI10HGKa3 zc?=VKc)5AcBo`jAzNv{SI5@b!CgA9(hrf)IU;jri-ETu>OfZJ(pawQWLE|WJ=n|Tq znHdN9nK0MN50w_(ZRO|~4fMlqUgpvSjk1bBCPYotogxUHwb?(Qh9} z(qQnFdw$(2xx}aB<8uVU{_;ZAq79H|VEc?{!9J!pPiID2lBGSDfP|JrBbRr8DWX3; ze*8FeIM-0@TWY)n6rlRsihrZz16Bo&9ZPhTj`?Z@d=eEERR2Ppx zuyU1IqTu2C9%d@ijBT?FWo!~}B(UAOj6T2}dt5BrWznd`bnZecPCL-}G%r2&D$H3?S|7&B*`41uj?)n8y;0Da)s zkFNv&B0}(rws-dSHp3EO;p2O6|0b97QQ&tvdIpBVkbl8n3JfW~lo(R_`uhI)@#x=c z#lOo;5W|5y$;P1m5Sm=!r9Lruf%ws~BEBsAj#kFf_wQFSGlgM)^lv%G2fT1CH}^oq zR^IEF#>DUavu;!|Qns|@jb!3icXAQ|m3dQ^eD)m(_9^rpyy7L%@MX%kjyas@ou*Lz zdtX@Ifb)fIp%wgZ(--x{toUb%;^9i}m4fjn^g;>khpZbY(W9Wy6vuRHea7Pu=2fO{ z%JtAt6823}V?^XLO#|UaPlRHC_BEjV9dm_RM{8dZ=J9Nz;M~vCkMg#HO9ge|2l|)xT5}Wv{j5gi%U$ zwHz$TvAnX82H=_YvShw?Yy$v+7|4n+1RwqPn`&s)NAf~Tqw5WauxR0w@($@>eBg!6#xCwlM7sDu1k-89u^k1 zw7h&ubMkMAv%e6;ssHU*geONf?Z>lHS~|JK#)|=vp!N0ZYa^GxrW|pV{J)8vLT{kS z^UC@#lQw`Vhe~wxRG?>HquWLpIu|cps+i(+zB=;3NAB)hf3smq_yG|0X|SeE(8LEz zZvKz$`~(dm@F5S$YOF5olfU=RFaQ2taPBeC zJaM8v>rwFa4u#*0si~>-b25X}H5v2k5kE^RxvG2@BcLl8KQ~=|TLeeoQ=6n!9jU*+ zzYe6yeDrkoeSF0>qGs3DBBB2&{Q4YcKlkV(HKYww0AOHNLe>BQehfd!q$Fj39iE#J zLK}QOD{yMRUsDOou(C?bdV6}dVZhX-pILnrv^pTUHX7?($pcMB-NIsr%W&mK8}*c| zm-VR3LbM}H@4EVWvY|3p4eS62jR9&yIh28;M(4d^nr?6yVC7~|Uc4h3YHDi5Jj?Pa z5XuSeL;z<$evq1dWRFqWLP9Ve1S>2?&aiUnMGi+Tz6b=UXTH(o_cg_LJ-*u+R$t3Q z*V<@Yx6ILRm|MqH`0c}^OeDat*0iMkEMH10_vt(c1jda74_g+^GB3c*>b9W@*;tM*a8i-$W@#R`!Eu-pz2$5CEI#e~nKx-PpxUJN@-h z$iltN6uHZHckuGUX!`!dZ^(Wmh%WWTKP>`0E)@o~RWTU@XO(-_@1OP+S z?c0&iphrDqQl38-DX*-YfUDIA*@jp-v;L)mqx#S>IXF0~SGu*=XR;olf97D0L#5+F z36}@`E>*rluYUU)9-0CLhp&vpa$R+mPI*)6JY@DZyK%Y#kE~7Xs6%)Hk+ao=%0WMe zZd>3rb2{~&JdaQV(Q5&p!>6#Grp5MKV21UHq;F0CiaA>og_sTQtO1S@q31)4atkf31bE5|%O(*jor5c;^u3yXR*^vlKr$uRc;93G?6VM;dRb&pL=IfDXf?V^eA>B70hY`8_3+S-Ly_2k&<`%iIQJqS2T1!_BiJzG#(8d2##*Kro011`ohRlANaz(-gu2k#mr zrhp0XX3>VSRqYsSt#oq}#>U6P5C&QQ_3NdX`u_Vu$BvoWorYF=9~Jqe$@}8P+b4Z7 zwh8J9$a{a^&H9~qKp~i!!J9u#Tvq+GYhD0S2*Dbo@qS0gz;LN63}${E7|w-`RN{U6 z+j;elFyhhf93Ana0H$GWw<`?rbpaT3&zh@y1& z@K8pdju$yYPO@M%cyKb}&K+ij40qx^1h@&X(PQLA9KO^`m;^9J#?J6wJ=p*GG&R%+ zn3lIer@%3l&CPj$^E->=FiR=y}6#-^izegKbXL=+Fh{i>` zb*nmt$mNfX`n>%NJ&apHfry>1pr9aXZ}%cQTM69tS#IvBGiT^>4C$j)yPv>Pm=1_!x;whLaq42N1ly@i|7)APp* zr?b={D2;81@Ti_V$v81NiL}`j-0C0rzS((sR3uQzy7%vI$yOkg)dj=C)ZDxge*xw@$B}6$O79b~VqP- z@KDRk%hygl6TybWFla$uph6GD2(=gKGjaR3oPGfT=il7hHx1`<)PpOpF**Jjsw;kMHDWECiotU55Dte|&P10(ven$G$}r z(i3?@Y}F|b=< zMV=qcXfQM}nf#Dr0Lyr80>OR&M!~WgX!rw3ii(Op<1_xJm14xwtE-a<0iO_j zb91#9tAA{Tu1G|6qOoz~&{XKcm5T}r9s}X)2L;KG7WqrGVMbAX`1sKpHfn!~8i&^=A?q?(_-=DXPF9j7~=c>VaIQ@i`8)ZGEQ$8mcP9*lhTO7U#)FT0b;$5`N4 zG-6O=>3_vH>kDD49djEaVMAHl+Fm1SWq$Cb-n+f89ryuw=qzgdx98WaawB9CyfL%q z&%A5Mq&ce!m4MKYu4Pp zzUQS4S0-YFgq&z@Tox&v_jL@JlDY%=)mOmWcwTL;nwLe#*XfI`uCA7QOwl8MK`_l^ z#gJ6j6T{p`0!T4pS?Q+;B@*ipytz?7 zFi>%(qyq~~+%ulE8Nhx(P^Yb}4F-{hjg1h(dt);*bg16x>1nSOaWWDVTEhE(^(y3H zSw-M4350jdlQaI7lp3(ZXaj3Jj_j|?r}BC#1KSd=BU4XmfY}HO;DsJvXW$A-DZhGgMkf%l$QbY3b+bAVH7meKw7&%qG4r}H~dAAvTbPS zp>C?&fQu-?#3hg};GkqdW!^6)mI-{l0lf$J9t{{E6POGVOPCdiqrU4sk#TyzsHn5| z5LgzGqDvADF&o+!2YVHX*>r5v12qeLN)n!C^Pdq-AdHd9px;Lh{_H?uKF`aGj+(|6 zq6~%5pRM2A+8Tu}_RzTsH$aZ4^YN54JG=nG{lN#{^#%bU68;Dz^7)ZPJY(z7P}Rqu zSo{vT@y*T68Ek0!_Q7tSHxt;ZsONp6qDW#@9db%F4j3qND z#JJ;_{*#lJ==ceTkEEoeROdNI2y6t&0!S!1#%NLDu^aK7rL|Z8QrcOE32)@jpFagw z^;f=jmtlvAoGGG21N&4|T9Q&kWHHdV-4Vh&JG=NELuV^~pJ;4Id;G|(hvzCgPR+Y5Wv3_s|&-qV}T0){&=FJStH{p z+G}TJ6^bw1DJVz=ubxCgDEB--Dk&6E$H0x5tYco&ca+~0!igeE^~cu^*PcT!BQUiY z85x%bE7}gr~mDYj=JZOrE8M@8)w5C}b~qeDl6Ef#^;jf$lG zTLDsu&%Bu~1qtoU?^*A^KVKCoL;o({4k1!^Cm^w)RS}F(7>uO5t4ojsalXY_5;8IhxM)gf_YQ7NI&Hnobz>M_nxK4dn%WIB zkpzq#Od@3y6Att&6*Dt#IA`wYY>ZqIlq7M7P5~myhD|?F8UP69zP4bA3?G&1H3VZw zw@>%*%la^$3tXl`Ya=7UkJbA)SW@d4f@w>n?xJXymzVtzDM2MLb8<#uZq~v7`rmj9 z+p6bS9og&GuiaklW~Bo~563bdxMyB$e^Yr$6%jn>?dc^YY=BqJF0yt{va%v!#xb*C zm*$&QZhPza@@l5Wq)wxVQ8P?gPi8JIE&@6NuO5Ob4DFcAwR)MFRKnSj+qMtU_!RRa zwyn${ofM?C#M+{w?>XGlA;peKgL(TxYJ_WCC)FV}^ zCmMLx3VA8`IWgRa>CpyW;|+-0yrxCCBD<**Td)U6&u~8#$>p@IkI$;X;ZLtZ_(|od z!;_UsoC<-*NCHQXCRB04>Ij$68}WsVN@(G!($dBIQUfkte5NIn2WJ;3Xh}4EsNUsu zSsW^E92FCBQh3r_tE%wj%ciCAM&zN~5IESn*a9@!7_mhKs2Sg7E}C%nF8SfZFSBI) zz~>ws9qR!S|NrDnd|#x@@b)hygKaS02?+C_;F6&t zC4CJsSh>DnpT&_aI1T}|sigcb1ZbqKUw@x!Kjby7?{eAFGDW!pO+T%+mJbXCo+ueu z&?!U16^p?nI5IR#t!Hw%AztDv3PF?Km)&!rL5H-3TxMDuD6Q~kNd+|RyJ!}1bO89= zrSuQ3K}dyvH|7pPBk&YFjaUdt$y$NP;*uVhmd2Y4-`DEu?~ek3A%(&K?ae=T8>ya2 zBv=izb8$)ptTzHbM^NT*!3(%UP>4P)0xn_7HFR_^k;GA17i2^McftiPT*4+)5n(@% z09S`d?#UNtYpw%{SRt1uqM@WQ_H*3sXa?fmh?42R#WXx~rmbwpEkv*G0J)F}fY>0c zn1Ky=H!EvT%~Asck>4FGzQAOnuxC%?|0fGwxq5X5Nb3U-H@Omj;1U@Q5OgBVp@p*= zpH{-r8(sJjF%<^7HY;&png3J~2X{vevAm~{eea$WHs~ya-GR59jub|;&-|bofut@6 z1Q@_yHu*B$l}-f0G15AQ`O?*IrrlQ9n66msc>s=b7iVPNB}>Nv4nj`Z`R#D(t-A>c z8t{IkE`Hd%f8V}w^R0_GU}Iot$YyupLYbL8GH$jmF1ql&*m!ufu`5O6kq+ZVtNT*1 zg5xv^ux$6r%Wqb!_B=o~^G;r#XhlWEDX-~e#}8yA;b2;(AXV)eN-7DsOYhw|Zq z+N?}>frJO!pFxE(^2(Ha4A6lTP!F|*&HhbU0&s^7+89od%q=O%#KDM);gw|vA%J1v zyn5|kmrr7k|5nr(jDf8%@Br_3B3tcxVn4p`WSr>us?67IsUs_eRl9gD06j~aHsV3 zLm@WVj5mZQBKPue`|!=Z2M#pCg}6n;OQDE@tv1OnR$uiUxlDtz0^{aqUE$i<86e02 z=0v2v%iYGwQC#9UjZ%$JK4jW%&kF_CUJ4+@2rJYh#KFSKN{*NPHt8?}1G`8ac9nU| z_4;WE@TNGN6x)G$2BED95cf%X`c)icIgjvHBGNKE_Dh-`9uj}oCz&9TYz0Xpk$_4? zObbZG_U~+J&i@77F+$fO9xx2lf&zE}EIb0LknqXGP#6~kL$uJBRsV`)5kU&UU?x#) zu-5As4ptNC5NqOMi2LyT&7p=_B!mV*V07>>1Y>UzR*P_4eOntH0?$o}htA_P)mBhs zK>pe(A6aC8Vi62LneGJA2FC?R2}9H=>@_sDOXogbmCjpGf@I+9p$1+G z-4c>|B-jpOzQk98S739oU8UY~1wS0UYZL)Sxogj!okxzaLt!F<4?yPJJUmbG^O=Y= z12!3P5C~ZE2|SQ8+vXk&LD_C}S)8x>R2%eAmSu8s5>6Ne6bR%=2~LlvAT9v;fjH^q z;xe-H!gOC>-%dy(h#P3?>Z+r5ux$v)A$F}8*7ny=4;3&rZI5@s?A)y)-t3XOt=g(6R;1Or~AW;18`9Vx*1{gf|FF!F6j(bvG&Iv9_$U4Mf z!Ry!W#KkoR;XujqSUrUc4}R~MtZZXf7t5_%w?3_5t`t+U%0pG~r$8vI_05~p#|Dav zitgZu9FB|G$Lju19-#{g5OM)-(YpYKA7^FJ1O){lKnVld;nJl8Cwm~3o#sq6HZt0` z>$3BP=zT3gh)Ku=0NH>~6Q>jjvtM&pa{9#+2`^tBhJIuP(lQo`qc9KcK3lIGOYE_c zo6Wd?@7~@OhooR_q!{oygaiPt3H2wK;&y}7Q7I|6-_LN&Ls^32w(?HJ;78dr|8u7E z3Z$eR);+GcEgaZt{P5wPXR1!f8P}Y?#{W4YTCkUikvct~{{K8MO43~ty@Dg9gPJ4% aC>9@BnzO9Epu}NU(g`(P)fdWESN Date: Sun, 27 Apr 2025 09:58:46 -0400 Subject: [PATCH 11/17] Prep for fusion of binary --- .../src/backends/wgpu_backend/kernels.rs | 25 ++++++++++------ .../src/backends/wgpu_backend/mod.rs | 30 ++++++++++--------- 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/constensor-core/src/backends/wgpu_backend/kernels.rs b/constensor-core/src/backends/wgpu_backend/kernels.rs index e26289f..e2ab96d 100644 --- a/constensor-core/src/backends/wgpu_backend/kernels.rs +++ b/constensor-core/src/backends/wgpu_backend/kernels.rs @@ -5,24 +5,31 @@ use cubecl::{cube, prelude::*}; #[cube(launch_unchecked)] pub(super) fn binary( - a: &Sequence>, + a: &Array, b: &Sequence>, - out: &mut Sequence>, + out: &mut Array, #[comptime] numel: u32, #[comptime] ops: Sequence, ) { if ABSOLUTE_POS < numel { + let op = comptime! { ops.index(0) }; + let bv = b.index(0); + match op { + BinaryOpType::Add => out[ABSOLUTE_POS] = a[ABSOLUTE_POS] + bv[ABSOLUTE_POS], + BinaryOpType::Sub => out[ABSOLUTE_POS] = a[ABSOLUTE_POS] - bv[ABSOLUTE_POS], + BinaryOpType::Mul => out[ABSOLUTE_POS] = a[ABSOLUTE_POS] * bv[ABSOLUTE_POS], + BinaryOpType::Div => out[ABSOLUTE_POS] = a[ABSOLUTE_POS] / bv[ABSOLUTE_POS], + } + #[unroll] - for index in 0..ops.len() { + for index in 1..ops.len() { let op = comptime! { ops.index(index.clone()) }; - let av = a.index(index); let bv = b.index(index); - let ov = out.index_mut(index); match op { - BinaryOpType::Add => ov[ABSOLUTE_POS] = av[ABSOLUTE_POS] + bv[ABSOLUTE_POS], - BinaryOpType::Sub => ov[ABSOLUTE_POS] = av[ABSOLUTE_POS] - bv[ABSOLUTE_POS], - BinaryOpType::Mul => ov[ABSOLUTE_POS] = av[ABSOLUTE_POS] * bv[ABSOLUTE_POS], - BinaryOpType::Div => ov[ABSOLUTE_POS] = av[ABSOLUTE_POS] / bv[ABSOLUTE_POS], + BinaryOpType::Add => out[ABSOLUTE_POS] = out[ABSOLUTE_POS] + bv[ABSOLUTE_POS], + BinaryOpType::Sub => out[ABSOLUTE_POS] = out[ABSOLUTE_POS] - bv[ABSOLUTE_POS], + BinaryOpType::Mul => out[ABSOLUTE_POS] = out[ABSOLUTE_POS] * bv[ABSOLUTE_POS], + BinaryOpType::Div => out[ABSOLUTE_POS] = out[ABSOLUTE_POS] / bv[ABSOLUTE_POS], } } } diff --git a/constensor-core/src/backends/wgpu_backend/mod.rs b/constensor-core/src/backends/wgpu_backend/mod.rs index e98b2a2..64ebaa8 100644 --- a/constensor-core/src/backends/wgpu_backend/mod.rs +++ b/constensor-core/src/backends/wgpu_backend/mod.rs @@ -130,12 +130,13 @@ impl BackendDevice for WgpuDevice { let output_handle = client.empty(out_elem_count * core::mem::size_of::()); unsafe { - let mut a_seq: SequenceArg<'_, RT, Array> = SequenceArg::new(); - a_seq.push(ArrayArg::from_raw_parts::( - &a_handle, - out_elem_count, - VECTORIZATION as u8, - )); + let a: ArrayArg<'_, RT> = unsafe { + ArrayArg::from_raw_parts::( + &a_handle, + out_elem_count, + VECTORIZATION as u8, + ) + }; let mut b_seq: SequenceArg<'_, RT, Array> = SequenceArg::new(); b_seq.push(ArrayArg::from_raw_parts::( @@ -144,12 +145,13 @@ impl BackendDevice for WgpuDevice { VECTORIZATION as u8, )); - let mut out_seq: SequenceArg<'_, RT, Array> = SequenceArg::new(); - out_seq.push(ArrayArg::from_raw_parts::( - &output_handle, - out_elem_count, - VECTORIZATION as u8, - )); + let out: ArrayArg<'_, RT> = unsafe { + ArrayArg::from_raw_parts::( + &output_handle, + out_elem_count, + VECTORIZATION as u8, + ) + }; let mut ops = Sequence::new(); ops.push(*operator); @@ -157,9 +159,9 @@ impl BackendDevice for WgpuDevice { &client, CubeCount::Static(VECTORIZATION, 1, 1), CubeDim::new((out_elem_count as u32).div_ceil(VECTORIZATION), 1, 1), - a_seq, + a, b_seq, - out_seq, + out, out_elem_count as u32, ops, ); From 959a381676cd4fa0e105ea9e512bd9b283333075 Mon Sep 17 00:00:00 2001 From: Eric Buehler Date: Sun, 27 Apr 2025 18:00:09 -0400 Subject: [PATCH 12/17] Wire some things up for unary ops --- constensor-core/examples/test/main.rs | 2 +- .../src/backends/wgpu_backend/kernels.rs | 210 +++++++++++++++++- .../src/backends/wgpu_backend/mod.rs | 68 ++++-- constensor-core/src/dtype/mod.rs | 58 ++--- constensor-core/src/graph.rs | 2 +- graph.png | Bin 27206 -> 16777 bytes 6 files changed, 284 insertions(+), 56 deletions(-) diff --git a/constensor-core/examples/test/main.rs b/constensor-core/examples/test/main.rs index 2655433..0eab77c 100644 --- a/constensor-core/examples/test/main.rs +++ b/constensor-core/examples/test/main.rs @@ -5,7 +5,7 @@ fn main() { let a = GraphTensor::, f32, Wgpu>::fill(&mut graph, 1.0); let b = GraphTensor::, f32, Wgpu>::fill(&mut graph, 2.0); let c = GraphTensor::, f32, Wgpu>::fill(&mut graph, 3.0); - let res = a * b + c; + let res = -c; graph.visualize("graph.png").unwrap(); diff --git a/constensor-core/src/backends/wgpu_backend/kernels.rs b/constensor-core/src/backends/wgpu_backend/kernels.rs index e2ab96d..8de84e4 100644 --- a/constensor-core/src/backends/wgpu_backend/kernels.rs +++ b/constensor-core/src/backends/wgpu_backend/kernels.rs @@ -1,13 +1,148 @@ -//! Instantiate cubecl kernels for binary operations on all supported dtypes. +//! Instantiate cubecl kernels for operations on all supported dtypes. -use crate::{dtype::DTypeOps, graph::BinaryOpType}; -use cubecl::{cube, prelude::*}; +use crate::{ + dtype::DTypeOps, + graph::{BinaryOpType, UnaryOpType}, +}; +use cubecl::{channel::MutexComputeChannel, cube, prelude::*, wgpu::WgpuServer}; + +use super::RT; + +pub trait UnaryKernelLaunch: CubeType + CubePrimitive + Send + Sync + Cast { + fn launch( + client: &ComputeClient>, + count: CubeCount, + dim: CubeDim, + a: ArrayArg<'_, RT>, + out: ArrayArg<'_, RT>, + numel: u32, + ops: Sequence, + ); +} + +// Integer impl → forwards to unary_int +macro_rules! impl_launch_int { + ($($t:ty),* $(,)?) => { + $( + impl UnaryKernelLaunch for $t { + #[inline(always)] + fn launch( + client: &ComputeClient>, + count: CubeCount, + dim: CubeDim, + a: ArrayArg<'_, RT>, + out: ArrayArg<'_, RT>, + numel: u32, + ops: Sequence, + ) { + unsafe { unary_int::launch_unchecked::(client, count, dim, a, out, numel, ops) }; + } + } + )* + }; +} + +impl_launch_int!(i32, i64, u8, u32); + +impl UnaryKernelLaunch for f32 { + #[inline(always)] + fn launch( + client: &ComputeClient>, + count: CubeCount, + dim: CubeDim, + a: ArrayArg<'_, RT>, + out: ArrayArg<'_, RT>, + numel: u32, + ops: Sequence, + ) { + unsafe { + unary_float::launch_unchecked::(client, count, dim, a, out, numel, ops) + }; + } +} + +impl UnaryKernelLaunch for f64 { + #[inline(always)] + fn launch( + client: &ComputeClient>, + count: CubeCount, + dim: CubeDim, + a: ArrayArg<'_, RT>, + out: ArrayArg<'_, RT>, + numel: u32, + ops: Sequence, + ) { + unsafe { + unary_float::launch_unchecked::(client, count, dim, a, out, numel, ops) + }; + } +} + +#[cfg(feature = "half")] +impl UnaryKernelLaunch for half::f16 { + #[inline(always)] + fn launch( + client: &ComputeClient>, + count: CubeCount, + dim: CubeDim, + a: ArrayArg<'_, RT>, + out: ArrayArg<'_, RT>, + numel: u32, + ops: Sequence, + ) { + unsafe { + unary_float::launch_unchecked::(client, count, dim, a, out, numel, ops) + }; + } +} + +#[cfg(feature = "bfloat")] +impl UnaryKernelLaunch for half::bf16 { + #[inline(always)] + fn launch( + client: &ComputeClient>, + count: CubeCount, + dim: CubeDim, + a: ArrayArg<'_, RT>, + out: ArrayArg<'_, RT>, + numel: u32, + ops: Sequence, + ) { + unsafe { + unary_float::launch_unchecked::(client, count, dim, a, out, numel, ops) + }; + } +} + +/// Convenience wrapper that launches the *right* kernel for `T`. +/// +/// `numel` and `ops` are compile‑time parameters exactly like the inner +/// kernels, so you call this with the same `cube!` launch macro you +/// already use. +/// +/// ```ignore +/// cube!( +/// ctx, +/// unary_auto::( &a, &mut out, comptime numel, comptime ops_seq ) +/// ); +/// ``` +pub(super) fn unary_auto( + client: &ComputeClient>, + count: CubeCount, + dim: CubeDim, + a: ArrayArg<'_, RT>, + out: ArrayArg<'_, RT>, + numel: u32, + ops: Sequence, +) { + T::launch(client, count, dim, a, out, numel, ops); +} #[cube(launch_unchecked)] pub(super) fn binary( - a: &Array, - b: &Sequence>, - out: &mut Array, + a: &Array>, + b: &Sequence>>, + out: &mut Array>, #[comptime] numel: u32, #[comptime] ops: Sequence, ) { @@ -35,6 +170,69 @@ pub(super) fn binary( } } +#[cube(launch_unchecked)] +pub(super) fn unary_float( + a: &Array, + out: &mut Array, + #[comptime] numel: u32, + #[comptime] ops: Sequence, +) { + if ABSOLUTE_POS < numel { + let op = comptime! { ops.index(0) }; + match op { + UnaryOpType::Neg => out[ABSOLUTE_POS] = -a[ABSOLUTE_POS], + UnaryOpType::Sqrt => out[ABSOLUTE_POS] = F::sqrt(a[ABSOLUTE_POS]), + } + + #[unroll] + for index in 1..ops.len() { + let op = comptime! { ops.index(index.clone()) }; + match op { + UnaryOpType::Neg => out[ABSOLUTE_POS] = -a[ABSOLUTE_POS], + UnaryOpType::Sqrt => out[ABSOLUTE_POS] = F::sqrt(a[ABSOLUTE_POS]), + } + } + } +} + +#[cube(launch_unchecked)] +pub(super) fn unary_int( + a: &Array, + out: &mut Array, + #[comptime] numel: u32, + #[comptime] ops: Sequence, +) { + if ABSOLUTE_POS < numel { + // ---- first op ----------------------------------------------------- + let op = comptime! { ops.index(0) }; + let mut tmp: f32 = f32::cast_from(a[ABSOLUTE_POS]); + + match op { + UnaryOpType::Neg => tmp = -tmp, + UnaryOpType::Sqrt => tmp = f32::sqrt(tmp), + // For any unsupported op, fail at compile‑time + // _ => comptime_error!("unary_int only supports Neg | Sqrt"), + } + + out[ABSOLUTE_POS] = I::cast_from(tmp); + + // ---- remaining ops (if any) -------------------------------------- + #[unroll] + for index in 1..ops.len() { + let op = comptime! { ops.index(index.clone()) }; + let mut tmp: f32 = f32::cast_from(out[ABSOLUTE_POS]); + + match op { + UnaryOpType::Neg => tmp = -tmp, + UnaryOpType::Sqrt => tmp = f32::sqrt(tmp), + // _ => comptime_error!("unary_int only supports Neg | Sqrt"), + } + + out[ABSOLUTE_POS] = I::cast_from(tmp); + } + } +} + #[cube(launch_unchecked)] pub(super) fn fill< T: CubeType + CubePrimitive + Send + Sync + LaunchArgExpand + Numeric + DTypeOps, diff --git a/constensor-core/src/backends/wgpu_backend/mod.rs b/constensor-core/src/backends/wgpu_backend/mod.rs index 64ebaa8..c9aca5e 100644 --- a/constensor-core/src/backends/wgpu_backend/mod.rs +++ b/constensor-core/src/backends/wgpu_backend/mod.rs @@ -4,7 +4,6 @@ use super::scheduler::topo_order; use crate::Op; use cubecl::{ channel::MutexComputeChannel, - cube, prelude::*, server::Handle, wgpu::{WgpuRuntime, WgpuServer}, @@ -12,12 +11,11 @@ use cubecl::{ use crate::{ device::Dev, - graph::BinaryOpType, storage::{BackendDevice, BackendStorage, Storage}, CompiledGraph, DType, GraphNode, Result, Shape, }; -mod kernels; +pub(crate) mod kernels; use super::cpu_backend::CpuStorage; @@ -83,7 +81,7 @@ impl BackendDevice for WgpuDevice { let CompiledGraph::Wgpu { order, graph, - ghost, + ghost: _, } = comp else { unreachable!("Expected Wgpu compiled graph"); @@ -120,6 +118,7 @@ impl BackendDevice for WgpuDevice { handles.insert(idx, output_handle.clone()); } + Op::BinaryOp { l_id, r_id, @@ -130,28 +129,24 @@ impl BackendDevice for WgpuDevice { let output_handle = client.empty(out_elem_count * core::mem::size_of::()); unsafe { - let a: ArrayArg<'_, RT> = unsafe { - ArrayArg::from_raw_parts::( - &a_handle, - out_elem_count, - VECTORIZATION as u8, - ) - }; - - let mut b_seq: SequenceArg<'_, RT, Array> = SequenceArg::new(); + let a: ArrayArg<'_, RT> = ArrayArg::from_raw_parts::( + &a_handle, + out_elem_count, + VECTORIZATION as u8, + ); + + let mut b_seq: SequenceArg<'_, RT, Array>> = SequenceArg::new(); b_seq.push(ArrayArg::from_raw_parts::( &b_handle, out_elem_count, VECTORIZATION as u8, )); - let out: ArrayArg<'_, RT> = unsafe { - ArrayArg::from_raw_parts::( - &output_handle, - out_elem_count, - VECTORIZATION as u8, - ) - }; + let out: ArrayArg<'_, RT> = ArrayArg::from_raw_parts::( + &output_handle, + out_elem_count, + VECTORIZATION as u8, + ); let mut ops = Sequence::new(); ops.push(*operator); @@ -170,6 +165,39 @@ impl BackendDevice for WgpuDevice { handles.insert(idx, output_handle.clone()); } + Op::UnaryOp { v_id, operator } => { + let v_handle = &handles[&v_id.get()]; + let output_handle = client.empty(out_elem_count * core::mem::size_of::()); + + unsafe { + let a: ArrayArg<'_, RT> = ArrayArg::from_raw_parts::( + &v_handle, + out_elem_count, + VECTORIZATION as u8, + ); + + let out: ArrayArg<'_, RT> = ArrayArg::from_raw_parts::( + &output_handle, + out_elem_count, + VECTORIZATION as u8, + ); + + let mut ops = Sequence::new(); + ops.push(*operator); + kernels::unary_auto::( + &client, + CubeCount::Static(VECTORIZATION, 1, 1), + CubeDim::new((out_elem_count as u32).div_ceil(VECTORIZATION), 1, 1), + a, + out, + out_elem_count as u32, + ops, + ); + }; + + handles.insert(idx, output_handle.clone()); + } + _ => todo!(), } } diff --git a/constensor-core/src/dtype/mod.rs b/constensor-core/src/dtype/mod.rs index 5d0deb7..5f3bb1c 100644 --- a/constensor-core/src/dtype/mod.rs +++ b/constensor-core/src/dtype/mod.rs @@ -4,7 +4,7 @@ use std::{ }; use cubecl::{ - prelude::{CubePrimitive, Numeric}, + prelude::{CubePrimitive, CubeType, Numeric}, CubeElement, }; #[cfg(feature = "bfloat")] @@ -87,32 +87,6 @@ sqrt_integral!(u32); sqrt_integral!(i32); sqrt_integral!(i64); -pub trait DTypeOps: - Copy - + Add - + Div - + Sub - + Mul - + Sqrtable - + SimdSupported - + GemmDispatch - + RandDispatch -{ -} - -#[cfg(feature = "cuda")] -pub trait DeviceReprLike: DeviceRepr {} - -#[cfg(not(feature = "cuda"))] -pub trait DeviceReprLike {} - -impl DeviceReprLike for u8 {} -impl DeviceReprLike for i32 {} -impl DeviceReprLike for u32 {} -impl DeviceReprLike for i64 {} -impl DeviceReprLike for f32 {} -impl DeviceReprLike for f64 {} - pub trait MaybeNeg { const NAME: &'static str; @@ -151,6 +125,35 @@ maybe_neg!(i64); maybe_neg!(f32); maybe_neg!(f64); +pub trait DTypeOps: + Copy + + Add + + Div + + Sub + + Mul + + MaybeNeg + + Sqrtable + + SimdSupported + + GemmDispatch + + RandDispatch + + CubeType + + crate::wgpu_backend::kernels::UnaryKernelLaunch +{ +} + +#[cfg(feature = "cuda")] +pub trait DeviceReprLike: DeviceRepr {} + +#[cfg(not(feature = "cuda"))] +pub trait DeviceReprLike {} + +impl DeviceReprLike for u8 {} +impl DeviceReprLike for i32 {} +impl DeviceReprLike for u32 {} +impl DeviceReprLike for i64 {} +impl DeviceReprLike for f32 {} +impl DeviceReprLike for f64 {} + /// Marker trait for tensor datatypes. pub trait DType: Debug @@ -158,7 +161,6 @@ pub trait DType: + DTypeOps + Send + Sync - + MaybeNeg + DeviceReprLike + CubePrimitive + CubeElement diff --git a/constensor-core/src/graph.rs b/constensor-core/src/graph.rs index 2f5e1b2..1a90e8d 100644 --- a/constensor-core/src/graph.rs +++ b/constensor-core/src/graph.rs @@ -714,7 +714,7 @@ impl BinaryOpType { } } -#[derive(PartialEq, Debug, Clone)] +#[derive(CubeType, Clone, Copy, Debug, Hash, PartialEq, Eq)] pub enum UnaryOpType { Neg, Sqrt, diff --git a/graph.png b/graph.png index e75153d2b7fdac08587f7a68b6e43b341d33f460..042d922de9c24c47e0bcca740c902cc875fc2b6a 100644 GIT binary patch literal 16777 zcmd74cRZHuA2)uLk`huvW@u12}#H*qpa)|GO{CCWn?7@Wrk#jXvoM8myrs| z%1p-dKD)nv{9eC*pMReF?seB)oY!$4Lk*JKKweL ziW2{|uH0USpD2yaE6S7BiGLqgq(_lRdr4>IPix#tm>|3AY0R(5Om}VnD^kn-_dq<= z%Z$B)k<@pnub7*s{fH#@8uf@azMSm%6@NvuxHZeVtY<>!hjlbH-yv(>$d~5adpA;i z%FCrV{^{>x;MJz)tr}`K0=}=_KKWyVLsQ(WYxDGv^5&6*<0CxW+#8MW3D!HFJ?*SV`D~6 zPJluxeRO74R=ZoMJolZ5h=}RF&Q8Yq`g%Mkuc*i$znO2Byux&YT|-wl1RvJk-u}3< zk|Q)URPFG?nw8~cH(^mx>SjF6(9n?UWN~4kUtC;VUSdIh{++b6G=l*-n^wy;-G+vS zi1>KwIAzK><$%^!_4nWH-bg2QcJuM^+1lIt z#K*HHBqR*}{29Eq`nR^Bf%?FK0}^-djD@LZ${QGPjLgi_`ud3~MtOO8JGSLTMeRyROdR_8bDN@~;_H%<>MvjRBqb$H&(Hhg zRaA_wEvJ4qQ`6QCvh2!9A-=+>VcnUwGMT)x9>f2gnDdj?0r*{g0*$dH0GNz zx}cz7S8!rknIs!K`_Q*?+h*r*C9v+Gq{;QoI#p9F@+-x~|^r)XvSLuwMvvWXpc6PXpv$L}-fAyzN+dDfu zbtDbECO-G~4R@Fc6N9~9uc4{AP0?H@`3S?dl+ohJoBevD?Ck7C_Iv^YJGi*GsJ3nM zDJd!8kiO7)@ywYs?>~K#S5Tm|x3~98D07~8l*cY8B(&4{WnW+4;OHoomX=m`Z?7Dd znB6`U121M48$|KYp+ z(0p`sw2t`5)Ku7MUrH`vVFp@STDPTrWxLN5AA7xd^JX!RWnuM8m#~TNeEOuUSLRGZ zx@Kh+>g`R+mN6NxZEM?gjy-$sj*gmP?=-gz zExK70vFzEiN6g{Jc0Pl0UWPB7iN{P$M|7>nv01qoU8)*vH4g7K5xNq88waTN;Snvr z#UD5e;o&>5*4!sgvIzmtu+jV}ts^*fZrH5YmE&A)%uL~SMUK`FOw`12X-`H5@dmK_MIVcRY zCGQ%Vn(FWFI%rEgPuhd!%9Sf-=H_<-0|WDF+a5VoZ)X-Deg6D8=_YpkwjEtS*Yg2RCSn0E8&&oEXJ$?E(Cx=!>+3?8{H@6O#_t}hXJQi^`KE_)JV{Oc zJLklXwe?#2livA?4^;pam5Po|QBRNUfNIh`dL9MG1_O^RW4VJC!Ym0VU3d1rvZ`%L zl)R{>M#|I(wi~YJl9uMcU@KN_Wn(gOa~0LqckbM^%WkxV>uGv=Erx1bf3LdGyiLIA zr-qn>1dGHcYaV`nI^sF6ta`&EBR57U`C%7xjTjjjEzbqy@CdWeV3P&CeJj83z=5xC z@9shYXBIZw!QuLiUO8Tb2Jap~ThN=4on6;o;ht~RE5asbOPZgbN0B^p`}WDN!^48s zePTGltoX3pj0~l2ap~Jj0_V=1YstMTb8>0WH&(=o4P6xqau# zlq~UFmGE4dQA~N?)7xwO@xh_kldeKW#>Qn6e>#(%7;iH)GGaQq!(EtL*3C_V zmQBo?UDA24xcDCROa?lsxgF0`lka2qFZk&^A&s57eto~kpYJgYEG*|pykcUpnmKxB z4;?yGYIc7$8OOb+z?7~rNh*Hd&aG&FpXbKf4hafI2(yHy)V3We(z$T=?pZgfP%X;kW`s8kzI(bLnfXNHL1GcOM#|PNiw26I zVb_UK=Vd6o{m0(k$!UMSwsO>g3r{e?mmC}lQsl-Ru#ecq?fq_iiakv{HbTj)+GjJ_ zgQwg4@WbG>+2nOK4Go43kMB@yX{}nD=Z|doj-jf4Y-mXJ!k>Qo@k7BVKej%MQ}yrS zl&GWj#MmPr_pfiaJ`y(Hx#98q_ewpl?TmAK^A=5*_zj*i8Qi*)m6i4W!-u>59*g?V zb8K4tb^Op` zNRE2;?7;H!c1M(#J%#4GT;1FvGcylK z&3-ywk<-=Rn;_|O|Cw@}EO$guO-&82py1~gE_$+6b#_L^V~iv+KVJ|_)iPDJ-rsRY zko`G+TeSa{FZwyJfIv95@3qnJNu|`!EdeyF@4tMB1H2e3XqM(lQ#T*Y)80~4SlH4m zv#z|hwq|nu`kAT9zt&;hU(R2+kQTwC7>TVVwb1gabTquDqrF}JeDqOzKEHDpE}U>w`o;09w>XS) zJM$?t`MD1VlbN4eJCzTLoH+6MJb#5eCS`4HvFg(0%MbY|jNJ;o);;YV9C*-kCEORX zQffcHbQn>2Sx}HVJ#AZRJLHp6YpB0ZMkblqW`G%m*5p#_qT!^N7{!YhFPaz|*CqxV zJk89!ySC`H9z$#iyhgz2HOH($e7w4&qwtsR?iR20zgP9Q)NMb-7emeZCbw#>r)4Ny zAvNWCL-bb3TQoE@0kn6W@&Q=f{!SEsc>g}=*@gJdV=DA0uaSv~?Ee1#pT9T6(6H0C z-7$W5e=GYvKnD*Ij}aeTb#+-^N*Zn%85xx~203YI{+e0Zr}2)RxmRuS#BjhWrfaB8 zjErRCMXZFh*_35G*H>L$78RwT9;q7}r&fPmnkXK4-`AILqbcEoprBMUG)$@VF``_+eq=2imBvb&SXi8sbp9ov^jCRZU0wZs ze}59@$|gK%66I6@=X$RFlIq3t=R^8?O_zb&6!8Z80i8W|lXaQ&LKWn89Z=qj@gF{X zxK&Xgi}gyu4H_Vs&luob&!p(UZ8;or=ZT)QF6X$R)tPsks1u_oYn_&5fByV2v9vtz zFw*qhsP|oaS6AEU=Tu^A{QUXTk(|&s+K6v2sEC)&(JiLE>iK8?!X!Ba=r(b{#D1XC zqr1vW1|9aIhK3)u6{}nzt$$kD{>aG4@COh43JVK$rM+2=6*VfV%xsbV`|G$~ zh1>2scka~C6o+w0Z7b2ErK1Z|@!(h%uxRIf`s~@)HdUFra@W~wnA}+r2YY*>6CX7w zXXY=T_G#*+DD~?9J~A>q+br{LVWRh3Yo(i9PEm=c8jh}McQ~U|LgU&zaP50V|8?N%vP7@H+jFh`9)S< z)U7v=j`INN+r-2LNtel|iIOg@o!PqP6Fo)3*WQqbc0*5(1KN^tG1Ux!Lf&yWxK>sA zj~}UJ3+HXy26NHgN)O3ndauLVEs6w=2?}0(?fW6#rt-_s;yl@9;Pcb7QKLOYmZ%;F zf`fwg`pc%ASjX>Aw2!3l+m|HZ-AC(~uC||d2fh(@9966ElqrxL(Z6xy01#$GVBnUx z*jThA2}iTQ$%}navy*oZChfJcv8ni0jTJnN^XxeD^-N-M$8*UMJv%!=8fF3SojZ3b z9e%izq*G{iF|c1twW$QndwUbp=lJ`EhLE5jVlqgh zfS5WB+S=M6f71%3s9vJG0S8yB_2})H6wJ5j`CWh)Zu2J3i>C-+| zR@TT5CN+gK!|&R##<@j(*b6;w9!obdG&~J(EnzkC>r25V@B3`xPuiT< zFk~`1*C+sjyrc2KC7g_dq`4l;T=Ig{{9Jh%01ts5>O)zbdhRQva&dAdizJ8SIE^Ie zme}vdqU`t_TCIi#@xHb7_x)m@i7|0Sd12>Y+HQk)wm5$AD{oY)l=tKGTA`mEX%38w zyVB+I+=h#nmntPCh2)LuNHTNi?(FL;CwG;Wmi8~~FmbOHDH6?2OWPbKz07-~@lnT( zq>>WxJ~G*6m9i(_aZG*FrcG};IxfdDva*JtI52`~)f8TSp*d7Ck|gKhQ6Z2`yz-|+ zNhar$XPOh)airEg5K7Rb@*r^cj6CV}j`{W6?@+0B1rlGG6o;<0ylt-~~uYA#)(S>OPXrokYbiinZViWu`kA}@b#**pX=zCirgveSUFMR+f8ColkVU6%T3HFA z9Atkluc#0-Yk4f0)ccFZH6}dVJL;gSY_{fjRwxQ;bz9q|so_QEt^|sfXHjWtywk1c z`N{TM9$&V3U$S-Afm0|6U@;Pwsbn8&QgwB;gk_-==xtC~SQY&P?X3ANCA&fLu@)&< zKa%aA-`79I3R2(&s(~0J*lW|1J%iu8Ii<8O(s`u#fVCpuUfv@|e-5JG4@@zVz zep@#uNl|UxYTh1w>BLUNGHeqhU{5@#rQ!^A|O!U*18vdz*jW0b*eKi1WS z;kB&3%DGSt4C1e}Pxjoo;3k%d!zhf}dDk|){kpy`V!c*z{M`BT8wT$O&PzO>XOq8v zJ*{)DQ);0qN%Ut|E+=UaVDMMuLRl+h4L^sYr8^lJ1JIJk3PW>@g38^;(qi)Lzn|Mg zN!jmQ(jVzE(v;{MA0OXxzSTc3Pk;~(4nI6+YUQ}wV*N2$WWER*Vk>T#1?df7D8IZe>G-f`LJ*}~G_zjFI_rj(bLQ$H!STdTp; zG>T`etlTEOd3@Tp@LBPJ^nwD1n8Ja}pfuC(IK4(!e4YVe7)|w8fV@grrN0_%YXFr}0kOiUD^)yRY)?9UX@2YpcXpG8^e0e0Waq#wTV7mIL;2{SO?V zA3kbP06=~xqyUAhaYNJc}o3gO6>j7a!bpic(5t%_?Febi|+>3*IW`;KTDoZ{Y>by zP(X|IA>+_`FVr!zCyJQ75AQu-IBsCz&ScLfZoldHjZde}obea1`ttI++KUcZ?#_;m zp%=N8HN7@fdnQn+xlPudqNnG)dcT8Vmr}u zBJowno`6qf{FO@^2X4>OfCGzI_XW|gid+NeKxKbvd%vLgyS~0Ydy?0h>&fXi6chac zc`qvFJ9WjJelk=p|2%?jelg&qII3A=yl84(o?7g}->It9l#~tFr{x0K#oyUTTSukz8eu4DT_@%2T5VNon8C&kg)leUqogx^l0k zk0g5~k<_5>fyRcJ2*q_4>X$heNrncQ&HiXKxpAX&@l-=^@!9t(Rw z;0)LBSzAf_4<59v;1d^*o2***bX#9tBp75N(5>^pFyMpvhsgblzXhE#KYl#-dtu?( zp-az3voEBnpB)++61M-Ql-+B=63jJgT^m5N=`lGlEG$fb*UhG(?9c4Tr7F+W!b$C? zmg`IJWYE$h`SeSl7K_-_g^O$H=$u~Bg*YL9#$!6|H!^DaqaC);vh9sf zdK#ABZzFSZKys>F4v5ghBDU~s)Pc`{j}ogA56_*^$~CC?z5?-8_r84O!-r=8>mNi# zwT(GDL2r*%2;;agIXNj&l{S18q#?@Re>3JjS2eV+#?{5x?{I{Ye3{D>06U~khTXf* z0HtcEsXgv`MUz>P&}Jt5ATjX*Cb#PQxZ5Q{)e8sbk^1v3YT0aZVY3#9Xb69$5*&%; zj5Tr%B-TSBBAU6Ze<7K*0l2>?Ua(Hj$+-`Vps1pv@*q0;MmRHA>?uY@#<{n#H`t_1rNHhlGV6pji2x=5Z^ljCGI~RX7`Y zF6}{h_~Yi&Zj*YpcdZ%7CoDU|LDi+^TI4*8C}QK|k8F2OI|5{oj#D~b7<#|wl@;sR zG<6ms6DlweRtYw4ZVA`l`v6YEi;G>7zmOKv%FEp551)~j50~fu{MYtovAU+F0z??@ zUAv;B+p0&;i@VIy_B%97rFYTKWTb8`z6^1Y`2O^C(`1R<-uQ>n(XBwE#Mpf{Q)lGo zLrO5e$jg9{p29wvYxY`;`ls~lEK{^7dIZ*~s-F2EfR??-%6VFc+*i#EHDvYANHRfi z?*Or=-@5gh%VkG+?P%f3FLPX3vfD;Sckt4sOK!k*^W@sm2ba=SE^2Et6UcWwx2k`# zm`Qt4OHWT3NQMjB5cROQV(|PsiL17{P`*Bcv3+Q7e?Sxy0+83dj?KyFuvi`n5?(@8%ea8ik06DCRSD#P*hWKg38Jg zf=i2idY?l-@Lc<23bM})wGO0e4uk}1or6|Z64}?sXQK|tIUzUQy}K0}Wm(rR2L0Ez zcWz)ALDDyF+-QcuYUt=#jOHzugXAn@d%II-z-GXCI4u z^r)36>3ni(Ar(fNIZofYq`~dj|($gVn z>7>e~uhh~#Pj~=~=b+JCf&g__o|_PSFbgcd6r>XOGSQDYJ8leB@WpX70x2hCfo;^( zLvX`T^9i>Ago5xg`Q%26gpMDNfJ5TDh3+f%J^DMds5Kk$2Fi&NmzhUP_;GA%0oVvx zJVDBx5f(8H!m0m6{-KEpE6C#e;H%aK((woi(qr!B&Ys;07ioGlHOvU=r39yFiR6p_ zIRzenogv=QLAqxax8H}d5QYVXA>;UA+Rfv_rAxb1l1|=*?LgQGgt&rvnrV@vGsJV? z#aJFcB`ccN;#ys?hkoc&-0|)rOD0;5lR+U&{59R(%!CmH!v+#;Ahcn=H^sK!Kkfzx zq*T1mcG}2@8|UKJuU|XBO|i#Lu1we3qRl(TlAF+R;LFewsy}=uGxz}B8z^>ga)tqY zUR#Op~Ng_Ov8h>hdda}yO+YczmdS_m0-*^^Lf&a1q>96)oJs>m3 zjo{?f!XF{#7qB002oDTcmW@c^$btIq-ExUx@@VhVq$KW6Ews&%L zQhwdt+KIHO={(EFUVH72X&_7;;^P4&u=%_Rzs}c}9&Q}=DG>`eefqT2Ol`$Mc%j&o zr|j*8ni9l)zkM@#5ED}i*At_ zEfqk=c^{6DpTB=KXiLk`&&yzB*eZ4lzpijiK|SRHG_@T(^|r2#i;s_XxIT0^@vF+m z_wP?(o?$*kKwtxB0x#i#t4n+;EI!oKzkLx**qD8NdQYD|rRpsKmIcJF1x>@2+OYL^?1UjL7f9|X)v8`S^6;EL9G;=D zwo<$I&COd#L3Kv2%gSouR9042PS4ETLFXg_5R;R%sIFq-;-`-?vhG_0_eOjIpy^3r zA%jrdc5riaC^>6uKFH$!)hXFoSv%O+j^6A8>4pPK3FX%JZGQ{|N$Qf$0s)z)`hYwrvHnxZcpc0)n(#q5bZ}^T7gaS@K*knX~oxWrr@OIt;bb9FD+%ktJgHCj*H|v?Qla6T3TA75+A`wKt$%6($m+cDdovf z~&LF5E^6*P^TH*a1uGTKN@O`RS36xc*W0!$gvnVml_ zYie$MZ5JVLWpxZDr5XYiKYv;fi5SEUFptn%b?hX?#jAUJbuEp_%kChLt~~$D9Ih_w zref@qZRI|6JNmR3@P~7tjW1(oZWjg{$M z4+`ODxBp+t%C`DC(pGcc)O<)|J=>yZy9Yl@4H8egH@1OE@BguHwf**`~2MMOD zA3*7D1Q`-_-c@@1opfcd*z<3^ON&+Oq5IEALr%N~9fyz>Y|#K!vWLNu%14~$Zv1=8 z6l@b)2M1s6qXAJ-Q3Bb=QFn-k#O0?@T)QWD;}$yDzp8;kMnQsA*OKY9Y4hfkXU~2l zb$*VwBPK3@sEOT63TLUPsJx4dMNghQNg`FPEx%AxI>Sx>OeJyK@#Dt{^8~DZ6Nyj) zkctRY&%Eive(&Fm_l-ulxw>v3!7}Z?HB4h@fIJ0}n1Gr+jMM<4DioycdxSOtD87NN zj%|cQ&DOi}t&QFeE$u(a{~}Fa!fUVX0w0G5*WlNtg`P_$!?|C#$2wLL#Q`*0%`7 z?QEButBG`;b|4VT7Qm-u&*ekl?tZB4v5y|HNMv#N@;@@VC)mk_RvRnn!dJPvpbL^x z{^v)00Bxq?5C`^GKCSzZKT}J0chK+O_CwEabPE1k2q$ecL*x_kuT^i93v3;p=DLnu zm-rjL*fhnaZ!TMz?^dgXjY;)8>Qh0K57)n>R zE837-NH+ftdip&QR2W8SS6|eQRR=!UW1JO+o?Qp$=i1HN ze|_VP@|httwOrEh*g_0?Tn=_6LW>@~*Mq23PtN!yasQMgj-fWE5Og_oH?n;t{4XSO zh9)O--eoLP5^K;Ih^}}N$^vkLxl1Jij=+C&Y?lY#FkQeBa7IN&`al3Il5Bqeo(ftS z5MYagMg_-J;W}nxj|(3Fuou_39MR&x0Xnl0U+M3p1b^|1SiZdNAW{6c7522)G3uVYOlv?m%r1s_Wa*#uPiQa-GR8@)W8R<=Dr{-(>J+JLvC_>pId54D5fIShmilyba zZa+t4(fMkMmgDu>q8b-3Zul>#iEXK&q0#r-6gH2V$^4f<p58BC%%!R>slG-Q3r}@&azZ@GAK6dzZ?MOU zT3TH1preWX{_j9^f-%R`=%HN^wZn5=IHergZ*% zExdX7<-V9tWKy0W&NO;g0&fES6}sv4+??Nw7cVpgk%6(g#O%((?IW9V>g?IOd6x+1 z85R@p7r@>Vq>hjcBU#TSR6rTp_-kY&kVq%)+qcQUz<|T&OT24xh6m)=LZm z6%0(DRK>8bP}~4IaK=t)Xgru^7P-lcSO^Wfb+W_ivJ*lqw2;=sV6zdqJAy4-xx$5b z8VC|$M$@BM8you?9Q;I7PiqBvrL$+Nt9`fFElnE|5kssok>Ofh90)_4f$+wWR|%kf zSd@x&gFi$(m}u!ZE{OOcGI4Eg^fU2bLBkq95uW}3b!ba-^J-`=`A*}Si1|_L2d2qtr=B}Gom{u>oY@zD5q$cLzd#>;bK;B$V`*2_eh z?-WIPW-o}a%*r%{=jyx`il5>8aIRZRLxFpvP#H5qsW%jLh7{!&Q*9+6a@gU za-7g+XjrF}mG9*|`T-0vuA5^}5gf`YYSi}Z0v5Lh7Q28|k1#=8`$8ssTie?5-50HI zetEeWbO$R}e@Ru!|4V^s?UyelN8-N{k5~<&LPtG92pxnS1;9@9mmiJsf&YlbyVAQ* zq)-oO2y^-3)vIAJ?RVss|30TR@8)_-x?X!KP4mSJTK`_;M~R>t@>O{*Qw9LG#`8Zr z^lrvQASCAiu4RJHP(8p)$T!d%41jI~QFJvmgb9j1k|3}1;2 zQM0%B`5u(P*=s+QO;J>#7D;946m7#o)gqA67SZeS`|`(o`ANqP9opKKt{Mor+xb_w zVUQ$MzVuJDEO?ldkeLF>WiA6_lg76n3K?gONVv`M60eNJ=OFM3CfA^@zMeQlIPEp~ z7O3VPz28d$v9J+qE3>Ndii*aqPtRg!ac;N&J_8_U3j_72P9YWY)!N0@?9s<`DG8sg zze1P!H9JY3n;ueDzy=CR(rY_ydjJ$F-Q87wAQ&^h~Hkb^^%BAz?sk5(EC6QV% z_Mjjn^Y0SaZsRGW1~xWR%n8*NE>5F4ws&EIEj zX_$^k*&%&!(Zb@VepN2KoHi&nXR|dgfvOOaNjNkI4Weo@CJ@=DLl;$QJbCXr$U<1)i&JLZYJS@VJ0o#F`f(8xP45 zheMKb?D?Uw$#&=g^!|S!odKoZd`UBQ`Fg$U~?VH(jLv%@S-Re{@WwJ-S*_ zP(bv$2e%*}f=n>xbFr`qoy7=>d5f7aEiOcLNeT(7tp_#pr1T=ulEU!yTYuelAqQ@=R|BPa9E@7Jp-kQEvjt+k)64AZSa`;Hyj2+`UE$gL0RQFcyJ_o0yujZ;M5OH3YKpOf7QxP zXu1jr(~ur#Wo<`dn@Gf1=2j5aeRY372f8J*lv`nEslq+xJ2)WJ&1K6DQ|AzRgkS#J zZQdf@m8hCh=itR6kXcr@8cy!(6F`w2`h%J@W$u9o?RJ}#_ro_E!)7OhW8|bZl7N^f zNVnYF`knQ&XA54w%(ESmCHNao^9FjZ|FZwg(znHkq&LRs6GUicfMaCm3M87B7J6+k zh&#xYYLB1$6;I?FV`aRgi7JB5aeKOY3nISNiku#sNZJ){CvZc9pbAjUh{{JoCg-2+ zp4|ZB8DtLlgw!&h?5<@&zG6P*F!XQzKvyAPAXCjE7x-is0)QlMoJ~U=C8Z$Sss7H{ z3KzHa?7-`~%{-8O0UfADOkqjSz>?ocg1QNix;;(Z^&OHn;4J?p70w_*J9GlQH$295 z8)Uc%P1nF{)v0`Kd3*^wn<%Eh+$2%AH`v{H=9^HtcUS~mJdlrHz=%kFquDMl*o?O(h3T02syr^WvDM2fh*GEl9E5s4Y5C{TIstypu&wS`%prdMsdgGxM13H``YbOm_a@0z z-2Nl<)+paLr$7qsJSe*_D(TAVDqdR6z@R@x4Bx!=cVL|xu`~qAUfJ4dj=ODu7F^Ug zu0vOZOA5?H(?uGAIO~REt!ad|j@p_nl51WeTT)?Q?9$ra?nfL4ER|V-0|lYl%%5tA z;eUsGG+#p3uN)$*Vb-!YLB^|+{6cfXAjEZ9)q+&%^%VzjH{w7}H1Quj`j&7(u$8xS zoYaoZ(KuWbEr}&YYGcRl-J4M{0L)MG#kh#k-j(-3t9#Sbr1D~rl9I9pR1=+|{U;eW zqgEiWY_?4;p7`-2fH>X=OCbch5t|D1s_hv&$P@`$?wIY(faxa5smHhqgY2{U|4Qm{ z#qPRJYF}%<7D$V@LIt~pa^3teiUlEkpY->zas=K=n3}$F$ zyb;SX1EgoE|MrHdY2ZkbJFRI`0ujf`bDh0$v+p&1!Hve9l^+RLY8aLi;ZMLtgoYXM z@Zo+oi7MPF^M&9FR@`lUlT1MQd{mk8*(S;2wcyCg$v4%^#`>B0`4Q;MP^d%x-A!n+ zA{;enh`ij~J_t3)qsBi)qxp=EiFQuifFx=NAe<^`)N5_Ti&s=M#&db>;P06kF4Ek` zBe`nk=9zgbKqZGxo=o`BoUA(Dm7AV+K86TxSKy=DTU(#(u0=ij@bP0XhubLE@~?vC z(Kyqj6p)IG1_nu*3E-Gu$p@8*0DRTzgv};zs6Hw<;S-hbaHFCXvXS2i)L>yy`i-YZ zO571Gz>ofa2q1}|KlQ9GC-|M?^x)}iCpu2)gy^GJT1)8k!;nLM-_!F5(t<}>-M5ps zHW+8ULo!|-)*4Q0-t`8??A%~SQLLXV0)O zTayQ!3pyZiZ3svW7DpZ|IyNachBu+rdx&!iXyD8*FI0%b!qF_X$cFUnG_Hx%ynXvk zGvxQ+W}w_`Bt(c>eJsJ)50J`?4Bq>$`$U;oPWZJ^uDqs2*w-PP%8B?D2}6 z8=p6MC#Yw&-@=73M2PZ1U^#H=8BEYHXu#wz3xxd*;lL z$enxSvrt;3mj)?r_7rY~Bcc7$WGe>y)X6kYh0w0Y$=D||@b!(!rA|bK3aQE&B>WAL z7bTZ!)oqtcQy=QO>Qx7f97x>5A|ho3t#Y0HfxDr<3^W`6@nD!|g9ur@BfgA)9Pp_K zEQ7Ggq}XrT@l|548P-yQ&C(G`Q&^H~3&mAK2wRLjkH{ocvgM@nF?@!o??mhpo)SG9 z+dXjg;rw?TyC}C@tGJ37_$zC2aE1FK!IGfEyoJO%g9H5Yt4Jg<4<0d~d?0dpxblHS z1V8jNLcV=)=u$ZHwIC_I z0oV@z{6U!NT{-#$0K=sg;?^j>f)4!<(bGCuoW1PQVNR+DLMf@J01pS+L3W2Bm|9Y@ z2O#XMpECn1$ZhN(9-ixqnHkU$bBrG0N`59rVD?me+WhXERt!$M)X{hE6k8)=A8a&WLL zVKUROQf6vc3}*D!5fV=+uqQo`wSNt9cNR;(br|)rr>41C^%mlvwav|RAPz97t9n@v z&zYg()>J&eWeS4yf<;k<#mkg+CdPEK$8k6d-yx*5u}Fo~YLw{0;P;JT@mm<6M0vk> zaRg2SSnM_=NB(D+jUwgxpGrvrUDk$tKdnzTfNltx=c1-&z<=5%0i&UJgIkAxFu@OA z_|sNa+0#%0C9~Nh<6V>yVWwHSUnfGz(s8Br7LP` zdr$HoJ4Pg4*Ge-*U@!;g!s#N6NGmHE_ztiJA%)>ADZkEW!1Ya3K%H@1#O++}0a=#E zrJhcqj*boxC;Y(OYXz8RP$6(^{l7zq{ETgZ5ySDvNj1-|MTEf8v<5Rz__Z!BOwLC! z0oQD71|D)+xzzrfc}Z?rBs|qxbi8ab%h;?!7m|W z5IV4-;RcAXPzB6ZCkF@j&ecpzOnk<*PBAet$1aEG%k{)F@s|;7_VSJY|NgB6|MSb< mFu&21+k$3)Z{GgII-RbNtu={1iTEoPNN3Ne$mhsj^Z7q^u>?#2 literal 27206 zcmZs@2RxQ<`#*k@Bq7Squ2d=^MRu7LWfh4+DkDN9AtZYzQBp=$(UcW3vXX2{W@KiB zj0(T^`T0E0|NH&_Ucc9~J>2(oo!5CD$9o-Dp!P9!MtW{~3WdUWSVKjZLZR};zqZlQ z;wQ&>R@(6&wB|?ERVXXu9|=WiQ51>*<*>>@{c8_?bh#SqpZ~gMs^{YidWNIaH@Cl{ zySa-#J$Acj!p*yG`GwKpMJGk?-93NYYVL08;+=d8RXfv^9pCPT$NHr163G)xFs**_ zW4kPa*vDVXedVq^RH~+f4ECOij~2IF{Pf_@{pNi`GD8Zry9I=(%f%R)c~j^X!@SHm z{rcALq-H1=>*DPiAGs;@7bxW@odWKYyP5y@?6$hYueFVruhU zj~qF|s<4?oSa3sN-n-C{kc7lUs+Wcgn>TO9A6r^F)jkqy_Abz*mA^Jc85tR=uB%hE zvlFGKr`I+#4Cw4MW@TebeDQ)gB_)M?u&r&DuDO_`~{Sf92^vrkoY!T_xUrY`Abe(TH1!THoxNH;<8UAQEtoO z{9M)F9m&_KXKmfOwX^Y3e~+rFsXKLM)eTZOtxTTyy&LX)J>Mfs5zPCFgW`x3;!cfBdLlom**OZ7nk2 z^<#XzZ*-KNgM;ImtE<`P7iPm>UT^E^+xsDnPgO}cN^t*v_Lx0qD=UIIST=16e(-=N zKt)LUz=6_=3g4sJ92_yC`MJh;B&T!j1b6HRbn7Z~-B?jkfu*bLE%oUeA7{LL`Em&N zK6-38cmK|j6l?(*8E*2CDMw@M|IFD+N=lY|_z<+SEc8G-+x*@05?s1^dXabTyk{%A zb&KY4N{V+tZ*Q-cS-D^7*RPv4tA%*Q#hs4%(^U9-IAr zgrKsR`Fpx+*RPuoR7a9Wx>@c`B%XC_ET@XPdVj|oo4o74M67LW%&=1x6%`j27tP9U zt)=fW(466uyXJLe=##^Nn8-*f9E`mD{2c=iLPHsQ5~rUO7V@lJyVkhdr0^QcdVYml z85uj&!ujgHKX~(IUr(aMcv^uFPBF{IjXvf6>-&crlgWc;U}O}(@R4)j&!6)8`t?jq zOi$=i^qxL>a!6mFb<2jeLPA0jv9Wacs)gUb%?Cf;i`{R_t*NE8$fRd$VPVlb)mJX< zz99DOOcA}el2@X#zvF?S@$tH2O^QCoqE&TqQi72h0+AZbyu6GV`a#~S)|mgE9f>_~ zMYyxGbAc(!YSVWP@8Mxf9=U7GW@ctMCqA!V@4fIb);n+4ipY^>>MOGd1mt3+iO@6(*6 z`FL_FlaiAiMq75=^7ZY*akjU&FZp9#?RR(oG<}JC zAy}!p`}=~iSE}E?zjelKe4mWNH=5SAwrSooF?a9WQ6ZnVy!2-yWqNk@F}~Fri)Yc}ftgLMG%3^I-o@b3!7{x;LSHaHuNbz`M{OIh@71L zffeRPh1bNf0e|vxH@^zoLEYHc$jZ#DM8Sdw-}Lc$+gX{q_pR%U@a|KuZq3fl;sl25 zzw}uN9l+hcRWv_8e|GuLRp^nLpI2}FGMK(`LVaDwRNkyy87>#dc{Sik$VG<2gbYJ z#(b=+t7&PubM?pPgSr`aCSSy!w6?TNQ1)kvEGm*mw`%lb+$7V*q`~p)uWBz&hlREE z5ft5Qm&tBX&$5?0YpbghteR6|(4J%Y|IS$nA8ym^!DzUa7tjwai_?~usu-6t-XhR|by?(f>Y zyQZ?z55;nUNxif&S-s}-=h)7!t{n;r4=3$onQli%tM*rgZ`-|lcSnKq&ad|dj*kT? z9Xv?IxM_a~0|UcwZqE(9tJ)0f>DR5x=l}jGUUremA9tQ*^d{)*2=>LG?3PrYeUg$5 z1={^V%uQwwwRL%5@h>NHPU9a$o3ze}2x(Y$*7 zIvGX$1Z_pScjzXm+ZtPvP#&Ul-rBT_ZlL#d`teyjI;VDOvNCkj7WUu~pY;m8vUeu7 zoSI!6dgR&NxoT^UT~`p!*8@4%4abfhJL2i3dhMF@u}2D_DA0o2wyha!%kuH}UvqtS z*gHLayNar+prBx41qUTIE-v};W15E#A2zhNS0xpB>iObvlOu{d8(>&hBbQ z6iWZkDt=GXhJ=Ik&Gios_}sa3XXyIpE?nx1b5%^8MQ$8nVPOqTP1KaXI1$@-?Yb2m z-FhW!{fVicKTCg&wf|h2?>V>pcd?{0f>X;i7vwb8I(JC3r54o!x!R&DhY8bGJ#s9bic*EuqMUg`fRv zDIUv9k4deKk2^^EhX=&;wPo8|7k#U$(U0$AiqJCiHqpOtL+@)7JITz+x($!|V z4(>E~wgvYdJy%zEZ?8XK;ZYTpHS77dP@Lv|obq)A;;X5v%c{8u#Aj6z#I{@XEsA^1 z`sevpD&c%Dd;9yFEx3So??guaG*<1!w_2E*Dxt1*X8-K3YEDuOW*v-E^t=(ZDaYuw zf_~qh$#>gKOiV;XMNeAGIygAcc;P^$wOoDp_>=2AN?|j=LBMch*q5izi;qx-`@h~C-w&ePLZrvzOs zxa+M;KiXJl_MUftH%u1Jgq2BGW}V`HGF7?hGN5E{b>!wFwimHE>>VA;V#Uwh>G%}Y zvTyRp(W8C+{pHSFzb3jio%LMWhqG;d?%bo_@iK1RzebwZL~T1>i4Jw5!_KAkF&%IX zIW1IFR3eP;ySn0!7U2IY@8~G_qENdH0P3J{11npJ4?=ID-N78Z{`Tw&7gDcma|QI>7h%sTiwC#SN+d)1>a2HkAaqGDpq;^N{F zH5MY5-@diVp`&=ce*OB$s-M%-H{ZQmNlv(R$oKuFKuzKDn6ozmv8WWKBS+`~U3}Wl zdh-7K`Lmw495{Qux_TA6zxAcRRJj)(w0`?$&dSOv$<>l__T3E%U+A@8c^qfmS%678 zdu&hffB5#}M@UPHeoBtR{DeugvDeh&PZ2JmD2o8XVt{AtLn9)tLjwc1o;(p5`2PK8 z(+!uVfzR0m1>6*U$9>iPDDP%gRy9jS*a#PfjvhVg^ZfZ9O7GnG*X^RBgWa3NOettg z-sh^qYI+YK1ZLwV#+icKrBy3Twd=&hM6vTTIQ(sVVe_ zpB?-0{Si9TX+b=ge&P-1WL?2!1G>FoWDu4L=P7x8^-(u0i!ot>ZS12p424~NPqY(*g*)ou!o@BChGAJmBBDwp5 zW|fUDjj@S|ATyo)uhA0<-&{q}1q=a3Zk4Uv2(4OKk^?)W^xpFy$ZGu@ZltT5Bv#~5 zKUm{nh?=sNl6Ph3n16Op3GMn2huFtjz7GmFLJ_KmRB{} zHP?CUBmHi~TVc>W{^j)=+yzjNK>%7)%9q8rhq~kC-Pm!^s^Tse*SKu& zI`mq#7Y#`L-HoLeKpH7qS7v}#(}C-)tVZ;Po@Qp!BwRa`@urz}J>TCJA#DSL_JO^j zD02S5jJwC{KGa8s9%lB#+;Ssc92rcGe)UcC%k`Sl7j6KRiblsz+f3(bLrhbN@S zZQlIP+&F>h?H9XSr=PugwIP^&ciH2^QTIJEX_IMocd1>)ZfR<2N&V@+>Z?V+`Y;A$35@N8l3e|y?_6H*JxxhYO2$>_klyp#vUH>T}5uW z74yx_%^8V_iC^X+M3DY*=kDFP&T$cCu;)&9Qa&Z|ahBvv^r~2tv=n zo*RE`b2-<|qQPX}Sh^ma?c7^J^?M-dxPDXHYxIlAw6tx%Mq7jJx(X_6c7^)(ro@~{ zLXV!}4u0ZPRTy<7Wp?827a~E_{)6%Yl zuReLVx>`fV<9D|1SIaCEyQsuOK}J5gSO75K+JFFab93y=Ab^?tG^?x5T{|Wxx{Hni zy#W=TSkANdYc3FKfL0NdnVFe>{E_kUc-&F-vNz=ZDPcI6L_>HJudVhca$eZ>vn-jG)pe&`4UjT#YF>`hUHgg(o5X9|mrp$f|-qQsB zZPVymE7;{{Hpd&;Kv`gQ>puDt{YI%8i>esIdXh^PHnninogw!rw+652@(TJI!e|@9| z4b(Z!STQQxnT>=RRdYsq4xL;EBxDvAmI#e4(r$CRCc56P!7pCCc=4K>+qs3QzNy*S zTTxMZQGL?|1!ZN2-d9y^{#PU<9}WSH0s#lVTHe#cf8)lDsp;v_h*GVBb&#i^z=0VP z#ZEzi58A?T*IOxw0)A+R=b%N|U%BD~<+rEriN3S*K9o7%ygX^qNt7hO8!^)o3Y5B1 zh-l|3L(F8YP1d|q?L9m{*`toHCGu)*%$|O9_4JGk@6=Rb9$wzQZ{Mh;q@+MpN*Wtk zmVW>8!4~Z?%RQ>2!-C!y0gbU}!^VyIL$zN&e=dQzSnkK@gVjgN?i_jdS|%ei^L^`6 zgZu}NovK73e1LqqBM zbs1n+Xn*++Y(BoNd|7o*RJ|yuBnqmF12oUh0_Sysfq}+l` zNmU0te8~48K0co@3=%Qazt5ac(+vIqDl~CDI60Mev6oj@(-Bdt2kG#cMex<{7nAgWnzVV_+TwD9gFR=Yu7G389LM)PA;w;182hd9D6)Oc*bnA^7uaj)21zPuhR zb?dWzsT+qMsP>+MPZJRnQ#H=Q#>xt{jTwJJziIsT?G1NgVv^z2p!KIef4*jFm}ahD z(HiGqcDT{!&*@$4%3C%-!+-QgM{2nN8{f>*G60Ka4i>op6!{Yc8Kz9;*ZB6~V~=%r zfX}mVa|Z!#n}7ch4JN0AmB@{>`|E-$2M-}=(qg!#l82Svm%i(mjEoHA4qt5U_vYfc z8=GU@{QUenBLnW;&;jzN!*0Yz%14Nl%{8zkK;Jlj5`rx+R>2 z3UJvMmwOb7%gtijG|ZOP9}!}uCvXz9Ec5I;h-92>*M{OTxRVffB z;RWCdeudBAnQ@;ik6K_DpL`G$6f<;9xH^NfJbqUD+ISyZsN8~x0pw0J_#Z099QuZa zbns|MX(ftGPoLPw8Pl$VAB36dUO z_Tts6GN^LFz_9&gzU$om%bgOy-Z#t1@c=KLcWIP}-!R6cn~27}83jk`a<|2%5K#f> zKX!I@*Zz#>zJzv0Za2p?Cg6$gxt^c1Bh5ddHB9p|H8Y`EpnO%=*IyjE9{VXIexvqQ zY{9>?&By8J=?_8l!sU*?@c46;Xj~BO*eI!b=hUg?V)tHT!r6?BjjiVU*t)hW|H>Ml zHR~&%oOt@;>bMd4x;fmVyJg{PNJNbb-($`$F!{SON!(*lP*6yJ`t+R3&Ald6V@%&j zErd253S7tt@QT}$^MC$snHvoIbM9sSofOTOk}#TUMCjy^y~=>f0|#zMmS>R5uDdW0 z0OK3Pld?7Z?jl@@H@`)qr{`oGT7MI{Drpo zV}-G~^~HpMb3$^mkH?=mK{WBM!AbB+i6q=#Rasw!_dRf;+Ot0?MKvjs{@h=d~Jq02bwSb&Ni=jAi&` zti0JBU*|O$8*wO6-`7job(WwFFEXW_K3(j70L}ypE9-lg?F`BKnRMw~Xo7?g1E?G{ zHy7&Zb2{qz_vf0qiLPJ*xx3z1Il1C~8Ed{fhVaRAqlyKDhLUp$b$Sy>u~%^LI$_ON zuijo$>;um8=eiD#YHVi}l9uL#2ws8aMZWkH*gNbyBGluMaYmb}Druo=o1;4|j6QwN z%)wFRBHdMnhY0o^Wb*cEDU@t-$6LuAZx*7Vp&@%5Wc8v?%yu2ra|i_Dko(+Mmge!y z#h#Q+aGrF5DnU-Y;_}b+?l=qX{>|sL9?Egm)YRm`sF?lr3y;)H-xvGd1%$Ga95iT| zz5g7v=B?wM`5QqAKfiU^gN>M59;CEwD%|t$Di}!rd?X5a_U|j?H;O%Ez0}oN$rB^s zptmv93MFZ3W~Lk};pt!Zo^9W|cO$6cd&qF&)-Bs9wrAIL*>;JW(1^z$0*tzM?_T5S z%SF%QQwRT>1!x_u5BYy-uqu!r4}w<#;y(9WdV1o~h&Aw_C5{=foB)A4XLFwUOk_{Q zif+EgyVhw#YHBJDa~Wtk46KZk9+hy$(HWWPebF3OQD%R9*+^+Xv-ub=JD;!6+#Rq+PeeqmhnXiVX=B>cM8oePGN5?}j zz$qnNUAz?dm?cHe6B}neS6(YLp8#a&1uLq6Oh>zC&mQ0Tjsm8F+JbWh>{qum)cl<) zUylnh%(?Vs?~t>L%g}x`V0wReji6k4^KovH)A$g!Y72>vj}-Z##V;)MhsPDJWI9LO zz3T^kfP@F2tGrLuiqk%Owr6)6Z}YDgo4t=!q7g&3g3pfH}fxssac~%*gmMu#}cX&Zb%GlM|?m{;+k38?Z^ruB56e z?R2JW>bh@#GHIIkuj3GX{7~La_sp3yaWOh4Pqq!bw>NO;*jgg_2|a$9aIT8 z2tqL*BYI#d|D`6k8&)N=xUNteShlsL&k!LZ~4y zlvA7<9$&fJWRX97&Ej$$n;21(GN~SSAG8th0n+ExZ1Jl#wx3^6K}a=xVM-MsPy6H) zoby;kPX(e8`>x|a*(pzFJ4qvR0BxR>4$#kqK7am)W3Pb%C|;nt=WnI%SNJ`=nU$U0 z9QBL0DO&IB+4Ynkk<6&vK}sCGeSO~Hip!kjXe^_8o%9givu8aFlvNZS87I1`aDIae z0fh%#CU|Zv^sRwhxys}BtkwK9oC|wM1C$bS6Sr>Ny1FOIY~elM)-F|S1_piw9ta@; zK|$8-f%VUlGBX*_?WU%tN&zuQ^ z%wuU~^^^DZ2R06l^43~pKqE6q@}#R< zhqHN$Rs#iNOT7m%VBWN;)Hcyt+-`EFFyDuOZ|DZV@H6smb&lEgnqjoT#l(9Fm` ze&g^52tc8e#ebN<{3;e_h8CIBGj1#|UVNb7!HpJ&#`E!tsXBERVQ&uO9s9OGRV*p- znzWZOMvwzo`-Mf#hD)Dccw_IR?ltQ=*mV#T16B9ubnTvg^v*%q2TWUIYQ^n3rLh^l z0shdhs@%&kqnRn2E-SWgpQ^StGnUxbbLCGD3UMON{7u+$c(yO7OT=ukS)|0tQH_`7 zz}b2a;>p(i7B|hl+vvTXMZKCzti0QP@MzZ5w8xK4!Qc(wUfo?-Sa>Zc%uGhAjkhHf zidPx>9w5eN2zT(9ihqypbpKhwfp&atVJgw+;a+mm;bdW(tiSX%hw*apqA-3BA{^B{ zO+Sz;DOppwaM*R5>2NEYUI?v=p$AD z?MDX`2}edqQj!&q2Z*5r73CC_Y3x<9sthcp?d=>WtDqP#9Vir7u)B=1=^zsJ9VknM z6X_38Nm(>z)wS98`nZ-;uP^j9Br40PSBb@Q%Zhi#TU@wc-jc2p5Ew|s;;@=R0j{@4 zAb~I0<_zGC=$XQ5HuRm(Ya!~wL9WD8)G;>xcTOv!hpc`^<^CG zC;1ado!^EZiXy6_n~`1PBt*^92Pc*jHo~!oay80M49$z=Pr+q-yIgvF^+mo{R@sJW zZ^sXA(QkIk2?z*CQ($^qT+D~0$ooRq8T+A6yliTfVkWD0Qg;!tfKb+Ak3WZ$lfT)zBrvc@74u+G*je^Vzn!IJWS)pg_>FKIvJ4B$$;(GZM0j!Sy^t*(4lV;A z+Vb7V8yo~i#{NGKwHLNKbL=@20_yzv%a;rZNxu;Z{GA%8!HtzAmx+mv!7zFfbHQH! z$!wwrqF=|p1^E-OwBCW5sLwgJr*nDaL(OF9aiI+mK*Eq)LSZaEEC}wVA2&mdKhgTO zF;In5#%ViY>4&3)f3k9wJMp4o=%Eoo*Qf1v9pa^UDG5+0a7ufD$=v-a4>*sqx(J;H z723{fj!l|p-$#v;S9b-gx$i|>1t$_DX3%odv$9kn-6F7{ZDd4`^4e_<=kpd)H~$&*PC(?OVEr-PyD#inIuYW9U$VEugi_*Nsc77`-BN)LWVhuj_r%kb!^;|v%V zvOEG35*u)G4a%g>%g74-6R7*}K^xgAXnMl=2C!zPxbR1j8t<17{h`58BHWpa>)Z%LA&&`Q-;UJcFJ{}5{XVi z?%@wPLTIR#mX=DuQ6S(5So-4ICMKt+9mxWKhQlu<$_1oOlEVCsHBW-e_#!7~@C<=W zQBfOz&rjA*@M`OUWx;Tex_$e0#O>QRA=^wu$HmolaU?x`T5Ic7_wi#M;|tI(~dfE+i#okVw*i5OeOj z9gvYJtEgCwGZAlfNA@I5!A%I-TefcPLp&qT_~1z;<11Hs5#LCwGRv81@9G(2&r6fi%RfC(C2}Q^tT&^ zIG)hg7l1ekN=~Q>Ii^StwY0V#+_Iq*9ro(gt1sTX@x__S6@T;cr8m~Tr%&oB{8aQ) z`iiGNqwnAU;34Ja=9UMQ4<8LI$GQJfY{A2JbJc{`fVyzkgqS z{P=OA9%A_>U%Vya0}1U-f3MAR8reW{q_}CrQ>RK>Tcf{jzgkxG{UkV^hL)D9uI>hd z*$o>u0R7Sm0Sm4IfS@JeLm)mNpw6WOa^|W^UEpS0wr$%iEKG|sM)X~6V`By!-`U9? zek>mmH^al%(=u{DwqK}~6GBJ@S84qAs#f3D=p&6!4fcfa9N0ioi1;mHUiA5Y*&&BG zF5+Oy=ynLIK`~3n$XE;Ik$g2ilM@*&mv`zeC$L%I2)%#*UI2XsMVd5nGi&Q0v|JJn zBq}oCJ%8Uzp@c9}qEV9m0&{^hBR=O1=%W_K6Nq0vhh~S6!{l892`5(Qs$fsOe-~#0 zLPFjHP@&s0!T<*iBn}ZQdqsqCN?p>ew?Z)_{SR`h1MELkKn7kJRw5G5%s}^a$(0_x zv_9WBEKch3A)!bmp-B647hu~mH>Vx1LeK0Q987&;Z^)v}0ojpm-MVscUT;Xc2!ujU z5&{&1FzXHRajgDi688D~ii$qy6=l#69FR}7w6Qq{40~-PB}NQBpq-nl?$xXNnRuj2 zY!^@?#NhmCf`1Y}?d$TNPdNOvzy#9h83ga5mPkuWo05Pe^c93`?|gBm59rf^a_4*N z78FYUwKUWc;^K(_2Ra%WbkTS3A{v-`QeR&M0VGMAwp|;7E6`%WTfz?=I+Tvw-{jPt zW5Q99DnjIMBC)OR#l|++JTFq)cI-g{Hgj?LSQ83`<#@-N7Q{oSmGCGbMW}$*f3Ot2 z$4146X5WC;cKGz^IAR-qdT?NN(sLyaBJUzosFfNN#-+Khmx*%$8cO0+by7W#;cyfy zkJ%$ToPGV5^{W>z1mM=dga9y9!%1-WZyQIykOAr^Nu2CAZw~KbZ~j&hykqz7tj%#| zs!AuI5R$wd!fR_Op*#m}15h7@66x+=Dd+$9uY1R<^Xu~S@|Y>yvR8L4VgJBMDsizK zza1BMbn@Mb0@P9z5-yTIgQ&X56ucOYbVf&^>t3h~x3LaI%fH9&Jb2K_5_~~b=?lKi zw!82i%8@Fu))ZG}6rF&95k%}b43FpD-izCjjf88AV%G%E20xtP?YM%=O<~)4G6FpL zl#x*rIGs9t8+ZST?1bXthj9PYQ6&(pv_g&rW>V4IuG?%>=dF+!M0h3|QiwVUT!B#O z9y`X=V2bzxY6#2ZE|Nn+o=6ev#-fMT+{QlqiruoRRn zE?*{ERyNXUIS-#=^|-$E_t%t{`#@oViJVXreSc&Nss?c>DGhMQTjo~WkiP~`yh(lq zfh>d}t6elSHyuvVHOjT){RK%E=E4Z%iu$*7Ab__ug^3k+8#>S8!h$vth6Kgsr11a` zNJ~fH)RIl$?w|Zm=JC=c&54N#lj-+YUC`B3|0i)cHsf^kl~98Yw09B_!*fR#QY(I0 zMX*@`3zzLQqKj1HECe83C}+=qVviWTv1Y@rH_EdYoct-q`y7;xq#}hP11@}e@TFry z9v4>zL9_;XJ+iHzyA_j(sWn@+Zcjr1%jV%ypEZT=A?I4v+}=vkVK^v-7zFqP22@jB z9Uy${fd=?s{eAg+UE-a`AUnoNI|uy`Z^^*VFf((1gAla zAb8ffN(NGs5eipRQxma~g(94G{81$WD-lsqKF(uprO*(O>rrxYk|bJfL_|pu!36j} z5ypr`YQ?q55~T%T8}l3C2YqPY>_|obC-gH?j6r+WVUR;1*bmoHCVNRuC_ z`J3_NNm=A(^%pLaXXM-$n2|}!x?%kToAG~zC5;6Q+6%{8pK?GpdSTrvLTWxx(Pg_F z^Q*@aLZRRsfq_uixv&rHw5g#X0TrMFq8(KNWa|x$$;&-xL)dyGt_v~@d-oWLBf>wv zot3qlSH?-h%oWIUpub-k&E>{~p~%jiR*Tc$2m1OBZV0@Epwcj)dA4~4J(8kpp01(J?aOhKL!816vIt6Djm#ST$B&-Z~E?CgjP<6|TMBA)}Yl(4bAN z1S@#Nc5!hr(%;|z$fI8~nVoVlDX9dYBgA5t4mA0p?s4Z0;K_|ny*hL-(0Ncqo<=w0L-FayH*fj1dvkq+F6qs zySX1MQLw;40$Z@%=MA2 zL4kpZHx7>dUvPVtkb#G5H;X2 z=3AjGr6Db`<73*h{}s9H?EIQZm#u1PxgBMC`xajj58Q*KRnwM#H9U%zohERt!E054 z-Md364R?0vb@0Nnm!(rWTBC|BVu9iZYFqz{B7yY9+}zwOycQA7=g*&~VT+L%0SINNe9%pA~*U-Mpx7z)^J}O4@1_ER7^X5P9w4|n{M*daV%&31!2?)AV()rU{Fh`QK zjdR<1Pd=fT5#t!-3`d{+-dGLA7Nwfvg`W0LFmt(j5bdAhRTg*{nCt9~#nqS^>q|w# zhy#-he#WBe7^rD`el{GBA_+Y%ZV162f9>t>u{-YD)|Qk|P|%gp9D|DS+oo8VYxGD1xB z!otE39kh)$L5QTIqtot~KHb#zi-&__O};UctgI}hZph+_$9$cZ%?{6pg@;lx+J!ar z92ARzQ{eC8<5Pyz(QjxEOgOc1sh?aqI5`6V@2V|crCxqgAmqF{Sv}m&aeMj^T;D9B zo(`U9+iz>+>Uh^8rQLu4#bI@If45>SpnI)&nP^G_eo0G31x3Tc&JHnY2q=;szn$(& zT5X_8fIw|8PDy|`sJO3f<$ zE64fPwzRi5L+i*-t{8N@9RSjtS#!qoO0&_2M^z^Ij=oUL=RV5XAiP9JK`dXz>^GdP zwUn$gMMh=?1%q7x*=xPZB5OTgOeEwG6@&!l-2H#*gR}`@@{8Od8CQY7rHSNi%j=8v z!cy7$>>Bu2N9({^Ut4eKcb3TNOM&n4%S z-Dg3%$N5e*lDZ0_Z3)^S35R7A1e{8HSmKiCw$pkv52F=yhCYEU-giy5e?lgi&BCxTBVy97x@|#wvw35&G-IwD`@7F&WJq81X+!Ochc>_5g|O zG3~MMj-MGd*3(Ge4dDJiYv$HT|;oQM)-asIsgKEKMOp9|6%$LIQ)5NV|lA~W&8 z*KpU3QAY?ROKI#um5e`7F&-B1C_r8kf`|&9am0qWlpyaq9n-P|St4DT8F9sy_xRg= zi<_D0dI6+RGYhB6)?wu>f@k*6^~Bt{vt9G~lPA<(!u=~xY`U?NSHsMzQ)u*J^WIN_ zLWuBv-9cX(1<^pvqM8}XoaBMT-*RL`?b?(n=AHz>F)9{yb#1O3s0RcQ9A&5KqM;^|NadJE8lH5qD1K%M@k%lnx&_&F~&^Nd4n?zkaU3f4O7t1MOL04E* z1lz%=SlKG7lD`^wwd2hL{u>Ka5OL%7+gi^7!%S}8wTl5!?h|?a^EHelZl-hkbTFLn zG89`V9ODyqwRLr?r_zTrqCn`}F;_&q0ettJwolMx5&j6q(&M`41h&d&4Z6;xB_vQV z^0~YCc}j{e@>W73B1g=wA38*hKuiASZ)&5lE@?9W= z&bB$gJ6<&3U5=k7{78F)k(+*q(-oMegJ77^lJn3XO$w|Op_)fBXRjRi(qUEi9U^i$ zt~Gz+Fonz;Fy`Q?3SD zx)C1)TGkF2_> zwi8CK$2>fi^d^lk%y8L3{>kZX+s?d58fx#d^WAl(stH9<0qPzch=P)I5T5dYEYF*E zLz;(Mu_57yeobX;4`SPS?H$(-2Rx5$5Hx_l`_C)?h4=N(7e16Hs>q3M1^$jGMcb5P zuDOWbfK+7Le$Ba9_aR709UE-+9dH0iDg#@c&s|J>Q-Oghfb$n8X7~;`-U75*F|Jj# z@1=}>-WdDeEC5OF+OEwiCy7yH6AlY7Z||Ia?_YUns_E#SjIL144^4OfTJ3bMkJ;)fOz=o}$?e--m`~cyHf?rcBSkK)jjX zNF9Oe1x-f8UcFsok&V<5#^kr4FX8EVJPdy=lt2o}EV5)#kgrK(0uwkZ#^JgbWFA2$ zH+p@65wedC%J>L&dY|lLcIcr0B&)f(b@9_3l9G4u;roL^LbkZLxJ)GZY~zpVqR*t; zuIJz&UbM3O=W$k+R!&Y19@zeWW@~q)uEJ$D!^$K>pBU92!J)h{I5X0!dgF#d(a*|F zNcs@O;WYS>3!!hL5&SBFe|YL-5U~J&>@-J3ke3Lokp>Kiks}NDWVn32qWkFiJ0ZW1 zYG@>^UdK)bZ13H_--IeAGrqPR<-YNs(Ktq4nLB_{%OBNu?fnbuFR`RV37-wx`7^Ut z@w5*c)d?`sWE?VW5|YNlL=V}%eR}{euhRyYa*t7ZxW&zBprDNQyi>IO@+zfv_GwjB zRWs25F%t{V;5dwE{k$C!ed-u<2GtFK8HB2Y;4zsn2oq_^q?-a_4nyY}L6A!rkE*G? zJq@@w9x0`-O=(rzb>Lc%zl3yg2+g!}@IYLA=BEZ^-`!L7u$mHb8`Qb2v{guj}+n|0oKlyUAv81 zN%AkAo}TznA%H`uyV-zeSLHt>y~YkoLgL~&Sle0(5j&w7kDyy$x~TfUa?@(q$249K z9z3vwEC1n#wI;IDBMvt{UP0`$^4-ePPT z92syBqDCVPL*lM@?S=W@g`Y(FWoBkhXrVHd@d*iGAOqv5!2!X+K8Cw05l88roLrAf zN6d4E*E|jBzyKg;NYMh&Z$SCUz#Y1t*p)#|J9qP~N1*0uK^p6WrbT8fSh%>8LCqT<*Bq?z8I*3<-tn}iR4FV*tMY0O%I>`ws zDHSex);nZnH>rp5lp;LH!p7!1I%*C5xdfSJzynUe@jX5nF0|+bL<_=<4s?GAi+FEG z@A!BqMg&YD>S$GN*Oq_=7YtEWMNQ37d3x)%ZSb+l3onS(fO$Gp2E+s!pPzll#Fdcz z`svfBL)H` zJsmAFQg3}yB$^%MoRHL;Dk3D35U^p*_M;ZtH?Ng-z2Rx-sKv5IWZRaW`LSq@2LlhH z!iTojOVuCPR*!K5A7rP>5uJv^0$1_7jGYgLt}6hl?4c182LKiH+EgzUae*uGm4$ZiW;~S0oO`)PjaXDr1YsuK)1=+Faih~{%UbMlj2RzH zI~K1ndh5_+0E!n&po+%f!;y;C+~v?wYKCYBXBH8uuxx$8hWBqEw{Cjw+$y*+lC~YJ z_<|SN+4@0xf>7&WL{!08iNpvpwg;$!Alw<$=;=rMHuemUjDR&CJoA^lO=NhO4!FpA z>Hv_C=w%+88}z=17PG@JI!~tF5EuH-TRvcNFyQQ3wqn+hLRAKyu3;G zwF?#&y*TXfiIwc^p1J3|;D!JOla=;Sy?dNR5G7I3d@j2Lj77FHk^hy%4`t2E}S?n~PpA z!`LPbT2iyA>3(JQ;5^!}(cW-0gR;s>wPnwGoFjJyerP|vGmBz)Z44H;jAjO|_w={3 zFu@`NaIsv$w1j=EdVY4Ao2wk68x5maHNUj*|o&9-D4j!clYu5coFs3k6QK~Ly z&A*zBYWfhG-$MnDAd}_Pb2;}>O8!B@2dV%Ts7=@D1McIH{#%7sCVU9=)}*=ttVj-0u7V5 zg&?IzDiZbx!YaEiKeoBSfiFfX9-AjCV;f!}LZJX_(x=5EtBKJS3XL^s^@mp4#bk&eTanS0^K?cx%G5nRs~?Pel5T=9 zI;J5Ke4Scl2T9D<)~>^LLtf*i#3Q|}{H2)AZXhus{5_yMz-#VP~vltWBIHSLoLd$O^qYG+b5%qx<#Hpkd@p7R)cyMxB6dtj)9 z^qj9{f0mrSdUP#wif~j78H2#I!#8Nip+6_qO*4`Bm`<{qAYS@{e713w&YG5u0w1*W z^fm%&slhD52C8!AnP$N{nIWgJ0HT&okrWtR8=>Z{Aoyhe=uZbfJC?1X%1TNk%oB(y z&^}xuboy^H8y3AzW*PayP=$FOMz6Fg`60zha`+ibtBQ-aLlF71qjgLDh$fX{;;;f= z^Kkf&z=Py<<&Pak+OjrXsJrhAT+#W?^U;{}fIJ}cMoc-;;XMGeVY<@7CHn;5W`PafeD=-rrceM*DFLkco2d;Cc?0y$<1TJDICgij)A0} z(OwWu*9bkl`6mFIIbyG*6DlkYY<$+AE-S;U!o3H{lS@6tyl_u2vD7QWr!b8;kQJ{>#(5XS$=u76$l#_!1!!)WDJDQ-l*dWrFfz&oSfBI zZAB-u*mPo|J56+Q;VG$L4y@gw`fN~eaATtK8bBRBWRCGALBmeWI=uKu5)1zc7EInw zgh#T6Xs~!Aiq?kY$5=i6AVfLfGLUf>SQE_wcX(u-hMCDw5;itQnD7`M@5febP0Gj! zgt#T=I!zoPY&#oB$ixss>i5WMr9hqIUg?kV3jipzFp!mS+xcP1SCOE@9`r$?26Iw+ z;hL_Ek0n(05xtf~bAfp*|;>%Kkj;jGLvMciNyhtSd4C|lL#sghp7J!vTRaKp=t zz5yxWTQ?z}P1rvSrvOY6+w=Mt@^uLv(LY_cMe^O!+$ONGT4$bU13(63AK=n`11ZA$ zGO)E>EB1t?bi;1}e9k(VnaELSPV zhoAE~cPrf+d@;8btHR% zH(C6nBvKMEWR3X6fUIruhvMbmHt2O`L{0hcM^R(#S}LKaI3QU|5)-&`jI7)k{|#{& z9fOKOwiF3&3wWBGemnQ5J}yT`6%f0egeV}B0+sA2JlWHq7*-Xz6Fb?u|NCJ}*sqI~ zzrcj|T=we7v0VrB0)6$6ndf9d=##@*S`{Qr4}Iku(%)IXe#eCo=YU<*m=3-Id~Naq z65v{E7oL0-Y=V=-xIFzZ-J`zYI=^jE! zWsW5aK?6o@!_y!y%ee8}L&v9HUW8i3*7f>zA;gZc=_<#+=H`brTWZGbX9RA*Ec%*# z-$cBEC^|McmBTF=8%ab4jQ3gp_M*+|VPtM(0edMJ+OZ?AzdJ=AJEs|1GHj^`ysPAs zO9B#J6fYH_Mb5@M#}G(?A1Dq*6}guwpC7MLttplobWA!d0@lwVdtpYO7cYehg%S0x zbw7*{L%3EH2$-v+A&pdpR3E71#3vwD=ZBr=5#hmyAU9<2RTQY=jfD+7iHVQbF4p4J z7v3;m$lKu1S8=RGFj7(q4G}L7TZMCr&bi*-@3!#cN(5-=k%xtE=*X*@K_YhR*mspc z70RHbqopxV-^QwtwA<(WE8EmUxzUn7A^Jt883B#ZKqS?@MiU=W1ow>LoFeB+BLDD; zBgUj3K%JV50Aq}LpDso|N(v|m_|_W=!t&pl<8N&LBGKXlQ%DfOT_jXcR{7WbqM=(P zpzcT4TMoPpjTqPM4$|b6fzOM7(?9ZDKEOs{Q&;thFtBWY#(Q_q*)SN1NQ#AxNi*8P zvNR1b17tN;7v$%PXA*kM6e13=(+>fXB(i~(^1@mK@K&8LuzC1qw2;LHWSAE7P|gLN z8xdr={BFq)3>Z@j!T3Z1*vVLJ=Bd}|l=QXaE@;sJNM@adk1q^fkpu5gat2xwHUdUr z!r;^$g4{EvG0>0JE_hi2!Z)dM$X=D8dgX`BH10AfV8w9eXN+&NQT82@r#jS)D2O){ zL5#XFb8%%jDsS&WZr-H8sT86F!c3BQUVh5t7iq(*%E56Qpx|EW$j)dRBQ+U=z_kZo zx$MOV4siYsxow!fWoGu`lfO>hrpB{%vvH!3QuS##B4uFY;>ap3ZjpqV$SG;bjLqbL zcRGb_EIxxl9&9~Eyr~awV^doGOG2yQ;pfowMcT^+FRP{b(7CSJb5_m-FE}HqX{35c zU=?H6Zxqh&jKWfyV$_aAti3RYhhbVlcr;{yiM%w-@j-;Wk_Mieixc2R-rKjy*!%G0 zr`6afAX{*<4`>?XvTRoCMNSVro|b|@7w8%p$i<|>kTh=}oykUl0TZ%E$sn!1$m(;N ziIm~tVc0FV%blE~YAi67aGQfdneI9MK-wEP376-{7H%I9geT!G?ni*|l5%aFR}v)`2=|b3 z#dRC_tZw5Ye6aF}hK?IXve>S~;+Bm*4V~*c5Dv0Ai1C4hC~mOOQT}|Ps5USJ5Leq` z0mhFDTqeWN0QxaZV^d}a5#x9wxBUIDL}c3G{+|CRA}gw{ zzJ}BS#Hzp?%MZ$w?GvHCRhSH!@*A46A%r|cgA%i{vZk)1TH(c1^*RL|fU&1+u1Z4K zM&`Q@&mKb@NqYh_#PPQ+J`e?P@X0$ONf+vyjlf*wll*o zMGY(&gl>*FKefcHzZXYE21q96m@s2MB+aHHj1^o|@>|az2B`))GB(&60byZ-G8-UY zQ-Q)<>dZqhlLbV1KxTuEM91YbzRc4(h?kMPJT9Ui`wMwRy=z67Y(d5uR0<6Xn;OI! zz6KUD@kM+_Mn+l1Xt236M7wBcVq zD~1=5csdTvEP~hqyxoMHR@SA892-4{tCYIcno#emKC$d4k@O3xjor7f@m^tDyvs{nh2s;lksf@Gl{+zJS|m@AEm@8*3Ry}|bdows?&~wxoyUJu=X8F*-}ke; zx8oZS;Ps}_u+YP*b06$_Vz|C?WtU@b$FQAZW{4O0>ThyYR)ax;GapFtlzpnyIoci|l#s**g&V4#*th$d&Y zYgnpUEy1u3D_<)B-;BhU2v^KE56V2<#^hfA&f73XZlM`I6Rj}b7{FgYuov(@{RxI8 zyOoy~V>Xv)FN`h+9nt(vnLfQ0lOhb80GMoLQD}OPcm(y57wos+LqxJ-@Bktev9>^1 zm6h9(q?|x7Q<>4gqMBW=Ex+H^2m>gK9L>4@Jc<9aP@_X{C03If$ zrW+aZ00f@_kEWeJzeN}?`= zgDmT#+OUDmY%9ZX&Z?KgbhjyUF-HFF5ys>agJ=S9%4*<`%S>Jf_2iAvQMy>}j6kX^ zOH{u+9(svOV+U%%&Cong>B7i&VMxAM+y>3f6lbg}9)3I+o3XqWXI0wt#oa0nF~(dx zi;}J?LO|PDzm+*0axf|OK~jd=FZ$rY?hJ#*&>0f9K&LDaud9t>bCQXqp(6PwaZyM` zxtroCp`{d-nkIb+f5E7a6Z(3*;&Mq81N+Dp4yO?FqI+{HnwCgRG2+OrJOzFTCdA=N zif;Pa+66_%JL0x5jDEFqh+rd>RK)wa@X}N?r(^fgyI{o?CmFwD$gdWpPpP2IQ99{FauMmIML#R}`PZ znkdeXfBIzaioxQz#@k1>P zbJxdR)|cfP^>PerIGp|#~5)ikJ#E5i_5CJC2C8XJ0FDXg*hn3ac#?rOK z@vxBDO{eL-$$($@!0_jA{x6=UMjpaW^`rv(%z=-Z2eo zXjZE2+|2lj%F44`oz+j?BL0g{Oc^pPJSs@Wz0D4?RN1Fzb8I`YMO~%!B8ESB@elCNZC{M&?;@I3^r0gQpJ7?fa zin^G9F=)1A?=7vSYLd#GPfk~9W$Cnom@qGr84ll?)pVi$+f|l%l9s?5KBIyp1}v<4 zUHx@&8EvDWVzmKf+&ZU=J8d~>_xdBU$Y9<^BNmTjOc{$W4yvzBr%nZuNqK&mPu0VW zcA_)_x#v))!I@&LFGEO2g79Uwz?$-_U8Gd=B&1q6m(b&t<&a0C!kc?>DkBk}3wVI` zR^-970eh0nWNuX?cLHXi!KwOOW$~$Y9W5L-Bi%GQ{W-mHW>DKb7#ZpM_BV=WGGKR@ z)OR;0DELD&qPB5BEale#$HHH%V+;_V589rZcTdNO;+jt)y%#3IK%t+@unw#F;!zCP zfJ7XSf^o4&9ts^rVI6VdfkoJ~^W;e@Wh{yTpTik1>kfUIldbAW!Svk;W@pTfEebpN z19_TGFic9h0=4Vff(=X~4K=w#^hab@-qpCF_{e;cb zzvOH{kJ0Pwl#4P%o*V9Z^VY4&&51Z&_!&jvSSS&2McM_v;T;n1pLFk{57Vo%n^ZZt ztHM53^jCeXNVca8a8+1AHAPd5Br)T5if4bl4xj^*QzO>Hbw`Heheoig|Fn~r5Mk;l3F1`(OBYF22zVwF@PP&ls*{?wRqEa#{5uWHs1c(C`? z=?OO`E@e~pEbL38H$n}zF5EG)D9Z2BM$7{JaBmTiQRYxMa`9h(42L`+#}Yqu#=5tN zom(K5<-lUaIukOQi5JEaBq#Z1DAt&ynf?YCUDw=e}!H-9$Z7ODX7Kd@!RB`7UQ0K*?}DdHK9B@+eyuC1QG?iK)^ zIS9og&iG+)@Y=e%0OqtkskMar4jVl>tJBa#XPSNq^nB@hN6S6aYy!qbxA&7jj#0$B zU{Te=2W>7{Buvcb9rF3~CA{nJv3VD$ie_E9ynPeuhSi z9ZPgq^@Sl|6$BacI_cD0XNJG(|K_n0xGJAFoirn8%L6RsZ}{pbCt$vQ{ombTj7mBnWRCJ0=35u@(TROJ9b#ZGYbAr6BN3H1Lan^^9?yN? zPjWmpDB9_d^wsw1Rc-N%Xrl;gn z5OgHio&Bn+9;h24IhyqOGU=c6^(h7o<7K8j!HE@)jmJ!w zAEuTCpcG>C*7`1)iw~Zh+D4+9Wmz?mCl}H-M@~##M+ia!c{eAtCzGE zPLOY4;1-B7L(CK21^q0?Lem05?-1Pk%C0^VFph^w!a!I;ms?D>EgEi6yb!SI8LeA} zKEBGL9rKE0Wk|~7)qHbP)+os>p!kW=Nb%*nBY3lQ&m&fqM|PRYS~C?=3kyhOTH0W& zdY9N3fkXwOn=^j8^VF2COy;?g$wM$8bg@7i^8XPSbx;Z{nA(EBeCdPmm&{g4OeZ1& zsjrv)52KOKx9VZ~2H=8d>_4KhVfeZP&P zv{+z>*Nm?0ThR;|_-_-chw1^lJeKQq0qB;E!<+T}p%GhAk#X8tuqQ~@4%}P3*MaW- z3%7JOVjUN}Q1HfVR0Jt+!!Vo#RrLBndOsHKcZCVLSKg%OESSxx{QtZ?ur6nt{g?3j zQai_?u`1%GbsDN2qF=~uubV>X14NxZ`L5VgS#>4(jpd$?%9CcmTK3yw?W8eh`mPOj zYNosJ7dxv|qr|}*AHS6vAi4x1M5wB6+?CtxL8p#HL}vO~Y37U+r~=6lw?iTb zSw^%q?wYUl?m5W71CZcv(xpK3pT@n+^qoyE)dmi3F zf03)GlO>2_ak6MR@{6y(z(-+W*_ozi$fq{M^(38+*dn?;|3H?I;k1k^h$IjP6>hg5 zcQ-YzATxBo+> jxy+$OpUozd=6`my%TrIt8lg5*_|M&Sl8eUKchi3X<44Qf From c8bfe670c1861ece8c896f09bfc846a410c229ae Mon Sep 17 00:00:00 2001 From: Eric Buehler Date: Sun, 27 Apr 2025 18:03:08 -0400 Subject: [PATCH 13/17] Complete merge --- .../src/backends/wgpu_backend/kernels.rs | 15 ++++++---- constensor-core/src/dtype/mod.rs | 28 +------------------ 2 files changed, 11 insertions(+), 32 deletions(-) diff --git a/constensor-core/src/backends/wgpu_backend/kernels.rs b/constensor-core/src/backends/wgpu_backend/kernels.rs index 8de84e4..c316710 100644 --- a/constensor-core/src/backends/wgpu_backend/kernels.rs +++ b/constensor-core/src/backends/wgpu_backend/kernels.rs @@ -182,14 +182,18 @@ pub(super) fn unary_float( match op { UnaryOpType::Neg => out[ABSOLUTE_POS] = -a[ABSOLUTE_POS], UnaryOpType::Sqrt => out[ABSOLUTE_POS] = F::sqrt(a[ABSOLUTE_POS]), + UnaryOpType::Exp => out[ABSOLUTE_POS] = F::exp(a[ABSOLUTE_POS]), + UnaryOpType::Exp2 => todo!(), } #[unroll] for index in 1..ops.len() { let op = comptime! { ops.index(index.clone()) }; match op { - UnaryOpType::Neg => out[ABSOLUTE_POS] = -a[ABSOLUTE_POS], - UnaryOpType::Sqrt => out[ABSOLUTE_POS] = F::sqrt(a[ABSOLUTE_POS]), + UnaryOpType::Neg => out[ABSOLUTE_POS] = -out[ABSOLUTE_POS], + UnaryOpType::Sqrt => out[ABSOLUTE_POS] = F::sqrt(out[ABSOLUTE_POS]), + UnaryOpType::Exp => out[ABSOLUTE_POS] = F::exp(out[ABSOLUTE_POS]), + UnaryOpType::Exp2 => todo!(), } } } @@ -210,8 +214,8 @@ pub(super) fn unary_int tmp = -tmp, UnaryOpType::Sqrt => tmp = f32::sqrt(tmp), - // For any unsupported op, fail at compile‑time - // _ => comptime_error!("unary_int only supports Neg | Sqrt"), + UnaryOpType::Exp => tmp = f32::exp(tmp), + UnaryOpType::Exp2 => todo!(), } out[ABSOLUTE_POS] = I::cast_from(tmp); @@ -225,7 +229,8 @@ pub(super) fn unary_int tmp = -tmp, UnaryOpType::Sqrt => tmp = f32::sqrt(tmp), - // _ => comptime_error!("unary_int only supports Neg | Sqrt"), + UnaryOpType::Exp => tmp = f32::exp(tmp), + UnaryOpType::Exp2 => todo!(), } out[ABSOLUTE_POS] = I::cast_from(tmp); diff --git a/constensor-core/src/dtype/mod.rs b/constensor-core/src/dtype/mod.rs index 3aaafba..429e778 100644 --- a/constensor-core/src/dtype/mod.rs +++ b/constensor-core/src/dtype/mod.rs @@ -182,33 +182,6 @@ impl Expable for f16 { } } -pub trait DTypeOps: - Copy - + Add - + Div - + Sub - + Mul - + Sqrtable - + Expable - + SimdSupported - + GemmDispatch - + RandDispatch -{ -} - -#[cfg(feature = "cuda")] -pub trait DeviceReprLike: DeviceRepr {} - -#[cfg(not(feature = "cuda"))] -pub trait DeviceReprLike {} - -impl DeviceReprLike for u8 {} -impl DeviceReprLike for i32 {} -impl DeviceReprLike for u32 {} -impl DeviceReprLike for i64 {} -impl DeviceReprLike for f32 {} -impl DeviceReprLike for f64 {} - pub trait MaybeNeg { const NAME: &'static str; @@ -255,6 +228,7 @@ pub trait DTypeOps: + Mul + MaybeNeg + Sqrtable + + Expable + SimdSupported + GemmDispatch + RandDispatch From e58d2a2dceaa8adc288d50e4fc61c5b60fd1624b Mon Sep 17 00:00:00 2001 From: Eric Buehler Date: Mon, 18 Aug 2025 08:19:16 -0300 Subject: [PATCH 14/17] Rename --- constensor-core/src/backends/wgpu_backend/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/constensor-core/src/backends/wgpu_backend/mod.rs b/constensor-core/src/backends/wgpu_backend/mod.rs index c9aca5e..d5020fd 100644 --- a/constensor-core/src/backends/wgpu_backend/mod.rs +++ b/constensor-core/src/backends/wgpu_backend/mod.rs @@ -42,7 +42,7 @@ pub struct WgpuStorage { } impl BackendStorage for WgpuStorage { - fn to_cpu_storage(&self) -> Result>> { + fn to_cpu_storage(&self) -> Result>> { let client = client(); let bytes = client.read_one(self.handle.clone().binding()); From ecb29aa7b705fc2d81fe49f83104fb615f51399f Mon Sep 17 00:00:00 2001 From: Eric Buehler Date: Mon, 18 Aug 2025 08:42:10 -0300 Subject: [PATCH 15/17] Use cubecl 0.6.0 --- Cargo.lock | 468 ++++++++++++++++++++++++++++++++++++----------------- Cargo.toml | 2 +- 2 files changed, 322 insertions(+), 148 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a3ce9a6..2f0dadb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,18 +2,6 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "ahash" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" -dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy 0.7.35", -] - [[package]] name = "aho-corasick" version = "1.1.3" @@ -247,8 +235,8 @@ version = "0.1.1" dependencies = [ "criterion", "cubecl", - "cudarc 0.16.1", - "dirs", + "cudarc", + "dirs 5.0.1", "gemm", "half", "num_cpus", @@ -322,6 +310,12 @@ dependencies = [ "itertools", ] +[[package]] +name = "critical-section" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b" + [[package]] name = "crossbeam-deque" version = "0.8.5" @@ -355,14 +349,15 @@ checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" [[package]] name = "cubecl" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1e438056cf7c25b3adde38240b89842e1c924b8e914731c82ad81161d23e6ff" +checksum = "b418aabe526efb2d2714b9282a583bccbc555fe2e42546101259b2042bc133fa" dependencies = [ + "cubecl-convolution", "cubecl-core", "cubecl-cuda", "cubecl-hip", - "cubecl-linalg", + "cubecl-matmul", "cubecl-runtime", "cubecl-std", "cubecl-wgpu", @@ -371,18 +366,20 @@ dependencies = [ [[package]] name = "cubecl-common" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79251bfc7f067ac9038232fe38a317adc2f31cb2fc3800e69fd409ccac7abc1f" +checksum = "ff2bed7d9aa1a2fe640696067cab6b0b1b40dd30cdb8d12f91ae43ca6392498e" dependencies = [ "bytemuck", + "cfg-if", "derive-new", "derive_more", - "dirs", + "dirs 6.0.0", "embassy-futures", + "embassy-time", "futures-lite", "half", - "hashbrown 0.14.5", + "hashbrown", "log", "num-traits", "portable-atomic", @@ -391,13 +388,33 @@ dependencies = [ "serde", "serde_json", "spin", + "wasm-bindgen-futures", + "web-time", +] + +[[package]] +name = "cubecl-convolution" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a324c7e684c627a2746fb4700a29b5881970c7f93cc06c66bcf3c2c5184af97b" +dependencies = [ + "bytemuck", + "cubecl-common", + "cubecl-core", + "cubecl-matmul", + "cubecl-random", + "cubecl-reduce", + "cubecl-runtime", + "cubecl-std", + "half", + "serde", ] [[package]] name = "cubecl-core" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b03bf4211cdbd68bb0fb8291e0ed825c13da0d1ac01b7c02dce3cee44a6138be" +checksum = "bc4c683bb3ed715eed12cfeca6599dc554efb9371e85f783eb7c3ae0d3781274" dependencies = [ "bitflags 2.9.0", "bytemuck", @@ -408,7 +425,7 @@ dependencies = [ "derive-new", "derive_more", "half", - "hashbrown 0.14.5", + "hashbrown", "log", "num-traits", "paste", @@ -419,9 +436,9 @@ dependencies = [ [[package]] name = "cubecl-cpp" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5eef85cbcc34be7e25fc9d39edf99ed68559862dbf25c1877ebdf4a9595d31b" +checksum = "0d06ed3e34f0a9f0a1ca225fc3351865a2ddf66966c73a7953dd27f1bbaed9b9" dependencies = [ "bytemuck", "cubecl-common", @@ -434,16 +451,16 @@ dependencies = [ [[package]] name = "cubecl-cuda" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71e091e4e3a3900faff440aec4053805ec4456f94f4acc4afe8e6b27519c6d16" +checksum = "bf8898be518bd3df9f7d0df8c25e95f07cfc087c6485dda42e99add432e3463b" dependencies = [ "bytemuck", "cubecl-common", "cubecl-core", "cubecl-cpp", "cubecl-runtime", - "cudarc 0.13.9", + "cudarc", "derive-new", "half", "log", @@ -452,9 +469,9 @@ dependencies = [ [[package]] name = "cubecl-hip" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2f8c00207517de61cccdc4ca2724bc1db9dab94840beaf4329e43cead3bc4a" +checksum = "d9afb1c085f8d715fd8686ad77ada69e364c9cc16a51cf10a7d65ad88d760b6b" dependencies = [ "bytemuck", "cubecl-common", @@ -471,18 +488,19 @@ dependencies = [ [[package]] name = "cubecl-hip-sys" -version = "6.4.0" +version = "6.4.4348201" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f7557762176858fa0357504025f09ae6e979c3547776ff8b6a1025ef0702450" +checksum = "678a20e5e38ce9c772bdd53596f2801ef210ae735ec2d7d46b5d5b675c09d929" dependencies = [ "libc", + "regex", ] [[package]] name = "cubecl-ir" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e096d77646590f0180ed4ce1aa7df4ecc7219f3c4616e9fe72d93ab63a352855" +checksum = "bbb72454fb17da66e60f98a011cce199ff0bb78c4e465236459af619039b6cd5" dependencies = [ "cubecl-common", "cubecl-macros-internal", @@ -490,34 +508,18 @@ dependencies = [ "float-ord", "fnv", "half", - "hashbrown 0.14.5", + "hashbrown", "num-traits", "portable-atomic", "serde", "variadics_please", ] -[[package]] -name = "cubecl-linalg" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75aacf86f6004c274e63589aed55c5edcbcdf1b292eaf4ce2c1688c04c41a194" -dependencies = [ - "bytemuck", - "cubecl-common", - "cubecl-core", - "cubecl-reduce", - "cubecl-runtime", - "cubecl-std", - "half", - "serde", -] - [[package]] name = "cubecl-macros" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd74622b5c8cb161e3f7fa0b2b751784ef89ab45acfa355f511eb2219dde337e" +checksum = "72d61316b8a2ec26bb554745591174022bb048dbcc508091b15dd4f6fcb4793a" dependencies = [ "cubecl-common", "darling", @@ -531,9 +533,9 @@ dependencies = [ [[package]] name = "cubecl-macros-internal" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a89898212c1eaba0e2f0dffcadc9790b20b75d2ec8836da084370b043be2623" +checksum = "b7c27cf02393d78a41ad125ef6d6274cd3930385f6945779f4f21a2c66e718db" dependencies = [ "darling", "proc-macro2", @@ -541,24 +543,58 @@ dependencies = [ "syn", ] +[[package]] +name = "cubecl-matmul" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66af2716f458eed3bd14d810d9a90bc43dd35c5464b2fe4630a711a074892fb8" +dependencies = [ + "bytemuck", + "cubecl-common", + "cubecl-core", + "cubecl-random", + "cubecl-reduce", + "cubecl-runtime", + "cubecl-std", + "half", + "serde", +] + +[[package]] +name = "cubecl-random" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "648731639463df215c01ee209477a2f233fb5168fb50e8df755e658ddaaf29f4" +dependencies = [ + "cubecl-common", + "cubecl-core", + "cubecl-runtime", + "cubecl-std", + "half", + "num-traits", + "rand", + "serde", +] + [[package]] name = "cubecl-reduce" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7afbdfe03e7e3ca71f61890ebebc6b4390494204b545e6f6bf51a43755449073" +checksum = "729e3de15783aed5c617c8c4de633765c6d0cf4b9564bb51d21dd67b4fdeeebf" dependencies = [ "cubecl-core", "cubecl-runtime", "cubecl-std", + "half", "num-traits", "serde", ] [[package]] name = "cubecl-runtime" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "385234520c9e392382737f32ad372b05f345656eb798ba00b72d2722c68b698c" +checksum = "68ab7c56cbb83e43fd1b52ee3d521bcb816068e0bb6f4aeb56c9bd63218b1147" dependencies = [ "async-channel", "bytemuck", @@ -567,21 +603,24 @@ dependencies = [ "cubecl-common", "cubecl-ir", "derive-new", - "hashbrown 0.14.5", + "dirs 6.0.0", + "foldhash", + "hashbrown", "log", "md5", "serde", "serde_json", "spin", + "toml", "variadics_please", "wasm-bindgen-futures", ] [[package]] name = "cubecl-std" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38868eea6fdc183feb3c46bcf5e666c78e6cf0ddca2c4f3a877785cc0eabd71e" +checksum = "3536c4c1c878bd77ceda959a3f157bbb69f957269e3f138c7edd72d9deb533fd" dependencies = [ "cubecl-core", "cubecl-runtime", @@ -591,9 +630,9 @@ dependencies = [ [[package]] name = "cubecl-wgpu" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77fa2dcfaa6d75cfbc5ff05cafe99ec4a7fb7c0fa7197917e0fd20f5b90979fe" +checksum = "1d3f068c9a81201f43cdd3f6385201b330f2499f6c24d688d82a70d705261a32" dependencies = [ "async-channel", "bytemuck", @@ -605,26 +644,17 @@ dependencies = [ "cubecl-runtime", "derive-new", "derive_more", - "hashbrown 0.14.5", + "half", + "hashbrown", "log", - "web-time", "wgpu", ] [[package]] name = "cudarc" -version = "0.13.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "486c221362668c63a1636cfa51463b09574433b39029326cff40864b3ba12b6e" -dependencies = [ - "libloading", -] - -[[package]] -name = "cudarc" -version = "0.16.1" +version = "0.16.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13f3807b96899cd88fb9feecc41316cd0879df6f2726616ab88a4bc919af7663" +checksum = "17200eb07e7d85a243aa1bf4569a7aa998385ba98d14833973a817a63cc86e92" dependencies = [ "half", "libloading", @@ -632,9 +662,9 @@ dependencies = [ [[package]] name = "darling" -version = "0.20.11" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +checksum = "08440b3dd222c3d0433e63e097463969485f112baff337dfdaca043a0d760570" dependencies = [ "darling_core", "darling_macro", @@ -642,9 +672,9 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.20.11" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +checksum = "d25b7912bc28a04ab1b7715a68ea03aaa15662b43a1a4b2c480531fd19f8bf7e" dependencies = [ "fnv", "ident_case", @@ -656,9 +686,9 @@ dependencies = [ [[package]] name = "darling_macro" -version = "0.20.11" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +checksum = "ce154b9bea7fb0c8e8326e62d00354000c36e79770ff21b8c84e3aa267d9d531" dependencies = [ "darling_core", "quote", @@ -667,9 +697,9 @@ dependencies = [ [[package]] name = "derive-new" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d150dea618e920167e5973d70ae6ece4385b7164e0d799fe7c122dd0a5d912ad" +checksum = "2cdc8d50f426189eef89dac62fabfa0abb27d5cc008f25bf4156a0203325becc" dependencies = [ "proc-macro2", "quote", @@ -678,18 +708,18 @@ dependencies = [ [[package]] name = "derive_more" -version = "1.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05" +checksum = "093242cf7570c207c83073cf82f79706fe7b8317e98620a47d5be7c3d8497678" dependencies = [ "derive_more-impl", ] [[package]] name = "derive_more-impl" -version = "1.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" +checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" dependencies = [ "proc-macro2", "quote", @@ -703,7 +733,16 @@ version = "5.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" dependencies = [ - "dirs-sys", + "dirs-sys 0.4.1", +] + +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys 0.5.0", ] [[package]] @@ -714,10 +753,22 @@ checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" dependencies = [ "libc", "option-ext", - "redox_users", + "redox_users 0.4.5", "windows-sys 0.48.0", ] +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users 0.5.2", + "windows-sys 0.59.0", +] + [[package]] name = "document-features" version = "0.2.11" @@ -748,6 +799,56 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f878075b9794c1e4ac788c95b728f26aa6366d32eeb10c7051389f898f7d067" +[[package]] +name = "embassy-time" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f820157f198ada183ad62e0a66f554c610cdcd1a9f27d4b316358103ced7a1f8" +dependencies = [ + "cfg-if", + "critical-section", + "document-features", + "embassy-time-driver", + "embedded-hal 0.2.7", + "embedded-hal 1.0.0", + "embedded-hal-async", + "futures-util", +] + +[[package]] +name = "embassy-time-driver" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d45f5d833b6d98bd2aab0c2de70b18bfaa10faf661a1578fd8e5dfb15eb7eba" +dependencies = [ + "document-features", +] + +[[package]] +name = "embedded-hal" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35949884794ad573cf46071e41c9b60efb0cb311e3ca01f7af807af1debc66ff" +dependencies = [ + "nb 0.1.3", + "void", +] + +[[package]] +name = "embedded-hal" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "361a90feb7004eca4019fb28352a9465666b24f840f5c3cddf0ff13920590b89" + +[[package]] +name = "embedded-hal-async" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4c685bbef7fe13c3c6dd4da26841ed3980ef33e841cddfa15ce8a8fb3f1884" +dependencies = [ + "embedded-hal 1.0.0", +] + [[package]] name = "enum-as-inner" version = "0.6.1" @@ -869,6 +970,24 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "pin-utils", +] + [[package]] name = "gemm" version = "0.18.2" @@ -1082,7 +1201,7 @@ checksum = "dcf29e94d6d243368b7a56caa16bc213e4f9f8ed38c4d9557069527b5d5281ca" dependencies = [ "bitflags 2.9.0", "gpu-descriptor-types", - "hashbrown 0.15.2", + "hashbrown", ] [[package]] @@ -1111,24 +1230,14 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.14.5" +version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", - "allocator-api2", - "serde", -] - -[[package]] -name = "hashbrown" -version = "0.15.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ "allocator-api2", "equivalent", "foldhash", + "serde", ] [[package]] @@ -1168,7 +1277,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", - "hashbrown 0.15.2", + "hashbrown", ] [[package]] @@ -1230,12 +1339,6 @@ version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2db585e1d738fc771bf08a151420d3ed193d9d895a36df7f6f8a9456b911ddc" -[[package]] -name = "lazy_static" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" - [[package]] name = "libc" version = "0.2.172" @@ -1301,9 +1404,9 @@ dependencies = [ [[package]] name = "md5" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" +checksum = "ae960838283323069879657ca3de837e9f7bbb4c7bf6ea7f1b290d5e9476d2e0" [[package]] name = "memchr" @@ -1338,7 +1441,7 @@ dependencies = [ "cfg_aliases", "codespan-reporting", "half", - "hashbrown 0.15.2", + "hashbrown", "hexf-parse", "indexmap", "log", @@ -1351,6 +1454,21 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "nb" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "801d31da0513b6ec5214e9bf433a77966320625a37860f910be265be6e18d06f" +dependencies = [ + "nb 1.1.0", +] + +[[package]] +name = "nb" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d5439c4ad607c3c23abf66de8c8bf57ba8adcd1f129e699851a6e43935d339d" + [[package]] name = "ndk-sys" version = "0.5.0+25.2.9519653" @@ -1468,7 +1586,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a98c6720655620a521dcc722d0ad66cd8afd5d86e34a89ef691c50b7b24de06" dependencies = [ "fixedbitset", - "hashbrown 0.15.2", + "hashbrown", "indexmap", "serde", ] @@ -1479,6 +1597,12 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "pkg-config" version = "0.3.32" @@ -1528,7 +1652,7 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ - "zerocopy 0.8.24", + "zerocopy", ] [[package]] @@ -1697,6 +1821,17 @@ dependencies = [ "thiserror 1.0.61", ] +[[package]] +name = "redox_users" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" +dependencies = [ + "getrandom 0.2.15", + "libredox", + "thiserror 2.0.12", +] + [[package]] name = "regex" version = "1.11.1" @@ -1761,11 +1896,10 @@ dependencies = [ [[package]] name = "sanitize-filename" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ed72fbaf78e6f2d41744923916966c4fbe3d7c74e3037a8ee482f1115572603" +checksum = "bc984f4f9ceb736a7bb755c3e3bd17dc56370af2600c9780dcc48c66453da34d" dependencies = [ - "lazy_static", "regex", ] @@ -1813,6 +1947,15 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_spanned" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40734c41988f7306bb04f0ecf60ec0f3f1caa34290e4e8ea471dcd3346483b83" +dependencies = [ + "serde", +] + [[package]] name = "slotmap" version = "1.0.7" @@ -1830,9 +1973,9 @@ checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" [[package]] name = "spin" -version = "0.9.8" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591" dependencies = [ "lock_api", "portable-atomic", @@ -1965,6 +2108,45 @@ dependencies = [ "serde_json", ] +[[package]] +name = "toml" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75129e1dc5000bfbaa9fee9d1b21f974f9fbad9daec557a521ee6e080825f6e8" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "toml_parser", + "toml_writer", + "winnow", +] + +[[package]] +name = "toml_datetime" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bade1c3e902f58d73d3f294cd7f20391c1cb2fbcb643b73566bc773971df91e3" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_parser" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b551886f449aa90d4fe2bdaa9f4a2577ad2dde302c61ecf262d80b116db95c10" +dependencies = [ + "winnow", +] + +[[package]] +name = "toml_writer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc842091f2def52017664b53082ecbbeb5c7731092bad69d2c63050401dfd64" + [[package]] name = "unicode-ident" version = "1.0.12" @@ -2000,6 +2182,12 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "void" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" + [[package]] name = "walkdir" version = "2.5.0" @@ -2126,7 +2314,7 @@ dependencies = [ "bitflags 2.9.0", "cfg_aliases", "document-features", - "hashbrown 0.15.2", + "hashbrown", "js-sys", "log", "naga", @@ -2156,7 +2344,7 @@ dependencies = [ "bitflags 2.9.0", "cfg_aliases", "document-features", - "hashbrown 0.15.2", + "hashbrown", "indexmap", "log", "naga", @@ -2223,7 +2411,7 @@ dependencies = [ "gpu-alloc", "gpu-allocator", "gpu-descriptor", - "hashbrown 0.15.2", + "hashbrown", "js-sys", "khronos-egl", "libc", @@ -2475,6 +2663,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "winnow" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95" + [[package]] name = "wit-bindgen-rt" version = "0.39.0" @@ -2490,33 +2684,13 @@ version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a62ce76d9b56901b19a74f19431b0d8b3bc7ca4ad685a746dfd78ca8f4fc6bda" -[[package]] -name = "zerocopy" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" -dependencies = [ - "zerocopy-derive 0.7.35", -] - [[package]] name = "zerocopy" version = "0.8.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879" dependencies = [ - "zerocopy-derive 0.8.24", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "zerocopy-derive", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index dd3fc28..521cff8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,4 +24,4 @@ gemm = "0.18" num_cpus = "1.16.0" rand = "0.9.1" rand_distr = "0.5.1" -cubecl = "0.5.0" +cubecl = "0.6.0" From b4a00d9e9382205eab07df86c3895aa7fcac48f9 Mon Sep 17 00:00:00 2001 From: Eric Buehler Date: Mon, 18 Aug 2025 08:47:46 -0300 Subject: [PATCH 16/17] Fix wgpu neg --- constensor-core/examples/test/main.rs | 4 ++-- .../src/backends/wgpu_backend/kernels.rs | 4 ++-- graph.png | Bin 16777 -> 27242 bytes 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/constensor-core/examples/test/main.rs b/constensor-core/examples/test/main.rs index 0eab77c..a49910d 100644 --- a/constensor-core/examples/test/main.rs +++ b/constensor-core/examples/test/main.rs @@ -5,7 +5,7 @@ fn main() { let a = GraphTensor::, f32, Wgpu>::fill(&mut graph, 1.0); let b = GraphTensor::, f32, Wgpu>::fill(&mut graph, 2.0); let c = GraphTensor::, f32, Wgpu>::fill(&mut graph, 3.0); - let res = -c; + let _out = a + b + c; graph.visualize("graph.png").unwrap(); @@ -14,5 +14,5 @@ fn main() { let tensor: Tensor, f32, Wgpu> = res; - assert_eq!(tensor.data().unwrap().to_vec(), vec![vec![5.0; 4]; 3],); + assert_eq!(tensor.data().unwrap().to_vec(), vec![vec![6.0; 4]; 3],); } diff --git a/constensor-core/src/backends/wgpu_backend/kernels.rs b/constensor-core/src/backends/wgpu_backend/kernels.rs index e450d9d..b159274 100644 --- a/constensor-core/src/backends/wgpu_backend/kernels.rs +++ b/constensor-core/src/backends/wgpu_backend/kernels.rs @@ -180,7 +180,7 @@ pub(super) fn unary_float( if ABSOLUTE_POS < numel { let op = comptime! { ops.index(0) }; match op { - UnaryOpType::Neg => out[ABSOLUTE_POS] = -a[ABSOLUTE_POS], + UnaryOpType::Neg => out[ABSOLUTE_POS] = F::from_int(0) - a[ABSOLUTE_POS], UnaryOpType::Sqrt => out[ABSOLUTE_POS] = F::sqrt(a[ABSOLUTE_POS]), UnaryOpType::Exp => out[ABSOLUTE_POS] = F::exp(a[ABSOLUTE_POS]), UnaryOpType::Exp2 => todo!(), @@ -192,7 +192,7 @@ pub(super) fn unary_float( for index in 1..ops.len() { let op = comptime! { ops.index(index.clone()) }; match op { - UnaryOpType::Neg => out[ABSOLUTE_POS] = -out[ABSOLUTE_POS], + UnaryOpType::Neg => out[ABSOLUTE_POS] = F::from_int(0) - out[ABSOLUTE_POS], UnaryOpType::Sqrt => out[ABSOLUTE_POS] = F::sqrt(out[ABSOLUTE_POS]), UnaryOpType::Exp => out[ABSOLUTE_POS] = F::exp(out[ABSOLUTE_POS]), UnaryOpType::Exp2 => todo!(), diff --git a/graph.png b/graph.png index 042d922de9c24c47e0bcca740c902cc875fc2b6a..90ad13d819e2fe8aa0e31bd612b5f3c4ad5bff25 100644 GIT binary patch literal 27242 zcmZ_0cUaGDA3yvpQKCgE?Ij~2656{^Ns^?ZGD@5FP)UQdH56?T4N=-#Lqk*2UZ~I# zMd*2**Zun)&mYh69Jl+p?%P$Ld7kh0YrQ|Wb+pwP=sD>r6bi#p4HaDqh3Y2$yPa+w z{w7^ir5pcPXLej&g|bTilU)8dibCP1992=&bA9l2(8YlD^Y24*vo983u&zy~>r~NE zpxSbLH{GtQn?gjkU)?>=E!)A7cDphzwPW)^&M1zR+j+i6CD>FLShh$UJE@bNwnO)H z$>Q4=ODlnaZpKt=FS)R}%Dpc*bMdlslE?O?ud?4Y-Dp%)E^q{xmWXYuSVR8DaveQY z?LN&z4gdSc;}#c+=aXm8l-=Bp?Ao8~W9BFM5- zh`-D)C`if4VT{nA!5?E|^wG9>{Q;h==8GNU4Pg<9iJ=c3a8FE3@XO2d ztzEme@y#1WQ&RzYdU_p0!#g;0_q%s*gxBGeIXmLw;*!tgn(W)Z|7K?9&bxQ-UNAGe zDd+XOs-}h-x3K@VZ$wgGpW(%e7x@ny*h~)A+Nx?|!b`tl!~A525Gxy7YJNWJ$B!SM zWM$nux{;HU)5w$@EH6)7Q&aPTg$3QIQ>Qq%xYAr^hn~NB6%ZO4s%dsXN?!3w$<-%i zW!!~!!)j(`g1VAgVsi=@!`gbLak|?Y6Bs*JGON-&@vu7typ7h1d;mJ7oXXNMK6IJ{5>le9PMMbLV zFJGuAIQT&QhsA65_C8Tjo3paA_?hV@o=e_;#T;B$gv|; zyp#NEY+%s1B`xD&^5YHRDJdy>Z$I?+n^*Zz+2ho>3+^6~v?r{um}Ok&DDQL6t7~d* zB8mGKYZj!^_w^Al>;{VK7018=Emck^wd=9L$(7|tqF%B;^KlPCN2&XU#I=? z;|DXnKY0xyAq-jB*`}8+)qQ*r9UWcc%%FefjL?Atr*;j;n`CBY4!FE5DDcNqTDOsd z;^N}6kw=PE*5l`dw5OUjYbtY4otcWNs=)sJo8IRbE9>d8^2)l?r0CGm($a3^*ynS+ zlWNU6$2aAl@H&LIZx`CR^R`D_Lqpn=Cv;b@UgeOmV3v`Q85te*J(;Ff{rdH-*>LId z?ryzXfq_Egkh$j@zV#P&^!CzSzkWT3`OrtBGXqv^!F3v1TB^Fbo6an6+O#P=CT5+W z(r|^BxZ|XbtE=mU*Fh}y&dxK}`gy+7*!lCz%JK*Z2+*xxuV`<-@7VF!7YT+JyCyzHQ$AO%;~!?uGZ{*#jy*d5h8 z7bAP)4@d0Y^PjHm5fWO1oq?0ZG#wEY6|GSIcpCd1*Hlzi>imuKPH?cPg+-9KdASy_9b-FdfLotdXk zpBC7?dt-HVHMw@|3_U%)eeR1EEpg&&x-zu}=S;fYD(oDYM^;Q*nnK1j+ICczdY2xw+v&s_!4TVm~u*1dql+ zb@ZXjkYX6G>@wrYnHMi#IxJ3}oM{-Ys-m2FtX+3+=c$pYsm$kpe?27I5+`@S?=7#a zjN|D~_80EG+!E)DqnTkhcm_7+v%gU|dUXA_!P3|V4<67_jz~#$_7zy|l#()JYZYHT zWo)c+*iI8?mT=r4n=Wz^vykk-5Dbz7sy{8J$v^4#*G^--aa_u z`19*o)Pr&9vOQv0dObPD$DN&>X?z9-1}tWW%Joj3tah=`8px+>ZEdYCS)G6K(xq)YhwXi^Qt2o^zV*kxvgudD+wlzSNx@#U{4xFRqM4b} zv17+3#>bWa{rjhP{(PH@#bBt4U`liB-maGW2aalKg`i}Moqchuv$J!VF~rT6aVO2_ z=qULOO5@<*Lr*WS(g7yzD{OpxC-`nG3s_oOVw1Dj^yS}1v07&Q5{)X}zCFpa#_RDx zs~(-pmqmt6@mTcqRL&LJZnLto($&|`D8Bx=^P#lMGGnl1o#>XEMMZ}f78ivni+xtP z9lySRmfp2TZ5@jLWbob!{s;{fl+(pQmtm(8+o;Y?UBf)HwVBVJMWOotUFw|>T2J6_wHHrWSw0csa{i2YnC{#>z`ay6c@9{_!zElVsupTOpejiWD&>K zU3>Oqa7)9mbz(z`Q4IJvl{ zmgmPWqKX}?u;%dWu~E_LCqA{%Ei@{q8wQA=V)WjZXqGR46Ot;$|T?F zGHvv|dviRx!Ta~`x4>=|W~H~B>U^YUWYprK(Q+Kef5gBg8J>|Lf|}cf`uB_Rc8W9W z&5{x+!wSzM>uIPBi|hrWV`7^2?@ab>o$SxO{;7Ts|m zpxAC$uA)}pwu;g-{p>Mk87xFx!PJG2kdS=S+I5@g{UyxnH}<@1NMmQaSym=9JzOdO zp|@A(*s(h@W@fIPy}cS%Jy`<23JTILGZSKrI1pJGnNOaCCnfROIqX|-_}H|iv#TrZ z_U7%n*kZ$@j>p@?Xwd5>Mn)98R(?JTWa4WQ6$!Xm1E$vNJl}~*bUt8!#*Y>NVGD(=$(7edYOHmKweRDuWjmmnc>^_i&e_&v9!QkJO zrKrfrO%LU~40FEi{}UV%vJI;!>BjG!!zqr^HBMi8vaO~%Huo3WH4K$|uyAmYHgI2l zT5!h>y3cJ%4OjpKDp>bEr^?C6Y47e$H=|o;NP*jV2z_kE#I$MFf%vL!>Ssn z^yd5d^QX?~(`zYz{w@vg*s;T>xY(_u)G|{Ua#+aWy3{^ihj1Cs)`EWlf<e@%%ZzU@CeR3#G(yDzbY&(B8z? zuLEDp(L?ji>h9>vmp*y&U{En}Ord<)O5-tXybZ%SD&Q zUWvFu{~-*`^ClGY|khi4=b^?G&DCC$~ch}`r+l3LkW+8#t$5L*;A>o zK|jZ6y|lD+X2)w8Sy_jXDk?xVpM+O8{<@Rxq&qHHc*(1=w#@B2Af)|t_fA~zL?g@L z!-p???akY7^ZD$Zc$Y_y9^nZHzxTR$`EtN)#R?;}F9jI2?XUJj*;)6cnUI~QGDaFB zgs#~XN?YmEp+(GLn=CIcHy&hWXJ_9t4-k04((;2D-=~+h14UG~Rj9eRxF{#q#jO5# zf8Jq!Ox5;&Ou&09%Rcl|@~bWJ2cOlw>gzkyFDxRG@j`}_wR+!E)y=>+A{L+ z@Ob|H@qQpb)|ih+>)bi6G_?>V3yUY3{p%SRqCMVH-AQn9cXyAIw2AM&hDCrqu)1O8 zZK=|bR=mV|78Vxl%xiX~(pVT2pR}|zqcsjLE`f!GOHs-9?xjb*JZP53@#TH4I-V(o zf7`aT2d=z*4cN)T&i;Oe!}(j^S`UvE{Xj44zM%g7wpJ%kvMhZ6p1`oN3ztbbq4w6j zZ*H`9bauAw@8A7jl=ZoJcp~ECcXdh53|H=r4c?UaZtFtMZ6IwmwRIwzaqZLljknEm zb8}d!|yC+4`xj$NhtHETdRjbBl}COCIMUUX6a}EwSt`Kv{$eMN<=d?k~)I{qT}w-vx!2LjI4PqRYBEJE|+m-Os zK9T2r0gx8O2hYS=V})nxQ)_}zWnR`{Y3XtN&;2%OX>$=S05%PPXQp;`s{N+`b--Yy+k!nA2 z%g%K68z#8f&D*u}r&jj!q}2_^dH?&KblCYT2e9Zzp0*7Kd44-fV5#@^_9iDMkNDdZ zZd(W3hRw0T)YMeBYxAZ}pryTEFM4WgGg7LWnx2-YGXi1m z%or_BbP~r1-QB!Cij~Ts6p}a!?^vI=-&qp zy<5n6@nTaj``-GAH&N$2X=sInvIi~MczCoRQRF*)(Z4_Z?x!G-U5d+)TOcJ|fj{|# z{NMaKDm^@6QnhJ(usL_`*b#K?+O_ly0E3b`OBEIMP;LPr^ru-_);~Xh7p84H9>Yv| zQBd$`u+-(0vhtck-XEKyX?#*sQ`3jow`@@zuMbHRJNK$R>+DO5{#Q0)HyUui9YDJa zyPjn#1LlQ4dbCaJq10(xo1UJY$l%}&AspgL)APKZqX%Xgd8AY*hMs57o^@DL>{w8C z?)e3(A74M5n$EFM`P1*HwNUXw&=GT=W!K-mEM)_eoakHp;7ma zm0@W`#m94^SQnHUpGEsh1}TlWfB$|9pV)lX>(?rzBjLRlmQ<7o#<-kFbQNW$n*_S| z4D9?SQ^>3#yvgvD_15+NHMR%#pG6~bb9Zn0p>6?e;fTIbT(BgAw<+QL)c}As_0}yK z6SE@wQEE#Ii+1!{QK#;A(N0TcR-9|lS9x$uv2y!cFEY%Mv$8bh|1Fyra-3W2&ODXx zFs@dd?*yv#d!c#n$IhPM^hb{(5)!ybgO7}gatnEs$x-(2_m{=xd27!@P#2OhJZ@Ju%CT(>_fZzRv>D&7U0#VCC_~a8AEQXH~Zh{hi z^u&oUoJ0$7$}_Ng7w!q~aXS<(V3cGOhdG{2-y?BDZ|ds&QU9hueJ+e|77cP{zbS$$ zAYnhEpnLlCqq<=BQ)+5zv;lREcR(eFZ>)r1WfR_3@n@l#ZEx&p6<|RLyCE6efyCAB z9VM>wg1BBA$1go%?q9MCHTap^p%6^Y43>HZzLxp;XaiM86kqiD8^8RaFKMW%`lEO@ zgfg@085{F_8?H>Uwy`<7gS8W@aSCABGw_FQ&iTr>3|vw`mQtt#g+)Or2k@+uk4A~$ z%>kL6akhMN)BTZBdwO18L@1xUHX|crdV0DF^a;m5%MMR-b7zCXu9$#cGg*B9)Y=Wn zNcZH)6i{Zz`(`}s?CRG)wFschWACPc4xPGjLvGmA$pbr;{QAPef*?i0bJFTsO=}lu*F=L=LGO}M#sn3*xA`ZCRqn=_hMs8^D%90qND6F zDGRDx{l^86gRNSJGB0k^Ct=$jF|z}T5Cnk3fbRsSMQC6}h*b6Wa}5m*J;#2LeyyFE zoJ@(`W2}VZ+`M@+JT?}0N2{46$8+FvOZcy%EqARO%a~CcS(2jz}+fVd9XIHgZYcjiu_7_5;o)DxQP{E99o2;9x(U9gyH? z0j_+&jt+8-`%HOG7}6!4*3t0^4`-&Kr5*2lbli1rgbLgc`vgjcFDiC*YbyiRWNX0C zmubihz^;K$p6pOjQxn{^iyn-R6lBIN;MlM0 z>QeIZST0|_%pv8tBY=S`KtMU5>cfYvz@wglqGxai8rs@mSci(rj%#W8EAT5+-uR0L zvlfTE%Of50Ez3!~jP2$XSbj2KUY}&_qX`Lbw|fr5L6wVoZLcxzte+|Dl5ygbLUPCO-&W(l|-#fOazgq zMt$1zIesC>a`*KNE8Qnbb6Eia>jo;kxZb>ZgCCxVd}Y}yi>(@W|2|P5(e$3bcyZI; ze=X7FFJE3KBqYSKb!+*e>x*1mIXzfcV_Ta#HZ=Nh`2WP=^v92J_J>?gRE{6t0I_`_ zEEHhy!NZ3i;<9BLo0`V1WlvS&+6Jmr)~#D-RB`s)x!TUoO#pHK*8&I$fh+O$_*W2H z&}ao)+dZfe&>q6FvUcwoMiV2emqK*pyu8tCIhN|p_4NUTHUXo7O6)g#09{6bJ9Oor zXszB@o=fR2DpX2ghWJAcpOdo(dYGf5Bme)b3!pT-8U#>@o0^-6u#cU9L#M!Nz+Jbh?LUCSp=Wo8X>?VtEKb!*%B}5bZ?F2;*Yzs~ z{s)ThWuZ6}A;`-##5d^Z@J&oi)SI!qdR1u$>oEqy+|0~3A^ak2!8m$#D=LT*ESl-* z=^{_jojXSta$&&`J*V-KR(^hd{70iMZyOsMX!!!Q1@(?>4_`2sX+ZM7cKv#>Y}%hc zo+xspWlsl|rl-?@&s$hqt6;f+AyZ_IWd26N9~aZthKZ-vAAP>k`Gol3uR4<5xP!IrXzVgM?>N%Tz4taGeNJxBgb-kBp)7c?Q-t>L@5s(6&M$BPpLhpEPUF zkzs7`KKw2Vi>Ie&_``=GP%`tPt{dw>u7C!-4<#MlF_8S+pn*WRAJi;zJ*&UpSI$A& z`Od@GRh^-E|0Wu~kepoNphcbIu`DYMM(7mt<6-h=Dt?Yp-0n@@HjuwH_kLGH%s`Qjl)&3ql+|z3nyQe}=SHfrRjS`TJM-a*y=H7pNq{SW=Gyb;&(o}1_u}#CT}6=l z1jNJ`Dk>_-SsA4Tbtqtkfw{SRQRgn0npWdb2T*#&A!vOaD(525lPn@BZc!Tg<6>FWHnQSw__4bVdoJ%I$|9&xMeS5i z%--{%xW=zADQ0yz)IJ#q=0Z;?FWZ_|KBa9 zW{WLGEhGpviCh)_4S6jYf9)#>#uih{!Ohsi%(+Sa-h0*S&+>Q} zn8GEk`}^0r%nT@cddk43CNFT}juXSq!*FHEkMu&n%rPl%aye|=36{*v%6bbNGf%L} z@pu-qBlT?Ms$7Ql!#hjE-h42!ZEl>`(P4I;8CXB^CgQ(0))28qM^{bpXx#O=8ir=60V?1L_7(UGcZbEB=uVI0-> zK1>8l&{`Uw*HU_P|2GQ|$|J3&qOx`C)_n5@Mrs9Y8Lxz)u6GiWlAU-HjU62k@F0t( z_Lef1bd{KwusN%=ABE9u24|?Z(2fr*$uOBv3+M!bsiOqO+ z>MCkp=-izB@~>Zt#>Nj5{=N8;ES)W#VRiD8G$rF3xpkLeFLuZPDzNZ?NE1uT{QTCS zmr+r$>nJ{WX4k$)M<=Y#dUtq~+_`s;)$_-@Gt0lf^V4l>Aj-MSr^UswV`xU z_n}G?ldL&L%wzNgetm!`^5H|C-C|;KA@&R3Zxi15>YAlXt0^4fc#^YAci*%E#Edr~1IBcI&e)6et;_U>hR_3Bk` zg_kt840e5^Z603Po!KbXmgcjtT0qL9&?e9SK7gGS|8V!rG=A>r>C@4#YzKdLCe4Ed zvQQR&{MaU#ipLPCAK4x{;e1HXPh$kPllt_c@ujIZ;@7BULO0B74}~+q^0HGzbiVE<-e*YO7xPl-EHuk z@|u_HT`FopOOiYv@Pq;Gk8Ix>Q=A$>hgx(@7EaDPcw_lr%8BCRgT{LHR>nTLm9GbY z5nyku;pxooYHz1dzRt5~##YlSvX4wp-yzj4$?NA<5J@b*iBF%*Y6BQ3K2ZC|agQ%H zMjV0^UY{uI@f>!?;>_Smw0s+!3K%wPDV~4k8#FXDTtBv5sd9Tc!Cm&5e`JrKx`KW7G; z(bd!1@dT6r_Va89rLv4mU-i43{qyT{5_HE}Tyu*oCDb%7CNC#tkD6cv5x;lr-K&m3 z07&;Q#-lSxn~Y3M+=5j#1{(-)U>|)L4~wHXHr8RsNoGj9Z4Woj*v9NWOP!(<1bVi> zLtpDij~b!~^xKP7hlhU)rL&?o;+c?o=m-?6-b!zIL^1wrDrhv<{Fh$G^O#(Y%kQ`-HJBIhJJ@E2wEr49zJ^Z;nH@+yvA?Lde9q11!04 z6Ab^i1mWhe?e=2j>Od1yMl9japFcn^aj~!I6%WnBYBEF9Aqx*dgGZn0UcbHoMNUTb zr2{sc)qU>%<0{@4X}2<}4f`JQVMD1Q2r>Hg>)i+;^)aAL$ob1Z`|aji z4-G5uGsD<55NHzsq%dtyR)kIW8kKae&HLYGtR^9~yFLcH)er*;_G>?|u}iYaleVfc z?Ekyepre=Pt7*NI-zK9-CJGVF^PC!KsMrj&1C`yrF%4eV4m#)2ZBg zd8IA&eB}+gjT3 zIq<9XkU~FlY^`z$2|45+ggKx{Y64Tu<}} z7!Pg%_4|zJMQH|uG=yvTy#G`W`uBHsRFkmSg9k0jj^rKX8{C6jiWTiwRi%K9oOUj> zrhXsKZ9E%`nStV<+qZqQvu$3Q!JbRq_jx_;IlgW?E4?^`D`b4k7bf1wsNyUeaF%=M zbtv-l)wFyR3dB1~GPpgg;Lisn-Xg3}6x9z-h?_MicRvUeK@U)om}VZA<2R^NPyAQN z7V6N;K%SCwazYVOAQ>pOQ%FC6*MUzRW4WzDFb@O`x8U2RCA*wesxBb7f->fxmL^02 z%#2ye3*Bo{*7&y{$_V%_!0O$*&y}hu6lgpwTekS4_ABx;&)wK+2~7sna=w;Ho*#}i zO83M;-hip5Ugggmp%4$y>KP$20R0ty_yZb>%9aA~4339&3i@jCs-??i4}w64JW?~U zA#lHoi;E+}G+1}?vu7J9?eE{mUOjW}+_~wzc08RYIXR#H*@9dnauq}e1pv9jiO~HhY><-u z@p@RdY)OH}EUKyHeTc_~j)ocu$0{fsY;0`LpFj7Bj^6V1+qaI~^YU@=@lDr$?9ZKq zQw(iM4N48b1C*N3#YIP0w%0u7uu5@V`y?d;U?vs{UAS<80xmr{J$=n%UTZJjorR^P z!#A*PjS!=yuPjQqCP31YhK5Z%#u7ZHB*_WgkAxe98K<=K44Hx{AH#9ajDj*C<6r;$4``0(Mw?l8-)X3)k`KcfiXME>pD14T`=auGEfD0K;2u@w;!fyqe$M;{p* zLoS195dntJljrK`6}Lx$$2T@=paG8nJ)u@8Iyi_!B_x9C$Ow_!di(pYuikLZEf;NW zYimGU$$n|Zc%a-PcEtsTE~N@++t$_=jLsjf21GZkjmGQU#_9mRz%WD?1w0dylH$a| zCTb3-+fr@s)}{FVr&AW&#l<65?C=J!T3O+BGLb+af=3*Z)*Jy$d^yaYg$d2UgF)U+ z9SwrS(pG-j(D^IjvT9pfLp&HbB{ZhP<0AA;OhRGO43szpdpt@{C)6KG9NJ$M8m9P- zKdz7sp-o6Lpy)0yyWYBWi(gon2BxkmtffNjgoW!k@1VQz5&V0+zia*T05$eF*A7w9 z+Pb>6Q0<8ehpT$qe3BWII0IFHeXohig$w)$P5B@ZWO4PXS?_aG*KY;yN%C*w*8M61 zKLw>qQ3q92RQMs+!ofgtO$}#6SZ!o{+z&~D3yYH-x?U|UQKC-3`VI>dM~#Z?3Ed@e zPm(c&_zM#XcJv%iMvcefBnv7yQD2Y8?jaZ+b?^0;+{)0c2QEYGIBv_X@?0sI^5Vq{ zX2ccI5x|;>LJL?~3k*hdWTNDLDc|Xq3GM9SMPWj`(QgIz>|w+vp8NTexQpUW)7`cn z^2$z52dSy4?QvBEO<@rNJR@hoiL3h1b^bgHi-5Q|3;Q0Us?1Z5nYV0Ni)~gmU(ZD- ziMYeq>(6pZ|4!#ARgu8j16dC?f;8a1(jda=>gHAramKD}ew}n?CWQd2e}CMdR*b>R zLba&x%`+Fj@|L^oN6$IkB{|2_V70zT%lw=j4#Ux~QbwSS)WeJ0U~X<+QTycusvw=W zw>LQi$mryUm)oIa1*!(KVFQ+oa|x^2lJFYBZmI9jdp|@wIC|<-)aADij>4P&F9hh? za$QefzY_+e8Z>!9IBg{Mg2tzd$26ScR0P%cHzFLCjo|_`J_sJD%m0~MvxSv)j>oX@ zUPOfAd9NQ^?#IW&Xx>D8FND;FQ*53q@za7!=OZLD4KW`&nnV;h3pQN=l|fp?rC{HTBt<+(;F{QE7>QMAk`Qh%TgY z)>9aGW#Y7xZgT%U=s0({pmP_4V}^A$Obvj~zBG*FlAZ5quNn z#S3Aa{~1qaeZWJ$L!RHYs1@L;K#R?a{mrdUC)JB^%oBvq(=#)Vp7)$z`(Ju+lMsUS zXBYpi%;F}}eb>--{udO?R7Hnbe6u|%9DmCqq(;s}+)3bqpx-~g9gmNXho}(h?82U> z)UF5sV*xr!k z{VcyKfQ1n+X=7OGtaMgjl7f8AMO$0#a`zvp0E!S;)A-Snzd@%#D{O?1Nyw6m1}idP zNN^%|r3qL=1lB63b#H(VaLaw3+Pd?JlihC+q9l?f@^%`>j@4jazje{LdlS%?a1<6k zJ|?g&+#rGAg|2X<=Ab;(HZ;&8eF~iUS&U81SuKRqA5l_x5vD+cIE?#z!bPJ8CHbfn z)TFU`-+k9?2zS*aO>gbZqSAp<&O> z2utGGzLhi+|-)j~>#x5A4VF9OfVrE7g+wYp4H!?4zGLVESG;C@G1QEu~Hqg@x zB7_Q@IPv8RqJ)Z$b0doJNQT2P>Ovg$kz6h;S+!8^qX?l}yX;0=&iDLv?V6L*BP6nS z?cSZ<+iO6=#YYYwF6-QO8wP2%bMfsuGbDv=f0W~0rbER%nDDR6Y5NK69ulS|u^G2> z@~eLipr`11dX}~HUtLJ$E_a;TM8$vn_;F%N`}k0#9L8vD`d@{REs2n=VCtvh`>2}I z&R_W{i?9d^rM_9VeL4x}ibO0)+5ip^jgQ>#$uvo?|DVvLr)p~{^6M-VU{fmxIZ>K# zi+HP~0%KtX$Ns#6`VI)GqC!%g$R+MEjx?-C#R8a2D0e}fqVYk4lM;?0zI}GK*y%H8 z4kavsS%;$%s;a6MtXv--AGd_ApZ&J2rNwhJY!dxx3WB|ESKo`7iI3M`R2?^*;dTV`_&xbm>|;I3;!WFtcT7#Nf3IoqoZS$sRHi-yeni1(8PE#DszoUa9;iAm1R=SiP#q2LvN9T5E|$9AAS|kzIN=~Y5J+w{Y!0TzurXr-S}amotzR0@8RWaJZm z?)s9Gw%%WfsIEjQILk!>m75qgx%}Ngz#VgPx1GW;*xt%Nn_zk_FIBEGP>`vvhS(^M5F1n=Dhg`dKDap614S&} ze}Ax(42V(X4W3{&u^PN!Dw(rP{{H@8k!~}I_mI@0@B{i`0;UGVj%dx?N3PKvkJ(*~ z?AR>?sKzliGCK4U@CBG#S$I={_eU9X)Sx`vif1u^&*IeF;TNEGZ} z`O{O@;*QB5gUTDS09{o14-&q0#K_zU4XuasR#axI^?y>NA-Mw*>6Trty;`gXXJN#f zLdwI^K9sP%RKTtz8QX#~;$(dR5-n)oHSfI!&y(vu0^E}N47@!C%No-~{>Y@*4wb$6 zCh+tBO@|gQ0UI!3PK|hk3MxiQ9`5dA0F}#-;)+D2L7)E)7$kf;*`E;sHCyUbe~k zK>>%!F$W|w0$^+9^jHL|l@h;&8df_ce43M*ReBbvqG&kpF^~+S*6sH0y3|JKP(^%$ zV83vJO;OV3=I5&+ypi=lv2eJTouPCHnH$?aAF4G3qmR5vTswEs^W*1~)xp%x{2?3= zzJAe-bJ)t(efjIn>+h1`E5t2)kmSq^Sx3Rl!Ct4c7m&NeR7AoLFL`P09E`j8Y*!b~ z{<~u?)AFw2Syn1S`PH4BUunvf3L&C5f=5H~!2-w9RKNPKIg}EckkD_&EBm*Yw7i>Z zH$1ogu;04>6$ALA>umXwp;5)ou%vSC5_sd3_z;mNjc#WQ40^MYsj{q|7`y-cy8ig_ zX10}HR;{kEUYHngVpWbGVlR-;LWBZ+m{P=*f~AtkRSU39 zrSLTD_Xoam_$F&yUz~ZTcOgmk0cIBhp*t-BtN1}F?v_wjZ@BE?s5QP}7|XQSVPTt~ zAT_RT|B5FxcxcXrUoOg?2Xi2~+{^)w4_w2B&1sJT>A|UlhzRArr~tL$S+nGi&$0LJ zX=vF=3HXpoy*KjGbJlc6ivZEt^EauxwjhB>kiJH-Ln%GiJ0pi#tG z1!~YZe%#8b7>YOQn8V*RF`|}0+`=orU}c5U>Vy7J>1chzs!wN&gY_W(pU}R2IvMP| znhy@Mptm3P{#Vx0H>m-bWb0fz-fVG#cWQdt45Sl2wVl&_GBS!0FysLdCw9K|duev> zLF1F(Nt71?2Q5Zr`mA};RY?pG(>BBSWV_B!ayZ;-m5(4a5C;}Uyo$LZ;T zkse)_b;w5_6LW!dOROKTB|`l1x;tDyV)3fm0IRG>W07=(Akt(u8~h zeC~ZIX|ndsa~Bh3b30gs&{S;@2lGBm7f2JA%7|AKMhe9^R#8V|)zUg@I4*0dn_6rL0>>P{0yb zP2&UA@y&-1_aXT!!Exw@2E&Q)6q)NgCnqhPz_!;!lR~^#aE(bwC}cE&S^-M5_@l>< zyYNqFh59r;|7KVJg@HmAw=H9+ii9`82FM^(I@*3G9Hc4GK%1ZUn%8b%!vrW>IWz(? zRc4rP!4krKBoabv(IpZSMD^1X?@AEm?(Zh__f|IIe5t2UPc-;yvjEa~!hmkb14kAn6JqFQo7 zrvhA`=V6?MkhxP(aIDxoV~@de9}F;Lg1$pb-zh6wUiT4+851|3r2l1>#KTLgZ9Tp4URKWLC|?~1(mzgd9d27bHTc=d0OG~8S; zrQ_=EPIex!yyd*x1>FffuvuvFNEZE5rS|nf7f}6AeGvB7J%9cLx{C;2j;^k5DwcU+ zX0RaM!C+VeBLl<7OEk}80Q@?hoJ=DjUrZpZ?s&b!89<-Rc9L*2d}Zfg-!JDy!upP% zT*!eV0sbL`81W*2u@2&22}2F^inLNLGu#N9pM-`!3Br+q7g(We=Bi_2qKzPv5G(}X z`3z903gt*b+>r1kTn*06V_e??&!#Raq$r6zpdxs(v_TpwvL7UlcH~_(;IU(bXcAau z&@4CD?b^W)aP%NC2f?41#724qxKmMk^&L{Vjo7>0-Q8qFbTVADfa6zlYwIHr^hiXI z?d)bhCr;Wjh|=tjOia|qYT zUA-ri@xA2wXGY-qIDiYNOC1d;6$8ziww%Rc^MdV7R2`(+$UN)LZrS}UtFcj0%4G9e zT3MZHOO&;QUWuCj<5VR{#K;S|7}5XRD|6>wW=kTgz$=mowK;*aczN{V9# z>3)r2ZhK6kA$_Za9t0HdpIS@iW*Gm}G@{hG%z#UK!)KOJctRuQwof1al^MX~=^KY+$TjZpI>uc?WTy z{@}irp*Jya+C+NMVQFb|jIMJWyvl0g@$KNNjyGh68J9*<5=1b;LnZhCg>9eP!bKuj zK-}^SeDe(Z5(;b$+(2j#WEcl})jsI&t{xr`?6Z}5g$4(K3Iu?xkX|OiF;q#QW}ibt zu4|wG`ozYbc}d&vB%x2oitQ!_pYaZfLqk5tY&_zbw?KKxHQnV9r-n|3PcvNL8aj?M zDI>B&J_P_icPk)3P+FRce6#?#iWC?m8Bjv}vWNAttjJ@e+qh8$C>IGAoSXtiiXvkM zT0^XobKYX)cu)~@j7tL$bH?#fOG-GwLJ36SvMZxHaiWclk6lcZq^4jg4lwCsIF_6p z+X%#@29^Q2I0W6ms)%i6QlEKR5589sHU{pPB%CpWgv1|S9u-{bxw*M2eBB@bYs?&0 zgwB~WpQR_Xv0>MfZwla(060E+dJ6nWeAfZ;cNHoknAa^ZxZ#whXxMh{?ox<3v%qoz zCWfVxTv5Rb+RuTEBJ}#vZ{I?I_U72Oi_$#Q!6L)>i373>GY#KX3tWazY7PG7s~TQ^ zN{JFJutPCmW3XWIn1WTo)h*v#W%>lOpM~~ZRpjeP>wKL3Z2EieQ?XY)`l{)nF(t_( zPXrl_d{X!8G2aZ@=&!J8>(Poz`Q5(`*7?#8gnOENo@i|4al6hmbfUt=vl6muc=N}R zc~|D!D!665ZFaCLlJLXmBr;I>(}j0?bkCjJfQ3wwq6one{}Eq8xqLapy;lipl2f&{ zYjJ65<(z%c#G8A^wh>u^7#NsbgjWp)BFSM6vEbJK%oAdJmMt`i;_DF*OO%5{1_wA< z>DR&;_k*}i{7v%R3N#AL4y`XXVnkM(gzxzIDcH)&$iS93eW`b{KDdiUN-0H$|Ii_l z)F9Ubrt>>_M}!7GZnJJ3#MeNQ*M`S7IT-|tBN_PqIX=ZvP;d}l&*3T7Zw|Or;>iL@ z!tR7M$Ar}YD|-42hJWjsZ!!p}x&8N6mH%GH6juMDZ$@^MhA`@W6$-rs<2?qyE|6f7 zDgcSFcX04UC5Mbfd^)(2#1QN3jC}bA>NIS`IuIBgBcr>BN}zfYqX1>%&b71mc`6-(=zV#~hg5*z4l*V~VELapeKL)u6&bh-=H_eQ5kp>uE1PdO z%!|1|{Q30h6H@NfVGVu&D)oFtw{PFB&)k`*_f*`~y*6JdMFHLsgz$^|x8e_C?_}N+ zd>#XHhd8UB3dCv%-vswk5sXkfdEMDrEs&AN&|+_7jE6I(AxDJD}XBBs_A`A|Dks3}fU=)S7;2)=+Cm zdxWpN_+ocBITif2(-DkFMiw1Up3}P)4G8b;{SHG0`O@OzOxj$#<4>5Gcd*jmi;cDN z{B9eE9UP!3q70t~w~(%z>CWlu*fBS|#=H6BwIB8|RcWT%R&NZIoA!Z0{m-Y?VUG>4 z1e_;vB((h6b9UvAY%}2;cNMzJrs`&rS5i3SzCkyWl}IdjNl5OlAs5Wl%W$3_kEx2I zYnd7!{@74#l#|Ft@4rt%f`&5p%hO3T4a{cxX6KxX1w3TDai`$dZ{KPg2RViayr-gD zf=J(yd1|}kbocEqJFWs=^_Dn^qGDr|uqagdaMkDUc{cr$gm!>0DFe3@g{HrJqNsjspy zXJ_bU5+flE@mc7yozv&$G`=sR56#b%&W@pMa!5Pxe($hIT|gJ4KmPp}@DME&mOR&a zGn|QZW)xK>xn+ujg98~Fe(#{ML1zc6E5tp42n~A-USIuN zAXwscFMmBuC!}HJ_c=}LVYoNi-RzyBsD74$p&=*vvV>}K^vPhK{H@Rv<>ch%a5F%# zc!=>GWMUiU8VUI=W3as_G|lA+bGYP3GKU1;_qp5m%R~*u4Y$IACSPd6w`i1L*$-}e zO;uu`*>?{#k!Tgb(If>-4qA=ayg+O-c{{tPxl`DvEUzQ@1p7i^U%fp(iSqEP;kolf z+L`XoB;h+yF`Ty$uda_9)ryP)AwmV3Iw8gArU-PjjPHSnWA+V4OT*D#qR8Pb24IXI z(S@WhhbN9;jc??Y;Q(|m!eCs;#s=|SH0iBocSj+7tDw>2GZpZvtDS^9RbVsM0eOUg z1Cfeh)X?GKcr_^htPC#`D1yJH2$`lQw=s!lV1U7|p4{^%R27cv-~qa=U%#H@_z@-{ zVO6}f$5;X?1T&vy7E|{3nY=|NU_F**>cA6GIUAk07iiG$h*tsyI4erXtKhUAU63|< z2J~4!6b!-=m(r|Kz;U;dL3L!}_F4Dt!?7z-Zxs&61aSlU z05AG~CY^-U%^dOXQ0jEQ02Kvsw%vi6tqnATqG+#Np1Q?yNgEU@e{pW zWIh^AzB@WE#sG{J&lBSm^hi?RtPRPhlvN&MgQ}>jkK;z6-;PLy4oM@b29GNc?^u_m z-K<-XUkV?`;l#|E8dd{5x;iIr@FR|~iwatca)0nrWwk3$}=+KILNC05=agaU)^+({K|U#BC$FNLW* zdQkA!ki7Chz;ebXL$b1VlLRaoAqFYOZO8j)Gq5})K{z0=_TN8g0gpTht_q&n>@^&leTOBLmdsD0zBG`DO_AIKc!!q}9YK>^!EJ1% zyn=O6&tZdLT4cG?##fErgBUd(cJx~{4&-Ikke?okt?PGv7~nGHoB zH-`dx7}94{PagU&z~Zu}9O5ig3WFyN(O3VAB^avXt0B>*$i9(KC!D5q@mUW1 zXh6&J4SZa!-W^`4VJnfMcRc5x*w|aN=0Or=dz7W>&#I3N=<*bhH%Ipwl8Gb`qTJ$E!?|iXk;d? z(kKm=D7`4?Tg~rYx^m?}^s#qF;zu{PX9}doq6&nepY#2^f)P}U1Smh(H?uAK#4WjNxAq&OEN;AGYGONilu&^V$|Z%tl*I6k{F3ob^koWv)Eh&=c>TWTrfZOj&4ac2gGYAn zGgJo3lNCC~lNfJq0|?`iw5|d+wEwd_FOq!nvivMSfN;58Jd5OBmVX6G`aW>!chfQMIz+Py3ZmIKfj!>?;1t zVM8G#PRZ}!%QPPN7<882}{ko)qv2CS0T-3B@C{1&!Vm*p7f2^1!vQUhu7k4d5>bM^75@{oWN#52Y&* zxoda#k|84|fiP?drFiJ%fI9pe7A@*>k86#zrAQS;VNxFMuBAYZnGc&U% zbdzcNeB4ptl3y_was`=9@;NQ>df?x!TtG%^sxG%BwmOtt;R3%KpLg9YA#v|rclW+} zj#B7q2P7pgqhPKc)>bc4dK^IK#z^l^jXZ%Gz#f^4LS9P=00m#pfwWMM{}zQVtKiE6 zfb=B53NVR*7RXM{BqoP^C(!-K_bFjtxm2u&>lpaC1+NM)rFwimSvRx$JBfRYj#itN zY(K84`5N+CcNZ43PNv?zg9i!F85tR=GL@>?wrlm4Odt}0=GZnmyP_}LwPd9Nh{2Py zkDHkq8y`2dw$4h-0e08Tyk)ef0m6j?Bu&OHmOa>b>FYU&FuVo>jd%+TJEEJ^ns-DW z!}wN^pQF}we76D8!a?^W@c=a1n*RQC&o*qeJOaEDE<$GdURi%wX!(e7B$61x7lZUQ z-Z_HJO`^=5inT1-<_z5>?!XmotJi{tHw|fSNmOHl?}K+nah*OugA#n$VNAuU?&=GO zUI=!|I>;rp(VA~Om7!UU@8=or<>;WtsAX(k9)uRW@Bq7*jD^GL2??%a$p0p(3mSnJAosImT~ZsF1;h}KmJ7p_ zH~`GT159=}Kp7?91p-63g@RK52V4)45Wh6X5FRHTAI5hBNDp#&s?o2_1QE@WRTSMGA)7G_u@jghtD;8s%znZR$Q#4>0)=-AolQxI z-XmQBIXPZD5Y#0K_8q?caT5SE8Oa!<1|h?nIV{YbnxqsWT95|e1WXHw>ybZ_oq&;q zTjLtA70JiF)-iCtwiHSALSfK2aDSgEHIZD9>`KMbMBy`1C+L|%Rw0_19!;Y;ka_s! zdX8E+oC0jYWV|KB4IlKnrMC+yFv2+Xc18alUWY1y<;?j7@7GeMl1yPk69$3BpSXmC zLx$@pWLljB0)&Or*WNglDoSqlCXoh)%8B}bQivcB340Spjp-Q7xx)U00r$vZ#O^4D zPLLQ1#E>`nASb@kgztI6^{hh1fy{p+ya})hJph5lzA0I0b?` zQ7I-S$k%I$l7t-QhY!CpPY7)uRP^^@rT`ehQm?~j3f|HF`})-yWO$9Mgwp-s;NW_y zm!nXf&-C{7NxCO0oI_Y94Wlv$PyMH3pufXW&@nKe8|kol4)P4{a}ysXAmS4^BJgs_ z|5MYM$K{x=ar~)4MlsnH71Agz7%FjSnli7Vg{&<|j!O0xGbY)#j`Od4-sO4j=e~c}Z+jn3Dgl~?r?Dnv3MKm~-fgnU_-B>K zlP5<2;yUpkh4CiLE$ViJjs`tZn%06%3?t|+t8ZP)lOG_Eb z6yIgO{vRjWaP-&k*{ad`q+kehWE_xJRuqvlc+e^NsQ(=?H)mDPG+kjI1^ zwLZ5=lu_#mjp;+~i%z>8m&eX-4@LuHzs(Tf98ABfiU`-rOP=u?nYzj{+ep~TRXZilNY=`~GTP6TUb`nMsUm!2T zlO%VA28A1jnoX`>9sZ2Sn|1Ib9Y9(P!~))XT{12uI7(@2`WM#odHTObcF+i1P81~y zc}6n9JP1OM_IKH~TB1X6?e&WuuqDguaAG~@0~0dQcCHgn8fLPN%p*AnKfhgAl_o#7gsLbz+(A30yE|M>6ZGpb)GgggfoN6VY z)#qmJjI#tzg)F5~NlGN1&bZ!LLa*3#+2S=bCatBewHiakD0qb<@q4Uzv@=7Y*9DG= zo4iw}VjNr<_0#p=APdW7y85ToJ)MCV&Iu?p45_^oRUa@cABy_^j_ce^Ir$^*?8=77;*XM-Tm@_pQG zK%1C}td1fK^3&LJST7+@Z*Qp|?Vyc^*@vT0*lQl42{3ZBt_AD==+9l)c9cD=I9^{H z^Mstc?Zz0Cc0&~4xuCGF7f=DVzGm2OE`yuyqe|Ty)~4(S01?8MGdmGs>(a?jw2VQd z>|Z=un7%q=jME_WDU&-Avt&A)oL0UI1wIoBu^W)M3&n(k=F*L+FmXKUSMQkTuV$3} zz@=v1EySc`kT@2eYw9trQb=8iES4tLv$~|gt6}K}*NTOisSHT?Xysr-hS#Dwaq|11 zq%-ZS3Q&WK5Nv;9LfE_P4NlX|jMWndBsL(mG2d{Nd-=v@Od7Cp{rUnP_X9aO)7ZTI z;WmPh`WoV_?y3p*24Mr!tDujlmbdWht{+6?Ut zlf9&>5}da20#!-hbzAH7io<6Kd|=n>%!uE=hiB3|*?Ms4^;;7BeguHp?f#un@_tCl z^^=V@o&O2H=S|M)q0Jv~Zual8hL8D#1_llFvX9Ma#b)}$Le%&e;;wbn*7j8h`o7=# z?Ri8S+9P{!IR7y25glH!)U_VqhW+q1Y`@phLTpgU)_mOL3!7Ed85NRv7#`7-UA$!c zO|Iat?`89`m^ppF&3uYo!HP zL*aTYd)(&_%h?iQi(0+9J#P4{vJctWgE@|6a%RUIe>clL<1`6zB&e6}%>q(|95UN7 zu4U@PH{P>ya&k^VF^NOX#rBs?EXB;HP=e?<5B3GCuoXAj?Sqk;w_w4{1^JfXe#Jst zs*I6D=JV(`E;&5sj)V*+4d{+nYz>5y53UtGt=EI{nlvM9`$9VXR~H(;XpV^c;?8B5Umv;x1SHd@rbrgZFDl z0%;0cK&F&rj5aXoG;a3iENl%$dMJ>QEY5p-P_Q#HD98l9O6%U4x86Sn8jt0J)ZY5* z`?Q~KnL!O+JvZHqJ9W{v^3*8}fLq;TfYXSuqtVftQn&;bl3*>D^-G+3pN(^hh-&DW zK6LHWHA-%m@S9K>u`22UH5IHylDAgNc7FD(q7dP<9Wtxv!^$T~wvVTD&{dQ#-^*_V zEY)MvRL2^i)PPcjuR%VOJF3&ywhvq#Wk2b~VT97xVyB;#u2x>e!UnnH{H6-`ydZFNBT~H=TgSsc49v+uirqEbjt?sChOp+#~E0^K^Rxszl0T0>u-rqI3W7;#A&)exo z0+xe72N$Eh6ovk78OKo^%GgWYQ}wW7^B0S38uK}A*rFY|zA~$EM|RGw%%co5qTxvU zge9*1bQw65MInQ1BK^a5ht*uvSKJPd{J4Fu9XT{bVP~q-d|!ImzSSmSkZ$`+c}tgs z#v29pdSPm09JTWDq$|9f7aMfcy2(tCcO^_R{of?4LJ_kK`^ci#?cIBw z-0O(y%#4gO4{7w2UlyQX-k@8DDgyf8%T0e@y9YC?>_{w%+KwAq%qa*XN8`&MF;i}n z_J;Y);rOSb>W$-=Aa@eUD0>g0fT-6N_F>!>GsioB*%ht$JHK*nD~gSh;B;r^4mV>7 zD`S#WP*{SMWKRu9vD917?wgiyp!3N?ei!uorcXZ&{yP%C zD~w%E8w+;r5>!+J7UMd*OT$W2NU^R@$T%#GrHf?j2DsW>~PnfS#viq&&6@e~eQ-70U zxqUn#<4k%4>pL3>pnyvXf(@WsbZruET5r8=@4kK0G5btouyFPAE&|lX zO`G1{cqjxq-JVVEi*cxI0Z!q2S(@?0A`Oz#5~;y&=FGp*w~xlBnAcv>tNIG*&ieXsEW>-G7@XNuQ`YT#(0cCSg1A|oUGF0Z^CvaV znL)3Ia-p4gd+TNDeW&n_NJtO0Q-avoB)y!=O}(tfv=7Wi3-aSP2rzm~O)nYmO*?K0 zs>2orB!n;%L&NCqJ$l#%pXBc~Prn&Xcs>q3B;L{mqxs~>96PgjcyC*91EE!Yv5F0(1(zuy+@35-l~Px;CZI3 ztTS*;uRfxo0K53w+^-Qc;SnCqm62f9nm&Y?u&7BRH2c)ca*-me=OiVu$Ay>1*0HbTRc zatxKCv9!~UVak>e+pC5Jgy*? zEy>H{kYUb?&d+c4Js#8F^cnYn8URW7it$uVD?qD|*Gzl?4k}aOt7GZax=(`#|La7! z$7ozd^aAQgJr#Ur)x(F53zH8@c4eFGQayOfLMEa4RfTO4sANFp;3{S|v+iBH-A11Y zCWp%T}f{S?C^&6z7HPX_$B2~>YhD@xGdylp$B$O z8o>Ll5J6LXGb$g41F=ctJZ13d36f>wy;3=u`9w<6mY3EP(KNvUCNdt>;nc+syEp2_ zJt%X0_t}Pli-4hd6kIem7xCl}d616`X<5t%IvA$g`{i_D|1okpdwOmy-a9S51R6~$ z6fu8^nItXE2p$`5WA`@+9-9#E#ASh<;q1kWPMFt)d7%+nn3tVy1%%i4pLp4UMRE^v z0>i(=>p?PJB}RkR2$3$FK2t`cU3S$DR%ZItJ?vZ15%WSq9GIJ!H`F*1XlX!l|57-g zoSZ1=wpID(TG_xH2#N{sGI{b!&T&b%|1VOGwc<4A7Q?%iGT7XFIsAx}EYgSwTnbfL zs8m%=hYd`lJ~7LQV-_m1n-6u1Pcz5D_Z64=-kT2wC-l?0DVm#7j@>)LY-(c z5Poa(=E%~TS+{MQOCNJMoyDvWLfg?Xt#Y_=cohBt6O@!8a9E=O6LIF3j~1tRF`?An zK57m>L?A$~`w%FtV>`0^nV!qXeQdsPysF|bEZhI>5Z_HV+Y){ZQU&l{S2a+>Vyh)g zq?_?1*WSH)r4K3)2()e;FeBX4xZ9sU_vaFM-gGy(wVbaOQ#&-ge5J1a>ptBz?c0|L qF=^mX`n6_LV!dvRuKkf$%R1%S&wt-N<^w+-rHpZOamcb)NB12}#H*qpa)|GO{CCWn?7@Wrk#jXvoM8myrs| z%1p-dKD)nv{9eC*pMReF?seB)oY!$4Lk*JKKweL ziW2{|uH0USpD2yaE6S7BiGLqgq(_lRdr4>IPix#tm>|3AY0R(5Om}VnD^kn-_dq<= z%Z$B)k<@pnub7*s{fH#@8uf@azMSm%6@NvuxHZeVtY<>!hjlbH-yv(>$d~5adpA;i z%FCrV{^{>x;MJz)tr}`K0=}=_KKWyVLsQ(WYxDGv^5&6*<0CxW+#8MW3D!HFJ?*SV`D~6 zPJluxeRO74R=ZoMJolZ5h=}RF&Q8Yq`g%Mkuc*i$znO2Byux&YT|-wl1RvJk-u}3< zk|Q)URPFG?nw8~cH(^mx>SjF6(9n?UWN~4kUtC;VUSdIh{++b6G=l*-n^wy;-G+vS zi1>KwIAzK><$%^!_4nWH-bg2QcJuM^+1lIt z#K*HHBqR*}{29Eq`nR^Bf%?FK0}^-djD@LZ${QGPjLgi_`ud3~MtOO8JGSLTMeRyROdR_8bDN@~;_H%<>MvjRBqb$H&(Hhg zRaA_wEvJ4qQ`6QCvh2!9A-=+>VcnUwGMT)x9>f2gnDdj?0r*{g0*$dH0GNz zx}cz7S8!rknIs!K`_Q*?+h*r*C9v+Gq{;QoI#p9F@+-x~|^r)XvSLuwMvvWXpc6PXpv$L}-fAyzN+dDfu zbtDbECO-G~4R@Fc6N9~9uc4{AP0?H@`3S?dl+ohJoBevD?Ck7C_Iv^YJGi*GsJ3nM zDJd!8kiO7)@ywYs?>~K#S5Tm|x3~98D07~8l*cY8B(&4{WnW+4;OHoomX=m`Z?7Dd znB6`U121M48$|KYp+ z(0p`sw2t`5)Ku7MUrH`vVFp@STDPTrWxLN5AA7xd^JX!RWnuM8m#~TNeEOuUSLRGZ zx@Kh+>g`R+mN6NxZEM?gjy-$sj*gmP?=-gz zExK70vFzEiN6g{Jc0Pl0UWPB7iN{P$M|7>nv01qoU8)*vH4g7K5xNq88waTN;Snvr z#UD5e;o&>5*4!sgvIzmtu+jV}ts^*fZrH5YmE&A)%uL~SMUK`FOw`12X-`H5@dmK_MIVcRY zCGQ%Vn(FWFI%rEgPuhd!%9Sf-=H_<-0|WDF+a5VoZ)X-Deg6D8=_YpkwjEtS*Yg2RCSn0E8&&oEXJ$?E(Cx=!>+3?8{H@6O#_t}hXJQi^`KE_)JV{Oc zJLklXwe?#2livA?4^;pam5Po|QBRNUfNIh`dL9MG1_O^RW4VJC!Ym0VU3d1rvZ`%L zl)R{>M#|I(wi~YJl9uMcU@KN_Wn(gOa~0LqckbM^%WkxV>uGv=Erx1bf3LdGyiLIA zr-qn>1dGHcYaV`nI^sF6ta`&EBR57U`C%7xjTjjjEzbqy@CdWeV3P&CeJj83z=5xC z@9shYXBIZw!QuLiUO8Tb2Jap~ThN=4on6;o;ht~RE5asbOPZgbN0B^p`}WDN!^48s zePTGltoX3pj0~l2ap~Jj0_V=1YstMTb8>0WH&(=o4P6xqau# zlq~UFmGE4dQA~N?)7xwO@xh_kldeKW#>Qn6e>#(%7;iH)GGaQq!(EtL*3C_V zmQBo?UDA24xcDCROa?lsxgF0`lka2qFZk&^A&s57eto~kpYJgYEG*|pykcUpnmKxB z4;?yGYIc7$8OOb+z?7~rNh*Hd&aG&FpXbKf4hafI2(yHy)V3We(z$T=?pZgfP%X;kW`s8kzI(bLnfXNHL1GcOM#|PNiw26I zVb_UK=Vd6o{m0(k$!UMSwsO>g3r{e?mmC}lQsl-Ru#ecq?fq_iiakv{HbTj)+GjJ_ zgQwg4@WbG>+2nOK4Go43kMB@yX{}nD=Z|doj-jf4Y-mXJ!k>Qo@k7BVKej%MQ}yrS zl&GWj#MmPr_pfiaJ`y(Hx#98q_ewpl?TmAK^A=5*_zj*i8Qi*)m6i4W!-u>59*g?V zb8K4tb^Op` zNRE2;?7;H!c1M(#J%#4GT;1FvGcylK z&3-ywk<-=Rn;_|O|Cw@}EO$guO-&82py1~gE_$+6b#_L^V~iv+KVJ|_)iPDJ-rsRY zko`G+TeSa{FZwyJfIv95@3qnJNu|`!EdeyF@4tMB1H2e3XqM(lQ#T*Y)80~4SlH4m zv#z|hwq|nu`kAT9zt&;hU(R2+kQTwC7>TVVwb1gabTquDqrF}JeDqOzKEHDpE}U>w`o;09w>XS) zJM$?t`MD1VlbN4eJCzTLoH+6MJb#5eCS`4HvFg(0%MbY|jNJ;o);;YV9C*-kCEORX zQffcHbQn>2Sx}HVJ#AZRJLHp6YpB0ZMkblqW`G%m*5p#_qT!^N7{!YhFPaz|*CqxV zJk89!ySC`H9z$#iyhgz2HOH($e7w4&qwtsR?iR20zgP9Q)NMb-7emeZCbw#>r)4Ny zAvNWCL-bb3TQoE@0kn6W@&Q=f{!SEsc>g}=*@gJdV=DA0uaSv~?Ee1#pT9T6(6H0C z-7$W5e=GYvKnD*Ij}aeTb#+-^N*Zn%85xx~203YI{+e0Zr}2)RxmRuS#BjhWrfaB8 zjErRCMXZFh*_35G*H>L$78RwT9;q7}r&fPmnkXK4-`AILqbcEoprBMUG)$@VF``_+eq=2imBvb&SXi8sbp9ov^jCRZU0wZs ze}59@$|gK%66I6@=X$RFlIq3t=R^8?O_zb&6!8Z80i8W|lXaQ&LKWn89Z=qj@gF{X zxK&Xgi}gyu4H_Vs&luob&!p(UZ8;or=ZT)QF6X$R)tPsks1u_oYn_&5fByV2v9vtz zFw*qhsP|oaS6AEU=Tu^A{QUXTk(|&s+K6v2sEC)&(JiLE>iK8?!X!Ba=r(b{#D1XC zqr1vW1|9aIhK3)u6{}nzt$$kD{>aG4@COh43JVK$rM+2=6*VfV%xsbV`|G$~ zh1>2scka~C6o+w0Z7b2ErK1Z|@!(h%uxRIf`s~@)HdUFra@W~wnA}+r2YY*>6CX7w zXXY=T_G#*+DD~?9J~A>q+br{LVWRh3Yo(i9PEm=c8jh}McQ~U|LgU&zaP50V|8?N%vP7@H+jFh`9)S< z)U7v=j`INN+r-2LNtel|iIOg@o!PqP6Fo)3*WQqbc0*5(1KN^tG1Ux!Lf&yWxK>sA zj~}UJ3+HXy26NHgN)O3ndauLVEs6w=2?}0(?fW6#rt-_s;yl@9;Pcb7QKLOYmZ%;F zf`fwg`pc%ASjX>Aw2!3l+m|HZ-AC(~uC||d2fh(@9966ElqrxL(Z6xy01#$GVBnUx z*jThA2}iTQ$%}navy*oZChfJcv8ni0jTJnN^XxeD^-N-M$8*UMJv%!=8fF3SojZ3b z9e%izq*G{iF|c1twW$QndwUbp=lJ`EhLE5jVlqgh zfS5WB+S=M6f71%3s9vJG0S8yB_2})H6wJ5j`CWh)Zu2J3i>C-+| zR@TT5CN+gK!|&R##<@j(*b6;w9!obdG&~J(EnzkC>r25V@B3`xPuiT< zFk~`1*C+sjyrc2KC7g_dq`4l;T=Ig{{9Jh%01ts5>O)zbdhRQva&dAdizJ8SIE^Ie zme}vdqU`t_TCIi#@xHb7_x)m@i7|0Sd12>Y+HQk)wm5$AD{oY)l=tKGTA`mEX%38w zyVB+I+=h#nmntPCh2)LuNHTNi?(FL;CwG;Wmi8~~FmbOHDH6?2OWPbKz07-~@lnT( zq>>WxJ~G*6m9i(_aZG*FrcG};IxfdDva*JtI52`~)f8TSp*d7Ck|gKhQ6Z2`yz-|+ zNhar$XPOh)airEg5K7Rb@*r^cj6CV}j`{W6?@+0B1rlGG6o;<0ylt-~~uYA#)(S>OPXrokYbiinZViWu`kA}@b#**pX=zCirgveSUFMR+f8ColkVU6%T3HFA z9Atkluc#0-Yk4f0)ccFZH6}dVJL;gSY_{fjRwxQ;bz9q|so_QEt^|sfXHjWtywk1c z`N{TM9$&V3U$S-Afm0|6U@;Pwsbn8&QgwB;gk_-==xtC~SQY&P?X3ANCA&fLu@)&< zKa%aA-`79I3R2(&s(~0J*lW|1J%iu8Ii<8O(s`u#fVCpuUfv@|e-5JG4@@zVz zep@#uNl|UxYTh1w>BLUNGHeqhU{5@#rQ!^A|O!U*18vdz*jW0b*eKi1WS z;kB&3%DGSt4C1e}Pxjoo;3k%d!zhf}dDk|){kpy`V!c*z{M`BT8wT$O&PzO>XOq8v zJ*{)DQ);0qN%Ut|E+=UaVDMMuLRl+h4L^sYr8^lJ1JIJk3PW>@g38^;(qi)Lzn|Mg zN!jmQ(jVzE(v;{MA0OXxzSTc3Pk;~(4nI6+YUQ}wV*N2$WWER*Vk>T#1?df7D8IZe>G-f`LJ*}~G_zjFI_rj(bLQ$H!STdTp; zG>T`etlTEOd3@Tp@LBPJ^nwD1n8Ja}pfuC(IK4(!e4YVe7)|w8fV@grrN0_%YXFr}0kOiUD^)yRY)?9UX@2YpcXpG8^e0e0Waq#wTV7mIL;2{SO?V zA3kbP06=~xqyUAhaYNJc}o3gO6>j7a!bpic(5t%_?Febi|+>3*IW`;KTDoZ{Y>by zP(X|IA>+_`FVr!zCyJQ75AQu-IBsCz&ScLfZoldHjZde}obea1`ttI++KUcZ?#_;m zp%=N8HN7@fdnQn+xlPudqNnG)dcT8Vmr}u zBJowno`6qf{FO@^2X4>OfCGzI_XW|gid+NeKxKbvd%vLgyS~0Ydy?0h>&fXi6chac zc`qvFJ9WjJelk=p|2%?jelg&qII3A=yl84(o?7g}->It9l#~tFr{x0K#oyUTTSukz8eu4DT_@%2T5VNon8C&kg)leUqogx^l0k zk0g5~k<_5>fyRcJ2*q_4>X$heNrncQ&HiXKxpAX&@l-=^@!9t(Rw z;0)LBSzAf_4<59v;1d^*o2***bX#9tBp75N(5>^pFyMpvhsgblzXhE#KYl#-dtu?( zp-az3voEBnpB)++61M-Ql-+B=63jJgT^m5N=`lGlEG$fb*UhG(?9c4Tr7F+W!b$C? zmg`IJWYE$h`SeSl7K_-_g^O$H=$u~Bg*YL9#$!6|H!^DaqaC);vh9sf zdK#ABZzFSZKys>F4v5ghBDU~s)Pc`{j}ogA56_*^$~CC?z5?-8_r84O!-r=8>mNi# zwT(GDL2r*%2;;agIXNj&l{S18q#?@Re>3JjS2eV+#?{5x?{I{Ye3{D>06U~khTXf* z0HtcEsXgv`MUz>P&}Jt5ATjX*Cb#PQxZ5Q{)e8sbk^1v3YT0aZVY3#9Xb69$5*&%; zj5Tr%B-TSBBAU6Ze<7K*0l2>?Ua(Hj$+-`Vps1pv@*q0;MmRHA>?uY@#<{n#H`t_1rNHhlGV6pji2x=5Z^ljCGI~RX7`Y zF6}{h_~Yi&Zj*YpcdZ%7CoDU|LDi+^TI4*8C}QK|k8F2OI|5{oj#D~b7<#|wl@;sR zG<6ms6DlweRtYw4ZVA`l`v6YEi;G>7zmOKv%FEp551)~j50~fu{MYtovAU+F0z??@ zUAv;B+p0&;i@VIy_B%97rFYTKWTb8`z6^1Y`2O^C(`1R<-uQ>n(XBwE#Mpf{Q)lGo zLrO5e$jg9{p29wvYxY`;`ls~lEK{^7dIZ*~s-F2EfR??-%6VFc+*i#EHDvYANHRfi z?*Or=-@5gh%VkG+?P%f3FLPX3vfD;Sckt4sOK!k*^W@sm2ba=SE^2Et6UcWwx2k`# zm`Qt4OHWT3NQMjB5cROQV(|PsiL17{P`*Bcv3+Q7e?Sxy0+83dj?KyFuvi`n5?(@8%ea8ik06DCRSD#P*hWKg38Jg zf=i2idY?l-@Lc<23bM})wGO0e4uk}1or6|Z64}?sXQK|tIUzUQy}K0}Wm(rR2L0Ez zcWz)ALDDyF+-QcuYUt=#jOHzugXAn@d%II-z-GXCI4u z^r)36>3ni(Ar(fNIZofYq`~dj|($gVn z>7>e~uhh~#Pj~=~=b+JCf&g__o|_PSFbgcd6r>XOGSQDYJ8leB@WpX70x2hCfo;^( zLvX`T^9i>Ago5xg`Q%26gpMDNfJ5TDh3+f%J^DMds5Kk$2Fi&NmzhUP_;GA%0oVvx zJVDBx5f(8H!m0m6{-KEpE6C#e;H%aK((woi(qr!B&Ys;07ioGlHOvU=r39yFiR6p_ zIRzenogv=QLAqxax8H}d5QYVXA>;UA+Rfv_rAxb1l1|=*?LgQGgt&rvnrV@vGsJV? z#aJFcB`ccN;#ys?hkoc&-0|)rOD0;5lR+U&{59R(%!CmH!v+#;Ahcn=H^sK!Kkfzx zq*T1mcG}2@8|UKJuU|XBO|i#Lu1we3qRl(TlAF+R;LFewsy}=uGxz}B8z^>ga)tqY zUR#Op~Ng_Ov8h>hdda}yO+YczmdS_m0-*^^Lf&a1q>96)oJs>m3 zjo{?f!XF{#7qB002oDTcmW@c^$btIq-ExUx@@VhVq$KW6Ews&%L zQhwdt+KIHO={(EFUVH72X&_7;;^P4&u=%_Rzs}c}9&Q}=DG>`eefqT2Ol`$Mc%j&o zr|j*8ni9l)zkM@#5ED}i*At_ zEfqk=c^{6DpTB=KXiLk`&&yzB*eZ4lzpijiK|SRHG_@T(^|r2#i;s_XxIT0^@vF+m z_wP?(o?$*kKwtxB0x#i#t4n+;EI!oKzkLx**qD8NdQYD|rRpsKmIcJF1x>@2+OYL^?1UjL7f9|X)v8`S^6;EL9G;=D zwo<$I&COd#L3Kv2%gSouR9042PS4ETLFXg_5R;R%sIFq-;-`-?vhG_0_eOjIpy^3r zA%jrdc5riaC^>6uKFH$!)hXFoSv%O+j^6A8>4pPK3FX%JZGQ{|N$Qf$0s)z)`hYwrvHnxZcpc0)n(#q5bZ}^T7gaS@K*knX~oxWrr@OIt;bb9FD+%ktJgHCj*H|v?Qla6T3TA75+A`wKt$%6($m+cDdovf z~&LF5E^6*P^TH*a1uGTKN@O`RS36xc*W0!$gvnVml_ zYie$MZ5JVLWpxZDr5XYiKYv;fi5SEUFptn%b?hX?#jAUJbuEp_%kChLt~~$D9Ih_w zref@qZRI|6JNmR3@P~7tjW1(oZWjg{$M z4+`ODxBp+t%C`DC(pGcc)O<)|J=>yZy9Yl@4H8egH@1OE@BguHwf**`~2MMOD zA3*7D1Q`-_-c@@1opfcd*z<3^ON&+Oq5IEALr%N~9fyz>Y|#K!vWLNu%14~$Zv1=8 z6l@b)2M1s6qXAJ-Q3Bb=QFn-k#O0?@T)QWD;}$yDzp8;kMnQsA*OKY9Y4hfkXU~2l zb$*VwBPK3@sEOT63TLUPsJx4dMNghQNg`FPEx%AxI>Sx>OeJyK@#Dt{^8~DZ6Nyj) zkctRY&%Eive(&Fm_l-ulxw>v3!7}Z?HB4h@fIJ0}n1Gr+jMM<4DioycdxSOtD87NN zj%|cQ&DOi}t&QFeE$u(a{~}Fa!fUVX0w0G5*WlNtg`P_$!?|C#$2wLL#Q`*0%`7 z?QEButBG`;b|4VT7Qm-u&*ekl?tZB4v5y|HNMv#N@;@@VC)mk_RvRnn!dJPvpbL^x z{^v)00Bxq?5C`^GKCSzZKT}J0chK+O_CwEabPE1k2q$ecL*x_kuT^i93v3;p=DLnu zm-rjL*fhnaZ!TMz?^dgXjY;)8>Qh0K57)n>R zE837-NH+ftdip&QR2W8SS6|eQRR=!UW1JO+o?Qp$=i1HN ze|_VP@|httwOrEh*g_0?Tn=_6LW>@~*Mq23PtN!yasQMgj-fWE5Og_oH?n;t{4XSO zh9)O--eoLP5^K;Ih^}}N$^vkLxl1Jij=+C&Y?lY#FkQeBa7IN&`al3Il5Bqeo(ftS z5MYagMg_-J;W}nxj|(3Fuou_39MR&x0Xnl0U+M3p1b^|1SiZdNAW{6c7522)G3uVYOlv?m%r1s_Wa*#uPiQa-GR8@)W8R<=Dr{-(>J+JLvC_>pId54D5fIShmilyba zZa+t4(fMkMmgDu>q8b-3Zul>#iEXK&q0#r-6gH2V$^4f<p58BC%%!R>slG-Q3r}@&azZ@GAK6dzZ?MOU zT3TH1preWX{_j9^f-%R`=%HN^wZn5=IHergZ*% zExdX7<-V9tWKy0W&NO;g0&fES6}sv4+??Nw7cVpgk%6(g#O%((?IW9V>g?IOd6x+1 z85R@p7r@>Vq>hjcBU#TSR6rTp_-kY&kVq%)+qcQUz<|T&OT24xh6m)=LZm z6%0(DRK>8bP}~4IaK=t)Xgru^7P-lcSO^Wfb+W_ivJ*lqw2;=sV6zdqJAy4-xx$5b z8VC|$M$@BM8you?9Q;I7PiqBvrL$+Nt9`fFElnE|5kssok>Ofh90)_4f$+wWR|%kf zSd@x&gFi$(m}u!ZE{OOcGI4Eg^fU2bLBkq95uW}3b!ba-^J-`=`A*}Si1|_L2d2qtr=B}Gom{u>oY@zD5q$cLzd#>;bK;B$V`*2_eh z?-WIPW-o}a%*r%{=jyx`il5>8aIRZRLxFpvP#H5qsW%jLh7{!&Q*9+6a@gU za-7g+XjrF}mG9*|`T-0vuA5^}5gf`YYSi}Z0v5Lh7Q28|k1#=8`$8ssTie?5-50HI zetEeWbO$R}e@Ru!|4V^s?UyelN8-N{k5~<&LPtG92pxnS1;9@9mmiJsf&YlbyVAQ* zq)-oO2y^-3)vIAJ?RVss|30TR@8)_-x?X!KP4mSJTK`_;M~R>t@>O{*Qw9LG#`8Zr z^lrvQASCAiu4RJHP(8p)$T!d%41jI~QFJvmgb9j1k|3}1;2 zQM0%B`5u(P*=s+QO;J>#7D;946m7#o)gqA67SZeS`|`(o`ANqP9opKKt{Mor+xb_w zVUQ$MzVuJDEO?ldkeLF>WiA6_lg76n3K?gONVv`M60eNJ=OFM3CfA^@zMeQlIPEp~ z7O3VPz28d$v9J+qE3>Ndii*aqPtRg!ac;N&J_8_U3j_72P9YWY)!N0@?9s<`DG8sg zze1P!H9JY3n;ueDzy=CR(rY_ydjJ$F-Q87wAQ&^h~Hkb^^%BAz?sk5(EC6QV% z_Mjjn^Y0SaZsRGW1~xWR%n8*NE>5F4ws&EIEj zX_$^k*&%&!(Zb@VepN2KoHi&nXR|dgfvOOaNjNkI4Weo@CJ@=DLl;$QJbCXr$U<1)i&JLZYJS@VJ0o#F`f(8xP45 zheMKb?D?Uw$#&=g^!|S!odKoZd`UBQ`Fg$U~?VH(jLv%@S-Re{@WwJ-S*_ zP(bv$2e%*}f=n>xbFr`qoy7=>d5f7aEiOcLNeT(7tp_#pr1T=ulEU!yTYuelAqQ@=R|BPa9E@7Jp-kQEvjt+k)64AZSa`;Hyj2+`UE$gL0RQFcyJ_o0yujZ;M5OH3YKpOf7QxP zXu1jr(~ur#Wo<`dn@Gf1=2j5aeRY372f8J*lv`nEslq+xJ2)WJ&1K6DQ|AzRgkS#J zZQdf@m8hCh=itR6kXcr@8cy!(6F`w2`h%J@W$u9o?RJ}#_ro_E!)7OhW8|bZl7N^f zNVnYF`knQ&XA54w%(ESmCHNao^9FjZ|FZwg(znHkq&LRs6GUicfMaCm3M87B7J6+k zh&#xYYLB1$6;I?FV`aRgi7JB5aeKOY3nISNiku#sNZJ){CvZc9pbAjUh{{JoCg-2+ zp4|ZB8DtLlgw!&h?5<@&zG6P*F!XQzKvyAPAXCjE7x-is0)QlMoJ~U=C8Z$Sss7H{ z3KzHa?7-`~%{-8O0UfADOkqjSz>?ocg1QNix;;(Z^&OHn;4J?p70w_*J9GlQH$295 z8)Uc%P1nF{)v0`Kd3*^wn<%Eh+$2%AH`v{H=9^HtcUS~mJdlrHz=%kFquDMl*o?O(h3T02syr^WvDM2fh*GEl9E5s4Y5C{TIstypu&wS`%prdMsdgGxM13H``YbOm_a@0z z-2Nl<)+paLr$7qsJSe*_D(TAVDqdR6z@R@x4Bx!=cVL|xu`~qAUfJ4dj=ODu7F^Ug zu0vOZOA5?H(?uGAIO~REt!ad|j@p_nl51WeTT)?Q?9$ra?nfL4ER|V-0|lYl%%5tA z;eUsGG+#p3uN)$*Vb-!YLB^|+{6cfXAjEZ9)q+&%^%VzjH{w7}H1Quj`j&7(u$8xS zoYaoZ(KuWbEr}&YYGcRl-J4M{0L)MG#kh#k-j(-3t9#Sbr1D~rl9I9pR1=+|{U;eW zqgEiWY_?4;p7`-2fH>X=OCbch5t|D1s_hv&$P@`$?wIY(faxa5smHhqgY2{U|4Qm{ z#qPRJYF}%<7D$V@LIt~pa^3teiUlEkpY->zas=K=n3}$F$ zyb;SX1EgoE|MrHdY2ZkbJFRI`0ujf`bDh0$v+p&1!Hve9l^+RLY8aLi;ZMLtgoYXM z@Zo+oi7MPF^M&9FR@`lUlT1MQd{mk8*(S;2wcyCg$v4%^#`>B0`4Q;MP^d%x-A!n+ zA{;enh`ij~J_t3)qsBi)qxp=EiFQuifFx=NAe<^`)N5_Ti&s=M#&db>;P06kF4Ek` zBe`nk=9zgbKqZGxo=o`BoUA(Dm7AV+K86TxSKy=DTU(#(u0=ij@bP0XhubLE@~?vC z(Kyqj6p)IG1_nu*3E-Gu$p@8*0DRTzgv};zs6Hw<;S-hbaHFCXvXS2i)L>yy`i-YZ zO571Gz>ofa2q1}|KlQ9GC-|M?^x)}iCpu2)gy^GJT1)8k!;nLM-_!F5(t<}>-M5ps zHW+8ULo!|-)*4Q0-t`8??A%~SQLLXV0)O zTayQ!3pyZiZ3svW7DpZ|IyNachBu+rdx&!iXyD8*FI0%b!qF_X$cFUnG_Hx%ynXvk zGvxQ+W}w_`Bt(c>eJsJ)50J`?4Bq>$`$U;oPWZJ^uDqs2*w-PP%8B?D2}6 z8=p6MC#Yw&-@=73M2PZ1U^#H=8BEYHXu#wz3xxd*;lL z$enxSvrt;3mj)?r_7rY~Bcc7$WGe>y)X6kYh0w0Y$=D||@b!(!rA|bK3aQE&B>WAL z7bTZ!)oqtcQy=QO>Qx7f97x>5A|ho3t#Y0HfxDr<3^W`6@nD!|g9ur@BfgA)9Pp_K zEQ7Ggq}XrT@l|548P-yQ&C(G`Q&^H~3&mAK2wRLjkH{ocvgM@nF?@!o??mhpo)SG9 z+dXjg;rw?TyC}C@tGJ37_$zC2aE1FK!IGfEyoJO%g9H5Yt4Jg<4<0d~d?0dpxblHS z1V8jNLcV=)=u$ZHwIC_I z0oV@z{6U!NT{-#$0K=sg;?^j>f)4!<(bGCuoW1PQVNR+DLMf@J01pS+L3W2Bm|9Y@ z2O#XMpECn1$ZhN(9-ixqnHkU$bBrG0N`59rVD?me+WhXERt!$M)X{hE6k8)=A8a&WLL zVKUROQf6vc3}*D!5fV=+uqQo`wSNt9cNR;(br|)rr>41C^%mlvwav|RAPz97t9n@v z&zYg()>J&eWeS4yf<;k<#mkg+CdPEK$8k6d-yx*5u}Fo~YLw{0;P;JT@mm<6M0vk> zaRg2SSnM_=NB(D+jUwgxpGrvrUDk$tKdnzTfNltx=c1-&z<=5%0i&UJgIkAxFu@OA z_|sNa+0#%0C9~Nh<6V>yVWwHSUnfGz(s8Br7LP` zdr$HoJ4Pg4*Ge-*U@!;g!s#N6NGmHE_ztiJA%)>ADZkEW!1Ya3K%H@1#O++}0a=#E zrJhcqj*boxC;Y(OYXz8RP$6(^{l7zq{ETgZ5ySDvNj1-|MTEf8v<5Rz__Z!BOwLC! z0oQD71|D)+xzzrfc}Z?rBs|qxbi8ab%h;?!7m|W z5IV4-;RcAXPzB6ZCkF@j&ecpzOnk<*PBAet$1aEG%k{)F@s|;7_VSJY|NgB6|MSb< mFu&21+k$3)Z{GgII-RbNtu={1iTEoPNN3Ne$mhsj^Z7q^u>?#2 From a9432a84cca2798edbe7cf276899fddbf2f520ea Mon Sep 17 00:00:00 2001 From: Eric Buehler Date: Mon, 18 Aug 2025 09:02:28 -0300 Subject: [PATCH 17/17] Complete unary --- .../src/backends/wgpu_backend/kernels.rs | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/constensor-core/src/backends/wgpu_backend/kernels.rs b/constensor-core/src/backends/wgpu_backend/kernels.rs index b159274..1c6e1b3 100644 --- a/constensor-core/src/backends/wgpu_backend/kernels.rs +++ b/constensor-core/src/backends/wgpu_backend/kernels.rs @@ -183,9 +183,9 @@ pub(super) fn unary_float( UnaryOpType::Neg => out[ABSOLUTE_POS] = F::from_int(0) - a[ABSOLUTE_POS], UnaryOpType::Sqrt => out[ABSOLUTE_POS] = F::sqrt(a[ABSOLUTE_POS]), UnaryOpType::Exp => out[ABSOLUTE_POS] = F::exp(a[ABSOLUTE_POS]), - UnaryOpType::Exp2 => todo!(), - UnaryOpType::Log => todo!(), - UnaryOpType::Log1p => todo!(), + UnaryOpType::Exp2 => out[ABSOLUTE_POS] = F::powf(F::from_int(2), a[ABSOLUTE_POS]), + UnaryOpType::Log => out[ABSOLUTE_POS] = F::log(a[ABSOLUTE_POS]), + UnaryOpType::Log1p => out[ABSOLUTE_POS] = F::log1p(a[ABSOLUTE_POS]), } #[unroll] @@ -195,9 +195,9 @@ pub(super) fn unary_float( UnaryOpType::Neg => out[ABSOLUTE_POS] = F::from_int(0) - out[ABSOLUTE_POS], UnaryOpType::Sqrt => out[ABSOLUTE_POS] = F::sqrt(out[ABSOLUTE_POS]), UnaryOpType::Exp => out[ABSOLUTE_POS] = F::exp(out[ABSOLUTE_POS]), - UnaryOpType::Exp2 => todo!(), - UnaryOpType::Log => todo!(), - UnaryOpType::Log1p => todo!(), + UnaryOpType::Exp2 => out[ABSOLUTE_POS] = F::powf(F::from_int(2), out[ABSOLUTE_POS]), + UnaryOpType::Log => out[ABSOLUTE_POS] = F::log(out[ABSOLUTE_POS]), + UnaryOpType::Log1p => out[ABSOLUTE_POS] = F::log1p(out[ABSOLUTE_POS]), } } } @@ -216,12 +216,12 @@ pub(super) fn unary_int tmp = -tmp, + UnaryOpType::Neg => tmp = 0. - tmp, UnaryOpType::Sqrt => tmp = f32::sqrt(tmp), UnaryOpType::Exp => tmp = f32::exp(tmp), - UnaryOpType::Exp2 => todo!(), - UnaryOpType::Log => todo!(), - UnaryOpType::Log1p => todo!(), + UnaryOpType::Exp2 => tmp = Powf::powf(2.0_f32, tmp), + UnaryOpType::Log => tmp = Log::log(tmp), + UnaryOpType::Log1p => tmp = Log1p::log1p(tmp), } out[ABSOLUTE_POS] = I::cast_from(tmp); @@ -233,12 +233,12 @@ pub(super) fn unary_int tmp = -tmp, + UnaryOpType::Neg => tmp = 0. - tmp, UnaryOpType::Sqrt => tmp = f32::sqrt(tmp), UnaryOpType::Exp => tmp = f32::exp(tmp), - UnaryOpType::Exp2 => todo!(), - UnaryOpType::Log => todo!(), - UnaryOpType::Log1p => todo!(), + UnaryOpType::Exp2 => tmp = Powf::powf(2.0_f32, tmp), + UnaryOpType::Log => tmp = Log::log(tmp), + UnaryOpType::Log1p => tmp = Log1p::log1p(tmp), } out[ABSOLUTE_POS] = I::cast_from(tmp);