From 16dc9e161b46bdb1cc03a6664033d2a6658df28e Mon Sep 17 00:00:00 2001 From: Oliver <111798851+oliver-keyspace@users.noreply.github.com> Date: Wed, 4 Jan 2023 11:44:18 -0500 Subject: [PATCH] Initial commit --- .gitignore | 8 + Package.resolved | 59 +++++ Package.swift | 29 +++ README.md | 3 + Sources/CLVMTools/IR/IRReader.swift | 268 ++++++++++++++++++++++ Sources/CLVMTools/IR/IRType.swift | 18 ++ Sources/CLVMTools/IR/IRUtils.swift | 118 ++++++++++ Sources/CLVMTools/IR/IRWriter.swift | 103 +++++++++ Sources/CLVMTools/Tools/BinUtils.swift | 95 ++++++++ Tests/CLVMToolsTests/CLVMToolsTests.swift | 6 + 10 files changed, 707 insertions(+) create mode 100644 .gitignore create mode 100644 Package.resolved create mode 100644 Package.swift create mode 100644 README.md create mode 100644 Sources/CLVMTools/IR/IRReader.swift create mode 100644 Sources/CLVMTools/IR/IRType.swift create mode 100644 Sources/CLVMTools/IR/IRUtils.swift create mode 100644 Sources/CLVMTools/IR/IRWriter.swift create mode 100644 Sources/CLVMTools/Tools/BinUtils.swift create mode 100644 Tests/CLVMToolsTests/CLVMToolsTests.swift diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..62b7e8f --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +.DS_Store +/.build +/Packages +/*.xcodeproj +xcuserdata/ +DerivedData/ +.swiftpm +.netrc diff --git a/Package.resolved b/Package.resolved new file mode 100644 index 0000000..f3013cd --- /dev/null +++ b/Package.resolved @@ -0,0 +1,59 @@ +{ + "pins" : [ + { + "identity" : "bigint", + "kind" : "remoteSourceControl", + "location" : "https://github.com/attaswift/BigInt", + "state" : { + "revision" : "0ed110f7555c34ff468e72e1686e59721f2b0da6", + "version" : "5.3.0" + } + }, + { + "identity" : "binarycodable", + "kind" : "remoteSourceControl", + "location" : "https://github.com/jverkoey/BinaryCodable", + "state" : { + "revision" : "aebaa30f1b73c5c45c374d251029b8bd68bde755", + "version" : "0.3.1" + } + }, + { + "identity" : "swift-bignum", + "kind" : "remoteSourceControl", + "location" : "https://github.com/dankogai/swift-bignum", + "state" : { + "revision" : "c7e3e4374d9b956e5e9e9014952a981fcf2aca2b", + "version" : "5.2.5" + } + }, + { + "identity" : "swift-bls-signatures", + "kind" : "remoteSourceControl", + "location" : "git@github.com:keyspacewallet/swift-bls-signatures.git", + "state" : { + "revision" : "e107b5caca1ee4c06591b9db512bd2c5357b94b6", + "version" : "0.0.3" + } + }, + { + "identity" : "swift-clvm", + "kind" : "remoteSourceControl", + "location" : "git@github.com:keyspaceapp/swift-clvm.git", + "state" : { + "revision" : "c1d09e8d986e253bc477b8a810ad6e9adcffcb4a", + "version" : "0.0.5" + } + }, + { + "identity" : "swift-numerics", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-numerics", + "state" : { + "revision" : "0a5bc04095a675662cf24757cc0640aa2204253b", + "version" : "1.0.2" + } + } + ], + "version" : 2 +} diff --git a/Package.swift b/Package.swift new file mode 100644 index 0000000..3678ebb --- /dev/null +++ b/Package.swift @@ -0,0 +1,29 @@ +// swift-tools-version: 5.7 + +import PackageDescription + +let package = Package( + name: "swift-clvm-tools", + platforms: [ + .iOS(.v15), + .macOS(.v10_15) + ], + products: [ + .library( + name: "CLVMTools", + targets: ["CLVMTools"]) + ], + dependencies: [ + .package(url: "git@github.com:keyspaceapp/swift-clvm.git", from: "0.0.5") + ], + targets: [ + .target( + name: "CLVMTools", + dependencies: [ + .product(name: "CLVM", package: "swift-clvm", condition: nil), + ]), + .testTarget( + name: "CLVMToolsTests", + dependencies: ["CLVMTools"]), + ] +) diff --git a/README.md b/README.md new file mode 100644 index 0000000..543a391 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# swift-clvm-tools + +Chialisp compiler and other CLVM related tools. diff --git a/Sources/CLVMTools/IR/IRReader.swift b/Sources/CLVMTools/IR/IRReader.swift new file mode 100644 index 0000000..d122108 --- /dev/null +++ b/Sources/CLVMTools/IR/IRReader.swift @@ -0,0 +1,268 @@ +import Foundation +import BigInt +import CLVM + +typealias Token = (String, Int) +typealias Stream = IndexingIterator<[Token]> + +enum SyntaxError: Error { + case unexpectedEndOfStream + case missingClosingParenthesis + case unterminatedString(Int, String) + case illegalDotExpression(Int) + case invalidHex(Int, String) + case badIRFormat(SExp) +} + +/// This also deals with comments. +func consume_whitespace(s: String, offset: Int) -> Int { + var offset = offset + while true { + while offset < s.count && s[s.index(s.startIndex, offsetBy: offset)].isWhitespace { + offset += 1 + } + if offset >= s.count || s[s.index(s.startIndex, offsetBy: offset)] != ";" { + break + } + while offset < s.count && !Set(["\n", "\r"]).contains(s[s.index(s.startIndex, offsetBy: offset)]) { + offset += 1 + } + } + return offset +} + +func consume_until_whitespace(s: String, offset: Int) -> Token { + let start = offset + var offset = offset + while offset < s.count && !s[s.index(s.startIndex, offsetBy: offset)].isWhitespace && s[s.index(s.startIndex, offsetBy: offset)] != ")" { + offset += 1 + } + return (String(s[s.index(s.startIndex, offsetBy: start).. Token { + if let token = stream.next() { + return token + } + throw SyntaxError.missingClosingParenthesis +} + +func tokenize_cons(token: String, offset: Int, stream: inout Stream) throws -> CLVMObject { + if token == ")" { + return CLVMObject(v: .sexp(try ir_new(type: .NULL, val: .int(0), offset: offset))) + } + + let initial_offset = offset + + let first_sexp = try tokenize_sexp(token: token, offset: offset, stream: &stream) + + var (token, offset) = try next_cons_token(stream: &stream) + let rest_sexp: CLVMObject + if token == "." { + let dot_offset = offset + // grab the last item + (token, offset) = try next_cons_token(stream: &stream) + rest_sexp = try tokenize_sexp(token: token, offset: offset, stream: &stream) + (token, offset) = try next_cons_token(stream: &stream) + if token != ")" { + throw SyntaxError.illegalDotExpression(dot_offset) + } + } + else { + rest_sexp = try tokenize_cons(token: token, offset: offset, stream: &stream) + } + return CLVMObject(v: .sexp( + try ir_cons( + first: SExp(obj: first_sexp), + rest: SExp(obj: rest_sexp), + offset: initial_offset + ) + )) +} + +func tokenize_int(token: String, offset: Int) throws -> CLVMObject? { + do { + // hack to avoid assert in bigint + if token == "-" { + return nil + } + guard let int = BigInt(token) else { + throw ValueError("Invalid Int") + } + return CLVMObject(v: .sexp(try ir_new(type: .INT, val: .int(int), offset: offset))) + } + catch is ValueError { + //pass + } + return nil +} + +func tokenize_hex(token: String, offset: Int) throws -> CLVMObject? { + if token.prefix(2).uppercased() == "0X" { + do { + var token = String(token.suffix(token.count - 2)) + if token.count % 2 == 1 { + token = "0" + token + } + return CLVMObject(v: .sexp(try ir_new(type: .HEX, val: .bytes(Data(hex: token)), offset: offset))) + } + catch { + throw SyntaxError.invalidHex(offset, token) + } + } + return nil +} + +func tokenize_quotes(token: String, offset: Int) throws -> CLVMObject? { + if token.count < 2 { + return nil + } + let c = token.first + if !Set(["\'", "\\", "\""]).contains(c) { + return nil + } + + if token.last != c { + throw SyntaxError.unterminatedString(offset, token) + } + + let q_type: IRType = c == "'" ? .SINGLE_QUOTE : .DOUBLE_QUOTE + + return CLVMObject(v: .tuple((.tuple((.int(BigInt(q_type.rawValue)), .int(BigInt(offset)))), .bytes(token.suffix(token.count-1).data(using: .utf8)!)))) +} + +func tokenize_symbol(token: String, offset: Int) -> CLVMObject? { + return CLVMObject(v: .tuple(( + .tuple(( + .int(BigInt(IRType.SYMBOL.rawValue)), + .int(BigInt(offset)) + )), + .bytes(token.data(using: .utf8)!) + ))) +} + +func tokenize_sexp(token: String, offset: Int, stream: inout Stream) throws -> CLVMObject { + if token == "(" { + let (token, offset) = try next_cons_token(stream: &stream) + return try tokenize_cons(token: token, offset: offset, stream: &stream) + } + + for f in [ + tokenize_int, + tokenize_hex, + tokenize_quotes, + tokenize_symbol, + ] { + if let r = try f(token, offset) { + return r + } + } + + throw ValueError("Invalid sexp") +} + +func token_stream(s: String) throws -> Stream { + // Python implements this with yield.. we can do something similar with AsyncThrowingStream + // but then everything becomes async. + var tokens: [Token] = [] + var offset = 0 + while offset < s.count { + offset = consume_whitespace(s: s, offset: offset) + if offset >= s.count { + break + } + let c = s[s.index(s.startIndex, offsetBy: offset)] + if ["(", ".", ")"].contains(c) { + tokens.append((String(c), offset)) + offset += 1 + continue + } + if ["\\", "\"", "'"].contains(c) { + let start = offset + let initial_c = s[s.index(s.startIndex, offsetBy: start)] + offset += 1 + while offset < s.count && s[s.index(s.startIndex, offsetBy: offset)] != initial_c { + offset += 1 + } + if offset < s.count { + tokens.append((String(s[s.index(s.startIndex, offsetBy: start).. SExp = SExp.to +) throws -> SExp { + var stream = try token_stream(s: s) + guard let (token, offset) = stream.next() else { + throw SyntaxError.unexpectedEndOfStream + } + return try to_sexp(.object(try tokenize_sexp( + token: token, + offset: offset, + stream: &stream + ))) +} + + +#warning("hack") + +extension Data { + public init(hex: String) { + self.init(Array(hex: hex)) + } +} + +// Data(hex:) +// From https://github.com/krzyzanowskim/CryptoSwift/ (MIT) +extension Array where Element == UInt8 { + public init(hex: String) { + self.init() + reserveCapacity(hex.unicodeScalars.lazy.underestimatedCount) + var buffer: UInt8? + var skip = hex.hasPrefix("0x") ? 2 : 0 + for char in hex.unicodeScalars.lazy { + guard skip == 0 else { + skip -= 1 + continue + } + guard char.value >= 48 && char.value <= 102 else { + removeAll() + return + } + let v: UInt8 + let c: UInt8 = UInt8(char.value) + switch c { + case let c where c <= 57: + v = c - 48 + case let c where c >= 65 && c <= 70: + v = c - 55 + case let c where c >= 97: + v = c - 87 + default: + removeAll() + return + } + if let b = buffer { + append(b << 4 | v) + buffer = nil + } else { + buffer = v + } + } + if let b = buffer { + append(b) + } + } +} diff --git a/Sources/CLVMTools/IR/IRType.swift b/Sources/CLVMTools/IR/IRType.swift new file mode 100644 index 0000000..e8b6e21 --- /dev/null +++ b/Sources/CLVMTools/IR/IRType.swift @@ -0,0 +1,18 @@ +import Foundation + +/// Associated values are `Int.from_bytes(b, .big)` where b is the utf8 encoding of the type's symbol. +public enum IRType: Int { + case CONS = 1129270867 + case NULL = 1314212940 + case INT = 4804180 + case HEX = 4736344 + case QUOTES = 20820 + case DOUBLE_QUOTE = 4477268 + case SINGLE_QUOTE = 5460308 + case SYMBOL = 5462349 + case OPERATOR = 20304 + case CODE = 1129268293 + case NODE = 1313817669 +} + +let CONS_TYPES: Set = Set([.CONS]) diff --git a/Sources/CLVMTools/IR/IRUtils.swift b/Sources/CLVMTools/IR/IRUtils.swift new file mode 100644 index 0000000..57683ac --- /dev/null +++ b/Sources/CLVMTools/IR/IRUtils.swift @@ -0,0 +1,118 @@ +import Foundation +import BigInt +import CLVM + +func ir_new(type: IRType, val: CastableType, offset: Int? = nil) throws -> SExp { + let sexpType: SExp + if offset != nil { + sexpType = try SExp.to(v: .tuple((.int(BigInt(type.rawValue)), .int(BigInt(offset!))))) + } else { + sexpType = SExp(obj: CLVMObject(v: .int(BigInt(type.rawValue)))) + } + return try SExp.to(v: .tuple((.sexp(sexpType), val))) +} + +func ir_new(type: CastableType, val: CastableType) throws -> SExp { + return try SExp.to(v: .tuple((type, val))) +} + +func ir_cons(first: SExp, rest: SExp, offset: Int? = nil) throws -> SExp { + return try ir_new( + type: .CONS, + val: .sexp( + ir_new( + type: .sexp(first), + val: .sexp(rest) + ) + ), + offset: offset + ) +} + +func ir_null() throws -> SExp { + return try ir_new(type: .int(BigInt(IRType.NULL.rawValue)), val: .int(0)) +} + +func ir_type(ir_sexp: SExp) throws -> IRType { + var the_type = try ir_sexp.first() + if the_type.listp() { + the_type = try the_type.first() + } + + return IRType(rawValue: int_from_bytes(blob: the_type.atom!))! +} + +func ir_val(ir_sexp: SExp) throws -> SExp { + try ir_sexp.rest() +} + +func ir_nullp(ir_sexp: SExp) throws -> Bool { + try ir_type(ir_sexp: ir_sexp) == .NULL +} + +func ir_listp(ir_sexp: SExp) throws -> Bool { + try CONS_TYPES.contains(ir_type(ir_sexp: ir_sexp)) +} + +func ir_as_sexp(ir_sexp: SExp) throws -> SExp { + if try ir_nullp(ir_sexp: ir_sexp) { + return SExp(obj: CLVMObject(v: .list([]))) + } + if try ir_type(ir_sexp: ir_sexp) == .CONS { + return try ir_as_sexp(ir_sexp: ir_first(ir_sexp: ir_sexp)) + .cons(right: .sexp(ir_as_sexp(ir_sexp: ir_rest(ir_sexp: ir_sexp)))) + } + return try ir_sexp.rest() +} + +func ir_as_atom(ir_sexp: SExp) throws -> Data { + return try ir_sexp.rest().atom! +} + +func ir_first(ir_sexp: SExp) throws -> SExp { + try ir_sexp.rest().first() +} + +func ir_rest(ir_sexp: SExp) throws -> SExp { + try ir_sexp.rest().rest() +} + +func ir_symbol(symbol: String) -> (IRType, Data) { + return (.SYMBOL, symbol.data(using: .utf8)!) +} + +func ir_as_symbol(ir_sexp: SExp) throws -> String? { + if try ir_sexp.listp() && ir_type(ir_sexp: ir_sexp) == .SYMBOL { + return String(data: try ir_as_sexp(ir_sexp: ir_sexp).atom!, encoding: .utf8) + } + return nil +} + +func is_ir(sexp: SExp) -> Bool { + if sexp.atom != nil { + return false + } + + let (type_sexp, val_sexp) = sexp.pair! + let f = type_sexp?.atom + if f == nil || f!.count > 1 { + return false + } + + let the_type: Int = int_from_bytes(blob: f!) + guard let t = IRType(rawValue: the_type) else { + return false + } + + if t == .CONS { + if val_sexp!.atom == Data() { + return true + } + if val_sexp!.pair != nil { + return is_ir(sexp: SExp(obj: val_sexp?.pair?.0)) && is_ir(sexp: SExp(obj: val_sexp?.pair?.1)) + } + return false + } + + return val_sexp!.atom != nil +} diff --git a/Sources/CLVMTools/IR/IRWriter.swift b/Sources/CLVMTools/IR/IRWriter.swift new file mode 100644 index 0000000..cbd1dd5 --- /dev/null +++ b/Sources/CLVMTools/IR/IRWriter.swift @@ -0,0 +1,103 @@ +import Foundation +import BigInt +import CLVM + +func iter_sexp_format(ir_sexp: SExp) throws -> IndexingIterator<[String]> { + // Python implements this with yield.. we can do something similar with AsyncThrowingStream + // but then everything becomes async. So doing this hack for now + var chars: [String] = [] + + chars.append("(") + var is_first = true + var ir_sexp = ir_sexp + while !(try ir_nullp(ir_sexp: ir_sexp)) { + if try !ir_listp(ir_sexp: ir_sexp) { + chars.append(" . ") + chars.append(contentsOf: try iter_ir_format(ir_sexp: ir_sexp)) + break + } + if !is_first { + chars.append(" ") + } + for char in try iter_ir_format(ir_sexp: ir_first(ir_sexp: ir_sexp)) { + chars.append(char) + } + ir_sexp = try ir_rest(ir_sexp: ir_sexp) + is_first = false + } + chars.append(")") + + return chars.makeIterator() // hack +} + +func iter_ir_format(ir_sexp: SExp) throws -> IndexingIterator<[String]> { + // Python implements this with yield.. we can do something similar with AsyncThrowingStream + // but then everything becomes async. So doing this hack for now + var tokens: [String] = [] + + if try ir_listp(ir_sexp: ir_sexp) { + tokens.append(contentsOf: try iter_sexp_format(ir_sexp: ir_sexp)) + return tokens.makeIterator() + } + + let type = try ir_type(ir_sexp: ir_sexp) + + if type == .CODE { + let sexp_stream = try sexp_to_stream(sexp: ir_val(ir_sexp: ir_sexp)) // hack, python uses bytesio + let code = sexp_stream.map { String(format: "%02hhx", $0) }.joined() // .hex() + tokens.append("CODE[\(code)]") + return tokens.makeIterator() + } + + if type == .NULL { + tokens.append("()") + return tokens.makeIterator() + } + + let atom = try ir_as_atom(ir_sexp: ir_sexp) + + if type == .INT { + let int: BigInt = int_from_bytes(blob: atom) + tokens.append("\(int)") + } else if type == .NODE { + let int: BigInt = int_from_bytes(blob: atom) + tokens.append("NODE[\(int)]") + } else if type == .HEX { + tokens.append("0x\(atom.map { String(format: "%02hhx", $0) }.joined())") // hex() + } else if type == .QUOTES { + tokens.append("\"\(String(data: atom, encoding: .utf8)!)\"") + } else if type == .DOUBLE_QUOTE { + tokens.append("\"\(String(data: atom, encoding: .utf8)!)\"") + } else if type == .SINGLE_QUOTE { + tokens.append("'\(String(data: atom, encoding: .utf8)!)'") + } else if [.SYMBOL, .OPERATOR].contains(type) { + do { + guard let string = String(data: atom, encoding: .utf8) else { + throw UnicodeDecodeError() + } + tokens.append(string) + } + catch is UnicodeDecodeError { + tokens.append("(indecipherable symbol: \(atom.map { String(format: "%02hhx", $0) }.joined())") // hex() + } + } + else { + throw SyntaxError.badIRFormat(ir_sexp) + } + + return tokens.makeIterator() +} + +private struct UnicodeDecodeError: Error { } + +func write_ir_to_stream(ir_sexp: SExp, s: inout String) throws { + for symbol in try iter_ir_format(ir_sexp: ir_sexp) { + s += symbol + } +} + +func write_ir(ir_sexp: SExp) throws -> String { + var s = "" + try write_ir_to_stream(ir_sexp: ir_sexp, s: &s) + return s +} diff --git a/Sources/CLVMTools/Tools/BinUtils.swift b/Sources/CLVMTools/Tools/BinUtils.swift new file mode 100644 index 0000000..d0dd11b --- /dev/null +++ b/Sources/CLVMTools/Tools/BinUtils.swift @@ -0,0 +1,95 @@ +import Foundation +import BigInt +import CLVM + +public func type_for_atom(atom: Data) -> IRType { + if atom.count > 2 { + let v = String(data: atom, encoding: .utf8) + if v != nil && v!.allSatisfy({ c in + // hack to match python string.printable + c.isPunctuation || c.isNumber || c.isASCII || c.isWhitespace + }) { + return .QUOTES + } + return .HEX + } + if int_to_bytes(v: int_from_bytes(blob: atom) as BigInt) == atom { + return .INT + } + return .HEX +} + +func assemble_from_ir(ir_sexp: SExp) throws -> SExp { + if var keyword = try ir_as_symbol(ir_sexp: ir_sexp) { + if keyword.first == "#" { + keyword = String(keyword[keyword.index(keyword.startIndex, offsetBy: 1)...]) + } + if let atom = KEYWORD_TO_ATOM[keyword] { + return try SExp.to(v: .bytes(atom)) + } + return try ir_val(ir_sexp: ir_sexp) + } + + if try !ir_listp(ir_sexp: ir_sexp) { + return try ir_val(ir_sexp: ir_sexp) + } + + if try ir_nullp(ir_sexp: ir_sexp) { + return try SExp.to(v: .list([])) + } + + // handle "q" + let first = try ir_first(ir_sexp: ir_sexp) + let keyword = try ir_as_symbol(ir_sexp: first) + if keyword == "q" { + // pass + // TODO: note that any symbol is legal after this point + } + + let sexp_1 = try assemble_from_ir(ir_sexp: first) + let sexp_2 = try assemble_from_ir(ir_sexp: try ir_rest(ir_sexp: ir_sexp)) + return try sexp_1.cons(right: .sexp(sexp_2)) +} + +func disassemble_to_ir(sexp: SExp, keyword_from_atom: [Data: String], allow_keyword: Bool = false) throws -> SExp { + if is_ir(sexp: sexp) && allow_keyword != false { + let symbol = ir_symbol(symbol: "ir") + let symbol_sexp = try SExp.to(v: .tuple((.int(BigInt(symbol.0.rawValue)), .bytes(symbol.1)))) + return try ir_cons(first: symbol_sexp, rest: sexp) + } + + if sexp.listp() { + var allow_keyword = allow_keyword + if try sexp.first().listp() || allow_keyword == false { + allow_keyword = true + } + let v0 = try disassemble_to_ir(sexp: try sexp.first(), keyword_from_atom: keyword_from_atom, allow_keyword: allow_keyword) + let v1 = try disassemble_to_ir(sexp: sexp.rest(), keyword_from_atom: keyword_from_atom, allow_keyword: false) + return try ir_cons(first: v0, rest: v1) + } + + let as_atom = sexp.atom! + if allow_keyword { + let v = keyword_from_atom[as_atom] + if v != nil && v != "." { + let symbol = ir_symbol(symbol: v!) + return try SExp.to(v: .tuple((.int(BigInt(symbol.0.rawValue)), .bytes(symbol.1)))) + } + } + + if sexp.nullp() { + return try ir_null() + } + + return try SExp.to(v: .tuple((.int(BigInt(type_for_atom(atom: as_atom).rawValue)), .bytes(as_atom)))) +} + +public func disassemble(sexp: SExp, keyword_from_atom: [Data: String] = KEYWORD_FROM_ATOM) throws -> String { + let symbols = try disassemble_to_ir(sexp: sexp, keyword_from_atom: keyword_from_atom) + return try write_ir(ir_sexp: symbols) +} + +public func assemble(s: String) throws -> SExp { + let symbols = try read_ir(s: s) + return try assemble_from_ir(ir_sexp: symbols) +} diff --git a/Tests/CLVMToolsTests/CLVMToolsTests.swift b/Tests/CLVMToolsTests/CLVMToolsTests.swift new file mode 100644 index 0000000..ff0bb93 --- /dev/null +++ b/Tests/CLVMToolsTests/CLVMToolsTests.swift @@ -0,0 +1,6 @@ +import XCTest +@testable import CLVMTools + +final class CLVMToolsTests: XCTestCase { + +}