From b7cd7ccf76a8c8f70869045a1d15060545ab5da4 Mon Sep 17 00:00:00 2001 From: Tom Briar Date: Fri, 20 Oct 2023 11:40:04 -0400 Subject: [PATCH 1/8] doc: Added Compressed Transaction Schema --- doc/compressed_transactions.md | 183 +++++++++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 doc/compressed_transactions.md diff --git a/doc/compressed_transactions.md b/doc/compressed_transactions.md new file mode 100644 index 0000000000000..c83651ce87eb7 --- /dev/null +++ b/doc/compressed_transactions.md @@ -0,0 +1,183 @@ +# Compressed Transaction Schema +By (Tom Briar) and (Andrew Poelstra) + +## 1. Abstract + +With this Transaction Compression Schema we use several methods to compress transactions, +including dropping data and recovering it on decompression by grinding until we obtain +valid signatures. + +The bulk of our size savings come from replacing the prevout of each input by a block +height and index. This requires the decompression to have access to the blockchain, and +also means that compression is ineffective for transactions that spend unconfirmed or +insufficiently confirmed outputs. + +Even without compression, Taproot keyspends are very small: as witness data they +include only a single 64/65-byte signature and do not repeat the public key or +any other metadata. By using pubkey recovery, we obtain Taproot-like compression +for legacy and Segwit transactions. + +The main applications for this schema are for steganography, satellite/radio broadcast, and +other low bandwidth channels with a high CPU availability on decompression. We +assume users have some ability to shape their transactions to improve their +compressibility, and therefore give special treatment to certain transaction forms. + +This schema is easily reversible except for compressing the Txid/Vout input pairs(Method 4). +Compressing the input Txid/Vout is optional, and without it still gleans 50% of the total compression. +This allows for the additional use case of P2P communication. + +## 2. Methods + +The four main methods to achieve a lower transactions size are: + +1. packing transaction metadata before the transaction and each of its inputs and +outputs to determine the structure of the following data. +2. replacing 32-bit numeric values with either variable-length integers (VarInts) or compact-integers (CompactSizes). +3. using compressed signatures and public key recovery upon decompression. +4. replacing the 36-byte txid/vout pair with a blockheight and output index. + +Method 4 will cause the compressed transaction to be undecompressable if a block +reorg occurs at or before the block it's included in. Therefore, we'll only compress +the Txid if the transaction input is at least one hundred blocks old. + + +## 3 Schema + +### 3.1 Primitives + +| Name | Width | Description | +|------------------|-----------|-------------| +| CompactSize | 1-5 Bytes | For 0-253, encode the value directly in one byte. For 254-65535, encode 254 followed by 2 little-endian bytes. For 65536-(2^32-1), encode 255 followed by 4 little-endian bytes. | +| CompactSize flag | 2 Bits | 1, 2 or 3 indicate literal values. 0 indicates that the value will be encoded in a later CompactInt. | +| VarInt | 1+ Bytes | 7-bit little-endian encoding, with each 7-bit word encoded in a byte. The highest bit of each byte is 1 if more bytes follow, and 0 for the last byte. | +| VLP-Bytestream | 2+ Bytes | A VarInt Length Prefixed Bytestream. Has a VarInt prefixed to determine the length. | + +### 3.2 General Schema + +| Name | Width | Description | +|--------------------------------|-----------------|-------------| +| Transaction Metadata | 1 Byte | Information on the structure of the transaction. See Section 3.3. | +| Version | 0-5 Bytes | An optional CompactSize containing the transactions version. | +| Input Count | 0-5 Bytes | An optional CompactSize containing the transactions input count. | +| Output Count | 0-5 Bytes | An optional CompactSize containing the transactions output count. | +| LockTime | 0-5 Bytes | An optional CompactSize containing the transaction LockTime if its non zero. | +| Minimum Blockheight | 1-5 Bytes | A VarInt containing the Minimum Blockheight of which the transaction locktime and input blockheights are given as offsets. | +| Input Metadata+Output Metadata | 1+ Bytes | A Encoding containing metadata on all the inputs and then all the outputs of the transaction. For each input see Section 3.4, for each output see Section 3.5. | +| Input Data | 66+ Bytes | See Section 3.6 for each input. | +| Output Data | 3+ Bytes | See Section 3.7 for each output. | + +For the four CompactSize listed above we could use a more compact bit encoding for these but they are already a fall back for the bit encoding of the Transaction Metadata. + +### 3.3 Transaction Metadata + +| Name | Width | Description | +|--------------|--------|-------------| +| Version | 2 Bits | A CompactSize flag for the transaction version. | +| Input Count | 2 Bits | A CompactSize flag for the transaction input count. | +| Output Count | 2 Bits | A CompactSize flag for the transaction output count. | +| LockTime | 1 Bit | A Boolean to indicate if the transaction has a LockTime. | + +### 3.4 Input Metadata + +| Name | Width | Description | +|----------------------|--------|-------------| +| Compressed Signature | 1 Bit | Signature compression flag. For P2TR: 1 for keyspend, 0 for scriptspend; For P2SH: 0 for p2sh, 1 for p2sh-wpkh. | +| Standard Hash | 1 Bit | A flag to determine if this Input's Signature Hash Type is standard (0x00 for Taproot, 0x01 for Legacy/Segwit). | +| Standard Sequence | 2 Bits | A CompactSize flag for the inputs sequence. Encode literal values as follows: 1 = 0x00000000, 2 = 0xFFFFFFFE, 3 = 0xFFFFFFFF. | + +### 3.5.1 Output Metadata + +| Name | Width | Description | +|---------------------|--------|-------------| +| Encoded Script Type | 3 Bits | Encoded Script Type. | + +#### 3.5.2 Script Type encoding + +| Script Type | Value | +|----------------------------|-------| +| Uncompressed P2PK | 0b000 | +| Compressed P2PK | 0b001 | +| P2PKH | 0b010 | +| P2SH | 0b011 | +| P2WSH | 0b100 | +| P2WPKH | 0b101 | +| P2TR | 0b110 | +| Uncompressed Custom Script | 0b111 | + +### 3.6 Input Data + +| Name | Width | Description | +|-------------------------|-----------|-------------| +| Sequence | 0-5 Bytes | An Optional VarInt containing the sequence if it was non-standard. | +| Txid Blockheight | 1-5 Bytes | A VarInt Either containing 0 if this an uncompressed input, or it contains the offset from Minimum Blockheight for this Txid. | +| Txid/Signature Data | 65+ Bytes | Txid/Signatures are determined to be uncompressed either by the output script of the previous transaction, or if the Txid Blockheight is zero. For each Compressed Txid/Signature See Section 3.6.1. For each Uncompressed Txid/Signature See Section 3.6.2. | + +### 3.6.1 Compressed Txid/Signature Data + +| Name | Width | Description | +|-------------------|-----------|-------------| +| Txid Block Index | 1-5 Bytes | A VarInt containing the flattened index from the Txid Blockheight for the Vout. | +| Signature | 64 Bytes | Contains the 64 byte signature. | +| Hash Type | 0-1 Bytes | An Optional Byte containing the Hash Type if it was non-standard.| + +### 3.6.2 Uncompressed Txid/Signature Data + +| Name | Width | Description | +|-----------|-----------|-------------| +| Txid | 32 Bytes | Contains the 32 byte Txid. | +| Vout | 1-5 Bytes | A CompactSize Containing the Vout of the Txid. | +| Signature | 2+ Bytes | A VLP-Bytestream containing the signature. | + +### 3.7 Output Data + +| Name | Width | Description | +|---------------|-----------|-------------| +| Output Script | 2+ Bytes | A VLP-Bytestream containing the output script. | +| Amount | 1-9 Bytes | A VarInt containing the output amount. | + +## 4 Ideal Transaction + +The target transaction for the most optimal compression was chosen +based off the most common transactions that are likely to be used +for purposes that requires the best compression. + +| Field | Requirements | Possible Savings | +|-----------------|-----------------------------------|-----------------------------------| +| Version | Less than four | 30 Bits | +| Input Count | Less then four | 30 Bits | +| Output Count | Less then four | 30 Bits | +| LockTime | 0 | 30 Bits | +| Input Sequence | 0x00, 0xFFFFFFFE, or 0xFFFFFFFF | 62 Bits For Each Input | +| Input Txid | Compressed Outpoint | 23-31 Bytes For Each Input | +| Input Vout | Compressed Outpoint | (-1)-3 Bytes For Each Input | +| Input Signature | Non-custom Script Signing | 40-72 Bytes For Each Legacy Input | +| Input Hash Type | 0x00 for Taproot, 0x01 for Legacy | 7 Bits For Each Input | +| Output Script | Non-custom Scripts | 2-5 Bytes For Each Output | +| Output Amount | No Restrictions | (-1)-7 Bytes For Each Output | + +## 5 Test Vectors + +| Transaction | Before Compression | Possible Savings | After Compression | +|--------------------------|--------------------|--------------------------|-------------------| +| 2-(input/output) Taproot | 312 Bytes | 78-124 Bytes and 2 Bits | 190-226 Bytes | +| 2-(input/output) Legacy | 394 Bytes | 118-196 Bytes and 2 Bits | 176-244 Bytes | + +Taproot (Uncompressed) +``` +020000000001028899af77861ede1ee384c333974722c96eabba8889506725b00735fc35ba41680000000000000000008899af77861ede1ee384c333974722c96eabba8889506725b00735fc35ba41680000000000000000000288130000000000002251206b10142cffb29e9d83f63a77a428be41f96bd9b6ccc9889e4ec74927058b41dda00f000000000000225120dd00ac641dc0f399e62a6ed6300aba1ec5fa4b3aeedf1717901e0d49d980efd20140f3d9bcc844eab7055a168a62f65b8625e3853fad8f834d5c82fdf23100b7b871cf48c2c956e7d76cdd367bbfefe496c426e64dcfeaef800ab9893142050714b6014081c15fe5ed6b8a0c0509e871dfbb7784ddb22dd33b47f3ad1a3b271d29acfe76b5152b53ed29a7f6ea27cb4f5882064da07e8430aacafab89a334b32780fcb2700000000 +``` + +Taproot (Compressed) +``` +2a81de3177d8019c2ef3d9bcc844eab7055a168a62f65b8625e3853fad8f834d5c82fdf23100b7b871cf48c2c956e7d76cdd367bbfefe496c426e64dcfeaef800ab9893142050714b6019c2e81c15fe5ed6b8a0c0509e871dfbb7784ddb22dd33b47f3ad1a3b271d29acfe76b5152b53ed29a7f6ea27cb4f5882064da07e8430aacafab89a334b32780fcb276b10142cffb29e9d83f63a77a428be41f96bd9b6ccc9889e4ec74927058b41dd8827dd00ac641dc0f399e62a6ed6300aba1ec5fa4b3aeedf1717901e0d49d980efd2a01f +``` + +Legacy (Uncompressed) +``` +02000000000102c583fe4f934a0ed87e4d082cd52967cc774b943fbb2e21378ec18b926b8dc549000000000000000000c583fe4f934a0ed87e4d082cd52967cc774b943fbb2e21378ec18b926b8dc5490000000000000000000288130000000000002251206b10142cffb29e9d83f63a77a428be41f96bd9b6ccc9889e4ec74927058b41dda00f000000000000225120dd00ac641dc0f399e62a6ed6300aba1ec5fa4b3aeedf1717901e0d49d980efd202473044022000d1c81efcf6d20d87253749bcef8bf1be7ba51ccdf7a3b328174ea874226c3c02202d810c20f92d49c821eaa6e3a9ec7d764e0e71006e572d6ea96b631bd921767c0121037833d05665f3b21c479583ee12c6c573d1f25977dedfae12c70c18ec9dd4618702473044022000d1c81efcf6d20d87253749bcef8bf1be7ba51ccdf7a3b328174ea874226c3c02202d810c20f92d49c821eaa6e3a9ec7d764e0e71006e572d6ea96b631bd921767c0121037833d05665f3b21c479583ee12c6c573d1f25977dedfae12c70c18ec9dd4618700000000 +``` + +Legacy (Compressed) +``` +2ad1e53044d801ae276c0002473044022000d1c81efcf6d20d87253749bcef8bf1be7ba51ccdf7a3b328174ea874226c3c02202d810c20f92d49c821eaa6e3a9ec7d764e0e71006e572d6ea96b631bd921767c0121037833d05665f3b21c479583ee12c6c573d1f25977dedfae12c70c18ec9dd461870001ae276c0002473044022000d1c81efcf6d20d87253749bcef8bf1be7ba51ccdf7a3b328174ea874226c3c02202d810c20f92d49c821eaa6e3a9ec7d764e0e71006e572d6ea96b631bd921767c0121037833d05665f3b21c479583ee12c6c573d1f25977dedfae12c70c18ec9dd46187006b10142cffb29e9d83f63a77a428be41f96bd9b6ccc9889e4ec74927058b41dd8827dd00ac641dc0f399e62a6ed6300aba1ec5fa4b3aeedf1717901e0d49d980efd2a01f +``` From 521848daeeecaeaaf823f31462e38f17bbecaf6f Mon Sep 17 00:00:00 2001 From: Tom Briar Date: Fri, 20 Oct 2023 11:29:58 -0400 Subject: [PATCH 2/8] util: Added a variable length bitstream encoder --- src/util/strencodings.h | 42 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/util/strencodings.h b/src/util/strencodings.h index 439678c24a132..69bedddacdc7f 100644 --- a/src/util/strencodings.h +++ b/src/util/strencodings.h @@ -300,6 +300,48 @@ bool ConvertBits(O outfn, It it, It end, I infn = {}) { return true; } +/** Convert from a set of power-of-2 number base to another set. */ +template +bool ConvertBitsVariable(const Span& frombitsvec, const Span& tobitsvec, O outfn, It it, It end, I infn = {}) { + size_t fromindex = 0; + size_t toindex = 0; + size_t acc = 0; + size_t bits = 0; + auto frombits = fromindex >= frombitsvec.size() ? frombitsvec[frombitsvec.size()-1] : frombitsvec[fromindex]; + auto tobits = toindex >= tobitsvec.size() ? tobitsvec[tobitsvec.size()-1] : tobitsvec[toindex]; + auto maxv = (1 << tobits) - 1; + auto max_acc = (1 << (frombits + tobits - 1)) - 1; + + while (it != end) { + int v = infn(*it); + if (v < 0) return false; + acc = ((acc << frombits) | v) & max_acc; + bits += frombits; + while (bits >= (size_t)tobits) { + bits -= tobits; + outfn((acc >> bits) & maxv); + ++toindex; + tobits = toindex >= tobitsvec.size() ? tobitsvec[tobitsvec.size()-1] : tobitsvec[toindex]; + maxv = (1 << tobits) - 1; + max_acc = (1 << (frombits + tobits - 1)) - 1; + } + ++it; + ++fromindex; + frombits = fromindex >= frombitsvec.size() ? frombitsvec[frombitsvec.size()-1] : frombitsvec[fromindex]; + max_acc = (1 << (frombits + tobits - 1)) - 1; + } + frombits = fromindex >= frombitsvec.size() ? frombitsvec[frombitsvec.size()-1] : frombitsvec[fromindex]; + tobits = toindex >= tobitsvec.size() ? tobitsvec[tobitsvec.size()-1] : tobitsvec[toindex]; + maxv = (1 << tobits) - 1; + if (pad) { + if (bits) outfn((acc << (tobits - bits)) & maxv); + } else if (bits >= (size_t)frombits || ((acc << (tobits - bits)) & maxv)) { + return false; + } + if (fromindex < frombitsvec.size()-1 || toindex < tobitsvec.size()-1) return false; + return true; +} + /** * Converts the given character to its lowercase equivalent. * This function is locale independent. It only converts uppercase From a30c5104e4ce718ab6e15a0fa8bd5b337d149f64 Mon Sep 17 00:00:00 2001 From: Tom Briar Date: Fri, 20 Oct 2023 11:30:18 -0400 Subject: [PATCH 3/8] script: Added the rest of the IsPayTo functions --- src/script/script.cpp | 40 ++++++++++++++++++++++++++++++++++++++-- src/script/script.h | 4 ++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/script/script.cpp b/src/script/script.cpp index 80e8d26bcfb97..5c30964abf059 100644 --- a/src/script/script.cpp +++ b/src/script/script.cpp @@ -204,20 +204,56 @@ unsigned int CScript::GetSigOpCount(const CScript& scriptSig) const return subscript.GetSigOpCount(true); } +bool CScript::IsPayToPublicKey() const +{ + // Test for pay-to-public-key CScripts: + return ((this->size() == 0x43 && (*this)[0] == 0x41 && (*this)[66] == OP_CHECKSIG) || + (this->size() == 0x23 && (*this)[0] == 0x21 && (*this)[34] == OP_CHECKSIG)); +} + bool CScript::IsPayToScriptHash() const { // Extra-fast test for pay-to-script-hash CScripts: - return (this->size() == 23 && + return (this->size() == 0x17 && (*this)[0] == OP_HASH160 && (*this)[1] == 0x14 && (*this)[22] == OP_EQUAL); } +bool CScript::IsPayToPublicKeyHash() const +{ + // Test for pay-to-public-key-hash CScripts: + return (this->size() == 0x19 && + (*this)[0] == OP_DUP && + (*this)[1] == OP_HASH160 && + (*this)[2] == 0x14 && + (*this)[23] == OP_EQUALVERIFY && + (*this)[24] == OP_CHECKSIG); +} + + bool CScript::IsPayToWitnessScriptHash() const { // Extra-fast test for pay-to-witness-script-hash CScripts: - return (this->size() == 34 && + return (this->size() == 0x22 && + (*this)[0] == OP_0 && + (*this)[1] == 0x20); +} + +bool CScript::IsPayToWitnessKeyHash() const +{ + // Test for pay-to-witness-public-key-hash CScripts: + return (this->size() == 0x16 && (*this)[0] == OP_0 && + (*this)[1] == 0x14); +} + + +bool CScript::IsPayToTaproot() const +{ + // Test for pay-to-taproot CScripts: + return (this->size() == 0x22 && + (*this)[0] == OP_1 && (*this)[1] == 0x20); } diff --git a/src/script/script.h b/src/script/script.h index 66d63fae89e12..22e3dd0ebd017 100644 --- a/src/script/script.h +++ b/src/script/script.h @@ -533,8 +533,12 @@ class CScript : public CScriptBase */ unsigned int GetSigOpCount(const CScript& scriptSig) const; + bool IsPayToPublicKey() const; bool IsPayToScriptHash() const; + bool IsPayToPublicKeyHash() const; bool IsPayToWitnessScriptHash() const; + bool IsPayToWitnessKeyHash() const; + bool IsPayToTaproot() const; bool IsWitnessProgram(int& version, std::vector& program) const; /** Called by IsStandardTx and P2SH/BIP62 VerifyScript (which makes it consensus-critical). */ From 574d0773fa05ef3f6fc75346a44a3ffcb6d6d71d Mon Sep 17 00:00:00 2001 From: Tom Briar Date: Wed, 6 Dec 2023 14:02:27 -0500 Subject: [PATCH 4/8] addresstype: Split ExtractDestination into two functions --- src/addresstype.cpp | 24 ++++++++++++++---------- src/addresstype.h | 13 +++++++++++++ 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/src/addresstype.cpp b/src/addresstype.cpp index f199d1b479446..96c4541252984 100644 --- a/src/addresstype.cpp +++ b/src/addresstype.cpp @@ -8,7 +8,6 @@ #include #include #include