From 01d92e1dfffc4e95c668857d3d327b01c5b6f44b Mon Sep 17 00:00:00 2001 From: Kiyoaki Tsurutani Date: Wed, 11 Feb 2026 18:44:01 +0900 Subject: [PATCH 01/10] feat: disambiguate input format with explicit --format option Change --format from OutputFormat (default hex) to Option. When explicitly specified, decode_input uses that format directly without fallback, preventing misdetection of base64 data as hex. When omitted, auto-detection (PEM -> hex -> base64) is preserved for backward compatibility. Output defaults to hex when --format is not specified. Add unit tests for decode_input with explicit/auto-detect modes and integration tests for explicit format roundtrip, format mismatch error messages, and default hex output verification. --- PLANS.md | 2 +- kylix-cli/src/cli.rs | 30 ++--- kylix-cli/src/commands/decaps.rs | 5 +- kylix-cli/src/commands/encaps.rs | 8 +- kylix-cli/src/commands/keygen.rs | 7 +- kylix-cli/src/commands/sign.rs | 6 +- kylix-cli/src/commands/verify.rs | 2 +- kylix-cli/src/io.rs | 184 +++++++++++++++++++++++++---- kylix-cli/tests/cli_integration.rs | 111 +++++++++++++++++ 9 files changed, 305 insertions(+), 50 deletions(-) diff --git a/PLANS.md b/PLANS.md index 575b1dd..44c78c7 100644 --- a/PLANS.md +++ b/PLANS.md @@ -23,6 +23,7 @@ CLI tool for kylix-pqc post-quantum cryptography library. | Bench Release Build Fix | HIGH | `--release` missing from external-tool-compare CI job, causing ~100x slowdown vs liboqs | | [H-2] Shared Secret Output Control | HIGH | `--secret-file` option for encaps/decaps to write shared secret to file instead of console | | [M-2] Split main.rs into Modules | MEDIUM | Split 1256-line main.rs into cli.rs, io.rs, macros.rs, and commands/ directory | +| [M-3] Input Format Disambiguation | MEDIUM | `--format` now uses `Option`: explicit format prevents fallback, None preserves auto-detect | --- @@ -34,7 +35,6 @@ CLI tool for kylix-pqc post-quantum cryptography library. | OpenSSL Dedup | LOW | Extract common logic from KEM/SIG benchmark functions | | liboqs Parsing | LOW | Parse column headers instead of hardcoded indices | | wolfSSL Support | LOW | Add wolfSSL as external benchmark tool | -| [M-3] Input Format Disambiguation | MEDIUM | Improve hex vs base64 auto-detection for edge cases | | [M-4] Deep Zeroization in encode/decode | MEDIUM | Zeroize intermediate strings in `encode_output`/`decode_input` (PEM wrapping, base64) | | [L-5] Windows ACL for Secret Keys | LOW | Enforce restrictive ACLs on secret key files on Windows (e.g. `windows-acl` crate) | diff --git a/kylix-cli/src/cli.rs b/kylix-cli/src/cli.rs index 8a6d86c..bebf832 100644 --- a/kylix-cli/src/cli.rs +++ b/kylix-cli/src/cli.rs @@ -34,9 +34,9 @@ pub(crate) enum Commands { #[arg(short, long)] output: String, - /// Output format - #[arg(short, long, value_enum, default_value = "hex")] - format: OutputFormat, + /// Encoding format (default: hex for output, auto-detect for input) + #[arg(short, long, value_enum)] + format: Option, }, /// Encapsulate a shared secret using a public key @@ -53,9 +53,9 @@ pub(crate) enum Commands { #[arg(long = "secret-file")] secret_file: Option, - /// Output format - #[arg(short, long, value_enum, default_value = "hex")] - format: OutputFormat, + /// Encoding format (default: hex for output, auto-detect for input) + #[arg(short, long, value_enum)] + format: Option, }, /// Decapsulate a shared secret using a secret key @@ -72,9 +72,9 @@ pub(crate) enum Commands { #[arg(long = "secret-file")] secret_file: Option, - /// Output format for shared secret - #[arg(short, long, value_enum, default_value = "hex")] - format: OutputFormat, + /// Encoding format (default: hex for output, auto-detect for input) + #[arg(short, long, value_enum)] + format: Option, }, /// Sign a file using ML-DSA or SLH-DSA @@ -91,9 +91,9 @@ pub(crate) enum Commands { #[arg(short, long)] output: PathBuf, - /// Output format - #[arg(short, long, value_enum, default_value = "hex")] - format: OutputFormat, + /// Encoding format (default: hex for output, auto-detect for input) + #[arg(short, long, value_enum)] + format: Option, /// Algorithm (required for SLH-DSA to distinguish -s/-f variants) #[arg(long, value_enum)] @@ -114,9 +114,9 @@ pub(crate) enum Commands { #[arg(short, long)] signature: PathBuf, - /// Input format for key and signature files - #[arg(short, long, value_enum, default_value = "hex")] - format: OutputFormat, + /// Encoding format (default: hex for output, auto-detect for input) + #[arg(short, long, value_enum)] + format: Option, /// Algorithm (required for SLH-DSA to distinguish -s/-f variants) #[arg(long, value_enum)] diff --git a/kylix-cli/src/commands/decaps.rs b/kylix-cli/src/commands/decaps.rs index 2e342f7..8247ac4 100644 --- a/kylix-cli/src/commands/decaps.rs +++ b/kylix-cli/src/commands/decaps.rs @@ -13,7 +13,7 @@ pub(crate) fn cmd_decaps( key: &PathBuf, input: Option<&PathBuf>, secret_file: Option<&PathBuf>, - format: OutputFormat, + format: Option, verbose: bool, ) -> Result<()> { let sk_data = @@ -21,6 +21,7 @@ pub(crate) fn cmd_decaps( let sk_bytes = Zeroizing::new(decode_input(&sk_data, format)?); drop(sk_data); // zeroize raw key string immediately after decoding + let out_format = format.unwrap_or(OutputFormat::Hex); let algo = Algorithm::detect_kem_from_sec_key(sk_bytes.len())?; if verbose { @@ -60,7 +61,7 @@ pub(crate) fn cmd_decaps( drop(sk_bytes); // zeroize secret key bytes immediately after decapsulation let ss_len = ss_bytes.len(); - let ss_encoded = Zeroizing::new(encode_output(&ss_bytes, format, "SHARED SECRET")); + let ss_encoded = Zeroizing::new(encode_output(&ss_bytes, out_format, "SHARED SECRET")); drop(ss_bytes); // zeroize shared secret bytes immediately after encoding if let Some(sf_path) = secret_file { write_secret_file(sf_path, &ss_encoded)?; diff --git a/kylix-cli/src/commands/encaps.rs b/kylix-cli/src/commands/encaps.rs index 7bc238e..edff1bd 100644 --- a/kylix-cli/src/commands/encaps.rs +++ b/kylix-cli/src/commands/encaps.rs @@ -12,12 +12,14 @@ pub(crate) fn cmd_encaps( pubkey: &PathBuf, output: Option<&PathBuf>, secret_file: Option<&PathBuf>, - format: OutputFormat, + format: Option, verbose: bool, ) -> Result<()> { let pk_data = fs::read_to_string(pubkey).context("Failed to read public key file")?; let pk_bytes = decode_input(&pk_data, format)?; + let out_format = format.unwrap_or(OutputFormat::Hex); + let algo = Algorithm::detect_kem_from_pub_key(pk_bytes.len())?; if verbose { @@ -34,7 +36,7 @@ pub(crate) fn cmd_encaps( }; let ss_bytes = Zeroizing::new(ss_bytes_raw); - let ct_encoded = encode_output(&ct_bytes, format, "ML-KEM CIPHERTEXT"); + let ct_encoded = encode_output(&ct_bytes, out_format, "ML-KEM CIPHERTEXT"); if let Some(out_path) = output { fs::write(out_path, &ct_encoded).context("Failed to write ciphertext")?; @@ -47,7 +49,7 @@ pub(crate) fn cmd_encaps( } let ss_len = ss_bytes.len(); - let ss_encoded = Zeroizing::new(encode_output(&ss_bytes, format, "SHARED SECRET")); + let ss_encoded = Zeroizing::new(encode_output(&ss_bytes, out_format, "SHARED SECRET")); drop(ss_bytes); // zeroize shared secret bytes immediately after encoding if let Some(sf_path) = secret_file { write_secret_file(sf_path, &ss_encoded)?; diff --git a/kylix-cli/src/commands/keygen.rs b/kylix-cli/src/commands/keygen.rs index 3899822..f959c0f 100644 --- a/kylix-cli/src/commands/keygen.rs +++ b/kylix-cli/src/commands/keygen.rs @@ -12,7 +12,7 @@ use crate::io::{encode_output, write_secret_file}; pub(crate) fn cmd_keygen( algo: Algorithm, output: &str, - format: OutputFormat, + format: Option, verbose: bool, ) -> Result<()> { if verbose { @@ -41,8 +41,9 @@ pub(crate) fn cmd_keygen( let pk_size = pk_bytes.len(); let sk_size = sk_bytes.len(); - let pk_encoded = encode_output(&pk_bytes, format, pk_label); - let sk_encoded = Zeroizing::new(encode_output(&sk_bytes, format, sk_label)); + let out_format = format.unwrap_or(OutputFormat::Hex); + let pk_encoded = encode_output(&pk_bytes, out_format, pk_label); + let sk_encoded = Zeroizing::new(encode_output(&sk_bytes, out_format, sk_label)); // sk_bytes is Zeroizing>, automatically zeroized on drop let pub_path = format!("{}.pub", output); diff --git a/kylix-cli/src/commands/sign.rs b/kylix-cli/src/commands/sign.rs index 1bbafed..50b92af 100644 --- a/kylix-cli/src/commands/sign.rs +++ b/kylix-cli/src/commands/sign.rs @@ -13,7 +13,7 @@ pub(crate) fn cmd_sign( key: &PathBuf, input: &PathBuf, output: &PathBuf, - format: OutputFormat, + format: Option, explicit_algo: Option, verbose: bool, ) -> Result<()> { @@ -22,6 +22,8 @@ pub(crate) fn cmd_sign( let sk_bytes = Zeroizing::new(decode_input(&sk_data, format)?); drop(sk_data); // zeroize raw key string immediately after decoding + let out_format = format.unwrap_or(OutputFormat::Hex); + // Use explicit algorithm if provided, otherwise detect from key size let algo = if let Some(a) = explicit_algo { // Validate key size matches the explicit algorithm @@ -114,7 +116,7 @@ pub(crate) fn cmd_sign( } else { "ML-DSA SIGNATURE" }; - let sig_encoded = encode_output(&sig_bytes, format, sig_label); + let sig_encoded = encode_output(&sig_bytes, out_format, sig_label); fs::write(output, &sig_encoded).context("Failed to write signature")?; if verbose { diff --git a/kylix-cli/src/commands/verify.rs b/kylix-cli/src/commands/verify.rs index ef7da63..605eb6a 100644 --- a/kylix-cli/src/commands/verify.rs +++ b/kylix-cli/src/commands/verify.rs @@ -12,7 +12,7 @@ pub(crate) fn cmd_verify( pubkey: &PathBuf, input: &PathBuf, signature: &PathBuf, - format: OutputFormat, + format: Option, explicit_algo: Option, verbose: bool, ) -> Result<()> { diff --git a/kylix-cli/src/io.rs b/kylix-cli/src/io.rs index bf5ee07..c42dbd8 100644 --- a/kylix-cli/src/io.rs +++ b/kylix-cli/src/io.rs @@ -37,33 +37,47 @@ fn is_hex(s: &str) -> bool { !s.is_empty() && s.chars().all(|c| c.is_ascii_hexdigit()) } -/// Decode bytes with auto-detection of format. -/// Detection order: PEM (by header) -> Hex (if all hex chars) -> Base64. -/// The format parameter is ignored for input; it only affects output encoding. -pub(crate) fn decode_input(data: &str, _format: OutputFormat) -> Result> { +/// Decode a PEM-encoded string to raw bytes. +fn decode_pem(data: &str) -> Result> { + let lines: Vec<&str> = data.lines().collect(); + if lines.len() < 3 || !data.starts_with("-----BEGIN") { + bail!("Invalid PEM format: expected -----BEGIN header, body lines, and -----END footer"); + } + let b64: String = lines[1..lines.len() - 1].join(""); + BASE64 + .decode(&b64) + .context("Failed to decode PEM base64 content") +} + +/// Decode bytes from the given encoding format. +/// +/// When `format` is `Some(...)`, the specified format is used directly. +/// When `format` is `None`, auto-detection is applied: +/// PEM (by header) -> Hex (all hex chars + even length) -> Base64. +pub(crate) fn decode_input(data: &str, format: Option) -> Result> { let data = data.trim(); - // Auto-detect PEM format - if data.starts_with("-----BEGIN") { - let lines: Vec<&str> = data.lines().collect(); - if lines.len() < 3 { - bail!("Invalid PEM format"); + match format { + Some(OutputFormat::Pem) => decode_pem(data), + Some(OutputFormat::Hex) => hex::decode(data).context( + "Failed to decode as hex. If the input uses a different encoding, \ + remove --format or use --format base64.", + ), + Some(OutputFormat::Base64) => BASE64.decode(data).context( + "Failed to decode as base64. If the input uses a different encoding, \ + remove --format or use --format hex.", + ), + None => { + // Auto-detect: PEM -> hex -> base64 + if data.starts_with("-----BEGIN") { + return decode_pem(data); + } + if is_hex(data) && data.len() % 2 == 0 { + return hex::decode(data).context("Failed to decode hex"); + } + BASE64.decode(data).context("Failed to decode base64") } - let b64: String = lines[1..lines.len() - 1].join(""); - return BASE64 - .decode(&b64) - .context("Failed to decode PEM base64 content"); } - - // Auto-detect hex vs base64 - // Hex: only 0-9, a-f, A-F (and must have even length for valid bytes) - // Base64: may contain +, /, = which are not valid hex - if is_hex(data) && data.len() % 2 == 0 { - return hex::decode(data).context("Failed to decode hex"); - } - - // Try base64 - BASE64.decode(data).context("Failed to decode base64") } /// Write a file with restricted permissions (0o600) on Unix systems. @@ -155,3 +169,127 @@ pub(crate) fn write_secret_file(path: &Path, content: &str) -> Result<()> { Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + + const SAMPLE_BYTES: &[u8] = &[0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x23]; + const SAMPLE_HEX: &str = "deadbeef0123"; + // base64 of [0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x23] = "3q2+7wEj" + const SAMPLE_BASE64: &str = "3q2+7wEj"; + + fn sample_pem() -> String { + format!( + "-----BEGIN TEST KEY-----\n{}\n-----END TEST KEY-----", + SAMPLE_BASE64 + ) + } + + // --- Explicit format decoding --- + + #[test] + fn decode_explicit_hex() { + let result = decode_input(SAMPLE_HEX, Some(OutputFormat::Hex)).unwrap(); + assert_eq!(result, SAMPLE_BYTES); + } + + #[test] + fn decode_explicit_base64() { + let result = decode_input(SAMPLE_BASE64, Some(OutputFormat::Base64)).unwrap(); + assert_eq!(result, SAMPLE_BYTES); + } + + #[test] + fn decode_explicit_pem() { + let pem = sample_pem(); + let result = decode_input(&pem, Some(OutputFormat::Pem)).unwrap(); + assert_eq!(result, SAMPLE_BYTES); + } + + // --- Explicit format prevents fallback --- + + #[test] + fn decode_explicit_hex_rejects_base64() { + // "3q2+7wEj" is valid base64, but should fail when hex is explicitly requested + let err = decode_input(SAMPLE_BASE64, Some(OutputFormat::Hex)).unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("hex"), "Error should mention hex: {}", msg); + } + + #[test] + fn decode_explicit_base64_rejects_invalid() { + // "deadbeef0123" is valid hex but also valid base64 (decodes to different bytes). + // Use a string that is NOT valid base64 to test rejection. + let err = decode_input("not-valid-base64!!!", Some(OutputFormat::Base64)).unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("base64"), + "Error should mention base64: {}", + msg + ); + } + + // --- Hex-looking data decoded as base64 when explicitly requested --- + + #[test] + fn decode_hex_looking_data_as_explicit_base64() { + // "deadbeef0123" happens to be decodable as base64 (it's all valid base64 chars) + // When base64 is explicitly requested, it should decode as base64, not hex + let as_base64 = decode_input(SAMPLE_HEX, Some(OutputFormat::Base64)).unwrap(); + let as_hex = decode_input(SAMPLE_HEX, Some(OutputFormat::Hex)).unwrap(); + // The decoded bytes should differ (different encodings of same string) + assert_ne!(as_base64, as_hex); + } + + // --- Auto-detection (None) --- + + #[test] + fn decode_auto_detect_pem() { + let pem = sample_pem(); + let result = decode_input(&pem, None).unwrap(); + assert_eq!(result, SAMPLE_BYTES); + } + + #[test] + fn decode_auto_detect_hex() { + let result = decode_input(SAMPLE_HEX, None).unwrap(); + assert_eq!(result, SAMPLE_BYTES); + } + + #[test] + fn decode_auto_detect_base64() { + // Use a base64 string that is NOT valid hex to ensure base64 fallback + let data = &[0xFF, 0x00, 0xAB]; + let b64 = BASE64.encode(data); + let result = decode_input(&b64, None).unwrap(); + assert_eq!(result, data); + } + + // --- encode_output --- + + #[test] + fn encode_hex_roundtrip() { + let encoded = encode_output(SAMPLE_BYTES, OutputFormat::Hex, "UNUSED"); + assert_eq!(encoded, SAMPLE_HEX); + let decoded = decode_input(&encoded, Some(OutputFormat::Hex)).unwrap(); + assert_eq!(decoded, SAMPLE_BYTES); + } + + #[test] + fn encode_base64_roundtrip() { + let encoded = encode_output(SAMPLE_BYTES, OutputFormat::Base64, "UNUSED"); + assert_eq!(encoded, SAMPLE_BASE64); + let decoded = decode_input(&encoded, Some(OutputFormat::Base64)).unwrap(); + assert_eq!(decoded, SAMPLE_BYTES); + } + + #[test] + fn encode_pem_roundtrip() { + let encoded = encode_output(SAMPLE_BYTES, OutputFormat::Pem, "TEST KEY"); + assert!(encoded.starts_with("-----BEGIN TEST KEY-----")); + assert!(encoded.ends_with("-----END TEST KEY-----")); + let decoded = decode_input(&encoded, Some(OutputFormat::Pem)).unwrap(); + assert_eq!(decoded, SAMPLE_BYTES); + } +} diff --git a/kylix-cli/tests/cli_integration.rs b/kylix-cli/tests/cli_integration.rs index 408df1e..02ef860 100644 --- a/kylix-cli/tests/cli_integration.rs +++ b/kylix-cli/tests/cli_integration.rs @@ -716,6 +716,117 @@ mod format_auto_detection { .assert() .success(); } + + #[test] + fn test_explicit_base64_roundtrip() { + let tmp = TempDir::new().unwrap(); + let key_path = tmp.path().join("key"); + let ct_path = tmp.path().join("ct"); + let ss_enc_path = tmp.path().join("ss_enc"); + let ss_dec_path = tmp.path().join("ss_dec"); + + // Generate keys in base64 + kylix() + .args(["keygen", "-a", "ml-kem-768", "-o"]) + .arg(key_path.to_str().unwrap()) + .arg("-f") + .arg("base64") + .assert() + .success(); + + // Encaps with explicit base64 format + kylix() + .args(["encaps", "--pub"]) + .arg(tmp.path().join("key.pub").to_str().unwrap()) + .arg("-o") + .arg(ct_path.to_str().unwrap()) + .arg("-f") + .arg("base64") + .arg("--secret-file") + .arg(ss_enc_path.to_str().unwrap()) + .assert() + .success(); + + // Decaps with explicit base64 format + kylix() + .args(["decaps", "--key"]) + .arg(tmp.path().join("key.sec").to_str().unwrap()) + .arg("-i") + .arg(ct_path.to_str().unwrap()) + .arg("-f") + .arg("base64") + .arg("--secret-file") + .arg(ss_dec_path.to_str().unwrap()) + .assert() + .success(); + + let ss_enc = fs::read_to_string(&ss_enc_path).unwrap(); + let ss_dec = fs::read_to_string(&ss_dec_path).unwrap(); + assert_eq!( + ss_enc.trim(), + ss_dec.trim(), + "Shared secrets should match with explicit base64 format" + ); + } + + #[test] + fn test_format_mismatch_error() { + let tmp = TempDir::new().unwrap(); + let key_path = tmp.path().join("key"); + let ct_path = tmp.path().join("ct"); + + // Generate keys in PEM format + kylix() + .args(["keygen", "-a", "ml-kem-768", "-o"]) + .arg(key_path.to_str().unwrap()) + .arg("-f") + .arg("pem") + .assert() + .success(); + + // Encaps without --format (auto-detect PEM -> succeeds) + kylix() + .args(["encaps", "--pub"]) + .arg(tmp.path().join("key.pub").to_str().unwrap()) + .arg("-o") + .arg(ct_path.to_str().unwrap()) + .assert() + .success(); + + // Decaps with wrong explicit format (hex) should fail with helpful message + kylix() + .args(["decaps", "--key"]) + .arg(tmp.path().join("key.sec").to_str().unwrap()) + .arg("-i") + .arg(ct_path.to_str().unwrap()) + .arg("-f") + .arg("hex") + .assert() + .failure() + .stderr(predicate::str::contains("--format")); + } + + #[test] + fn test_default_output_is_hex() { + let tmp = TempDir::new().unwrap(); + let key_path = tmp.path().join("key"); + + // Generate keys without --format (should default to hex) + kylix() + .args(["keygen", "-a", "ml-kem-768", "-o"]) + .arg(key_path.to_str().unwrap()) + .assert() + .success(); + + let pub_content = fs::read_to_string(tmp.path().join("key.pub")).unwrap(); + let trimmed = pub_content.trim(); + // Hex output should contain only hex characters + assert!( + trimmed.chars().all(|c| c.is_ascii_hexdigit()), + "Default output should be hex, got: {}...", + &trimmed[..trimmed.len().min(40)] + ); + } } mod secret_file { From 74060def42a621e82e8e190fb5cd11550512fe88 Mon Sep 17 00:00:00 2001 From: Kiyoaki Tsurutani Date: Wed, 11 Feb 2026 18:57:57 +0900 Subject: [PATCH 02/10] fix: address PR review comments for format disambiguation - Validate -----END footer in decode_pem (Copilot + Gemini) - Use generic error message listing all formats: hex|base64|pem (Copilot) - Refactor None branch to if/else if/else expression (Gemini) - Use &Path instead of to_str().unwrap() in new integration tests (Copilot) - Use input-only help text for verify's --format arg (Copilot) --- kylix-cli/src/cli.rs | 2 +- kylix-cli/src/io.rs | 19 +++++++++++-------- kylix-cli/tests/cli_integration.rs | 26 +++++++++++++------------- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/kylix-cli/src/cli.rs b/kylix-cli/src/cli.rs index bebf832..8a9e423 100644 --- a/kylix-cli/src/cli.rs +++ b/kylix-cli/src/cli.rs @@ -114,7 +114,7 @@ pub(crate) enum Commands { #[arg(short, long)] signature: PathBuf, - /// Encoding format (default: hex for output, auto-detect for input) + /// Input encoding (auto-detect if omitted) #[arg(short, long, value_enum)] format: Option, diff --git a/kylix-cli/src/io.rs b/kylix-cli/src/io.rs index c42dbd8..59bdcf5 100644 --- a/kylix-cli/src/io.rs +++ b/kylix-cli/src/io.rs @@ -40,7 +40,10 @@ fn is_hex(s: &str) -> bool { /// Decode a PEM-encoded string to raw bytes. fn decode_pem(data: &str) -> Result> { let lines: Vec<&str> = data.lines().collect(); - if lines.len() < 3 || !data.starts_with("-----BEGIN") { + if lines.len() < 3 + || !lines[0].starts_with("-----BEGIN") + || !lines.last().unwrap().starts_with("-----END") + { bail!("Invalid PEM format: expected -----BEGIN header, body lines, and -----END footer"); } let b64: String = lines[1..lines.len() - 1].join(""); @@ -61,21 +64,21 @@ pub(crate) fn decode_input(data: &str, format: Option) -> Result decode_pem(data), Some(OutputFormat::Hex) => hex::decode(data).context( "Failed to decode as hex. If the input uses a different encoding, \ - remove --format or use --format base64.", + remove --format or set it to the encoding used by the input: hex|base64|pem.", ), Some(OutputFormat::Base64) => BASE64.decode(data).context( "Failed to decode as base64. If the input uses a different encoding, \ - remove --format or use --format hex.", + remove --format or set it to the encoding used by the input: hex|base64|pem.", ), None => { // Auto-detect: PEM -> hex -> base64 if data.starts_with("-----BEGIN") { - return decode_pem(data); - } - if is_hex(data) && data.len() % 2 == 0 { - return hex::decode(data).context("Failed to decode hex"); + decode_pem(data) + } else if is_hex(data) && data.len() % 2 == 0 { + hex::decode(data).context("Failed to decode hex") + } else { + BASE64.decode(data).context("Failed to decode base64") } - BASE64.decode(data).context("Failed to decode base64") } } } diff --git a/kylix-cli/tests/cli_integration.rs b/kylix-cli/tests/cli_integration.rs index 02ef860..b0e86d1 100644 --- a/kylix-cli/tests/cli_integration.rs +++ b/kylix-cli/tests/cli_integration.rs @@ -728,7 +728,7 @@ mod format_auto_detection { // Generate keys in base64 kylix() .args(["keygen", "-a", "ml-kem-768", "-o"]) - .arg(key_path.to_str().unwrap()) + .arg(&key_path) .arg("-f") .arg("base64") .assert() @@ -737,26 +737,26 @@ mod format_auto_detection { // Encaps with explicit base64 format kylix() .args(["encaps", "--pub"]) - .arg(tmp.path().join("key.pub").to_str().unwrap()) + .arg(tmp.path().join("key.pub")) .arg("-o") - .arg(ct_path.to_str().unwrap()) + .arg(&ct_path) .arg("-f") .arg("base64") .arg("--secret-file") - .arg(ss_enc_path.to_str().unwrap()) + .arg(&ss_enc_path) .assert() .success(); // Decaps with explicit base64 format kylix() .args(["decaps", "--key"]) - .arg(tmp.path().join("key.sec").to_str().unwrap()) + .arg(tmp.path().join("key.sec")) .arg("-i") - .arg(ct_path.to_str().unwrap()) + .arg(&ct_path) .arg("-f") .arg("base64") .arg("--secret-file") - .arg(ss_dec_path.to_str().unwrap()) + .arg(&ss_dec_path) .assert() .success(); @@ -778,7 +778,7 @@ mod format_auto_detection { // Generate keys in PEM format kylix() .args(["keygen", "-a", "ml-kem-768", "-o"]) - .arg(key_path.to_str().unwrap()) + .arg(&key_path) .arg("-f") .arg("pem") .assert() @@ -787,18 +787,18 @@ mod format_auto_detection { // Encaps without --format (auto-detect PEM -> succeeds) kylix() .args(["encaps", "--pub"]) - .arg(tmp.path().join("key.pub").to_str().unwrap()) + .arg(tmp.path().join("key.pub")) .arg("-o") - .arg(ct_path.to_str().unwrap()) + .arg(&ct_path) .assert() .success(); // Decaps with wrong explicit format (hex) should fail with helpful message kylix() .args(["decaps", "--key"]) - .arg(tmp.path().join("key.sec").to_str().unwrap()) + .arg(tmp.path().join("key.sec")) .arg("-i") - .arg(ct_path.to_str().unwrap()) + .arg(&ct_path) .arg("-f") .arg("hex") .assert() @@ -814,7 +814,7 @@ mod format_auto_detection { // Generate keys without --format (should default to hex) kylix() .args(["keygen", "-a", "ml-kem-768", "-o"]) - .arg(key_path.to_str().unwrap()) + .arg(&key_path) .assert() .success(); From 20b01c73b7c321fb4e245ac5a08ca5826a5cc011 Mon Sep 17 00:00:00 2001 From: Kiyoaki Tsurutani Date: Wed, 11 Feb 2026 23:13:09 +0900 Subject: [PATCH 03/10] refactor: centralize default output format and add PEM validation task - Extract OutputFormat::DEFAULT constant to avoid repeating Hex default across 4 command files (keygen, encaps, decaps, sign) - Add [L-6] PEM Label Validation to PLANS.md for future BEGIN/END label mismatch detection in decode_pem --- PLANS.md | 1 + kylix-cli/src/cli.rs | 5 +++++ kylix-cli/src/commands/decaps.rs | 2 +- kylix-cli/src/commands/encaps.rs | 2 +- kylix-cli/src/commands/keygen.rs | 2 +- kylix-cli/src/commands/sign.rs | 2 +- 6 files changed, 10 insertions(+), 4 deletions(-) diff --git a/PLANS.md b/PLANS.md index 44c78c7..0ae4cc5 100644 --- a/PLANS.md +++ b/PLANS.md @@ -37,6 +37,7 @@ CLI tool for kylix-pqc post-quantum cryptography library. | wolfSSL Support | LOW | Add wolfSSL as external benchmark tool | | [M-4] Deep Zeroization in encode/decode | MEDIUM | Zeroize intermediate strings in `encode_output`/`decode_input` (PEM wrapping, base64) | | [L-5] Windows ACL for Secret Keys | LOW | Enforce restrictive ACLs on secret key files on Windows (e.g. `windows-acl` crate) | +| [L-6] PEM Label Validation | LOW | Validate that BEGIN/END labels match in `decode_pem` (e.g. reject `BEGIN X / END Y`). Currently relies on downstream key-size validation as safety net | --- diff --git a/kylix-cli/src/cli.rs b/kylix-cli/src/cli.rs index 8a9e423..f75e768 100644 --- a/kylix-cli/src/cli.rs +++ b/kylix-cli/src/cli.rs @@ -454,3 +454,8 @@ pub(crate) enum OutputFormat { /// PEM format Pem, } + +impl OutputFormat { + /// Default format used for output when `--format` is not specified. + pub(crate) const DEFAULT: Self = Self::Hex; +} diff --git a/kylix-cli/src/commands/decaps.rs b/kylix-cli/src/commands/decaps.rs index 8247ac4..fe2e5d2 100644 --- a/kylix-cli/src/commands/decaps.rs +++ b/kylix-cli/src/commands/decaps.rs @@ -21,7 +21,7 @@ pub(crate) fn cmd_decaps( let sk_bytes = Zeroizing::new(decode_input(&sk_data, format)?); drop(sk_data); // zeroize raw key string immediately after decoding - let out_format = format.unwrap_or(OutputFormat::Hex); + let out_format = format.unwrap_or(OutputFormat::DEFAULT); let algo = Algorithm::detect_kem_from_sec_key(sk_bytes.len())?; if verbose { diff --git a/kylix-cli/src/commands/encaps.rs b/kylix-cli/src/commands/encaps.rs index edff1bd..2eb4d6b 100644 --- a/kylix-cli/src/commands/encaps.rs +++ b/kylix-cli/src/commands/encaps.rs @@ -18,7 +18,7 @@ pub(crate) fn cmd_encaps( let pk_data = fs::read_to_string(pubkey).context("Failed to read public key file")?; let pk_bytes = decode_input(&pk_data, format)?; - let out_format = format.unwrap_or(OutputFormat::Hex); + let out_format = format.unwrap_or(OutputFormat::DEFAULT); let algo = Algorithm::detect_kem_from_pub_key(pk_bytes.len())?; diff --git a/kylix-cli/src/commands/keygen.rs b/kylix-cli/src/commands/keygen.rs index f959c0f..4eacab0 100644 --- a/kylix-cli/src/commands/keygen.rs +++ b/kylix-cli/src/commands/keygen.rs @@ -41,7 +41,7 @@ pub(crate) fn cmd_keygen( let pk_size = pk_bytes.len(); let sk_size = sk_bytes.len(); - let out_format = format.unwrap_or(OutputFormat::Hex); + let out_format = format.unwrap_or(OutputFormat::DEFAULT); let pk_encoded = encode_output(&pk_bytes, out_format, pk_label); let sk_encoded = Zeroizing::new(encode_output(&sk_bytes, out_format, sk_label)); // sk_bytes is Zeroizing>, automatically zeroized on drop diff --git a/kylix-cli/src/commands/sign.rs b/kylix-cli/src/commands/sign.rs index 50b92af..ee077d9 100644 --- a/kylix-cli/src/commands/sign.rs +++ b/kylix-cli/src/commands/sign.rs @@ -22,7 +22,7 @@ pub(crate) fn cmd_sign( let sk_bytes = Zeroizing::new(decode_input(&sk_data, format)?); drop(sk_data); // zeroize raw key string immediately after decoding - let out_format = format.unwrap_or(OutputFormat::Hex); + let out_format = format.unwrap_or(OutputFormat::DEFAULT); // Use explicit algorithm if provided, otherwise detect from key size let algo = if let Some(a) = explicit_algo { From 959015434813e9ec9d4fc135a109891db343cece Mon Sep 17 00:00:00 2001 From: Kiyoaki Tsurutani Date: Wed, 11 Feb 2026 23:19:22 +0900 Subject: [PATCH 04/10] fix: address third round of PR review comments - Handle CRLF line endings in decode_pem by normalizing \r (Copilot) - Replace OutputFormat::DEFAULT with #[derive(Default)] + #[default] and use unwrap_or_default() for idiomatic Rust (Copilot) - Fix test_default_output_is_hex: add empty check and use floor_char_boundary for safe string slicing (Copilot) --- kylix-cli/src/cli.rs | 8 ++------ kylix-cli/src/commands/decaps.rs | 2 +- kylix-cli/src/commands/encaps.rs | 2 +- kylix-cli/src/commands/keygen.rs | 2 +- kylix-cli/src/commands/sign.rs | 2 +- kylix-cli/src/io.rs | 9 +++++++++ kylix-cli/tests/cli_integration.rs | 5 +++-- 7 files changed, 18 insertions(+), 12 deletions(-) diff --git a/kylix-cli/src/cli.rs b/kylix-cli/src/cli.rs index f75e768..d4947b5 100644 --- a/kylix-cli/src/cli.rs +++ b/kylix-cli/src/cli.rs @@ -445,17 +445,13 @@ impl Algorithm { } } -#[derive(Copy, Clone, PartialEq, Eq, ValueEnum)] +#[derive(Copy, Clone, Default, PartialEq, Eq, ValueEnum)] pub(crate) enum OutputFormat { /// Hexadecimal encoding + #[default] Hex, /// Base64 encoding Base64, /// PEM format Pem, } - -impl OutputFormat { - /// Default format used for output when `--format` is not specified. - pub(crate) const DEFAULT: Self = Self::Hex; -} diff --git a/kylix-cli/src/commands/decaps.rs b/kylix-cli/src/commands/decaps.rs index fe2e5d2..93ea67f 100644 --- a/kylix-cli/src/commands/decaps.rs +++ b/kylix-cli/src/commands/decaps.rs @@ -21,7 +21,7 @@ pub(crate) fn cmd_decaps( let sk_bytes = Zeroizing::new(decode_input(&sk_data, format)?); drop(sk_data); // zeroize raw key string immediately after decoding - let out_format = format.unwrap_or(OutputFormat::DEFAULT); + let out_format = format.unwrap_or_default(); let algo = Algorithm::detect_kem_from_sec_key(sk_bytes.len())?; if verbose { diff --git a/kylix-cli/src/commands/encaps.rs b/kylix-cli/src/commands/encaps.rs index 2eb4d6b..e4aa9fe 100644 --- a/kylix-cli/src/commands/encaps.rs +++ b/kylix-cli/src/commands/encaps.rs @@ -18,7 +18,7 @@ pub(crate) fn cmd_encaps( let pk_data = fs::read_to_string(pubkey).context("Failed to read public key file")?; let pk_bytes = decode_input(&pk_data, format)?; - let out_format = format.unwrap_or(OutputFormat::DEFAULT); + let out_format = format.unwrap_or_default(); let algo = Algorithm::detect_kem_from_pub_key(pk_bytes.len())?; diff --git a/kylix-cli/src/commands/keygen.rs b/kylix-cli/src/commands/keygen.rs index 4eacab0..911eaba 100644 --- a/kylix-cli/src/commands/keygen.rs +++ b/kylix-cli/src/commands/keygen.rs @@ -41,7 +41,7 @@ pub(crate) fn cmd_keygen( let pk_size = pk_bytes.len(); let sk_size = sk_bytes.len(); - let out_format = format.unwrap_or(OutputFormat::DEFAULT); + let out_format = format.unwrap_or_default(); let pk_encoded = encode_output(&pk_bytes, out_format, pk_label); let sk_encoded = Zeroizing::new(encode_output(&sk_bytes, out_format, sk_label)); // sk_bytes is Zeroizing>, automatically zeroized on drop diff --git a/kylix-cli/src/commands/sign.rs b/kylix-cli/src/commands/sign.rs index ee077d9..a4a2d4d 100644 --- a/kylix-cli/src/commands/sign.rs +++ b/kylix-cli/src/commands/sign.rs @@ -22,7 +22,7 @@ pub(crate) fn cmd_sign( let sk_bytes = Zeroizing::new(decode_input(&sk_data, format)?); drop(sk_data); // zeroize raw key string immediately after decoding - let out_format = format.unwrap_or(OutputFormat::DEFAULT); + let out_format = format.unwrap_or_default(); // Use explicit algorithm if provided, otherwise detect from key size let algo = if let Some(a) = explicit_algo { diff --git a/kylix-cli/src/io.rs b/kylix-cli/src/io.rs index 59bdcf5..7eb2242 100644 --- a/kylix-cli/src/io.rs +++ b/kylix-cli/src/io.rs @@ -39,6 +39,15 @@ fn is_hex(s: &str) -> bool { /// Decode a PEM-encoded string to raw bytes. fn decode_pem(data: &str) -> Result> { + // Normalize CRLF to LF so that trailing \r doesn't corrupt header/footer + // checks or base64 body content + let normalized; + let data = if data.contains('\r') { + normalized = data.replace('\r', ""); + &normalized + } else { + data + }; let lines: Vec<&str> = data.lines().collect(); if lines.len() < 3 || !lines[0].starts_with("-----BEGIN") diff --git a/kylix-cli/tests/cli_integration.rs b/kylix-cli/tests/cli_integration.rs index b0e86d1..bf940b9 100644 --- a/kylix-cli/tests/cli_integration.rs +++ b/kylix-cli/tests/cli_integration.rs @@ -820,11 +820,12 @@ mod format_auto_detection { let pub_content = fs::read_to_string(tmp.path().join("key.pub")).unwrap(); let trimmed = pub_content.trim(); - // Hex output should contain only hex characters + // Hex output should be non-empty and contain only hex characters + assert!(!trimmed.is_empty(), "Public key output should not be empty"); assert!( trimmed.chars().all(|c| c.is_ascii_hexdigit()), "Default output should be hex, got: {}...", - &trimmed[..trimmed.len().min(40)] + &trimmed[..trimmed.len().min(40).min(trimmed.floor_char_boundary(40))] ); } } From 54b0c1d41b4301e760e28fabc153b76564ec2e18 Mon Sep 17 00:00:00 2001 From: Kiyoaki Tsurutani Date: Wed, 11 Feb 2026 23:29:36 +0900 Subject: [PATCH 05/10] fix: clean up error message formatting and test assertion - Remove line continuation in error messages to avoid embedded indentation spaces in output (Copilot) - Simplify test prefix slice to trimmed.get(..40).unwrap_or(trimmed) instead of floor_char_boundary (Copilot) --- kylix-cli/src/io.rs | 6 ++---- kylix-cli/tests/cli_integration.rs | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/kylix-cli/src/io.rs b/kylix-cli/src/io.rs index 7eb2242..383b06b 100644 --- a/kylix-cli/src/io.rs +++ b/kylix-cli/src/io.rs @@ -72,12 +72,10 @@ pub(crate) fn decode_input(data: &str, format: Option) -> Result decode_pem(data), Some(OutputFormat::Hex) => hex::decode(data).context( - "Failed to decode as hex. If the input uses a different encoding, \ - remove --format or set it to the encoding used by the input: hex|base64|pem.", + "Failed to decode as hex. If the input uses a different encoding, remove --format or set it to the encoding used by the input: hex|base64|pem.", ), Some(OutputFormat::Base64) => BASE64.decode(data).context( - "Failed to decode as base64. If the input uses a different encoding, \ - remove --format or set it to the encoding used by the input: hex|base64|pem.", + "Failed to decode as base64. If the input uses a different encoding, remove --format or set it to the encoding used by the input: hex|base64|pem.", ), None => { // Auto-detect: PEM -> hex -> base64 diff --git a/kylix-cli/tests/cli_integration.rs b/kylix-cli/tests/cli_integration.rs index bf940b9..b767fd9 100644 --- a/kylix-cli/tests/cli_integration.rs +++ b/kylix-cli/tests/cli_integration.rs @@ -825,7 +825,7 @@ mod format_auto_detection { assert!( trimmed.chars().all(|c| c.is_ascii_hexdigit()), "Default output should be hex, got: {}...", - &trimmed[..trimmed.len().min(40).min(trimmed.floor_char_boundary(40))] + trimmed.get(..40).unwrap_or(trimmed) ); } } From f2c3fa2edc01cf495783f439d8ed1e61352dcf90 Mon Sep 17 00:00:00 2001 From: Kiyoaki Tsurutani Date: Thu, 12 Feb 2026 21:33:42 +0900 Subject: [PATCH 06/10] fix: stricter PEM validation, Cow normalization, and error message cleanup - Validate PEM BEGIN/END labels match and reject malformed footers - Use Cow for CRLF normalization in decode_pem - Extract format_mismatch_msg helper to deduplicate error strings - Improve auto-detect base64 fallback error to suggest --format --- kylix-cli/src/io.rs | 56 ++++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/kylix-cli/src/io.rs b/kylix-cli/src/io.rs index 383b06b..6802884 100644 --- a/kylix-cli/src/io.rs +++ b/kylix-cli/src/io.rs @@ -1,5 +1,6 @@ use anyhow::{bail, Context, Result}; use base64::{engine::general_purpose::STANDARD as BASE64, Engine}; +use std::borrow::Cow; use std::fs; #[cfg(unix)] use std::fs::OpenOptions; @@ -41,26 +42,50 @@ fn is_hex(s: &str) -> bool { fn decode_pem(data: &str) -> Result> { // Normalize CRLF to LF so that trailing \r doesn't corrupt header/footer // checks or base64 body content - let normalized; - let data = if data.contains('\r') { - normalized = data.replace('\r', ""); - &normalized + let data: Cow = if data.contains('\r') { + Cow::Owned(data.replace('\r', "")) } else { - data + Cow::Borrowed(data) }; let lines: Vec<&str> = data.lines().collect(); - if lines.len() < 3 - || !lines[0].starts_with("-----BEGIN") - || !lines.last().unwrap().starts_with("-----END") - { + if lines.len() < 3 { bail!("Invalid PEM format: expected -----BEGIN header, body lines, and -----END footer"); } + + // Validate and parse the BEGIN line: -----BEGIN