From 03849055b7676adf10ff7caa338adbe173e3fddf Mon Sep 17 00:00:00 2001 From: BlackHoleFox Date: Wed, 13 Mar 2024 13:47:55 -0700 Subject: [PATCH] Parse default Apple deployment target from SDK properties --- src/json.rs | 519 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 121 ++++++++---- 2 files changed, 601 insertions(+), 39 deletions(-) create mode 100644 src/json.rs diff --git a/src/json.rs b/src/json.rs new file mode 100644 index 000000000..8d5349d41 --- /dev/null +++ b/src/json.rs @@ -0,0 +1,519 @@ +// Vendored from `smoljson` bef592c5da1c3fe38b2462a8d231b0e0c8a86f80, with explicit permission +// from the author (`thomcc`). Minimized for cc/simplicity. Modifications and additions made to fit cc's needs. +#![allow(dead_code)] + +use std::borrow::Cow; +use std::boxed::Box; +use std::string::String; +/// First lifetime is for strings borrowed from the source. +/// Second lifetime is for strings borrowed from the parser. +#[derive(PartialEq, Debug, Clone)] +pub(crate) enum Token<'s> { + Null, + Bool(bool), + NumU(u64), + NumI(i64), + NumF(f64), + StrBorrow(&'s str), + StrOwn(Box), + Colon, + Comma, + ObjectBegin, + ObjectEnd, + ArrayBegin, + ArrayEnd, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct Error(()); + +impl core::fmt::Display for Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str("JSON parse error") + } +} + +pub type Result = core::result::Result; + +pub(crate) struct Reader<'a> { + input: &'a str, + bytes: &'a [u8], + tok_start: usize, + pos: usize, + buf: String, + stash: Option>, +} + +impl<'a> Reader<'a> { + /// Create a reader which uses the [default `Dialect`](Dialect::DEFAULT). + pub fn new(input: &'a str) -> Self { + Self { + input, + bytes: input.as_bytes(), + pos: 0, + buf: String::new(), + tok_start: 0, + stash: None, + } + } + + #[inline] + pub fn position(&self) -> usize { + self.pos.min(self.bytes.len()) + } + + #[cold] + pub(super) fn err(&self) -> Error { + Error(()) + } + + /// Returns `Err` if there are any more non-whitespace/non-comment (if this + /// reader's dialect allows comments) characters in the input. + pub fn finish(mut self) -> Result<()> { + match self.next_token() { + Ok(Some(_)) => Err(self.err()), + Ok(None) => Ok(()), + Err(e) => Err(e), + } + } + + fn bnext_if(&mut self, b: u8) -> bool { + if self.pos < self.bytes.len() && self.bytes[self.pos] == b { + self.pos += 1; + true + } else { + false + } + } + + fn bnext(&mut self) -> Option { + if self.pos < self.bytes.len() { + let ch = self.bytes[self.pos]; + self.pos += 1; + Some(ch) + } else { + None + } + } + + fn bnext_or_err(&mut self) -> Result { + match self.bnext() { + Some(c) => Ok(c), + None => Err(self.err()), + } + } + + fn bpeek(&mut self) -> Option { + if self.pos < self.bytes.len() { + Some(self.bytes[self.pos]) + } else { + None + } + } + + fn bpeek_or_nul(&mut self) -> u8 { + self.bpeek().unwrap_or(b'\0') + } + + fn bump(&mut self) { + self.pos += 1; + debug_assert!(self.pos <= self.input.len()); + } + + fn finished(&self) -> bool { + self.pos >= self.bytes.len() + } + + pub(super) fn ref_stash(&self) -> Option<&Token<'a>> { + self.stash.as_ref() + } + + pub(super) fn mut_stash(&mut self) -> &mut Option> { + &mut self.stash + } + pub(super) fn take_stash(&mut self) -> Option> { + self.stash.take() + } + + pub(super) fn skipnpeek(&mut self) -> Result> { + debug_assert!(self.stash.is_none()); + self.skip_trivial()?; + Ok(self.bpeek()) + } + + fn skip_trivial(&mut self) -> Result<()> { + loop { + self.skip_ws_only(); + if !self.bnext_if(b'/') { + return Ok(()); + } + match self.bnext() { + Some(b'*') => self.skip_block_comment()?, + Some(b'/') => self.skip_line_comment(), + _ => return Err(self.err()), + } + } + } + + fn skip_line_comment(&mut self) { + let (mut p, bs) = (self.pos, self.bytes); + while p < bs.len() && bs[p] != b'\n' { + p += 1; + } + self.pos = p; + } + + fn skip_block_comment(&mut self) -> Result<()> { + let (mut p, bs) = (self.pos, self.bytes); + loop { + if p + 1 >= bs.len() { + self.pos = p; + return Err(self.err()); + } + if bs[p] == b'*' && bs[p + 1] == b'/' { + self.pos = p + 2; + return Ok(()); + } + p += 1; + } + } + + fn skip_ws_only(&mut self) { + let (mut p, bs) = (self.pos, self.bytes); + while p < bs.len() && matches!(bs[p], b'\n' | b' ' | b'\t' | b'\r') { + p += 1; + } + self.pos = p; + } + + fn cur_ch(&self) -> Option { + self.input[self.pos..].chars().next() + } + + fn single_hex_escape(&mut self) -> Result { + let mut acc = 0; + for _ in 0..4 { + let b = self.bnext_or_err()?; + let n = match b { + b'0'..=b'9' => b - b'0', + b'a'..=b'f' => b - b'a' + 10, + b'A'..=b'F' => b - b'A' + 10, + _ => return Err(self.err()), + }; + acc = acc * 16 + (n as u16); + } + Ok(acc) + } + + fn read_hex_escape(&mut self) -> Result<()> { + // todo: option where we reutrn an error (instead using replacement + // char) if unescaping produces unpaired surrogates. + use core::char::REPLACEMENT_CHARACTER as REPLACEMENT; + const LEAD: core::ops::Range = 0xd800..0xdc00; + const TRAIL: core::ops::Range = 0xdc00..0xe000; + + let lead = self.single_hex_escape()?; + if let Some(c) = core::char::from_u32(lead as u32) { + self.buf.push(c); + return Ok(()); + } + if TRAIL.contains(&lead) { + self.buf.push(REPLACEMENT); + return Ok(()); + } + debug_assert!(LEAD.contains(&lead)); + let p = self.pos; + let trail = if self.bytes[p..].starts_with(b"\\u") { + self.pos += 2; + self.single_hex_escape()? + } else { + self.buf.push(REPLACEMENT); + return Ok(()); + }; + if !TRAIL.contains(&trail) { + // rewind here so we follow algorithm 2 (max subparts of illegal + // sequence) for https://www.unicode.org/review/pr-121.html. + self.pos = p; + self.buf.push(REPLACEMENT); + return Ok(()); + } + let scalar = (((lead as u32 - 0xd800) << 10) | (trail as u32 - 0xdc00)) + 0x10000; + debug_assert!( + core::char::from_u32(scalar).is_some(), + r#""\u{:04x}\u{:04x}" => {:#x}"#, + lead, + trail, + scalar, + ); + // all well-formed surrogate pairs map to `char`s (e.g. unicode scalar + // values), so unwrap is fine + self.buf.push(core::char::from_u32(scalar).unwrap()); + Ok(()) + } + + fn expect_next(&mut self, next: &[u8]) -> Result<()> { + for &i in next { + if Some(i) != self.bnext() { + return Err(self.err()); + } + } + Ok(()) + } + + fn unescape_next(&mut self) -> Result<()> { + let b = self.bnext_or_err()?; + match b { + b'b' => self.buf.push('\x08'), + b'f' => self.buf.push('\x0c'), + b'n' => self.buf.push('\n'), + b'r' => self.buf.push('\r'), + b't' => self.buf.push('\t'), + b'\\' => self.buf.push('\\'), + b'/' => self.buf.push('/'), + b'\"' => self.buf.push('\"'), + b'u' => return self.read_hex_escape(), + _ => return Err(self.err()), + } + Ok(()) + } + + fn read_keyword(&mut self, id: &[u8], t: Token<'a>) -> Result> { + debug_assert_eq!(self.bytes[self.pos - 1], id[0]); + self.expect_next(&id[1..])?; + Ok(t) + } + + pub(crate) fn unpeek(&mut self, t: Token<'a>) { + assert!(self.stash.is_none()); + self.stash = Some(t); + } + pub(crate) fn next_token(&mut self) -> Result>> { + if let Some(t) = self.stash.take() { + return Ok(Some(t)); + } + self.skip_trivial()?; + if self.pos >= self.input.len() { + return Ok(None); + } + self.tok_start = self.pos; + let tok = match self.bnext_or_err()? { + b':' => return Ok(Some(Token::Colon)), + b',' => return Ok(Some(Token::Comma)), + b'{' => return Ok(Some(Token::ObjectBegin)), + b'}' => return Ok(Some(Token::ObjectEnd)), + b'[' => return Ok(Some(Token::ArrayBegin)), + b']' => return Ok(Some(Token::ArrayEnd)), + b'"' => self.read_string(), + b't' => self.read_keyword(b"true", Token::Bool(true)), + b'f' => self.read_keyword(b"false", Token::Bool(false)), + b'n' => self.read_keyword(b"null", Token::Null), + b'-' | b'0'..=b'9' => self.read_num(), + _ => return Err(self.err()), + }; + Ok(Some(tok?)) + } + + fn is_delim_byte(&self, b: u8) -> bool { + matches!(b, b',' | b'}' | b']' | b' ' | b'\t' | b'\n' | b'\r') + } + + fn read_num(&mut self) -> Result> { + let neg = self.bytes[self.tok_start] == b'-'; + let mut float = false; + while let Some(b) = self.bpeek() { + match b { + b'.' | b'e' | b'E' | b'+' | b'-' => { + float = true; + self.bump(); + } + b'0'..=b'9' => { + self.bump(); + } + b if self.is_delim_byte(b) => break, + _ => return Err(self.err()), + } + } + let text = &self.input[self.tok_start..self.pos]; + if !float { + if neg { + if let Ok(i) = text.parse::() { + debug_assert!(i < 0); + return Ok(Token::NumI(i)); + } + } else if let Ok(u) = text.parse::() { + return Ok(Token::NumU(u)); + } + } + if let Ok(v) = text.parse::() { + Ok(Token::NumF(v)) + } else { + Err(self.err()) + } + } + + fn read_string(&mut self) -> Result> { + self.buf.clear(); + let bs = self.bytes; + loop { + let mut p = self.pos; + let start = p; + while p < bs.len() && bs[p] != b'"' && bs[p] != b'\\' { + p += 1; + } + if p == bs.len() || !self.input.is_char_boundary(p) { + self.pos = p; + return Err(self.err()); + } + self.pos = p + 1; + if bs[p] == b'"' && self.buf.is_empty() { + // didn't need any unescaping. + return Ok(Token::StrBorrow(&self.input[start..p])); + } + self.buf.push_str(&self.input[start..p]); + if bs[p] == b'"' { + return Ok(Token::StrOwn(self.buf.clone().into_boxed_str())); + } + debug_assert_eq!(bs[p], b'\\'); + self.unescape_next()? + } + } +} + +macro_rules! tok_tester { + ($($func:ident matches $tok:ident);*) => {$( + pub(crate) fn $func(&mut self) -> Result<()> { + match self.next_token() { + Ok(Some(Token::$tok)) => Ok(()), + Err(e) => Err(e), + _ => Err(self.err()), + } + } + )*}; +} +impl<'a> Reader<'a> { + pub(crate) fn next(&mut self) -> Result> { + match self.next_token() { + Ok(Some(v)) => Ok(v), + Err(e) => Err(e), + _ => Err(self.err()), + } + } + tok_tester! { + array_begin matches ArrayBegin; + // array_end matches ArrayEnd; + obj_begin matches ObjectBegin; + // obj_end matches ObjectEnd; + comma matches Comma; + colon matches Colon; + null matches Null + } + pub(crate) fn comma_or_obj_end(&mut self) -> Result { + match self.next_token() { + Ok(Some(Token::Comma)) => Ok(true), + Ok(Some(Token::ObjectEnd)) => Ok(false), + Err(e) => Err(e), + _ => Err(self.err()), + } + } + pub(crate) fn comma_or_array_end(&mut self) -> Result { + match self.next_token() { + Ok(Some(Token::Comma)) => Ok(true), + Ok(Some(Token::ArrayEnd)) => Ok(false), + Err(e) => Err(e), + _ => Err(self.err()), + } + } + pub(crate) fn key(&mut self) -> Result> { + match self.next_token() { + Ok(Some(Token::StrBorrow(b))) => Ok(Cow::Borrowed(b)), + Ok(Some(Token::StrOwn(b))) => Ok(Cow::Owned(b.into())), + Err(e) => Err(e), + Ok(Some(_t)) => Err(self.err()), + _o => Err(self.err()), + } + } +} + +impl<'a> Reader<'a> { + fn read_str(&mut self) -> Result>> { + match self.next_token() { + Ok(Some(Token::StrBorrow(s))) => Ok(Some(Cow::Borrowed(s))), + Ok(Some(Token::StrOwn(s))) => Ok(Some(Cow::Owned(s.into()))), + Ok(Some(_)) => Ok(None), + Ok(None) => Err(self.err()), + Err(e) => Err(e), + } + } + + pub fn read_str_from_object( + &mut self, + key: &str, + parent_object: Option<&str>, + ) -> Result> { + let inside_nested_object = parent_object.is_some(); + if let Some(parent_name) = parent_object { + // If the field we want is inside a nested object, skip into that object + loop { + match self.read_str()? { + Some(value) => { + if value == Cow::Borrowed(parent_name) { + if self.next()? != Token::Colon { + return Err(self.err()); + } + if self.next()? != Token::ObjectBegin { + return Err(self.err()); + } + break; + } + } + None => continue, + } + } + } + + let mut nesting = false; + loop { + let value = match self.next()? { + Token::StrBorrow(s) => Cow::Borrowed(s), + Token::StrOwn(s) => Cow::Owned(s.into()), + Token::ObjectBegin => { + nesting = true; + continue; + } + Token::ObjectEnd => { + if nesting { + // Exit nested object, we know its `ObjectEnd` isn't for the seeked one.. + nesting = false; + } else if inside_nested_object || self.skipnpeek() == Ok(None) { + // Finding the end of the current object without finding a matching key is an error + // when a specific scope is provided. + // If this `ObjectEnd` was the last in the file, error too. + return Err(self.err()); + } + continue; + } + _ => continue, + }; + + if value != key { + continue; + } + + // If the parser is inside a nested object but the caller wanted something in the parent + // structure, don't read anything out of this object. + if nesting && !inside_nested_object { + continue; + } + + if self.next()? != Token::Colon { + return Err(self.err()); + } + + return match self.read_str() { + Ok(Some(val)) => Ok(val), + Ok(None) => Err(self.err()), + Err(e) => Err(e), + }; + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 8ef48305b..a3f516e73 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -233,6 +233,7 @@ use std::process::Child; use std::process::Command; use std::sync::{Arc, Mutex}; +mod json; #[cfg(feature = "parallel")] mod parallel; mod windows; @@ -292,7 +293,7 @@ pub struct Build { extra_warnings: Option, env_cache: Arc>>>>, apple_sdk_root_cache: Arc>>, - apple_versions_cache: Arc>>, + apple_versions_cache: Arc>>, emit_rerun_if_env_changed: bool, cached_compiler_family: Arc, ToolFamily>>>, } @@ -1919,9 +1920,9 @@ impl Build { map_darwin_target_from_rust_to_compiler_architecture(target) { let sdk_details = - apple_os_sdk_parts(AppleOs::Ios, &AppleArchSpec::Simulator("")); + apple_os_sdk_parts(AppleOs::Ios, AppleArchSpec::Simulator("")); let deployment_target = - self.apple_deployment_version(AppleOs::Ios, None, &sdk_details.sdk); + self.apple_deployment_version(AppleOs::Ios, &sdk_details); cmd.args.push( format!( "--target={}-apple-ios{}-simulator", @@ -1935,12 +1936,9 @@ impl Build { map_darwin_target_from_rust_to_compiler_architecture(target) { let sdk_details = - apple_os_sdk_parts(AppleOs::WatchOs, &AppleArchSpec::Simulator("")); - let deployment_target = self.apple_deployment_version( - AppleOs::WatchOs, - None, - &sdk_details.sdk, - ); + apple_os_sdk_parts(AppleOs::WatchOs, AppleArchSpec::Simulator("")); + let deployment_target = + self.apple_deployment_version(AppleOs::WatchOs, &sdk_details); cmd.args.push( format!( "--target={}-apple-watchos{}-simulator", @@ -1954,12 +1952,9 @@ impl Build { map_darwin_target_from_rust_to_compiler_architecture(target) { let sdk_details = - apple_os_sdk_parts(AppleOs::TvOs, &AppleArchSpec::Simulator("")); - let deployment_target = self.apple_deployment_version( - AppleOs::TvOs, - None, - &sdk_details.sdk, - ); + apple_os_sdk_parts(AppleOs::TvOs, AppleArchSpec::Simulator("")); + let deployment_target = + self.apple_deployment_version(AppleOs::TvOs, &sdk_details); cmd.args.push( format!( "--target={}-apple-tvos{}-simulator", @@ -1973,12 +1968,9 @@ impl Build { map_darwin_target_from_rust_to_compiler_architecture(target) { let sdk_details = - apple_os_sdk_parts(AppleOs::TvOs, &AppleArchSpec::Device("")); - let deployment_target = self.apple_deployment_version( - AppleOs::TvOs, - None, - &sdk_details.sdk, - ); + apple_os_sdk_parts(AppleOs::TvOs, AppleArchSpec::Device("")); + let deployment_target = + self.apple_deployment_version(AppleOs::TvOs, &sdk_details); cmd.args.push( format!("--target={}-apple-tvos{}", arch, deployment_target).into(), ); @@ -2587,8 +2579,8 @@ impl Build { } }; - let sdk_details = apple_os_sdk_parts(os, &arch); - let min_version = self.apple_deployment_version(os, Some(arch_str), &sdk_details.sdk); + let sdk_details = apple_os_sdk_parts(os, arch); + let min_version = self.apple_deployment_version(os, &sdk_details); match arch { AppleArchSpec::Device(_) if is_mac => { @@ -3603,30 +3595,73 @@ impl Build { Ok(ret) } - fn apple_deployment_version(&self, os: AppleOs, arch_str: Option<&str>, sdk: &str) -> String { + fn apple_deployment_version(&self, os: AppleOs, sdk_parts: &AppleSdkTargetParts) -> String { let default_deployment_from_sdk = || { + let is_catalyst = matches!(sdk_parts.arch, AppleArchSpec::Catalyst(_)); + // Needs to be both to distinguish between two targets inside the same SDK, like catalyst in the mac SDK. + let cache_key = (os, sdk_parts.arch); + let mut cache = self .apple_versions_cache .lock() .expect("apple_versions_cache lock failed"); - if let Some(ret) = cache.get(sdk) { + if let Some(ret) = cache.get(&cache_key) { return Some(ret.clone()); } - let version = run_output( - self.cmd("xcrun") - .arg("--show-sdk-platform-version") - .arg("--sdk") - .arg(sdk), - "xcrun", - &self.cargo_output, - ) - .ok()?; + let sdk_root = self + .apple_sdk_root(&sdk_parts.sdk) + .map(PathBuf::from) + .map_err(|e| { + self.cargo_output + .print_warning(&format_args!("{e}, SDK default version unusable")); + e + }) + .ok()?; + + let sdk_info = fs::read_to_string(sdk_root.join("SDKSettings.json")) + .map_err(|e| { + self.cargo_output.print_warning(&format_args!( + "Failed to read SDK properties ({e}), SDK default version unusable" + )); + e + }) + .ok()?; - let version = std::str::from_utf8(&version).ok()?.trim().to_owned(); + let mut sdk_parser = json::Reader::new(&sdk_info); - cache.insert(sdk.into(), version.clone()); + let version = sdk_parser + .read_str_from_object("DefaultDeploymentTarget", None) + .ok() + .or_else(|| { + self.cargo_output.print_warning(&format_args!( + "SDK was misformed, no SDK default version available" + )); + None + }) + .as_deref() + .map(ToString::to_string)?; + + // If cc is targeting catalyst, get the version map of macOS->iOS to find the actual default. + let version = if is_catalyst { + let mut map_parser = json::Reader::new(&sdk_info); + map_parser + .read_str_from_object(&version, Some("macOS_iOSMac")) + .ok() + .or_else(|| { + self.cargo_output.print_warning(&format_args!( + "SDK was misformed, no catalyst version map available" + )); + None + }) + .as_deref() + .map(ToString::to_string)? + } else { + version + }; + + cache.insert(cache_key, version.clone()); Some(version) }; @@ -3703,7 +3738,12 @@ impl Build { .and_then(maybe_cpp_version_baseline) .or_else(default_deployment_from_sdk) .unwrap_or_else(|| { - if arch_str == Some("aarch64") { + let arch = match sdk_parts.arch { + AppleArchSpec::Device(a) => a, + _ => unreachable!(), + }; + + if arch == "aarch64" { "11.0".into() } else { let default = "10.7"; @@ -3745,7 +3785,7 @@ fn fail(s: &str) -> ! { std::process::exit(1); } -#[derive(Clone, Copy, PartialEq)] +#[derive(Clone, Copy, PartialEq, Eq, Hash)] enum AppleOs { MacOs, Ios, @@ -3764,12 +3804,13 @@ impl std::fmt::Debug for AppleOs { } struct AppleSdkTargetParts { + arch: AppleArchSpec, sdk_prefix: &'static str, sim_prefix: &'static str, sdk: Cow<'static, str>, } -fn apple_os_sdk_parts(os: AppleOs, arch: &AppleArchSpec) -> AppleSdkTargetParts { +fn apple_os_sdk_parts(os: AppleOs, arch: AppleArchSpec) -> AppleSdkTargetParts { let (sdk_prefix, sim_prefix) = match os { AppleOs::MacOs => ("macosx", ""), AppleOs::Ios => ("iphone", "ios-"), @@ -3784,6 +3825,7 @@ fn apple_os_sdk_parts(os: AppleOs, arch: &AppleArchSpec) -> AppleSdkTargetParts }; AppleSdkTargetParts { + arch, sdk_prefix, sim_prefix, sdk, @@ -3791,6 +3833,7 @@ fn apple_os_sdk_parts(os: AppleOs, arch: &AppleArchSpec) -> AppleSdkTargetParts } #[allow(dead_code)] +#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] enum AppleArchSpec { Device(&'static str), Simulator(&'static str),