Skip to content

Commit

Permalink
Gimlet hangs on cold power-on after toolchain update (#1742)
Browse files Browse the repository at this point in the history
Fixes #1741 by:

  1. Bumping the stack size for the net task
  2. Changing i2c_driver to do its existing SCL wiggle dance on a
     controller reset
  3. Fixing the SPD retry login in gimlet_seq

Unrelatedly, this also grows the stack of the power task to provide
sufficient stack space to read the IBC blackbox.
  • Loading branch information
bcantrill authored Apr 10, 2024
1 parent 68f2c91 commit d663549
Show file tree
Hide file tree
Showing 9 changed files with 129 additions and 30 deletions.
2 changes: 1 addition & 1 deletion app/demo-stm32h7-nucleo/app-h743.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ task-slots = ["sys"]

[tasks.net]
name = "task-net"
stacksize = 3000
stacksize = 4000
priority = 2
max-sizes = {flash = 65536, ram = 8192, sram1 = 32768}
features = ["h743"]
Expand Down
2 changes: 1 addition & 1 deletion app/demo-stm32h7-nucleo/app-h753.toml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ task-slots = ["sys"]

[tasks.net]
name = "task-net"
stacksize = 3000
stacksize = 4000
priority = 2
max-sizes = {flash = 131072, ram = 16384, sram1 = 32768}
features = ["h753"]
Expand Down
4 changes: 2 additions & 2 deletions app/gimlet/base.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ request_reset = ["hiffy", "control_plane_agent"]

[tasks.net]
name = "task-net"
stacksize = 6040
stacksize = 8000
priority = 5
features = ["mgmt", "h753", "gimlet", "vlan", "vpd-mac"]
max-sizes = {flash = 131072, ram = 65536, sram1 = 16384}
Expand Down Expand Up @@ -139,7 +139,7 @@ name = "task-power"
features = ["gimlet"]
priority = 6
max-sizes = {flash = 65536, ram = 16384 }
stacksize = 2800
stacksize = 3800
start = true
task-slots = ["i2c_driver", "sensor", "gimlet_seq"]
notifications = ["timer", "external_badness"]
Expand Down
2 changes: 1 addition & 1 deletion app/gimletlet/app.toml
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ notifications = ["hash-irq"]

[tasks.net]
name = "task-net"
stacksize = 6040
stacksize = 8000
priority = 3
features = ["h753", "vlan", "gimletlet-nic", "use-spi-core", "spi4"]
max-sizes = {flash = 131072, ram = 65536, sram1 = 16384}
Expand Down
2 changes: 1 addition & 1 deletion app/psc/base.toml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ task-slots = ["i2c_driver"]

[tasks.net]
name = "task-net"
stacksize = 6040
stacksize = 8000
priority = 4
features = ["mgmt", "h753", "psc", "vlan", "vpd-mac", "use-spi-core", "spi2"]
max-sizes = {flash = 131072, ram = 65536, sram1 = 16384}
Expand Down
4 changes: 2 additions & 2 deletions app/sidecar/base.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ task-slots = ["sys"]

[tasks.net]
name = "task-net"
stacksize = 6040
stacksize = 8000
priority = 5
features = ["mgmt", "h753", "sidecar", "vlan", "vpd-mac", "use-spi-core", "spi3"]
max-sizes = {flash = 131072, ram = 65536, sram1 = 16384}
Expand Down Expand Up @@ -278,7 +278,7 @@ name = "task-power"
features = ["sidecar"]
priority = 6
max-sizes = {flash = 32768, ram = 8192 }
stacksize = 2800
stacksize = 3800
start = true
task-slots = ["i2c_driver", "sensor", "sequencer"]
notifications = ["timer"]
Expand Down
66 changes: 53 additions & 13 deletions drv/gimlet-seq-server/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,16 @@ include!(concat!(env!("OUT_DIR"), "/i2c_config.rs"));
)]
mod payload;

#[derive(Copy, Clone, PartialEq, Count)]
enum I2cTxn {
SpdLoad(u8, u8),
SpdLoadTop(u8, u8),
VCoreOn,
VCoreOff,
SocOn,
SocOff,
}

#[derive(Copy, Clone, PartialEq, Count)]
enum Trace {
Ice40Rails(bool, bool),
Expand Down Expand Up @@ -111,11 +121,17 @@ enum Trace {
SpdBankAbsent(u8),
SpdAbsent(u8, u8, u8),
SpdDimmsFound(usize),
I2cFault {
retries_remaining: u8,
I2cError {
txn: I2cTxn,
#[count(children)]
code: i2c::ResponseCode,
},
I2cFault(I2cTxn),
I2cRetry {
#[count(children)]
txn: I2cTxn,
retries_remaining: u8,
},
StartFailed(#[count(children)] SeqError),
#[count(skip)]
None,
Expand Down Expand Up @@ -592,6 +608,7 @@ impl<S: SpiServer> NotificationHandler for ServerImpl<S> {
}

fn retry_i2c_txn<T, E>(
which: I2cTxn,
mut txn: impl FnMut() -> Result<T, E>,
) -> Result<T, i2c::ResponseCode>
where
Expand All @@ -604,15 +621,18 @@ where
Ok(x) => return Ok(x),
Err(e) => {
let code = e.into();
ringbuf_entry!(Trace::I2cFault {
retries_remaining,
code,
});
ringbuf_entry!(Trace::I2cError { txn: which, code });

if retries_remaining == 0 {
ringbuf_entry!(Trace::I2cFault(which));
return Err(code);
}

ringbuf_entry!(Trace::I2cRetry {
txn: which,
retries_remaining
});

retries_remaining -= 1;
}
}
Expand Down Expand Up @@ -1119,7 +1139,20 @@ fn read_spd_data_and_load_packrat(
// We'll store that byte and then read 255 more.
tmp[0] = first;

retry_i2c_txn(|| spd.read_into(&mut tmp[1..]))?;
let mut retried = false;

retry_i2c_txn(I2cTxn::SpdLoad(nbank, i), || {
if retried {
//
// If our read needs to be retried, we need to also reset
// ourselves back to the 0th byte.
//
_ = spd.read_reg::<u8, u8>(0)?;
}

retried = true;
spd.read_into(&mut tmp[1..])
})?;

packrat.set_spd_eeprom(ndx, false, 0, &tmp);
}
Expand All @@ -1146,9 +1179,16 @@ fn read_spd_data_and_load_packrat(
let spd = I2cDevice::new(i2c_task, controller, port, mux, mem);

let chunk = 128;
retry_i2c_txn(|| spd.read_reg_into::<u8>(0, &mut tmp[..chunk]))?;

retry_i2c_txn(|| spd.read_into(&mut tmp[chunk..]))?;
retry_i2c_txn(I2cTxn::SpdLoadTop(nbank, i), || {
//
// Both of these reads need to be in a single transaction from
// the perspective of the retry logic: if either fails, we
// must redo both.
//
spd.read_reg_into::<u8>(0, &mut tmp[..chunk])?;
spd.read_into(&mut tmp[chunk..])
})?;

packrat.set_spd_eeprom(ndx, true, 0, &tmp);
}
Expand Down Expand Up @@ -1302,8 +1342,8 @@ cfg_if::cfg_if! {
let (device, rail) = i2c_config::pmbus::vddcr_soc(i2c);
let mut vddcr_soc = Raa229618::new(&device, rail);

retry_i2c_txn(|| vdd_vcore.turn_off())?;
retry_i2c_txn(|| vddcr_soc.turn_off())?;
retry_i2c_txn(I2cTxn::VCoreOff, || vdd_vcore.turn_off())?;
retry_i2c_txn(I2cTxn::SocOff, || vddcr_soc.turn_off())?;
Ok(())
}

Expand All @@ -1317,8 +1357,8 @@ cfg_if::cfg_if! {
let (device, rail) = i2c_config::pmbus::vddcr_soc(i2c);
let mut vddcr_soc = Raa229618::new(&device, rail);

retry_i2c_txn(|| vdd_vcore.turn_on())?;
retry_i2c_txn(|| vddcr_soc.turn_on())?;
retry_i2c_txn(I2cTxn::VCoreOn, || vdd_vcore.turn_on())?;
retry_i2c_txn(I2cTxn::SocOn, || vddcr_soc.turn_on())?;
Ok(())
}

Expand Down
67 changes: 58 additions & 9 deletions drv/stm32xx-i2c-server/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,48 @@ fn reset_if_needed(
}
}

///
/// A variant of [`reset_if_needed`] that will also wiggle the SCL lines
/// via [`wiggle_scl`].
///
fn reset_and_wiggle_if_needed(
code: ResponseCode,
controller: &I2cController<'_>,
port: PortIndex,
muxes: &[I2cMux<'_>],
muxmap: &mut MuxMap,
pins: &[I2cPins],
) {
if reset_needed(code) {
let sys = SYS.get_task_id();
let sys = Sys::from(sys);

for pin in pins
.iter()
.filter(|p| p.controller == controller.controller)
.filter(|p| p.port == port)
{
wiggle_scl(&sys, pin.scl, pin.sda);

//
// [`wiggle_scl`] puts our pins in output (and input) mode; set
// them back to be configured for I2C before we reset.
//
for gpio_pin in &[pin.scl, pin.sda] {
sys.gpio_configure_alternate(
*gpio_pin,
OutputType::OpenDrain,
Speed::Low,
Pull::None,
pin.function,
);
}
}

reset(controller, port, muxes, muxmap);
}
}

include!(concat!(env!("OUT_DIR"), "/i2c_config.rs"));

type PortMap = FixedMap<Controller, PortIndex, { i2c_config::NCONTROLLERS }>;
Expand Down Expand Up @@ -487,12 +529,13 @@ fn main() -> ! {
}
}

reset_if_needed(
reset_and_wiggle_if_needed(
code,
controller,
port,
&muxes,
&mut muxmap,
&pins,
);
return Err(code);
}
Expand Down Expand Up @@ -588,17 +631,23 @@ fn configure_port(
}

///
/// When the system is reset without power loss, I2C can be in an arbitrary
/// state with respect to the bus -- and we can therefore come to life with a
/// transaction already in flight. It is very important that we abort any
/// such transaction: failure to do so will result in our first I2C
/// transaction being corrupted. (And especially because our first I2C
/// transactions may well be to disable segments on a mux, this can result in
/// nearly arbitrary mayhem down the road!) To do this, we engage in the
/// When the system is either reset without power loss (e.g., due to an SP
/// upgrade) or I2C is preempted longer than the 25ms I2C timeout (e.g., due
/// to a large process panicking and being dumped by jefe), I2C can be in an
/// arbitrary state with respect to the bus -- and we can therefore come to
/// life with a transaction already in flight. It is very important that we
/// abort any such transaction: failure to do so will result in our first I2C
/// transaction being corrupted. (And because our first I2C transaction on SP
/// boot may well be to disable segments on a mux, this can result in nearly
/// arbitrary mayhem down the road!) To do this, we engage in the
/// time-honored[0] tradition of "clocking through the problem": wiggling SCL
/// until we see SDA high, and then pulling SDA low and releasing SCL to
/// indicate a STOP condition. (Note that we need to do this up to 9 times to
/// assure that we have clocked through the entire transaction.)
/// assure that we have clocked through the entire transaction.) Our assumption
/// is that if SCL is being stretched by an errant target, it has been already
/// stretched beyond our timeout (25ms); if this is the case, us trying to
/// wiggle SCL here won't actually wiggle SCL -- but unless such a device is
/// isolated to a segment on a mux that we can reset, nothing will in fact help.
///
/// [0] Analog Devices. AN-686: Implementing an I2C Reset. 2003.
///
Expand Down
10 changes: 10 additions & 0 deletions task/jefe/src/dump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ enum Trace {
},
DumpRead(usize),
DumpDone(Result<(), humpty::DumpError<()>>),
DumpTime {
start: u64,
end: u64,
},
}

ringbuf!(Trace, 8, Trace::None);
Expand Down Expand Up @@ -178,6 +182,7 @@ fn dump_task_setup(
/// Once a task dump is set up, this function executes it
fn dump_task_run(base: u32, task: usize) -> Result<(), DumpAgentError> {
ringbuf_entry!(Trace::DumpStart { base });
let start = sys_get_timer().now;

//
// The humpty dance is your chance... to do the dump!
Expand Down Expand Up @@ -225,6 +230,11 @@ fn dump_task_run(base: u32, task: usize) -> Result<(), DumpAgentError> {
);

ringbuf_entry!(Trace::DumpDone(r));
ringbuf_entry!(Trace::DumpTime {
start,
end: sys_get_timer().now
});

r.map_err(|_| DumpAgentError::DumpFailed)?;
Ok(())
}
Expand Down

0 comments on commit d663549

Please sign in to comment.