Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integration Test: restart node if it failed to start in RandomlyKill #4625

Merged
merged 1 commit into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions test/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,16 @@ pub(crate) struct ProcessGuard {
pub killed: bool,
}

impl ProcessGuard {
pub(crate) fn is_alive(&mut self) -> bool {
let try_wait = self.child.try_wait();
match try_wait {
Ok(status_op) => status_op.is_none(),
Err(_err) => false,
}
}
}

impl Drop for ProcessGuard {
fn drop(&mut self) {
if !self.killed {
Expand Down Expand Up @@ -738,6 +748,15 @@ impl Node {
g.take()
}

pub(crate) fn is_alive(&mut self) -> bool {
let mut g = self.inner.guard.write().unwrap();
if let Some(guard) = g.as_mut() {
guard.is_alive()
} else {
false
}
}

pub fn stop(&mut self) {
drop(self.take_guard());
}
Expand Down
13 changes: 12 additions & 1 deletion test/src/rpc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,13 +174,24 @@ impl RpcClient {
}

pub fn wait_rpc_ready(&self) {
self.wait_rpc_ready_internal(|| {
panic!("wait rpc ready timeout");
});
}

pub fn wait_rpc_ready_internal<F>(&self, fail: F) -> bool
where
F: Fn(),
{
let now = std::time::Instant::now();
while self.inner.get_tip_block_number().is_err() {
std::thread::sleep(std::time::Duration::from_millis(100));
if now.elapsed() > std::time::Duration::from_secs(60) {
panic!("wait rpc ready timeout");
fail();
return false;
}
}
true
}

pub fn get_block_template(
Expand Down
20 changes: 18 additions & 2 deletions test/src/specs/fault_injection/randomly_kill.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,23 @@ impl Spec for RandomlyKill {
fn run(&self, nodes: &mut Vec<Node>) {
let mut rng = thread_rng();
let node = &mut nodes[0];
for _ in 0..rng.gen_range(10..20) {
node.rpc_client().wait_rpc_ready();
let max_restart_times = rng.gen_range(10..20);

let mut node_crash_times = 0;

let mut randomly_kill_times = 0;
while randomly_kill_times < max_restart_times {
node.rpc_client().wait_rpc_ready_internal(|| {});

if !node.is_alive() {
node.start();
node_crash_times += 1;

if node_crash_times > 3 {
panic!("Node crash too many times");
}
}

let n = rng.gen_range(0..10);
// TODO: the kill of child process and mining are actually sequential here
// We need to find some way to so these two things in parallel.
Expand All @@ -25,6 +40,7 @@ impl Spec for RandomlyKill {
}
info!("Stop the node");
node.stop_gracefully();
randomly_kill_times += 1;
info!("Start the node");
node.start();
}
Expand Down
Loading