Skip to content

Commit 7c97fbe

Browse files
committed
Support cmd execution timeout in service mode
1 parent 4cd8d83 commit 7c97fbe

File tree

1 file changed

+89
-28
lines changed

1 file changed

+89
-28
lines changed

memcr.c

Lines changed: 89 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ static int rss_file;
119119
static int compress;
120120
static int checksum;
121121
static int service;
122+
static unsigned int timeout;
122123

123124

124125
#define BIT(x) (1ULL << x)
@@ -483,6 +484,7 @@ static int seize_pid(pid_t pid)
483484
int status;
484485
siginfo_t si;
485486

487+
printf("[+] seizing tid %d\n", pid);
486488
ret = ptrace(PTRACE_SEIZE, pid, NULL, 0);
487489
if (ret) {
488490
if (errno == ESRCH) {
@@ -495,6 +497,7 @@ static int seize_pid(pid_t pid)
495497
}
496498

497499
try_again:
500+
498501
ret = ptrace(PTRACE_INTERRUPT, pid, NULL, NULL);
499502
if (ret) {
500503
fprintf(stderr, "ptrace(PTRACE_INTERRUPT) pid %d: %m\n", pid);
@@ -535,6 +538,7 @@ static int seize_pid(pid_t pid)
535538
return 1;
536539
}
537540

541+
printf("[i] delivered signal %d to pid %d\n", si.si_signo, pid);
538542
goto try_again;
539543
}
540544

@@ -549,6 +553,25 @@ static int seize_pid(pid_t pid)
549553
return 0;
550554
}
551555

556+
static int unseize_pid(pid_t pid)
557+
{
558+
return ptrace(PTRACE_DETACH, pid, NULL, 0);
559+
}
560+
561+
static int unseize_target(void)
562+
{
563+
int ret = 0;
564+
int i;
565+
566+
printf("[+] unseizing target\n");
567+
568+
for (i = 0; i < nr_threads; i++)
569+
ret |= unseize_pid(tids[i]);
570+
nr_threads = 0;
571+
572+
return ret;
573+
}
574+
552575
static int seize_target(pid_t pid)
553576
{
554577
int ret;
@@ -573,25 +596,6 @@ static int seize_target(pid_t pid)
573596
return 0;
574597
}
575598

576-
static int unseize_pid(pid_t pid)
577-
{
578-
return ptrace(PTRACE_DETACH, pid, NULL, 0);
579-
}
580-
581-
static int unseize_target(void)
582-
{
583-
int ret = 0;
584-
int i;
585-
586-
printf("[+] unseizing target\n");
587-
588-
for (i = 0; i < nr_threads; i++)
589-
ret |= unseize_pid(tids[i]);
590-
nr_threads = 0;
591-
592-
return ret;
593-
}
594-
595599
static int parasite_socket_create(pid_t pid)
596600
{
597601
int pid_netns = -1;
@@ -688,6 +692,8 @@ static int __read(int fd, void *buf, size_t count, int (*check_peer_ok)(void), i
688692
continue;
689693

690694
break;
695+
} else if (errno == EINTR) {
696+
continue;
691697
}
692698

693699
if (silent == FALSE)
@@ -923,6 +929,20 @@ static void clear_pid_on_worker_exit_non_blocking(pid_t worker)
923929
}
924930
}
925931

932+
static int get_pid_worker(pid_t pid)
933+
{
934+
int worker = PID_INVALID;
935+
pthread_mutex_lock(&checkpoint_service_data_lock);
936+
for (int i=0; i<CHECKPOINTED_PIDS_LIMIT; ++i) {
937+
if (checkpoint_service_data[i].pid == pid) {
938+
worker = checkpoint_service_data[i].worker;
939+
break;
940+
}
941+
}
942+
pthread_mutex_unlock(&checkpoint_service_data_lock);
943+
return worker;
944+
}
945+
926946
static int can_checkpoint_pid(pid_t pid)
927947
{
928948
pthread_mutex_lock(&checkpoint_service_data_lock);
@@ -2470,24 +2490,37 @@ static void try_to_abort_checkpoint(pid_t pid)
24702490
}
24712491
}
24722492

2473-
static void checkpoint_procedure_service(int checkpointSocket, int cd)
2493+
static int checkpoint_procedure_service(int checkpointSocket, int cd, int pid, int worker_pid)
24742494
{
24752495
int ret;
24762496
struct service_response svc_resp;
24772497

2478-
fprintf(stdout, "[+] Service waiting for worker checkpoint...\n");
2498+
if (timeout) {
2499+
fprintf(stdout, "[+] Service waiting for worker checkpoint with timeout %d[s]...\n", timeout);
2500+
struct timeval rcv_timeout = { .tv_sec = timeout, .tv_usec = 0 };
2501+
ret = setsockopt(checkpointSocket, SOL_SOCKET, SO_RCVTIMEO, &rcv_timeout, sizeof(rcv_timeout));
2502+
if (ret < 0)
2503+
fprintf(stderr, "[-] Error setting socket timeout: %m, waiting forever!\n");
2504+
} else
2505+
fprintf(stdout, "[+] Service waiting for worker checkpoint...\n");
2506+
24792507
ret = _read(checkpointSocket, &svc_resp, sizeof(svc_resp)); // receive resp from child
24802508

24812509
if (ret == sizeof(svc_resp)) {
24822510
fprintf(stdout, "[+] Service received checkpoint response, informing client...\n");
24832511
send_response_to_client(cd, svc_resp.resp_code);
2512+
return svc_resp.resp_code;
24842513
} else {
24852514
fprintf(stderr, "[!] Error reading checkpoint response from worker!\n");
2515+
// unnable to read response from worker, kill both
2516+
kill(pid, SIGKILL);
2517+
kill(worker_pid, SIGKILL);
24862518
send_response_to_client(cd, MEMCR_ERROR_GENERAL);
2519+
return MEMCR_ERROR_GENERAL;
24872520
}
24882521
}
24892522

2490-
static void restore_procedure_service(int cd, struct service_command svc_cmd)
2523+
static void restore_procedure_service(int cd, struct service_command svc_cmd, int worker_pid)
24912524
{
24922525
int rd, ret = 0;
24932526
struct service_response svc_resp;
@@ -2504,12 +2537,23 @@ static void restore_procedure_service(int cd, struct service_command svc_cmd)
25042537
ret = -1;
25052538
}
25062539

2507-
fprintf(stdout, "[+] Service waiting for worker to restore... \n");
2540+
if (timeout) {
2541+
fprintf(stdout, "[+] Service waiting for worker to restore with timeout %d[s]...\n", timeout);
2542+
struct timeval rcv_timeout = { .tv_sec = timeout, .tv_usec = 0 };
2543+
ret = setsockopt(rd, SOL_SOCKET, SO_RCVTIMEO, &rcv_timeout, sizeof(rcv_timeout));
2544+
if (ret < 0)
2545+
fprintf(stderr, "[-] Error setting socket timeout: %m, waiting forever!\n");
2546+
} else
2547+
fprintf(stdout, "[+] Service waiting for worker to restore... \n");
2548+
25082549
ret = _read(rd, &svc_resp, sizeof(struct service_response)); // read response from service
25092550
close(rd);
25102551

25112552
if (ret != sizeof(struct service_response)) {
25122553
fprintf(stderr, "[-] %s() read() svc_resp failed: ret %d\n", __func__, ret);
2554+
// unnable to read response from worker, kill both
2555+
kill(svc_cmd.pid, SIGKILL);
2556+
kill(worker_pid, SIGKILL);
25132557
ret = -1;
25142558
}
25152559

@@ -2573,18 +2617,30 @@ static void *service_command_thread(void *ptr)
25732617
} else if (forkpid > 0) {
25742618
close(checkpoint_resp_sockets[1]);
25752619
set_pid_checkpointing(svc_ctx.svc_cmd.pid, checkpoint_resp_sockets[0]);
2576-
checkpoint_procedure_service(checkpoint_resp_sockets[0], svc_ctx.cd);
2577-
set_pid_checkpointed(svc_ctx.svc_cmd.pid, forkpid);
2620+
if (checkpoint_procedure_service(checkpoint_resp_sockets[0], svc_ctx.cd,
2621+
svc_ctx.svc_cmd.pid, forkpid))
2622+
clear_pid_checkpoint_data(svc_ctx.svc_cmd.pid);
2623+
else
2624+
set_pid_checkpointed(svc_ctx.svc_cmd.pid, forkpid);
2625+
25782626
close(checkpoint_resp_sockets[0]);
25792627
} else {
25802628
fprintf(stderr, "%s(): Fork error!\n", __func__);
2629+
clear_pid_checkpoint_data(svc_ctx.svc_cmd.pid);
25812630
}
25822631

25832632
break;
25842633
}
25852634
case MEMCR_RESTORE: {
25862635
fprintf(stdout, "[+] handling MEMCR_RESTORE for %d.\n", svc_ctx.svc_cmd.pid);
2587-
restore_procedure_service(svc_ctx.cd, svc_ctx.svc_cmd);
2636+
int worker_pid = get_pid_worker(svc_ctx.svc_cmd.pid);
2637+
if (worker_pid == PID_INVALID) {
2638+
fprintf(stderr, "%s(): Error, worker pid not found for %d!\n", __func__, svc_ctx.svc_cmd.pid);
2639+
send_response_to_client(svc_ctx.cd, MEMCR_ERROR_GENERAL);
2640+
close(svc_ctx.cd);
2641+
break;
2642+
}
2643+
restore_procedure_service(svc_ctx.cd, svc_ctx.svc_cmd, worker_pid);
25882644
clear_pid_checkpoint_data(svc_ctx.svc_cmd.pid);
25892645
break;
25902646
}
@@ -2800,7 +2856,8 @@ static void usage(const char *name, int status)
28002856
" -f --rss-file include file mapped memory\n" \
28012857
" -z --compress compress memory dump\n" \
28022858
" -c --checksum enable md5 checksum for memory dump\n" \
2803-
" -e --encrypt enable encryption of memory dump\n",
2859+
" -e --encrypt enable encryption of memory dump\n" \
2860+
" -t --timeout timeout in seconds for checkpoint/restore execution in service mode\n",
28042861
name);
28052862

28062863
exit(status);
@@ -2840,14 +2897,15 @@ int main(int argc, char *argv[])
28402897
{ "compress", 0, NULL, 'z'},
28412898
{ "checksum", 0, NULL, 'c'},
28422899
{ "encrypt", 2, 0, 'e'},
2900+
{ "timeout", 1, 0, 't'},
28432901
{ NULL, 0, NULL, 0 }
28442902
};
28452903

28462904
dump_dir = "/tmp";
28472905
parasite_socket_dir = NULL;
28482906
parasite_socket_use_netns = 0;
28492907

2850-
while ((opt = getopt_long(argc, argv, "hp:d:S:Nl:nmfzce::", long_options, &option_index)) != -1) {
2908+
while ((opt = getopt_long(argc, argv, "hp:d:S:Nl:nmfzce::t:", long_options, &option_index)) != -1) {
28512909
switch (opt) {
28522910
case 'h':
28532911
usage(argv[0], 0);
@@ -2896,6 +2954,9 @@ int main(int argc, char *argv[])
28962954
else if (optind < argc && argv[optind][0] != '-')
28972955
encrypt_arg = argv[optind++];
28982956
break;
2957+
case 't':
2958+
timeout = atoi(optarg);
2959+
break;
28992960
default: /* '?' */
29002961
usage(argv[0], 1);
29012962
}

0 commit comments

Comments
 (0)