Skip to content

Commit 0926596

Browse files
committed
Make PerfCounters robust to the task being killed while we're setting up its counters
1 parent b191809 commit 0926596

File tree

1 file changed

+39
-17
lines changed

1 file changed

+39
-17
lines changed

src/PerfCounters.cc

+39-17
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ static int64_t read_counter(ScopedFd& fd) {
291291
return val;
292292
}
293293

294+
// Can return a closed fd if `tid > 0` and the task was just SIGKILLed.
294295
static ScopedFd start_counter(pid_t tid, int group_fd,
295296
struct perf_event_attr* attr,
296297
bool* disabled_txcp = nullptr) {
@@ -299,7 +300,7 @@ static ScopedFd start_counter(pid_t tid, int group_fd,
299300
}
300301
attr->pinned = group_fd == -1;
301302
int fd = syscall(__NR_perf_event_open, attr, tid, -1, group_fd, PERF_FLAG_FD_CLOEXEC);
302-
if (0 >= fd && errno == EINVAL && attr->type == PERF_TYPE_RAW &&
303+
if (fd < 0 && errno == EINVAL && attr->type == PERF_TYPE_RAW &&
303304
(attr->config & IN_TXCP)) {
304305
// The kernel might not support IN_TXCP, so try again without it.
305306
struct perf_event_attr tmp_attr = *attr;
@@ -320,16 +321,25 @@ static ScopedFd start_counter(pid_t tid, int group_fd,
320321
}
321322
}
322323
}
323-
if (0 >= fd) {
324-
if (errno == EACCES) {
325-
CLEAN_FATAL() << "Permission denied to use 'perf_event_open'; are hardware perf events "
326-
"available? See https://github.com/rr-debugger/rr/wiki/Will-rr-work-on-my-system";
327-
}
328-
if (errno == ENOENT) {
329-
CLEAN_FATAL() << "Unable to open performance counter with 'perf_event_open'; "
330-
"are hardware perf events available? See https://github.com/rr-debugger/rr/wiki/Will-rr-work-on-my-system";
324+
if (fd < 0) {
325+
switch (errno) {
326+
case EACCES:
327+
CLEAN_FATAL() << "Permission denied to use 'perf_event_open'; are hardware perf events "
328+
"available? See https://github.com/rr-debugger/rr/wiki/Will-rr-work-on-my-system";
329+
break;
330+
case ENOENT:
331+
CLEAN_FATAL() << "Unable to open performance counter with 'perf_event_open'; "
332+
"are hardware perf events available? See https://github.com/rr-debugger/rr/wiki/Will-rr-work-on-my-system";
333+
break;
334+
case ESRCH:
335+
if (tid > 0) {
336+
break;
337+
}
338+
RR_FALLTHROUGH;
339+
default:
340+
FATAL() << "Failed to initialize counter";
341+
break;
331342
}
332-
FATAL() << "Failed to initialize counter";
333343
}
334344
return ScopedFd(fd);
335345
}
@@ -751,6 +761,9 @@ void PerfCounters::PTState::open(pid_t tid) {
751761
init_perf_event_attr(&attr, event_type, 0);
752762
attr.aux_watermark = 8 * 1024 * 1024;
753763
pt_perf_event_fd = start_counter(tid, -1, &attr);
764+
if (!pt_perf_event_fd.is_open()) {
765+
return;
766+
}
754767

755768
size_t page_size = sysconf(_SC_PAGESIZE);
756769
void* base = mmap(NULL, page_size + PT_PERF_DATA_SIZE,
@@ -783,6 +796,10 @@ struct PerfEventAux {
783796
};
784797

785798
size_t PerfCounters::PTState::flush() {
799+
if (!mmap_header) {
800+
return 0;
801+
}
802+
786803
uint64_t data_end = mmap_header->data_head;
787804
__sync_synchronize();
788805
volatile char* data_buf = reinterpret_cast<volatile char*>(mmap_header) +
@@ -889,13 +906,15 @@ void PerfCounters::start(Task* t, Ticks ticks_period) {
889906
fd_useless_counter = start_counter(tid, -1, &perf_attr.cycles);
890907
}
891908

892-
struct f_owner_ex own;
893-
own.type = F_OWNER_TID;
894-
own.pid = tid;
895-
if (fcntl(fd_ticks_interrupt, F_SETOWN_EX, &own)) {
896-
FATAL() << "Failed to SETOWN_EX ticks event fd";
909+
if (fd_ticks_interrupt.is_open()) {
910+
struct f_owner_ex own;
911+
own.type = F_OWNER_TID;
912+
own.pid = tid;
913+
if (fcntl(fd_ticks_interrupt, F_SETOWN_EX, &own)) {
914+
FATAL() << "Failed to SETOWN_EX ticks event fd";
915+
}
916+
make_counter_async(fd_ticks_interrupt, PerfCounters::TIME_SLICE_SIGNAL);
897917
}
898-
make_counter_async(fd_ticks_interrupt, PerfCounters::TIME_SLICE_SIGNAL);
899918

900919
if (pt_state) {
901920
pt_state->open(tid);
@@ -1044,7 +1063,10 @@ Ticks PerfCounters::read_ticks(Task* t, Error* error) {
10441063
uint64_t adjusted_counting_period =
10451064
counting_period +
10461065
(t->session().is_recording() ? recording_skid_size() : skid_size());
1047-
uint64_t interrupt_val = read_counter(fd_ticks_interrupt);
1066+
uint64_t interrupt_val = 0;
1067+
if (fd_ticks_interrupt.is_open()) {
1068+
interrupt_val = read_counter(fd_ticks_interrupt);
1069+
}
10481070
uint64_t ret;
10491071
if (!fd_ticks_measure.is_open()) {
10501072
if (fd_minus_ticks_measure.is_open()) {

0 commit comments

Comments
 (0)