Skip to content

Commit c2cdd83

Browse files
committed
Failing at spinlock implementation
1 parent 5549802 commit c2cdd83

12 files changed

+415
-4
lines changed

runtime/include/bpftime_epoll.h

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
#pragma once
2+
/*
3+
Keeping declaration for structs that are specific to linux
4+
structures added from
5+
<sys/epoll.h>
6+
<linux/perf_event.h>
7+
<linux/types.h>
8+
*/
9+
union epoll_data {
10+
void *ptr;
11+
int fd;
12+
uint32_t u32;
13+
uint64_t u64;
14+
};
15+
16+
typedef union epoll_data epoll_data_t;
17+
struct epoll_event {
18+
uint32_t events; /* Epoll events */
19+
epoll_data_t data; /* User data variable */
20+
};
21+
typedef uint64_t __u64;
22+
typedef uint64_t __aligned_u64;
23+
typedef uint32_t __u32;
24+
typedef uint16_t __u16;
25+
typedef uint8_t __u8;
26+
27+
typedef int32_t __s32;
28+
typedef int64_t __s64;
29+
30+
struct perf_event_header {
31+
__u32 type;
32+
__u16 misc;
33+
__u16 size;
34+
};
35+
36+
/*
37+
https://github.com/torvalds/linux/blob/f06ce441457d4abc4d76be7acba26868a2d02b1c/include/uapi/linux/perf_event.h#L571
38+
*/
39+
struct perf_event_mmap_page {
40+
41+
__u32 version; /* version number of this structure */
42+
__u32 compat_version; /* lowest version this is compat with */
43+
44+
/*
45+
* Bits needed to read the hw events in user-space.
46+
*
47+
* u32 seq, time_mult, time_shift, index, width;
48+
* u64 count, enabled, running;
49+
* u64 cyc, time_offset;
50+
* s64 pmc = 0;
51+
*
52+
* do {
53+
* seq = pc->lock;
54+
* barrier()
55+
*
56+
* enabled = pc->time_enabled;
57+
* running = pc->time_running;
58+
*
59+
* if (pc->cap_usr_time && enabled != running) {
60+
* cyc = rdtsc();
61+
* time_offset = pc->time_offset;
62+
* time_mult = pc->time_mult;
63+
* time_shift = pc->time_shift;
64+
* }
65+
*
66+
* index = pc->index;
67+
* count = pc->offset;
68+
* if (pc->cap_user_rdpmc && index) {
69+
* width = pc->pmc_width;
70+
* pmc = rdpmc(index - 1);
71+
* }
72+
*
73+
* barrier();
74+
* } while (pc->lock != seq);
75+
*
76+
* NOTE: for obvious reason this only works on self-monitoring
77+
* processes.
78+
*/
79+
__u32 lock; /* seqlock for synchronization */
80+
__u32 index; /* hardware event identifier */
81+
__s64 offset; /* add to hardware event value */
82+
__u64 time_enabled; /* time event active */
83+
__u64 time_running; /* time event on cpu */
84+
union {
85+
__u64 capabilities;
86+
struct {
87+
__u64 cap_bit0 : 1, /* Always 0, deprecated, see commit 860f085b74e9 */
88+
cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */
89+
90+
cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */
91+
cap_user_time : 1, /* The time_{shift,mult,offset} fields are used */
92+
cap_user_time_zero : 1, /* The time_zero field is used */
93+
cap_user_time_short : 1, /* the time_{cycle,mask} fields are used */
94+
cap_____res : 58;
95+
};
96+
};
97+
98+
/*
99+
* If cap_user_rdpmc this field provides the bit-width of the value
100+
* read using the rdpmc() or equivalent instruction. This can be used
101+
* to sign extend the result like:
102+
*
103+
* pmc <<= 64 - width;
104+
* pmc >>= 64 - width; // signed shift right
105+
* count += pmc;
106+
*/
107+
__u16 pmc_width;
108+
109+
/*
110+
* If cap_usr_time the below fields can be used to compute the time
111+
* delta since time_enabled (in ns) using rdtsc or similar.
112+
*
113+
* u64 quot, rem;
114+
* u64 delta;
115+
*
116+
* quot = (cyc >> time_shift);
117+
* rem = cyc & (((u64)1 << time_shift) - 1);
118+
* delta = time_offset + quot * time_mult +
119+
* ((rem * time_mult) >> time_shift);
120+
*
121+
* Where time_offset,time_mult,time_shift and cyc are read in the
122+
* seqcount loop described above. This delta can then be added to
123+
* enabled and possible running (if index), improving the scaling:
124+
*
125+
* enabled += delta;
126+
* if (index)
127+
* running += delta;
128+
*
129+
* quot = count / running;
130+
* rem = count % running;
131+
* count = quot * enabled + (rem * enabled) / running;
132+
*/
133+
__u16 time_shift;
134+
__u32 time_mult;
135+
__u64 time_offset;
136+
/*
137+
* If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated
138+
* from sample timestamps.
139+
*
140+
* time = timestamp - time_zero;
141+
* quot = time / time_mult;
142+
* rem = time % time_mult;
143+
* cyc = (quot << time_shift) + (rem << time_shift) / time_mult;
144+
*
145+
* And vice versa:
146+
*
147+
* quot = cyc >> time_shift;
148+
* rem = cyc & (((u64)1 << time_shift) - 1);
149+
* timestamp = time_zero + quot * time_mult +
150+
* ((rem * time_mult) >> time_shift);
151+
*/
152+
__u64 time_zero;
153+
154+
__u32 size; /* Header size up to __reserved[] fields. */
155+
__u32 __reserved_1;
156+
157+
/*
158+
* If cap_usr_time_short, the hardware clock is less than 64bit wide
159+
* and we must compute the 'cyc' value, as used by cap_usr_time, as:
160+
*
161+
* cyc = time_cycles + ((cyc - time_cycles) & time_mask)
162+
*
163+
* NOTE: this form is explicitly chosen such that cap_usr_time_short
164+
* is a correction on top of cap_usr_time, and code that doesn't
165+
* know about cap_usr_time_short still works under the assumption
166+
* the counter doesn't wrap.
167+
*/
168+
__u64 time_cycles;
169+
__u64 time_mask;
170+
171+
/*
172+
* Hole for extension of the self monitor capabilities
173+
*/
174+
175+
__u8 __reserved[116*8]; /* align to 1k. */
176+
177+
/*
178+
* Control data for the mmap() data buffer.
179+
*
180+
* User-space reading the @data_head value should issue an smp_rmb(),
181+
* after reading this value.
182+
*
183+
* When the mapping is PROT_WRITE the @data_tail value should be
184+
* written by userspace to reflect the last read data, after issueing
185+
* an smp_mb() to separate the data read from the ->data_tail store.
186+
* In this case the kernel will not over-write unread data.
187+
*
188+
* See perf_output_put_handle() for the data ordering.
189+
*
190+
* data_{offset,size} indicate the location and size of the perf record
191+
* buffer within the mmapped area.
192+
*/
193+
__u64 data_head; /* head in the data section */
194+
__u64 data_tail; /* user-space written tail */
195+
__u64 data_offset; /* where the buffer starts */
196+
__u64 data_size; /* data buffer size */
197+
198+
/*
199+
* AUX area is defined by aux_{offset,size} fields that should be set
200+
* by the userspace, so that
201+
*
202+
* aux_offset >= data_offset + data_size
203+
*
204+
* prior to mmap()ing it. Size of the mmap()ed area should be aux_size.
205+
*
206+
* Ring buffer pointers aux_{head,tail} have the same semantics as
207+
* data_{head,tail} and same ordering rules apply.
208+
*/
209+
__u64 aux_head;
210+
__u64 aux_tail;
211+
__u64 aux_offset;
212+
__u64 aux_size;
213+
};
214+
215+

runtime/include/bpftime_shm.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
#include <ebpf-vm.h>
1414
#if __linux__
1515
#include <sys/epoll.h>
16+
#elif __APPLE__
17+
#include "bpftime_epoll.h"
1618
#endif
1719

1820
namespace bpftime

runtime/include/platform_utils.hpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#pragma once
2+
3+
#include <cstdlib>
4+
#include <functional>
5+
6+
#if defined(__linux__)
7+
#include <sched.h>
8+
#elif defined(__APPLE__) && defined(__MACH__)
9+
#include <sys/sysctl.h>
10+
#include <pthread.h>
11+
typedef int cpu_set_t;
12+
13+
inline void CPU_ZERO(cpu_set_t *set) {
14+
*set = 0;
15+
}
16+
17+
inline void CPU_SET(int cpu, cpu_set_t *set) {
18+
*set |= (1 << cpu);
19+
}
20+
21+
inline int CPU_ISSET(int cpu, const cpu_set_t *set) {
22+
return (*set & (1 << cpu)) != 0;
23+
}
24+
int sched_getaffinity(pid_t pid, size_t cpusetsize, cpu_set_t *mask);
25+
int sched_setaffinity(pid_t pid, size_t cpusetsize, const cpu_set_t *mask);
26+
#else
27+
#error "Unsupported platform"
28+
#endif
29+
30+
int get_current_cpu();

runtime/include/spinlock.hpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#ifndef SPINLOCK_HPP
2+
#define SPINLOCK_HPP
3+
4+
#include <atomic>
5+
6+
class Spinlock {
7+
public:
8+
std::atomic_flag lock = ATOMIC_FLAG_INIT;
9+
10+
// Default constructor to initialize the atomic_flag
11+
Spinlock() : lock(ATOMIC_FLAG_INIT) {}
12+
13+
// Deleted copy constructor to prevent copying
14+
Spinlock(const Spinlock&) = delete;
15+
16+
void spin_lock() {
17+
while (lock.test_and_set(std::memory_order_acquire)) {
18+
// busy-wait
19+
}
20+
}
21+
22+
void spin_unlock() {
23+
lock.clear(std::memory_order_release);
24+
}
25+
};
26+
27+
// Global functions to match pthread_spin_lock/unlock interface
28+
inline void spin_lock(Spinlock* spinlock) {
29+
spinlock->spin_lock();
30+
}
31+
32+
inline void spin_unlock(Spinlock* spinlock) {
33+
spinlock->spin_unlock();
34+
}
35+
36+
#endif // SPINLOCK_HPP

runtime/include/spinlock_wrapper.hpp

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#ifndef SPINLOCK_WRAPPER_HPP
2+
#define SPINLOCK_WRAPPER_HPP
3+
4+
#include <pthread.h>
5+
#include <atomic>
6+
7+
// Include the custom spinlock implementation
8+
#include "spinlock.hpp"
9+
10+
// Define the custom spinlock type and functions only if the standard library is not available
11+
#if !defined(_POSIX_SPIN_LOCKS) || _POSIX_SPIN_LOCKS <= 0
12+
typedef Spinlock pthread_spinlock_t; // Define custom spinlock type
13+
14+
// Non-volatile version
15+
inline int pthread_spin_init(pthread_spinlock_t* lock, int pshared) {
16+
(void)pshared; // suppress unused parameter warning
17+
lock->lock.clear();
18+
return 0;
19+
}
20+
21+
inline int pthread_spin_destroy(pthread_spinlock_t* lock) {
22+
(void)lock; // suppress unused parameter warning
23+
return 0;
24+
}
25+
26+
inline int pthread_spin_lock(pthread_spinlock_t* lock) {
27+
spin_lock(lock);
28+
return 0;
29+
}
30+
31+
inline int pthread_spin_unlock(pthread_spinlock_t* lock) {
32+
spin_unlock(lock);
33+
return 0;
34+
}
35+
36+
// Volatile version
37+
inline int pthread_spin_init(volatile pthread_spinlock_t* lock, int pshared) {
38+
(void)pshared; // suppress unused parameter warning
39+
const_cast<pthread_spinlock_t*>(lock)->lock.clear();
40+
return 0;
41+
}
42+
43+
inline int pthread_spin_destroy(volatile pthread_spinlock_t* lock) {
44+
(void)lock; // suppress unused parameter warning
45+
return 0;
46+
}
47+
48+
inline int pthread_spin_lock(volatile pthread_spinlock_t* lock) {
49+
spin_lock(const_cast<pthread_spinlock_t*>(lock));
50+
return 0;
51+
}
52+
53+
inline int pthread_spin_unlock(volatile pthread_spinlock_t* lock) {
54+
spin_unlock(const_cast<pthread_spinlock_t*>(lock));
55+
return 0;
56+
}
57+
58+
#else
59+
// Ensure the volatile versions of the standard library functions
60+
61+
inline int pthread_spin_init(volatile pthread_spinlock_t* lock, int pshared) {
62+
return pthread_spin_init(const_cast<pthread_spinlock_t*>(lock), pshared);
63+
}
64+
65+
inline int pthread_spin_destroy(volatile pthread_spinlock_t* lock) {
66+
return pthread_spin_destroy(const_cast<pthread_spinlock_t*>(lock));
67+
}
68+
69+
inline int pthread_spin_lock(volatile pthread_spinlock_t* lock) {
70+
return pthread_spin_lock(const_cast<pthread_spinlock_t*>(lock));
71+
}
72+
73+
inline int pthread_spin_unlock(volatile pthread_spinlock_t* lock) {
74+
return pthread_spin_unlock(const_cast<pthread_spinlock_t*>(lock));
75+
}
76+
77+
#endif // _POSIX_SPIN_LOCKS
78+
79+
#endif // SPINLOCK_WRAPPER_HPP

runtime/src/bpf_map/map_common_def.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
#include <boost/interprocess/managed_shared_memory.hpp>
1212
#include <boost/interprocess/containers/vector.hpp>
1313
#include <functional>
14-
#include <sched.h>
14+
#include "platform_utils.hpp"
1515

1616
namespace bpftime
1717
{
@@ -23,14 +23,14 @@ using bytes_vec = boost::interprocess::vector<uint8_t, bytes_vec_allocator>;
2323
template <class T>
2424
static inline T ensure_on_current_cpu(std::function<T(int cpu)> func)
2525
{
26-
return func(sched_getcpu());
26+
return func(get_current_cpu());
2727
}
2828

2929
template <class T>
3030
static inline T ensure_on_certain_cpu(int cpu, std::function<T()> func)
3131
{
3232
static thread_local int currcpu = -1;
33-
if (currcpu == sched_getcpu()) {
33+
if (currcpu == get_current_cpu()) {
3434
return func(currcpu);
3535
}
3636
cpu_set_t orig, set;

0 commit comments

Comments
 (0)