Skip to content

Commit 3169161

Browse files
author
Lars T Hansen
committed
Some progress
1 parent c417ba2 commit 3169161

File tree

7 files changed

+234
-89
lines changed

7 files changed

+234
-89
lines changed

manual-tests/sessions/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
.PHONY: all clean
22

33
PROGRAMS=sonar-session sonar-job sonar-worker
4-
CXXFLAGS=-O2
4+
CFLAGS=-Wall
5+
CXXFLAGS=-O2 -Wall
56

67
all: $(PROGRAMS)
78
clean:
Lines changed: 85 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,81 +1,106 @@
1-
# WALLTIME must have the execution time in seconds
2-
31
BEGIN {
4-
delete session[0]
5-
delete job[0]
6-
delete bash[0]
7-
WALLTIME=strtonum(WALLTIME)
2+
# Arguably keeping all the times is unnecessary, we just need min
3+
# and max, but it's OK for debugging.
4+
delete job[0] # job[pid][...] is cputime for jobs
5+
delete bash[0] # bash[pid][...] is cputime for interactive bash shells
6+
delete run[0] # run[pid][...] is cputime for run_test.sh shells
7+
WALLTIME=strtonum(WALLTIME) # Execution time in seconds of tomost sonar-job
8+
NUMJOBS=strtonum(NUMJOBS) # Number of jobs we should see
89
}
910

10-
# For each line, parse the cputime_sec if present and bucket the output by command
1111
{
12-
time=0
13-
ix=index($0, ",cputime_sec=")
14-
if (ix > 0) {
15-
s=substr($0, ix+13)
16-
ix=index(s, ",")
17-
if (ix > 0) {
18-
s=substr(s, 0, ix-1)
19-
}
20-
time=strtonum(s)
21-
}
22-
if (index($0, ",cmd=sonar-session") > 0) {
23-
session[length(session)]=time
24-
} else if (index($0, ",cmd=sonar-job") > 0) {
25-
job[length(job)]=time
26-
} else if (index($0, ",cmd=bash") > 0) {
27-
ix=index($0, ",pid=")
28-
if (ix > 0) {
29-
s=substr($0, ix+5)
30-
ix=index(s, ",")
31-
if (ix > 0) {
32-
s=substr(s, 0, ix-1)
33-
}
34-
if (isarray(bash[s])) {
35-
bash[s][length(bash[s])]=time
36-
} else {
37-
bash[s][0]=time
38-
}
39-
}
12+
time=number_field($0, "cputime_sec")
13+
pid=number_field($0, "pid")
14+
cmd=string_field($0, "cmd")
15+
switch (cmd) {
16+
case /.*sonar-job/:
17+
if (isarray(job[pid]))
18+
job[pid][length(job[pid])]=time
19+
else
20+
job[pid][0]=time
21+
break
22+
case /.*bash/:
23+
if (isarray(bash[pid]))
24+
bash[pid][length(bash[pid])]=time
25+
else
26+
bash[pid][0]=time
27+
break
28+
case /.*run_test/:
29+
if (isarray(run[pid]))
30+
run[pid][length(run[pid])]=time
31+
else
32+
run[pid][0]=time
33+
break
4034
}
4135
}
4236

43-
# Check that the bucket values are sane
4437
END {
45-
if (length(session) == 0) {
46-
print "No sessions!"
47-
exit 1
48-
}
49-
diff=session[length(session)-1] - session[0]
50-
if (diff > 2) {
51-
print "Session cost " diff " is too high"
52-
for ( x in session ) {
53-
print " " session[x]
54-
}
55-
exit 1
56-
}
57-
58-
if (length(job) == 0) {
59-
print("no jobs!")
60-
exit 1
38+
fail=0
39+
if (length(job) != NUMJOBS) {
40+
print "Wrong number of jobs, expected " NUMJOBS " got " length(job)
41+
fail=1
6142
}
62-
diff=job[length(job)-1] - job[0]
63-
if (diff < WALLTIME/2) {
64-
print "Job cost " diff " is too small for wall time " WALLTIME
65-
for ( x in job ) {
66-
print " " job[x]
43+
for (pid in job) {
44+
diff=job[pid][length(job[pid])-1] - job[pid][0]
45+
if (diff < WALLTIME/2) {
46+
print "Job " pid ": Job cost " diff " is too small for wall time " WALLTIME
47+
for ( x in job[pid] ) {
48+
print " " job[pid][x]
49+
}
50+
fail=1
6751
}
68-
exit 1
6952
}
7053

7154
for (pid in bash) {
7255
diff=bash[pid][length(bash[pid])-1] - bash[pid][0]
7356
if (diff > 2) {
74-
print "Bash cost " diff " is too high"
57+
print "Bash " pid ": Shell cost " diff " is too high"
7558
for ( x in bash[pid] ) {
7659
print " " bash[pid][x]
7760
}
78-
exit 1
61+
fail=1
7962
}
8063
}
64+
65+
if (length(run) == 0) {
66+
print "No test runners!"
67+
fail=1
68+
}
69+
70+
for (pid in run) {
71+
diff=run[pid][length(run[pid])-1] - run[pid][0]
72+
if (diff > 2) {
73+
print "Run " pid ": Shell cost " diff " is too high"
74+
for ( x in run[pid] ) {
75+
print " " run[pid][x]
76+
}
77+
fail=1
78+
}
79+
}
80+
81+
if (fail) {
82+
exit 1
83+
}
84+
}
85+
86+
function number_field(input, tag, s) {
87+
s = string_field(input, tag)
88+
if (s != "")
89+
return strtonum(s)
90+
return 0
91+
}
92+
93+
function string_field(input, tag, s, pat, ix) {
94+
s = ""
95+
pat="," tag "="
96+
ix=index(input, pat)
97+
if (ix > 0) {
98+
s=substr($0, ix+length(pat))
99+
ix=index(s, ",")
100+
if (ix > 0) {
101+
s=substr(s, 0, ix-1)
102+
}
103+
}
104+
return s
81105
}
106+

manual-tests/sessions/run_test.sh

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,27 @@
33
# Usage:
44
# run_test.sh path-to-sonar-binary
55
#
6-
# This must be run from the directory that has sonar-session.c, sonar-job.c, sonar-worker.cc,
7-
# Makefile and check-output.awk.
6+
# This tests the accumulation of cpu time to nested jobs.
7+
#
8+
# A job is a Unix "process group". Within a process group, the cpu time of children accrue to the
9+
# process group leader as the children terminate (are waited for). But when a process in a group
10+
# forks off a new process group - creates a subjob - the cpu time of the subjob should not accrue
11+
# the the parent job.
12+
#
13+
# To test this:
14+
#
15+
# - this script runs `sonar-job N` where N >= 0
16+
# - if N > 0, that program forks off `sonar-job N-1`
17+
# - each job is its own process group
18+
# - each sonar-job then runs some copies of sonar-work, the cpu time of which will accrue to the job
19+
# - sonar runs in the background to observe the goings-on
20+
# - in the end, when the topmost sonar-job returns, we want to check that each sonar-job has gotten
21+
# cpu time accrued to it corresponding to the sum of of its workers, not including subjobs, and
22+
# also that this script has not been charged for the work of any of the jobs.
23+
# - this script is called run_test.sh and this is known to the awk processing script, q.v.
24+
#
25+
# This script must be run from the directory that has sonar-job.c, sonar-worker.cc, Makefile and
26+
# check-output.awk.
827

928
set -e
1029
if (( $# != 1 )); then
@@ -16,6 +35,7 @@ SONARBIN=$1
1635
# Config these, if you must
1736
SLEEPTIME=3
1837
WAITTIME=10
38+
NUMJOBS=2
1939
OUTFILE=sonar-output.txt
2040

2141
# Compile things as necessary
@@ -24,19 +44,26 @@ make --quiet
2444
# Run sonar in the background, every few seconds, and try to capture no more than necessary.
2545
rm -f $OUTFILE
2646
( while true ; do
27-
$SONARBIN ps | grep -E ",user=$LOGNAME," | grep -E ',cmd=(sonar|bash)' >> $OUTFILE
47+
$SONARBIN ps --batchless --exclude-system-jobs | grep -E ',cmd=(sonar|run_test|bash)' >> $OUTFILE
2848
sleep $SLEEPTIME
2949
done ) &
3050
SONARPID=$!
3151

32-
# Run the new session root
33-
echo "The test can take 60-90s on 2024-era hardware"
52+
# Fork off a new job tree - with parameter N, we should get N+1 levels
53+
echo "The test will take several minutes"
3454
then=$(date +%s)
35-
./sonar-session
55+
./sonar-job $((NUMJOBS - 1))
3656
now=$(date +%s)
3757

58+
# Wait for session time to be accrued to this shell
59+
echo "Waiting $((SLEEPTIME * 2))s in run_test (#1) for things to settle..."
60+
sleep $((SLEEPTIME * 2))
3861
kill -TERM $SONARPID
3962

40-
# Now process the output. See check-output.c for details.
41-
gawk -vWALLTIME=$((now - then)) -f check-output.awk $OUTFILE
63+
# New output may arrive late after the script has been killed
64+
echo "Waiting $((SLEEPTIME * 2))s in run_test (#2) for things to settle..."
65+
sleep $((SLEEPTIME * 2))
66+
67+
# Now process the output.
68+
gawk -vNUMJOBS=$NUMJOBS -vWALLTIME=$((now - then)) -f check-output.awk $OUTFILE
4269
echo "Everything is fine"

manual-tests/sessions/sonar-job.c

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/* The job of this program is to be the parent process for a bunch of worker processes, which we run
22
* serially as that provides the best signal. */
33

4+
#include <assert.h>
45
#include <errno.h>
56
#include <stdio.h>
67
#include <stdlib.h>
@@ -11,20 +12,61 @@
1112
#define ITERATIONS 5
1213

1314
int main(int argc, char **argv) {
15+
/* If this is not the top of a process group, make it one */
16+
if (getpid() != getpgid(0)) {
17+
if (setpgid(0, 0) == -1) {
18+
perror("Trying to create a new process group");
19+
exit(1);
20+
}
21+
}
22+
assert(getpid() == getpgid(0));
23+
24+
/* If subjobs are wanted, create them */
25+
pid_t subjob = 0;
26+
if (argc > 1) {
27+
int n = atoi(argv[1]);
28+
if (n > 0) {
29+
switch (subjob = fork()) {
30+
case -1:
31+
perror("Trying to fork a subjob");
32+
exit(1);
33+
case 0: {
34+
char buf[20];
35+
sprintf(buf, "%d", n-1);
36+
execl("sonar-job","sonar-job",buf,NULL);
37+
perror("Trying to exec a subjob");
38+
exit(1);
39+
}
40+
}
41+
}
42+
}
43+
44+
/* Do the work */
1445
for ( int i=0 ; i < ITERATIONS ; i++ ) {
15-
pid_t child = fork();
16-
if (child == (pid_t)-1) {
46+
pid_t worker;
47+
switch (worker = fork()) {
48+
case -1:
1749
perror("Trying to fork a new process for sonar-worker");
1850
exit(1);
19-
}
20-
if (child == 0) {
51+
case 0:
2152
execl("sonar-worker", "sonar-worker", NULL);
2253
perror("Trying to exec sonar-worker");
2354
exit(1);
24-
}
25-
if (wait(NULL) == -1) {
26-
perror("Waiting for sonar-worker");
27-
exit(1);
55+
default:
56+
if (waitpid(worker, NULL, 0) == -1) {
57+
perror("Waiting for sonar-worker");
58+
exit(1);
59+
}
2860
}
2961
}
62+
63+
/* Wait for the subjob to be done */
64+
if (subjob > 0) {
65+
waitpid(subjob, NULL, 0);
66+
}
67+
68+
/* Wait a bit, so that information from the terminated job can be accounted to the current
69+
process. */
70+
printf("Waiting 10s in sonar-job for things to settle...\n");
71+
sleep(10);
3072
}

manual-tests/sessions/sonar-session.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
#include <sys/wait.h>
99

1010
int main(int argc, char **argv) {
11-
/* When the shell starts this process it may make it a session leader, otherwise, become one. */
11+
/* When the shell starts this process it may make it a process group leader, otherwise, become one. */
1212
if (getpid() != getpgid(0)) {
13-
if (setsid() == (pid_t)-1) {
14-
perror("Trying to become session leader");
13+
if (setpgid(0, 0) == -1) {
14+
perror("Trying to become process group leader");
1515
exit(1);
1616
}
1717
}
@@ -36,6 +36,6 @@ int main(int argc, char **argv) {
3636

3737
/* Wait a bit, so that information from the terminated job can be accounted to the current
3838
process. */
39-
printf("Waiting 10s for things to settle...\n");
39+
printf("Waiting 10s in sonar-session for things to settle...\n");
4040
sleep(10);
4141
}

manual-tests/sessions/sonar-worker.cc

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@ void init_matrix(T mat[N][M], int scheme) {
2020
}
2121

2222
void mmul(float A[N][N], float B[N][N], float C[N][N]) {
23-
for ( int kk=0 ; kk < N ; kk+=S ) {
24-
for ( int jj=0 ; jj < N ; jj+=S ) {
25-
for ( int i=0 ; i < N ; i++ ) {
26-
for ( int j=jj ; j < jj + S ; j++ ) {
23+
for ( size_t kk=0 ; kk < N ; kk+=S ) {
24+
for ( size_t jj=0 ; jj < N ; jj+=S ) {
25+
for ( size_t i=0 ; i < N ; i++ ) {
26+
for ( size_t j=jj ; j < jj + S ; j++ ) {
2727
float sum = C[i][j];
28-
for ( int k=kk ; k < kk + S ; k++ ) {
28+
for ( size_t k=kk ; k < kk + S ; k++ ) {
2929
sum += A[i][k] * B[k][j];
3030
}
3131
C[i][j] = sum;
@@ -46,8 +46,8 @@ int main(int argc, char** argv) {
4646

4747
// Use the result
4848
float sum = 0.0f;
49-
for ( int j=0 ; j < N ; j++ ) {
50-
for ( int i=0 ; i < N ; i++ ) {
49+
for ( size_t j=0 ; j < N ; j++ ) {
50+
for ( size_t i=0 ; i < N ; i++ ) {
5151
sum += C[j][i];
5252
}
5353
}

0 commit comments

Comments
 (0)