-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathjson-named-users-rsp.R
180 lines (149 loc) · 5.92 KB
/
json-named-users-rsp.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#! /usr/local/bin/Rscript
# Load jsonlite package
library(jsonlite)
# Set default json file path
json_path <- "/var/lib/rstudio-server/audit/r-sessions/r-sessions.jsonl"
# Set minimum date - default is 1 year ago
min_date <- as.POSIXct(Sys.Date() - 365)
max_date <- as.POSIXct(Sys.Date() + 1)
# Set CSV path for MAU data write
csv_path <- gsub(" ", "-", paste0("./rsp-user-counts-", Sys.time(), ".csv"))
# Set monthly value
monthly <- TRUE
# Set debug value
debug <- FALSE
# Print Debug utility
print_debug <- function(msg) {
if(debug) cat(msg, "\n")
}
print_dims <- function(dat) {
dims <- dim(dat)
print_debug(paste0("\tData dimensions: ", paste0(dims[1], " x ", dims[2])))
}
count_metric <- function(log_data, metric) {
print_debug(paste0("Calculating ", metric, " counts ----"))
print_debug(paste0("\tFiltering to ", metric, " events"))
log_data <- log_data[metric == log_data$type, ]
print_debug("\tSelecting only timestamp, month, and username")
log_data <- log_data[,c("timestamp", "month", "username")]
print_dims(log_data)
# Count sessions per user per month
print_debug(paste0("\tCounting ", metric, " events per user per month"))
user_metric_counts <- as.data.frame(table(log_data$username, log_data$month))
names(user_metric_counts) <- c("user", "month", metric)
user_metric_counts$product <- "RStudio Server Pro"
print_debug(paste0("Finished calculating ", metric, " counts ----"))
user_metric_counts
}
# Parse arguments if run as CLI
if (!interactive()) {
library(argparser, quietly = TRUE)
p <- arg_parser(description = "Active RStudio Server Pro User Counts from NDJSON input.")
p <- add_argument(parser = p,
arg = "--json-path",
help = "Path to NDJSON input file",
type = "character",
default = json_path)
p <- add_argument(parser = p,
arg = "--min-date",
help = "Minimum date to compute user counts",
type = "character",
default = format(min_date, "%Y-%m-%d"))
p <- add_argument(parser = p,
arg = "--max-date",
help = "Maximum date to compute user counts",
type = "character",
default = format(max_date, "%Y-%m-%d"))
p <- add_argument(parser = p,
arg = "--output",
help = "Path to write .csv file of user counts",
type = "character",
default = csv_path)
p <- add_argument(parser = p,
arg = "--monthly",
help = "Count active users by month",
flag = TRUE)
p <- add_argument(parser = p,
arg = "--debug",
help = "Enable debug output",
flag = TRUE)
argv <- parse_args(p)
json_path <- argv$json_path
min_date <- as.POSIXct(argv$min_date)
csv_path <- argv$output
monthly <- argv$monthly
debug <- argv$debug
}
tryCatch({
# Read each line of JSON data
print_debug(paste0("Reading NDJSON data from: ", json_path))
# Read the file line by line
con <- file(json_path, "r")
lines <- readLines(con)
close(con)
# Parse each line as JSON and combine into a data frame
log_data <- do.call(rbind, lapply(lines, function(line) {
# Skip empty lines
if (nchar(trimws(line)) == 0) return(NULL)
# Parse each line as JSON
tryCatch({
record <- fromJSON(line)
# Convert to data frame row
if (is.null(record)) return(NULL)
return(as.data.frame(record, stringsAsFactors = FALSE))
}, error = function(e) {
print_debug(paste("Skipping malformed JSON line:", line))
return(NULL)
})
}))
print_dims(log_data)
# Verify required columns exist
required_cols <- c("timestamp", "type", "username")
missing_cols <- required_cols[!required_cols %in% names(log_data)]
if (length(missing_cols) > 0) {
stop("Missing required columns in JSON: ", paste(missing_cols, collapse = ", "))
}
# Convert timestamp from numeric
print_debug("Converting timestamp")
log_data$timestamp <- as.POSIXct(log_data$timestamp / 1000, origin = "1970-01-01")
print_dims(log_data)
# Filter to events >= min_date
print_debug(paste0("Filtering to events >= ", min_date))
log_data <- log_data[log_data$timestamp >= min_date,]
print_dims(log_data)
# Extract month and year
print_debug("Extracting month from timestamp")
log_data$month <- format(log_data$timestamp, format = "%Y-%m")
# Count session_start events
session_counts <- count_metric(log_data, "session_start")
# Count auth_login events
login_counts <- count_metric(log_data, "auth_login")
# Combine data
print_debug("Combining login and session counts")
all_counts <- merge(session_counts, login_counts, all = TRUE)
names(all_counts) <- c("user", "month", "product", "sessions", "logins")
all_counts$sessions[is.na(all_counts$sessions)] <- 0
all_counts$logins[is.na(all_counts$logins)] <- 0
# Create active column indicating the user logged in OR started a session
print_debug("Identifying active users")
all_counts$active <- all_counts$sessions > 0 | all_counts$logins > 0
# Count monthly active users
if (monthly) {
print_debug("Counting monthly active users")
counts <- unique(all_counts[all_counts$active, c("user", "month", "active")])
counts <- as.data.frame(table(counts$month))
names(counts) <- c("Month", "Active User Count")
} else {
counts <- all_counts[all_counts$active, "user"]
counts <- paste0(length(unique(counts)), " unique Posit Workbench named users between ",
format(min_date, "%Y-%m-%d"), " and ", format(max_date, "%Y-%m-%d"))
}
# Write CSV
print_debug(paste0("Writing user counts data to ", csv_path))
write.csv(all_counts, csv_path, row.names = FALSE)
# Print final user counts
print(counts, row.names = FALSE)
}, error = function(e) {
cat("Error: ", e$message, "\n")
quit(status = 1)
})