Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions synthetic-multi-round-qa/multi-round-qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ class WorkloadConfig:
# Whether to include user id in request header
enable_user_id: bool

# Whether strictly cap active sessions at num_users
enforce_strict_concurrent_users: bool = False


@dataclass
class UserConfig:
Expand Down Expand Up @@ -457,6 +460,10 @@ def __init__(
if self.use_sharegpt:
self._load_sharegpt_data()

self.enforce_strict_concurrent_users = (
workload_config.enforce_strict_concurrent_users
)

def _load_sharegpt_data(self):
with open("ShareGPT.json", "r", encoding="utf-8") as file:
self.sharegpt_data = json.load(file)
Expand Down Expand Up @@ -499,15 +506,28 @@ def _remove_finished_sessions(self):
self.session_summaries.append(session.summary())
self.sessions = [s for s in self.sessions if not s.finished]

def _can_join_user(self, timestamp: float) -> bool:
# No new user session if gap_between_users time interval not meets
if timestamp - self.last_user_join <= self.gap_between_users:
return False

# No user seession if active user count is less than configured
if (
self.enforce_strict_concurrent_users
and len(self.sessions) >= self.workload_config.num_users
):
return False
return True

def step(self, timestamp: float, executor: RequestExecutor):
if self.need_ramp_up:
self._ramp_up(timestamp, self.ramp_up_time)

if self.start_time is None:
self.start_time = timestamp

# New user session only joins when meets time interval and active user count is less than configured
if timestamp - self.last_user_join > self.gap_between_users and len(self.sessions) < self.workload_config.num_users:
# Check if can join new user session
if self._can_join_user(timestamp):
new_session = self._create_user_session()
if new_session is not None:
self.last_user_join = timestamp
Expand Down Expand Up @@ -704,6 +724,11 @@ def parse_arguments() -> WorkloadConfig:
parser.add_argument(
"--sharegpt", action="store_true", help="Whether to use ShareGPT dataset"
)
parser.add_argument(
"--enforce-strict-concurrent-users",
action="store_true",
help="Strictly enforce concurrent users count to match --num-users",
)
args = parser.parse_args()
return args

Expand Down Expand Up @@ -750,6 +775,7 @@ def main():
qps=args.qps,
model=args.model,
enable_user_id=args.request_with_user_id,
enforce_strict_concurrent_users=args.enforce_strict_concurrent_users,
)

manager = UserSessionManager(
Expand Down
Loading