Skip to content

Commit b520016

Browse files
authored
Merge pull request #8 from exasol/multinode
Multinode
2 parents 09dd3a9 + fa68dfe commit b520016

38 files changed

+3741
-550
lines changed

benchkit/cli.py

Lines changed: 283 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,244 @@ def status(
282282
_show_configs_summary(config_files)
283283

284284

285+
@app.command()
286+
def check(
287+
config: str = typer.Option(..., "--config", "-c", help="Path to config YAML file"),
288+
verbose: bool = typer.Option(
289+
False, "--verbose", "-v", help="Show all configuration details"
290+
),
291+
) -> None:
292+
"""Check and display configuration file contents."""
293+
from pathlib import Path
294+
295+
import yaml
296+
from rich.panel import Panel
297+
from rich.text import Text
298+
299+
config_path = Path(config)
300+
301+
# Try to load and validate the config
302+
validation_errors: list[str] = []
303+
cfg: dict[str, Any] | None = None
304+
305+
try:
306+
cfg = load_config(config)
307+
except FileNotFoundError as e:
308+
console.print(
309+
Panel(
310+
f"[red]Configuration file not found:[/red] {config_path}",
311+
title="Configuration Check",
312+
border_style="red",
313+
)
314+
)
315+
raise typer.Exit(1) from e
316+
except ValueError as e:
317+
# Parse validation errors from the exception
318+
error_msg = str(e)
319+
validation_errors.append(error_msg)
320+
except Exception as e:
321+
validation_errors.append(f"Unexpected error: {e}")
322+
323+
# Display header with validation status
324+
if validation_errors:
325+
status_text = Text()
326+
status_text.append("Configuration: ", style="bold")
327+
status_text.append(str(config_path), style="cyan")
328+
status_text.append("\nStatus: ", style="bold")
329+
status_text.append("✗ Invalid", style="red bold")
330+
console.print(Panel(status_text, border_style="red"))
331+
332+
console.print("\n[red bold]Errors found:[/red bold]")
333+
for i, error in enumerate(validation_errors, 1):
334+
# Clean up the error message
335+
error = error.replace("Invalid configuration: ", "")
336+
console.print(f" {i}. {error}")
337+
338+
raise typer.Exit(1)
339+
340+
# Config is valid - display it
341+
status_text = Text()
342+
status_text.append("Configuration: ", style="bold")
343+
status_text.append(str(config_path), style="cyan")
344+
status_text.append("\nStatus: ", style="bold")
345+
status_text.append("✓ Valid", style="green bold")
346+
console.print(Panel(status_text, border_style="green"))
347+
348+
if cfg is None:
349+
raise typer.Exit(1)
350+
351+
# Display Project section
352+
project_table = Table(show_header=False, box=None, padding=(0, 2))
353+
project_table.add_column("Key", style="bold")
354+
project_table.add_column("Value")
355+
project_table.add_row("ID", cfg.get("project_id", "-"))
356+
project_table.add_row("Title", cfg.get("title", "-"))
357+
project_table.add_row("Author", cfg.get("author", "-"))
358+
console.print(Panel(project_table, title="Project", border_style="blue"))
359+
360+
# Display Environment section
361+
env = cfg.get("env", {})
362+
env_table = Table(show_header=False, box=None, padding=(0, 2))
363+
env_table.add_column("Key", style="bold")
364+
env_table.add_column("Value")
365+
env_table.add_row("Mode", env.get("mode", "local"))
366+
if env.get("region"):
367+
env_table.add_row("Region", env.get("region"))
368+
env_table.add_row(
369+
"External DB Access", "Yes" if env.get("allow_external_database_access") else "No"
370+
)
371+
if env.get("ssh_key_name"):
372+
env_table.add_row("SSH Key", env.get("ssh_key_name"))
373+
console.print(Panel(env_table, title="Environment", border_style="blue"))
374+
375+
# Display Systems section
376+
systems = cfg.get("systems", [])
377+
instances = env.get("instances", {}) or {}
378+
379+
# Collect all system names for IP variable validation display
380+
system_names_upper = {s["name"].upper() for s in systems}
381+
382+
systems_content = []
383+
for i, system in enumerate(systems, 1):
384+
name = system.get("name", "unnamed")
385+
kind = system.get("kind", "unknown")
386+
version = system.get("version", "unknown")
387+
setup = system.get("setup", {})
388+
method = setup.get("method", "default")
389+
node_count = setup.get("node_count", 1)
390+
391+
system_lines = [
392+
f"[bold cyan]{i}. {name}[/bold cyan]",
393+
f" Kind: {kind}",
394+
f" Version: {version}",
395+
f" Method: {method}",
396+
]
397+
398+
# Node count
399+
node_desc = "single-node" if node_count == 1 else f"{node_count}-node cluster"
400+
system_lines.append(f" Nodes: {node_count} ({node_desc})")
401+
402+
# Instance type if available
403+
instance_config = instances.get(name, {})
404+
if instance_config.get("instance_type"):
405+
system_lines.append(f" Instance: {instance_config.get('instance_type')}")
406+
407+
# IP variables
408+
ip_vars = []
409+
ip_fields = ["host", "host_addrs", "host_external_addrs"]
410+
for field in ip_fields:
411+
value = setup.get(field, "")
412+
if isinstance(value, str) and value.startswith("$"):
413+
ip_vars.append(value)
414+
415+
if ip_vars:
416+
# Check if IP vars are valid
417+
all_valid = True
418+
for var in ip_vars:
419+
var_name = var[1:] # Remove $
420+
import re
421+
422+
match = re.match(r"^([A-Z_][A-Z0-9_]*)_(PRIVATE|PUBLIC)_IP$", var_name)
423+
if match and match.group(1) not in system_names_upper:
424+
all_valid = False
425+
break
426+
427+
status = "[green]✓[/green]" if all_valid else "[red]✗[/red]"
428+
system_lines.append(f" IP Vars: {', '.join(ip_vars)} {status}")
429+
430+
# Verbose mode: show all setup params
431+
if verbose:
432+
system_lines.append(" [dim]Setup:[/dim]")
433+
for key, value in setup.items():
434+
if key not in ["method", "node_count"]:
435+
# Mask passwords
436+
if "password" in key.lower():
437+
value = "********"
438+
system_lines.append(f" {key}: {value}")
439+
440+
systems_content.append("\n".join(system_lines))
441+
442+
console.print(
443+
Panel(
444+
"\n\n".join(systems_content),
445+
title=f"Systems ({len(systems)})",
446+
border_style="blue",
447+
)
448+
)
449+
450+
# Display Workload section
451+
workload = cfg.get("workload", {})
452+
workload_table = Table(show_header=False, box=None, padding=(0, 2))
453+
workload_table.add_column("Key", style="bold")
454+
workload_table.add_column("Value")
455+
workload_table.add_row("Name", workload.get("name", "-"))
456+
sf = workload.get("scale_factor", 1)
457+
workload_table.add_row("Scale Factor", f"{sf} ({sf} GB for TPC-H)")
458+
workload_table.add_row("Data Format", workload.get("data_format", "csv"))
459+
workload_table.add_row("Runs/Query", str(workload.get("runs_per_query", 3)))
460+
workload_table.add_row("Warmup Runs", str(workload.get("warmup_runs", 1)))
461+
462+
# Query info
463+
queries = workload.get("queries", {})
464+
include = queries.get("include", [])
465+
exclude = queries.get("exclude", [])
466+
if include:
467+
workload_table.add_row("Queries", f"Include: {', '.join(include)}")
468+
elif exclude:
469+
workload_table.add_row("Queries", f"All except: {', '.join(exclude)}")
470+
else:
471+
workload_table.add_row("Queries", "All (22 queries for TPC-H)")
472+
473+
# Multiuser
474+
multiuser = workload.get("multiuser")
475+
if multiuser and multiuser.get("enabled", True):
476+
num_streams = multiuser.get("num_streams", 1)
477+
randomize = multiuser.get("randomize", False)
478+
random_seed = multiuser.get("random_seed", "-")
479+
workload_table.add_row(
480+
"Multiuser",
481+
f"{num_streams} streams, randomize: {randomize}, seed: {random_seed}",
482+
)
483+
484+
console.print(Panel(workload_table, title="Workload", border_style="blue"))
485+
486+
# Display Execution section
487+
execution = cfg.get("execution", {})
488+
if execution.get("parallel"):
489+
exec_table = Table(show_header=False, box=None, padding=(0, 2))
490+
exec_table.add_column("Key", style="bold")
491+
exec_table.add_column("Value")
492+
exec_table.add_row("Parallel", "Yes")
493+
if execution.get("max_workers"):
494+
exec_table.add_row("Max Workers", str(execution.get("max_workers")))
495+
console.print(Panel(exec_table, title="Execution", border_style="blue"))
496+
497+
# Display Report section
498+
report = cfg.get("report", {})
499+
if report:
500+
report_table = Table(show_header=False, box=None, padding=(0, 2))
501+
report_table.add_column("Key", style="bold")
502+
report_table.add_column("Value")
503+
report_table.add_row("Output", report.get("output_path", "-"))
504+
report_table.add_row("Figures", report.get("figures_dir", "-"))
505+
506+
# Charts enabled
507+
charts = []
508+
if report.get("show_boxplots", True):
509+
charts.append("boxplots")
510+
if report.get("show_latency_cdf"):
511+
charts.append("latency CDF")
512+
if report.get("show_bar_chart", True):
513+
charts.append("bar chart")
514+
if report.get("show_heatmap", True):
515+
charts.append("heatmap")
516+
report_table.add_row("Charts", ", ".join(charts) if charts else "none")
517+
518+
console.print(Panel(report_table, title="Report Settings", border_style="blue"))
519+
520+
console.print("\n[green]Configuration is valid and ready to use.[/green]")
521+
522+
285523
@app.command()
286524
def infra(
287525
action: str = typer.Argument(..., help="Action: plan, apply, destroy"),
@@ -819,21 +1057,38 @@ def _probe_remote_systems(config: dict[str, Any], outdir: Path) -> bool:
8191057
outputs = result.outputs or {}
8201058

8211059
# Extract instance information from new terraform output format
822-
instances = {}
1060+
# After Terraform fix, IPs are always lists: ["ip"] for single-node, ["ip1", "ip2"] for multinode
1061+
instances_to_probe = (
1062+
[]
1063+
) # List of (system_name, node_idx, public_ip, private_ip)
8231064

824-
# New format: system_public_ips = {"exasol": "ip", "clickhouse": "ip"}
1065+
# New format: system_public_ips = {"exasol": ["ip"], "clickhouse": ["ip1", "ip2"]}
8251066
# Note: _parse_terraform_outputs already extracted the "value" field
8261067
if "system_public_ips" in outputs:
8271068
public_ips = outputs["system_public_ips"] or {}
8281069
private_ips = outputs.get("system_private_ips", {}) or {}
8291070

830-
for system_name, public_ip in public_ips.items():
831-
instances[system_name] = {
832-
"public_ip": public_ip,
833-
"private_ip": private_ips.get(system_name),
834-
}
1071+
for system_name, public_ip_list in public_ips.items():
1072+
private_ip_list = private_ips.get(system_name)
1073+
1074+
# Handle both list and single IP (backward compatibility)
1075+
if isinstance(public_ip_list, list):
1076+
for idx, public_ip in enumerate(public_ip_list):
1077+
private_ip = (
1078+
private_ip_list[idx]
1079+
if isinstance(private_ip_list, list)
1080+
else private_ip_list
1081+
)
1082+
instances_to_probe.append(
1083+
(system_name, idx, public_ip, private_ip)
1084+
)
1085+
else:
1086+
# Backward compatibility: single IP (not a list)
1087+
instances_to_probe.append(
1088+
(system_name, 0, public_ip_list, private_ip_list)
1089+
)
8351090

836-
if not instances:
1091+
if not instances_to_probe:
8371092
console.print("[yellow]No instances found in terraform outputs[/yellow]")
8381093
return False
8391094

@@ -848,19 +1103,31 @@ def _probe_remote_systems(config: dict[str, Any], outdir: Path) -> bool:
8481103
ssh_key_path = os.path.expanduser(ssh_key_path)
8491104

8501105
success_count = 0
851-
total_instances = len(instances)
852-
853-
for system_name, instance_info in instances.items():
854-
public_ip = instance_info["public_ip"]
855-
console.print(f"[blue]Probing {system_name} ({public_ip})...[/blue]")
1106+
total_instances = len(instances_to_probe)
1107+
1108+
for system_name, node_idx, public_ip, private_ip in instances_to_probe:
1109+
# Show node index for multinode systems
1110+
node_label = (
1111+
f"-node{node_idx}"
1112+
if any(
1113+
s == system_name and i != node_idx
1114+
for s, i, _, _ in instances_to_probe
1115+
)
1116+
else ""
1117+
)
1118+
console.print(
1119+
f"[blue]Probing {system_name}{node_label} ([{public_ip}])...[/blue]"
1120+
)
8561121

8571122
if _probe_single_remote_system(
858-
system_name, public_ip, ssh_key_path, ssh_user, outdir
1123+
f"{system_name}{node_label}", public_ip, ssh_key_path, ssh_user, outdir
8591124
):
860-
console.print(f"[green]✓ {system_name} probe completed[/green]")
1125+
console.print(
1126+
f"[green]✓ {system_name}{node_label} probe completed[/green]"
1127+
)
8611128
success_count += 1
8621129
else:
863-
console.print(f"[red]✗ {system_name} probe failed[/red]")
1130+
console.print(f"[red]✗ {system_name}{node_label} probe failed[/red]")
8641131

8651132
console.print(
8661133
f"[blue]Completed {success_count}/{total_instances} system probes[/blue]"

0 commit comments

Comments
 (0)