Skip to content

Commit

Permalink
lint ufmt fix
Browse files Browse the repository at this point in the history
  • Loading branch information
amirakb89 committed Jan 16, 2025
1 parent 5bb4730 commit e2808f3
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 17 deletions.
3 changes: 2 additions & 1 deletion fbgemm_gpu/bench/bench_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,8 @@ def benchmark_requests(
# Run at least one warmup iteration to avoid the long cudaLaunchKernel time
# for the first kernel if warmup_ms > 0
# warmup_ms is prioritized over num_warmups
if (warmup_ms is None):

if warmup_ms is None:
num_warmups = num_warmups + 1 if num_warmups >= 0 else 1

# warm-up the GPU before profiling
Expand Down
33 changes: 17 additions & 16 deletions fbgemm_gpu/bench/split_table_batched_embeddings_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,7 @@
logging.basicConfig(level=logging.DEBUG)


def kineto_trace_profiler(
p: profile,
trace_info: tuple[str, str, str, str]
) -> float:
def kineto_trace_profiler(p: profile, trace_info: tuple[str, str, str, str]) -> float:
phase, trace_url, tbe_type, kern_name = trace_info
p.export_chrome_trace(
trace_url.format(tbe_type=tbe_type, phase=phase, ospid=os.getpid())
Expand Down Expand Up @@ -1169,13 +1166,15 @@ def nbit_cpu( # noqa C901
default="{tbe_type}_tbe_{phase}_trace_{ospid}.json",
)
@click.option(
"--warmup-runs", default=2,
help="Number of warmup runs. Ignored if --warmup-ms is set.")
"--warmup-runs",
default=2,
help="Number of warmup runs. Ignored if --warmup-ms is set.",
)
@click.option(
"--warmup-ms",
type=int,
default=None,
help="Warmup duration in milliseconds. Disables the --run-nums option."
help="Warmup duration in milliseconds. Disables the --run-nums option.",
)
def nbit_device( # noqa C901
alpha: float,
Expand Down Expand Up @@ -1393,7 +1392,7 @@ def context_factory(on_trace_ready: Callable[[profile], None]):
indices.int(),
offsets.int(),
per_sample_weights,
)
),
)

with context_factory(
Expand All @@ -1411,7 +1410,7 @@ def context_factory(on_trace_ready: Callable[[profile], None]):
)

if export_trace:
kernel_time = time_dict['kernel_time']
kernel_time = time_dict["kernel_time"]
bandwidth = read_write_bytes / kernel_time / 1.0e3

logging.info(
Expand Down Expand Up @@ -1536,13 +1535,15 @@ def context_factory(on_trace_ready: Callable[[profile], None]):
default="{tbe_type}_tbe_spec_{phase}_trace_{ospid}.json",
)
@click.option(
"--warmup-runs", default=2,
help="Number of warmup runs. Ignored if --warmup-ms is set.")
"--warmup-runs",
default=2,
help="Number of warmup runs. Ignored if --warmup-ms is set.",
)
@click.option(
"--warmup-ms",
type=int,
default=None,
help="Warmup duration in milliseconds. Disables the --run-nums option."
help="Warmup duration in milliseconds. Disables the --run-nums option.",
)
def nbit_device_with_spec( # noqa C901
alpha: float,
Expand Down Expand Up @@ -1760,11 +1761,11 @@ def nbit_device_with_spec( # noqa C901
per_sample_weights,
),
check_median=check_median,
warmup_ms=warmup_ms
warmup_ms=warmup_ms,
)

# copy the request of last iteration for kineto profile benchmark
if (i == runs_of_iters - 1):
if i == runs_of_iters - 1:
kineto_request = requests

# free up memory
Expand Down Expand Up @@ -1815,7 +1816,7 @@ def context_factory(on_trace_ready: Callable[[profile], None]):
indices.int(),
offsets.int(),
per_sample_weights,
)
),
)

with context_factory(
Expand All @@ -1833,7 +1834,7 @@ def context_factory(on_trace_ready: Callable[[profile], None]):
)

if export_trace:
kernel_time = time_dict['kernel_time']
kernel_time = time_dict["kernel_time"]
bandwidth = read_write_bytes / kernel_time / 1.0e3

logging.info(
Expand Down

0 comments on commit e2808f3

Please sign in to comment.