|
6 | 6 |
|
7 | 7 | import pytest
|
8 | 8 | from fixtures.log_helper import log
|
9 |
| -from fixtures.neon_fixtures import NeonEnvBuilder, generate_uploads_and_deletions |
| 9 | +from fixtures.neon_fixtures import ( |
| 10 | + NeonEnvBuilder, |
| 11 | + generate_uploads_and_deletions, |
| 12 | +) |
10 | 13 | from fixtures.pageserver.http import PageserverApiException
|
11 | 14 | from fixtures.utils import wait_until
|
12 | 15 | from fixtures.workload import Workload
|
@@ -142,6 +145,10 @@ def test_sharding_compaction(
|
142 | 145 | "image_layer_creation_check_threshold": 0,
|
143 | 146 | }
|
144 | 147 |
|
| 148 | + # Disable compression, as we can't estimate the size of layers with compression enabled |
| 149 | + # TODO: implement eager layer cutting during compaction |
| 150 | + neon_env_builder.pageserver_config_override = "image_compression='disabled'" |
| 151 | + |
145 | 152 | neon_env_builder.num_pageservers = 1 if shard_count is None else shard_count
|
146 | 153 | env = neon_env_builder.init_start(
|
147 | 154 | initial_tenant_conf=TENANT_CONF,
|
@@ -320,3 +327,87 @@ def assert_broken():
|
320 | 327 | or 0
|
321 | 328 | ) == 0
|
322 | 329 | assert not env.pageserver.log_contains(".*Circuit breaker failure ended.*")
|
| 330 | + |
| 331 | + |
| 332 | +@pytest.mark.parametrize("enabled", [True, False]) |
| 333 | +def test_image_layer_compression(neon_env_builder: NeonEnvBuilder, enabled: bool): |
| 334 | + tenant_conf = { |
| 335 | + # small checkpointing and compaction targets to ensure we generate many upload operations |
| 336 | + "checkpoint_distance": f"{128 * 1024}", |
| 337 | + "compaction_threshold": "1", |
| 338 | + "compaction_target_size": f"{128 * 1024}", |
| 339 | + # no PITR horizon, we specify the horizon when we request on-demand GC |
| 340 | + "pitr_interval": "0s", |
| 341 | + # disable background compaction and GC. We invoke it manually when we want it to happen. |
| 342 | + "gc_period": "0s", |
| 343 | + "compaction_period": "0s", |
| 344 | + # create image layers as eagerly as possible |
| 345 | + "image_creation_threshold": "1", |
| 346 | + "image_layer_creation_check_threshold": "0", |
| 347 | + } |
| 348 | + |
| 349 | + # Explicitly enable/disable compression, rather than using default |
| 350 | + if enabled: |
| 351 | + neon_env_builder.pageserver_config_override = "image_compression='zstd'" |
| 352 | + else: |
| 353 | + neon_env_builder.pageserver_config_override = "image_compression='disabled'" |
| 354 | + |
| 355 | + env = neon_env_builder.init_start(initial_tenant_conf=tenant_conf) |
| 356 | + |
| 357 | + tenant_id = env.initial_tenant |
| 358 | + timeline_id = env.initial_timeline |
| 359 | + |
| 360 | + pageserver = env.pageserver |
| 361 | + ps_http = env.pageserver.http_client() |
| 362 | + with env.endpoints.create_start( |
| 363 | + "main", tenant_id=tenant_id, pageserver_id=pageserver.id |
| 364 | + ) as endpoint: |
| 365 | + endpoint.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)") |
| 366 | + # Generate around 800k worth of easily compressible data to store |
| 367 | + for v in range(100): |
| 368 | + endpoint.safe_psql( |
| 369 | + f"INSERT INTO foo (id, val) VALUES ({v}, repeat('abcde{v:0>3}', 500))" |
| 370 | + ) |
| 371 | + # run compaction to create image layers |
| 372 | + ps_http.timeline_checkpoint(tenant_id, timeline_id, wait_until_uploaded=True) |
| 373 | + |
| 374 | + layer_map = ps_http.layer_map_info(tenant_id, timeline_id) |
| 375 | + image_layer_count = 0 |
| 376 | + delta_layer_count = 0 |
| 377 | + for layer in layer_map.historic_layers: |
| 378 | + if layer.kind == "Image": |
| 379 | + image_layer_count += 1 |
| 380 | + elif layer.kind == "Delta": |
| 381 | + delta_layer_count += 1 |
| 382 | + assert image_layer_count > 0 |
| 383 | + assert delta_layer_count > 0 |
| 384 | + |
| 385 | + log.info(f"images: {image_layer_count}, deltas: {delta_layer_count}") |
| 386 | + |
| 387 | + bytes_in = pageserver.http_client().get_metric_value( |
| 388 | + "pageserver_compression_image_in_bytes_total" |
| 389 | + ) |
| 390 | + bytes_out = pageserver.http_client().get_metric_value( |
| 391 | + "pageserver_compression_image_out_bytes_total" |
| 392 | + ) |
| 393 | + assert bytes_in is not None |
| 394 | + assert bytes_out is not None |
| 395 | + log.info(f"Compression ratio: {bytes_out/bytes_in} ({bytes_out} in, {bytes_out} out)") |
| 396 | + |
| 397 | + if enabled: |
| 398 | + # We are writing high compressible repetitive plain text, expect excellent compression |
| 399 | + EXPECT_RATIO = 0.2 |
| 400 | + assert bytes_out / bytes_in < EXPECT_RATIO |
| 401 | + else: |
| 402 | + # Nothing should be compressed if we disabled it. |
| 403 | + assert bytes_out >= bytes_in |
| 404 | + |
| 405 | + # Destroy the endpoint and create a new one to resetthe caches |
| 406 | + with env.endpoints.create_start( |
| 407 | + "main", tenant_id=tenant_id, pageserver_id=pageserver.id |
| 408 | + ) as endpoint: |
| 409 | + for v in range(100): |
| 410 | + res = endpoint.safe_psql( |
| 411 | + f"SELECT count(*) FROM foo WHERE id={v} and val=repeat('abcde{v:0>3}', 500)" |
| 412 | + ) |
| 413 | + assert res[0][0] == 1 |
0 commit comments