Skip to content

Commit c96e801

Browse files
arpad-mjcspkoivunej
authored
Enable zstd in tests (#8368)
Successor of #8288 , just enable zstd in tests. Also adds a test that creates easily compressable data. Part of #5431 --------- Co-authored-by: John Spray <john@neon.tech> Co-authored-by: Joonas Koivunen <joonas@neon.tech>
1 parent 5a77276 commit c96e801

File tree

5 files changed

+122
-1
lines changed

5 files changed

+122
-1
lines changed

pageserver/src/metrics.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,22 @@ pub(crate) static CIRCUIT_BREAKERS_UNBROKEN: Lazy<IntCounter> = Lazy::new(|| {
610610
.expect("failed to define a metric")
611611
});
612612

613+
pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES: Lazy<IntCounter> = Lazy::new(|| {
614+
register_int_counter!(
615+
"pageserver_compression_image_in_bytes_total",
616+
"Size of uncompressed data written into image layers"
617+
)
618+
.expect("failed to define a metric")
619+
});
620+
621+
pub(crate) static COMPRESSION_IMAGE_OUTPUT_BYTES: Lazy<IntCounter> = Lazy::new(|| {
622+
register_int_counter!(
623+
"pageserver_compression_image_out_bytes_total",
624+
"Size of compressed image layer written"
625+
)
626+
.expect("failed to define a metric")
627+
});
628+
613629
pub(crate) mod initial_logical_size {
614630
use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
615631
use once_cell::sync::Lazy;

pageserver/src/tenant/storage_layer/image_layer.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -738,6 +738,9 @@ struct ImageLayerWriterInner {
738738
key_range: Range<Key>,
739739
lsn: Lsn,
740740

741+
// Total uncompressed bytes passed into put_image
742+
uncompressed_bytes: u64,
743+
741744
blob_writer: BlobWriter<false>,
742745
tree: DiskBtreeBuilder<BlockBuf, KEY_SIZE>,
743746
}
@@ -793,6 +796,7 @@ impl ImageLayerWriterInner {
793796
lsn,
794797
tree: tree_builder,
795798
blob_writer,
799+
uncompressed_bytes: 0,
796800
};
797801

798802
Ok(writer)
@@ -811,6 +815,7 @@ impl ImageLayerWriterInner {
811815
) -> anyhow::Result<()> {
812816
ensure!(self.key_range.contains(&key));
813817
let compression = self.conf.image_compression;
818+
self.uncompressed_bytes += img.len() as u64;
814819
let (_img, res) = self
815820
.blob_writer
816821
.write_blob_maybe_compressed(img, ctx, compression)
@@ -836,6 +841,11 @@ impl ImageLayerWriterInner {
836841
let index_start_blk =
837842
((self.blob_writer.size() + PAGE_SZ as u64 - 1) / PAGE_SZ as u64) as u32;
838843

844+
// Calculate compression ratio
845+
let compressed_size = self.blob_writer.size() - PAGE_SZ as u64; // Subtract PAGE_SZ for header
846+
crate::metrics::COMPRESSION_IMAGE_INPUT_BYTES.inc_by(self.uncompressed_bytes);
847+
crate::metrics::COMPRESSION_IMAGE_OUTPUT_BYTES.inc_by(compressed_size);
848+
839849
let mut file = self.blob_writer.into_inner();
840850

841851
// Write out the index

test_runner/fixtures/neon_fixtures.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1158,6 +1158,7 @@ def __init__(self, config: NeonEnvBuilder):
11581158
"listen_http_addr": f"localhost:{pageserver_port.http}",
11591159
"pg_auth_type": pg_auth_type,
11601160
"http_auth_type": http_auth_type,
1161+
"image_compression": "zstd",
11611162
}
11621163
if self.pageserver_virtual_file_io_engine is not None:
11631164
ps_cfg["virtual_file_io_engine"] = self.pageserver_virtual_file_io_engine

test_runner/regress/test_compaction.py

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66

77
import pytest
88
from fixtures.log_helper import log
9-
from fixtures.neon_fixtures import NeonEnvBuilder, generate_uploads_and_deletions
9+
from fixtures.neon_fixtures import (
10+
NeonEnvBuilder,
11+
generate_uploads_and_deletions,
12+
)
1013
from fixtures.pageserver.http import PageserverApiException
1114
from fixtures.utils import wait_until
1215
from fixtures.workload import Workload
@@ -142,6 +145,10 @@ def test_sharding_compaction(
142145
"image_layer_creation_check_threshold": 0,
143146
}
144147

148+
# Disable compression, as we can't estimate the size of layers with compression enabled
149+
# TODO: implement eager layer cutting during compaction
150+
neon_env_builder.pageserver_config_override = "image_compression='disabled'"
151+
145152
neon_env_builder.num_pageservers = 1 if shard_count is None else shard_count
146153
env = neon_env_builder.init_start(
147154
initial_tenant_conf=TENANT_CONF,
@@ -320,3 +327,87 @@ def assert_broken():
320327
or 0
321328
) == 0
322329
assert not env.pageserver.log_contains(".*Circuit breaker failure ended.*")
330+
331+
332+
@pytest.mark.parametrize("enabled", [True, False])
333+
def test_image_layer_compression(neon_env_builder: NeonEnvBuilder, enabled: bool):
334+
tenant_conf = {
335+
# small checkpointing and compaction targets to ensure we generate many upload operations
336+
"checkpoint_distance": f"{128 * 1024}",
337+
"compaction_threshold": "1",
338+
"compaction_target_size": f"{128 * 1024}",
339+
# no PITR horizon, we specify the horizon when we request on-demand GC
340+
"pitr_interval": "0s",
341+
# disable background compaction and GC. We invoke it manually when we want it to happen.
342+
"gc_period": "0s",
343+
"compaction_period": "0s",
344+
# create image layers as eagerly as possible
345+
"image_creation_threshold": "1",
346+
"image_layer_creation_check_threshold": "0",
347+
}
348+
349+
# Explicitly enable/disable compression, rather than using default
350+
if enabled:
351+
neon_env_builder.pageserver_config_override = "image_compression='zstd'"
352+
else:
353+
neon_env_builder.pageserver_config_override = "image_compression='disabled'"
354+
355+
env = neon_env_builder.init_start(initial_tenant_conf=tenant_conf)
356+
357+
tenant_id = env.initial_tenant
358+
timeline_id = env.initial_timeline
359+
360+
pageserver = env.pageserver
361+
ps_http = env.pageserver.http_client()
362+
with env.endpoints.create_start(
363+
"main", tenant_id=tenant_id, pageserver_id=pageserver.id
364+
) as endpoint:
365+
endpoint.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)")
366+
# Generate around 800k worth of easily compressible data to store
367+
for v in range(100):
368+
endpoint.safe_psql(
369+
f"INSERT INTO foo (id, val) VALUES ({v}, repeat('abcde{v:0>3}', 500))"
370+
)
371+
# run compaction to create image layers
372+
ps_http.timeline_checkpoint(tenant_id, timeline_id, wait_until_uploaded=True)
373+
374+
layer_map = ps_http.layer_map_info(tenant_id, timeline_id)
375+
image_layer_count = 0
376+
delta_layer_count = 0
377+
for layer in layer_map.historic_layers:
378+
if layer.kind == "Image":
379+
image_layer_count += 1
380+
elif layer.kind == "Delta":
381+
delta_layer_count += 1
382+
assert image_layer_count > 0
383+
assert delta_layer_count > 0
384+
385+
log.info(f"images: {image_layer_count}, deltas: {delta_layer_count}")
386+
387+
bytes_in = pageserver.http_client().get_metric_value(
388+
"pageserver_compression_image_in_bytes_total"
389+
)
390+
bytes_out = pageserver.http_client().get_metric_value(
391+
"pageserver_compression_image_out_bytes_total"
392+
)
393+
assert bytes_in is not None
394+
assert bytes_out is not None
395+
log.info(f"Compression ratio: {bytes_out/bytes_in} ({bytes_out} in, {bytes_out} out)")
396+
397+
if enabled:
398+
# We are writing high compressible repetitive plain text, expect excellent compression
399+
EXPECT_RATIO = 0.2
400+
assert bytes_out / bytes_in < EXPECT_RATIO
401+
else:
402+
# Nothing should be compressed if we disabled it.
403+
assert bytes_out >= bytes_in
404+
405+
# Destroy the endpoint and create a new one to resetthe caches
406+
with env.endpoints.create_start(
407+
"main", tenant_id=tenant_id, pageserver_id=pageserver.id
408+
) as endpoint:
409+
for v in range(100):
410+
res = endpoint.safe_psql(
411+
f"SELECT count(*) FROM foo WHERE id={v} and val=repeat('abcde{v:0>3}', 500)"
412+
)
413+
assert res[0][0] == 1

test_runner/regress/test_disk_usage_eviction.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,9 @@ def _eviction_env(
230230
neon_env_builder.num_pageservers = num_pageservers
231231
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
232232

233+
# Disable compression support for EvictionEnv to get larger layer sizes
234+
neon_env_builder.pageserver_config_override = "image_compression='disabled'"
235+
233236
# initial tenant will not be present on this pageserver
234237
env = neon_env_builder.init_configs()
235238
env.start()

0 commit comments

Comments
 (0)