diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 398ff08a..00000000 --- a/.coveragerc +++ /dev/null @@ -1,2 +0,0 @@ -[run] -branch = True diff --git a/changelog.d/.gitkeep b/changelog.d/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/changelog.d/20250611_120003_jb_PL_133453_rbd_whole_object.rst b/changelog.d/20250611_120003_jb_PL_133453_rbd_whole_object.rst new file mode 100644 index 00000000..e09f7ac4 --- /dev/null +++ b/changelog.d/20250611_120003_jb_PL_133453_rbd_whole_object.rst @@ -0,0 +1,3 @@ +.. A new scriv changelog fragment. + +- Fix whole object detection (PL-133453) diff --git a/poetry.lock b/poetry.lock index 992f365d..ff3fc0c4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1558,21 +1558,21 @@ pytest = ">=2.2" [[package]] name = "pytest-cov" -version = "4.1.0" +version = "6.1.1" description = "Pytest plugin for measuring coverage." optional = false -python-versions = ">=3.7" +python-versions = ">=3.9" files = [ - {file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"}, - {file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"}, + {file = "pytest_cov-6.1.1-py3-none-any.whl", hash = "sha256:bddf29ed2d0ab6f4df17b4c55b0a657287db8684af9c42ea546b21b1041b3dde"}, + {file = "pytest_cov-6.1.1.tar.gz", hash = "sha256:46935f7aaefba760e716c2ebfbe1c216240b9592966e7da99ea8292d4d3e2a0a"}, ] [package.dependencies] -coverage = {version = ">=5.2.1", extras = ["toml"]} +coverage = {version = ">=7.5", extras = ["toml"]} pytest = ">=4.6" [package.extras] -testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] [[package]] name = "pytest-flake8" @@ -2093,4 +2093,4 @@ test = ["zope.testing", "zope.testrunner"] [metadata] lock-version = "2.0" python-versions = "~3.12" -content-hash = "2f80dba3d3b4fea6bed7f68b24aa7181ba3f8bec4ca88490e4056ea2f1bae502" +content-hash = "73bcebf14e85baeaa6d7095055b58fabf870d60245546ec1a4034020e78966d6" diff --git a/pyproject.toml b/pyproject.toml index e3f21f8e..2ad849a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,17 @@ version = "literal: pyproject.toml: tool.poetry.version" entry_title_template = "{% if version %}{{ version }} {% endif %}({{ date.strftime('%Y-%m-%d') }})" categories = "" + +[tool.pytest.ini_options] +addopts = "--timeout=30 --tb=native --cov=src --cov-report=html --cov-config=pyproject.toml src -r w" +markers = "slow: This is a non-unit test and thus is not run by default. Use ``-m slow`` to run these, or ``-m 1`` to run all tests." +log_level = "NOTSET" +asyncio_mode = "auto" + +[tool.coverage.run] +branch = true +omit = [ "*/tests/*" ] + [tool.poetry] name = "backy" version = "2.6.0.dev0" @@ -64,7 +75,7 @@ pytest = "^7.4.0" pytest-aiohttp = "^1.0.4" pytest-asyncio = "^0.23.3" pytest-cache = "^1.0" -pytest-cov = "^4.1.0" +pytest-cov = "^6.1.0" pytest-flake8 = "^1.1.1" pytest-timeout = "^2.1.0" scriv = "^1.3.1" diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index d3c553f2..00000000 --- a/pytest.ini +++ /dev/null @@ -1,9 +0,0 @@ -[pytest] -addopts = --timeout=30 --tb=native --cov=src --cov-report=html src -r w -markers = slow: This is a non-unit test and thus is not run by default. Use ``-m slow`` to run these, or ``-m 1`` to run all tests. -log_level = NOTSET -asyncio_mode = auto - - -filterwarnings = - ignore::DeprecationWarning:telnetlib3.*: diff --git a/src/backy/rbd/rbd.py b/src/backy/rbd/rbd.py index b5575bca..469b2739 100644 --- a/src/backy/rbd/rbd.py +++ b/src/backy/rbd/rbd.py @@ -1,4 +1,5 @@ import contextlib +import functools import json import struct import subprocess @@ -8,23 +9,7 @@ from structlog.stdlib import BoundLogger from backy.ext_deps import RBD -from backy.utils import CHUNK_SIZE, punch_hole - - -def detect_whole_object_support(): - result = subprocess.run( - ["rbd", "help", "export-diff"], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - check=True, - ) - return "--whole-object" in result.stdout.decode("ascii") - - -try: - CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF = detect_whole_object_support() -except Exception: - CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF = False +from backy.utils import CHUNK_SIZE class RBDClient(object): @@ -80,6 +65,10 @@ def _rbd_stream(self, cmd: list[str]) -> Iterator[IO[bytes]]: if rc: raise subprocess.CalledProcessError(rc, proc.args) + @functools.cached_property + def _supports_whole_object(self): + return "--whole-object" in self._rbd(["help", "export-diff"]) + def exists(self, snapspec: str): try: return self._rbd(["info", snapspec], format="json") @@ -148,7 +137,7 @@ def snap_rm(self, image): @contextlib.contextmanager def export_diff(self, new: str, old: str) -> Iterator["RBDDiffV1"]: - if CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF: + if self._supports_whole_object: EXPORT_WHOLE_OBJECT = ["--whole-object"] else: EXPORT_WHOLE_OBJECT = [] @@ -316,7 +305,7 @@ def integrate(self, target, snapshot_from, snapshot_to): for record in self.read_data(): target.seek(record.start) if isinstance(record, Zero): - punch_hole(target, target.tell(), record.length) + target.write(b"\x00" * record.length) elif isinstance(record, Data): for chunk in record.stream(): target.write(chunk) diff --git a/src/backy/rbd/tests/conftest.py b/src/backy/rbd/tests/conftest.py index e5f93e35..de9ef6e0 100644 --- a/src/backy/rbd/tests/conftest.py +++ b/src/backy/rbd/tests/conftest.py @@ -4,7 +4,6 @@ import pytest -import backy.rbd.rbd from backy.rbd import RBDClient @@ -242,11 +241,8 @@ def unmap(self, device): @pytest.fixture(params=[CephJewelCLI, CephLuminousCLI, CephNautilusCLI]) def rbdclient(request, tmp_path, monkeypatch, log): - monkeypatch.setattr( - backy.rbd.rbd, "CEPH_RBD_SUPPORTS_WHOLE_OBJECT_DIFF", True - ) - client = RBDClient(log) + client._supports_whole_object = True client._ceph_cli = request.param(tmp_path) return client diff --git a/src/backy/rbd/tests/test_ceph.py b/src/backy/rbd/tests/test_ceph.py index d7592c48..e13cba39 100644 --- a/src/backy/rbd/tests/test_ceph.py +++ b/src/backy/rbd/tests/test_ceph.py @@ -15,7 +15,7 @@ from backy.rbd.rbd import RBDDiffV1 from backy.revision import Revision -BLOCK = backy.utils.PUNCH_SIZE +BLOCK = backy.utils.CHUNK_SIZE with open(Path(__file__).parent / "nodata.rbddiff", "rb") as f: SAMPLE_RBDDIFF = f.read() diff --git a/src/backy/tests/test_utils.py b/src/backy/tests/test_utils.py index 77d31622..1f3e2cff 100644 --- a/src/backy/tests/test_utils.py +++ b/src/backy/tests/test_utils.py @@ -1,7 +1,6 @@ import asyncio import datetime import os -import sys from zoneinfo import ZoneInfo import pytest @@ -13,11 +12,8 @@ SafeFile, TimeOut, TimeOutError, - _fake_fallocate, - copy_overwrite, files_are_equal, files_are_roughly_equal, - punch_hole, ) @@ -324,33 +320,6 @@ def test_roughly_compare_files_timeout(tmp_path): assert not files_are_roughly_equal(open("a", "rb"), open("b", "rb")) -def test_copy_overwrite_correctly_makes_sparse_file(tmp_path): - # Create a test file that contains random data, then we insert - # blocks of zeroes. copy_overwrite will not break them and will make the - # file sparse. - source_name = str(tmp_path / "input") - with open(source_name, "wb") as f: - f.write(b"12345" * 1024 * 100) - f.seek(1024 * 16) - f.write(b"\x00" * 1024 * 16) - with open(source_name, "rb") as source: - target_name = str(tmp_path / "output") - with open(target_name, "wb") as target: - # To actually ensure that we punch holes and truncate, lets - # fill the file with a predictable pattern that is non-zero and - # longer than the source. - target.write(b"1" * 1024 * 150) - with open(target_name, "r+b") as target: - copy_overwrite(source, target) - with open(source_name, "rb") as source_current: - with open(target_name, "rb") as target_current: - assert source_current.read() == target_current.read() - if sys.platform == "linux2": - assert os.stat(source_name).st_blocks > os.stat(target_name).st_blocks - else: - assert os.stat(source_name).st_blocks >= os.stat(target_name).st_blocks - - def test_unmocked_now_returns_time_time_float(): before = datetime.datetime.now(ZoneInfo("UTC")) now = backy.utils.now() @@ -358,46 +327,6 @@ def test_unmocked_now_returns_time_time_float(): assert before <= now <= after -@pytest.fixture -def testfile(tmp_path): - fn = str(tmp_path / "myfile") - with open(fn, "wb") as f: - f.write(b"\xde\xad\xbe\xef" * 32) - return fn - - -def test_punch_hole(testfile): - with open(testfile, "r+b") as f: - f.seek(0) - punch_hole(f, 2, 4) - f.seek(0) - assert f.read(8) == b"\xde\xad\x00\x00\x00\x00\xbe\xef" - - -def test_punch_hole_needs_length(testfile): - with pytest.raises(IOError): - with open(testfile, "r+b") as f: - punch_hole(f, 10, 0) - - -def test_punch_hole_needs_writable_file(testfile): - with pytest.raises(OSError): - with open(testfile, "rb") as f: - punch_hole(f, 0, 1) - - -def test_punch_hole_needs_nonnegative_offset(testfile): - with pytest.raises(OSError): - with open(testfile, "r+b") as f: - punch_hole(f, -1, 1) - - -def test_fake_fallocate_only_punches_holes(testfile): - with pytest.raises(NotImplementedError): - with open(testfile, "r+b") as f: - _fake_fallocate(f, 0, 0, 10) - - def test_timeout(capsys): timeout = TimeOut(0.05, 0.01) while timeout.tick(): diff --git a/src/backy/utils.py b/src/backy/utils.py index c3a479ff..c94e7553 100644 --- a/src/backy/utils.py +++ b/src/backy/utils.py @@ -1,8 +1,6 @@ import asyncio import base64 import contextlib -import ctypes -import ctypes.util import datetime import hashlib import mmap @@ -39,8 +37,6 @@ GiB = MiB * 1024 TiB = GiB * 1024 -# 64 kiB blocks are a good compromise between sparsiness and fragmentation. -PUNCH_SIZE = 4 * MiB CHUNK_SIZE = 4 * MiB END = object() @@ -243,58 +239,6 @@ def zeroes(size): z.close() -@report_status -def copy_overwrite(source: IO, target: IO): - """Efficiently overwrites `target` with a copy of `source`. - - Identical regions won't be touched so this is COW-friendly. Assumes - that `target` is a shallow copy of a previous version of `source`. - - Assumes that `target` exists and is open in read-write mode. - """ - punch_size = PUNCH_SIZE - source.seek(0, 2) - size = source.tell() - if size > 500 * GiB: - punch_size *= 10 - source.seek(0) - yield size / punch_size - try: - posix_fadvise(source.fileno(), 0, 0, os.POSIX_FADV_SEQUENTIAL) # type: ignore - posix_fadvise(target.fileno(), 0, 0, os.POSIX_FADV_SEQUENTIAL) # type: ignore - except Exception: - pass - with zeroes(punch_size) as z: - while True: - startpos = source.tell() - chunk = source.read(punch_size) - if not chunk: - break - target.seek(startpos) - compare = target.read(len(chunk)) - if not compare or chunk != compare: - if chunk == z: - target.flush() - punch_hole(target, startpos, len(chunk)) - else: - target.seek(startpos) - target.write(chunk) - yield - - size = source.tell() - target.flush() - try: - target.truncate(size) - except OSError: - pass # truncate may not be supported, i.e. on special files - try: - os.fsync(target) - except OSError: - pass # fsync may not be supported, i.e. on special files - yield END - yield size - - @report_status def copy(source: IO, target: IO): """Efficiently overwrites `target` with a copy of `source`. @@ -394,7 +338,7 @@ def files_are_roughly_equal( size = a.seek(0, os.SEEK_END) if size != (size_b := b.seek(0, os.SEEK_END)): log.error( - "files-roughly-equal-size-mismatch", + "files-roughly-equal-size-diff", size_a=size, size_b=size_b, ) @@ -594,69 +538,6 @@ def tick(self): return True -# Adapted from -# https://github.com/trbs/fallocate/issues/4 - - -log = structlog.stdlib.get_logger() - -FALLOC_FL_KEEP_SIZE = 0x01 -FALLOC_FL_PUNCH_HOLE = 0x02 - - -def _fake_fallocate(fd, mode, offset, len_): - log.debug("fallocate-non-hole-punching") - if len_ <= 0: - raise IOError("fallocate: length must be positive") - if mode & FALLOC_FL_PUNCH_HOLE: - old = fd.tell() - fd.seek(offset) - fd.write(b"\x00" * len_) - fd.seek(old) - else: - raise NotImplementedError( - "fake fallocate() supports only hole punching" - ) - - -def _make_fallocate(): - libc_name = ctypes.util.find_library("c") - libc = ctypes.CDLL(libc_name, use_errno=True) - _fallocate = libc.fallocate - c_off_t = ctypes.c_size_t - _fallocate.restype = ctypes.c_int - _fallocate.argtypes = [ctypes.c_int, ctypes.c_int, c_off_t, c_off_t] - - def fallocate(fd, mode, offset, len_): - if len_ <= 0: - raise IOError("fallocate: length must be positive") - res = _fallocate(fd.fileno(), mode, offset, len_) - if res != 0: - errno = ctypes.get_errno() - raise OSError(errno, "fallocate: " + os.strerror(errno)) - - return fallocate - - -try: - fallocate = _make_fallocate() -except AttributeError: # pragma: no cover - fallocate = _fake_fallocate - - -def punch_hole(f, offset, len_): - """Ensure that the specified byte range is zeroed. - - Depending on the availability of fallocate(), this is either - delegated to the kernel or done manualy. - """ - params = (f, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, offset, len_) - try: - fallocate(*params) - except OSError: - _fake_fallocate(*params) - - class BackyJSONEncoder(JSONEncoder): def default(self, o: Any) -> Any: if hasattr(o, "to_dict"):