diff --git a/src/borg/archive.py b/src/borg/archive.py index 79ff01d967..11deb8bd8c 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -2,6 +2,7 @@ import errno import json import os +import posixpath import stat import sys import time @@ -1243,8 +1244,8 @@ def __init__( @contextmanager def create_helper(self, path, st, status=None, hardlinkable=True, strip_prefix=None): if strip_prefix is not None: - assert not path.endswith(os.sep) - if strip_prefix.startswith(path + os.sep): + assert not path.endswith("/") + if strip_prefix.startswith(path + "/"): # still on a directory level that shall be stripped - do not create an item for this! yield None, "x", False, None return @@ -1547,7 +1548,7 @@ def s_to_ns(s): # if the tar has names starting with "./", normalize them like borg create also does. # ./dir/file must become dir/file in the borg archive. - normalized_path = os.path.normpath(tarinfo.name) + normalized_path = posixpath.normpath(tarinfo.name) item = Item( path=make_path_safe(normalized_path), mode=tarinfo.mode | type, @@ -1608,7 +1609,7 @@ def process_symlink(self, *, tarinfo, status, type): def process_hardlink(self, *, tarinfo, status, type): with self.create_helper(tarinfo, status, type) as (item, status): # create a not hardlinked borg item, reusing the chunks, see HardLinkManager.__doc__ - normalized_path = os.path.normpath(tarinfo.linkname) + normalized_path = posixpath.normpath(tarinfo.linkname) safe_path = make_path_safe(normalized_path) chunks = self.hlm.retrieve(safe_path) if chunks is not None: diff --git a/src/borg/archiver/create_cmd.py b/src/borg/archiver/create_cmd.py index c5616f04e8..cbbdefc3cc 100644 --- a/src/borg/archiver/create_cmd.py +++ b/src/borg/archiver/create_cmd.py @@ -3,6 +3,7 @@ import argparse import logging import os +import posixpath import stat import subprocess import time @@ -16,11 +17,11 @@ from ..cache import Cache from ..constants import * # NOQA from ..compress import CompressionSpec -from ..helpers import comment_validator, ChunkerParams, PathSpec +from ..helpers import comment_validator, ChunkerParams, FilesystemPathSpec from ..helpers import archivename_validator, FilesCacheMode from ..helpers import eval_escapes from ..helpers import timestamp, archive_ts_now -from ..helpers import get_cache_dir, os_stat, get_strip_prefix +from ..helpers import get_cache_dir, os_stat, get_strip_prefix, slashify from ..helpers import dir_is_tagged from ..helpers import log_multi from ..helpers import basic_json_data, json_print @@ -106,8 +107,9 @@ def create_inner(archive, cache, fso): pipe_bin = sys.stdin.buffer pipe = TextIOWrapper(pipe_bin, errors="surrogateescape") for path in iter_separated(pipe, paths_sep): + path = slashify(path) strip_prefix = get_strip_prefix(path) - path = os.path.normpath(path) + path = posixpath.normpath(path) try: with backup_io("stat"): st = os_stat(path=path, parent_fd=None, name=None, follow_symlinks=False) @@ -160,7 +162,7 @@ def create_inner(archive, cache, fso): continue strip_prefix = get_strip_prefix(path) - path = os.path.normpath(path) + path = posixpath.normpath(path) try: with backup_io("stat"): st = os_stat(path=path, parent_fd=None, name=None, follow_symlinks=False) @@ -489,7 +491,7 @@ def _rec_walk( path=path, fd=child_fd, st=st, strip_prefix=strip_prefix ) for tag_name in tag_names: - tag_path = os.path.join(path, tag_name) + tag_path = posixpath.join(path, tag_name) self._rec_walk( path=tag_path, parent_fd=child_fd, @@ -523,7 +525,7 @@ def _rec_walk( with backup_io("scandir"): entries = helpers.scandir_inorder(path=path, fd=child_fd) for dirent in entries: - normpath = os.path.normpath(os.path.join(path, dirent.name)) + normpath = posixpath.normpath(posixpath.join(path, dirent.name)) self._rec_walk( path=normpath, parent_fd=child_fd, @@ -962,5 +964,5 @@ def build_parser_create(self, subparsers, common_parser, mid_common_parser): subparser.add_argument("name", metavar="NAME", type=archivename_validator, help="specify the archive name") subparser.add_argument( - "paths", metavar="PATH", nargs="*", type=PathSpec, action="extend", help="paths to archive" + "paths", metavar="PATH", nargs="*", type=FilesystemPathSpec, action="extend", help="paths to archive" ) diff --git a/src/borg/archiver/extract_cmd.py b/src/borg/archiver/extract_cmd.py index a842dd21d6..a3885a0c11 100644 --- a/src/borg/archiver/extract_cmd.py +++ b/src/borg/archiver/extract_cmd.py @@ -1,7 +1,6 @@ import sys import argparse import logging -import os import stat from ._common import with_repository, with_archive @@ -60,7 +59,7 @@ def do_extract(self, args, repository, manifest, archive): for item in archive.iter_items(): orig_path = item.path if strip_components: - stripped_path = os.sep.join(orig_path.split(os.sep)[strip_components:]) + stripped_path = "/".join(orig_path.split("/")[strip_components:]) if not stripped_path: continue item.path = stripped_path diff --git a/src/borg/archiver/help_cmd.py b/src/borg/archiver/help_cmd.py index 40237aed1b..32523e30e9 100644 --- a/src/borg/archiver/help_cmd.py +++ b/src/borg/archiver/help_cmd.py @@ -35,11 +35,14 @@ class HelpMixIn: start with ``src``. - When you back up relative paths like ``../../src``, the archived paths start with ``src``. + - On native Windows, archived absolute paths look like ``C/Windows/System32``. Borg supports different pattern styles. To define a non-default style for a specific pattern, prefix it with two characters followed by a colon ':' (i.e. ``fm:path/*``, ``sh:path/**``). + Note: Windows users must only use forward slashes in patterns, not backslashes. + The default pattern style for ``--exclude`` differs from ``--pattern``, see below. `Fnmatch `_, selector ``fm:`` @@ -48,8 +51,8 @@ class HelpMixIn: any number of characters, '?' matching any single character, '[...]' matching any single character specified, including ranges, and '[!...]' matching any character not specified. For the purpose of these patterns, - the path separator (backslash for Windows and '/' on other systems) is not - treated specially. Wrap meta-characters in brackets for a literal + the path separator (forward slash '/') is not treated specially. + Wrap meta-characters in brackets for a literal match (i.e. ``[?]`` to match the literal character '?'). For a path to match a pattern, the full path must match, or it must match from the start of the full path to just before a path separator. Except @@ -69,9 +72,7 @@ class HelpMixIn: `Regular expressions `_, selector ``re:`` Unlike shell patterns, regular expressions are not required to match the full path and any substring match is sufficient. It is strongly recommended to - anchor patterns to the start ('^'), to the end ('$') or both. Path - separators (backslash for Windows and '/' on other systems) in paths are - always normalized to a forward slash '/' before applying a pattern. + anchor patterns to the start ('^'), to the end ('$') or both. Path prefix, selector ``pp:`` This pattern style is useful to match whole subdirectories. The pattern @@ -103,6 +104,15 @@ class HelpMixIn: cannot supply ``re:`` patterns. Further, ensure that ``sh:`` and ``fm:`` patterns only contain a handful of wildcards at most. + .. note:: + + **Windows path handling**: All paths in Borg archives use forward slashes (``/``) + as path separators, regardless of the platform. When creating archives on Windows, + backslashes from filesystem paths are automatically converted to forward slashes. + When extracting archives created on POSIX systems that contain literal backslashes + in filenames (which is rare, but possible), the backslash character is replaced + with ``%`` on Windows to prevent misinterpretation as a path separator. + Exclusions can be passed via the command line option ``--exclude``. When used from within a shell, the patterns should be quoted to protect them from expansion. diff --git a/src/borg/helpers/__init__.py b/src/borg/helpers/__init__.py index ca19f5c890..86648ab7f5 100644 --- a/src/borg/helpers/__init__.py +++ b/src/borg/helpers/__init__.py @@ -20,7 +20,7 @@ from .fs import ensure_dir, join_base_dir, get_socket_filename from .fs import get_security_dir, get_keys_dir, get_base_dir, get_cache_dir, get_config_dir, get_runtime_dir from .fs import dir_is_tagged, dir_is_cachedir, remove_dotdot_prefixes, make_path_safe, scandir_inorder -from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, get_strip_prefix, umount +from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, get_strip_prefix, umount, slashify from .fs import O_, flags_dir, flags_special_follow, flags_special, flags_base, flags_normal, flags_noatime from .fs import HardLinkManager from .misc import sysinfo, log_multi, consume @@ -28,7 +28,15 @@ from .parseformat import bin_to_hex, hex_to_bin, safe_encode, safe_decode from .parseformat import text_to_json, binary_to_json, remove_surrogates, join_cmd from .parseformat import eval_escapes, decode_dict, positive_int_validator, interval -from .parseformat import PathSpec, SortBySpec, ChunkerParams, FilesCacheMode, partial_format, DatetimeWrapper +from .parseformat import ( + PathSpec, + FilesystemPathSpec, + SortBySpec, + ChunkerParams, + FilesCacheMode, + partial_format, + DatetimeWrapper, +) from .parseformat import format_file_size, parse_file_size, FileSize from .parseformat import sizeof_fmt, sizeof_fmt_iec, sizeof_fmt_decimal, Location, text_validator from .parseformat import format_line, replace_placeholders, PlaceholderError, relative_time_marker_validator diff --git a/src/borg/helpers/fs.py b/src/borg/helpers/fs.py index 66e1ecdfbf..fa95dbf24c 100644 --- a/src/borg/helpers/fs.py +++ b/src/borg/helpers/fs.py @@ -249,6 +249,11 @@ def make_path_safe(path): For reasons of security, a ValueError is raised should `path` contain any '..' elements. """ + if "\\.." in path or "..\\" in path: + raise ValueError(f"unexpected '..' element in path {path!r}") + + path = percentify(path) + path = path.lstrip("/") if path.startswith("../") or "/../" in path or path.endswith("/..") or path == "..": raise ValueError(f"unexpected '..' element in path {path!r}") @@ -256,6 +261,26 @@ def make_path_safe(path): return path +def slashify(path): + """ + Replace backslashes with forward slashes if running on Windows. + + Use case: we always want to use forward slashes, even on Windows. + """ + return path.replace("\\", "/") if is_win32 else path + + +def percentify(path): + """ + Replace backslashes with percent signs if running on Windows. + + Use case: if an archived path contains backslashes (which is not a path separator on POSIX + and could appear as a normal character in POSIX paths), we need to replace them with percent + signs to make the path usable on Windows. + """ + return path.replace("\\", "%") if is_win32 else path + + def get_strip_prefix(path): # similar to how rsync does it, we allow users to give paths like: # /this/gets/stripped/./this/is/kept @@ -265,7 +290,7 @@ def get_strip_prefix(path): pos = path.find("/./") # detect slashdot hack if pos > 0: # found a prefix to strip! make sure it ends with one "/"! - return os.path.normpath(path[:pos]) + os.sep + return posixpath.normpath(path[:pos]) + "/" else: # no or empty prefix, nothing to strip! return None @@ -276,15 +301,14 @@ def get_strip_prefix(path): def remove_dotdot_prefixes(path): """ - Remove '../'s at the beginning of `path`. Additionally, - the path is made relative. + Remove '../'s at the beginning of `path`. Additionally, the path is made relative. - `path` is expected to be normalized already (e.g. via `os.path.normpath()`). + `path` is expected to be normalized already (e.g. via `posixpath.normpath()`). """ + assert "\\" not in path if is_win32: if len(path) > 1 and path[1] == ":": path = path.replace(":", "", 1) - path = path.replace("\\", "/") path = path.lstrip("/") path = _dotdot_re.sub("", path) diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index afb1b0cab7..e536526a33 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -18,17 +18,17 @@ from string import Formatter from ..logger import create_logger -from ..platformflags import is_win32 logger = create_logger() from .errors import Error -from .fs import get_keys_dir, make_path_safe +from .fs import get_keys_dir, make_path_safe, slashify from .msgpack import Timestamp from .time import OutputTimestamp, format_time, safe_timestamp from .. import __version__ as borg_version from .. import __version_tuple__ as borg_version_tuple from ..constants import * # NOQA +from ..platformflags import is_win32 if TYPE_CHECKING: from ..item import ItemDiff @@ -335,6 +335,12 @@ def PathSpec(text): return text +def FilesystemPathSpec(text): + if not text: + raise argparse.ArgumentTypeError("Empty strings are not accepted as paths.") + return slashify(text) + + def SortBySpec(text): from ..manifest import AI_HUMAN_SORT_KEYS @@ -558,7 +564,8 @@ def _parse(self, text): m = self.local_re.match(text) if m: self.proto = "file" - self.path = os.path.abspath(os.path.normpath(m.group("path"))) + path = m.group("path") + self.path = slashify(os.path.abspath(path)) if is_win32 else os.path.abspath(path) return True return False diff --git a/src/borg/item.pyx b/src/borg/item.pyx index e477473f5d..2ee622864f 100644 --- a/src/borg/item.pyx +++ b/src/borg/item.pyx @@ -7,7 +7,7 @@ from cpython.bytes cimport PyBytes_AsStringAndSize from .constants import ITEM_KEYS, ARCHIVE_KEYS from .helpers import StableDict from .helpers import format_file_size -from .helpers.fs import assert_sanitized_path, to_sanitized_path +from .helpers.fs import assert_sanitized_path, to_sanitized_path, percentify, slashify from .helpers.msgpack import timestamp_to_int, int_to_timestamp, Timestamp from .helpers.time import OutputTimestamp, safe_timestamp @@ -265,7 +265,7 @@ cdef class Item(PropDict): path = PropDictProperty(str, 'surrogate-escaped str', encode=assert_sanitized_path, decode=to_sanitized_path) source = PropDictProperty(str, 'surrogate-escaped str') # legacy borg 1.x. borg 2: see .target - target = PropDictProperty(str, 'surrogate-escaped str') + target = PropDictProperty(str, 'surrogate-escaped str', encode=slashify, decode=percentify) user = PropDictProperty(str, 'surrogate-escaped str') group = PropDictProperty(str, 'surrogate-escaped str') diff --git a/src/borg/legacyrepository.py b/src/borg/legacyrepository.py index 930c209a95..504556ba65 100644 --- a/src/borg/legacyrepository.py +++ b/src/borg/legacyrepository.py @@ -5,6 +5,7 @@ import stat import struct import time +from pathlib import Path from collections import defaultdict from configparser import ConfigParser from functools import partial @@ -27,7 +28,6 @@ from .repoobj import RepoObj from .checksums import crc32, StreamingXXH64 from .crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError -from .repository import _local_abspath_to_file_url logger = create_logger(__name__) @@ -191,8 +191,9 @@ class PathPermissionDenied(Error): exit_mcode = 21 def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True, send_log_cb=None): - self.path = os.path.abspath(path) - self._location = Location(_local_abspath_to_file_url(self.path)) + p = Path(path).absolute() + self.path = str(p) + self._location = Location(p.as_uri()) self.version = None # long-running repository methods which emit log or progress output are responsible for calling # the ._send_log method periodically to get log and progress output transferred to the borg client diff --git a/src/borg/patterns.py b/src/borg/patterns.py index c7e5ad225b..c1f8f57277 100644 --- a/src/borg/patterns.py +++ b/src/borg/patterns.py @@ -1,6 +1,6 @@ import argparse import fnmatch -import os.path +import posixpath import re import sys import unicodedata @@ -142,7 +142,7 @@ def match(self, path): in self.fallback is returned (defaults to None). """ - path = normalize_path(path).lstrip(os.path.sep) + path = normalize_path(path).lstrip("/") # do a fast lookup for full path matches (note: we do not count such matches): non_existent = object() value = self._path_full_patterns.get(path, non_existent) @@ -215,7 +215,7 @@ class PathFullPattern(PatternBase): PREFIX = "pf" def _prepare(self, pattern): - self.pattern = os.path.normpath(pattern).lstrip(os.path.sep) # sep at beginning is removed + self.pattern = posixpath.normpath(pattern).lstrip("/") # / at beginning is removed def _match(self, path): return path == self.pattern @@ -236,12 +236,10 @@ class PathPrefixPattern(PatternBase): PREFIX = "pp" def _prepare(self, pattern): - sep = os.path.sep - - self.pattern = (os.path.normpath(pattern).rstrip(sep) + sep).lstrip(sep) # sep at beginning is removed + self.pattern = (posixpath.normpath(pattern).rstrip("/") + "/").lstrip("/") # / at beginning is removed def _match(self, path): - return (path + os.path.sep).startswith(self.pattern) + return (path + "/").startswith(self.pattern) class FnmatchPattern(PatternBase): @@ -252,19 +250,19 @@ class FnmatchPattern(PatternBase): PREFIX = "fm" def _prepare(self, pattern): - if pattern.endswith(os.path.sep): - pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + "*" + os.path.sep + if pattern.endswith("/"): + pattern = posixpath.normpath(pattern).rstrip("/") + "/*/" else: - pattern = os.path.normpath(pattern) + os.path.sep + "*" + pattern = posixpath.normpath(pattern) + "/*" - self.pattern = pattern.lstrip(os.path.sep) # sep at beginning is removed + self.pattern = pattern.lstrip("/") # / at beginning is removed # fnmatch and re.match both cache compiled regular expressions. # Nevertheless, this is about 10 times faster. self.regex = re.compile(fnmatch.translate(self.pattern)) def _match(self, path): - return self.regex.match(path + os.path.sep) is not None + return self.regex.match(path + "/") is not None class ShellPattern(PatternBase): @@ -275,18 +273,16 @@ class ShellPattern(PatternBase): PREFIX = "sh" def _prepare(self, pattern): - sep = os.path.sep - - if pattern.endswith(sep): - pattern = os.path.normpath(pattern).rstrip(sep) + sep + "**" + sep + "*" + sep + if pattern.endswith("/"): + pattern = posixpath.normpath(pattern).rstrip("/") + "/**/*/" else: - pattern = os.path.normpath(pattern) + sep + "**" + sep + "*" + pattern = posixpath.normpath(pattern) + "/**/*" - self.pattern = pattern.lstrip(sep) # sep at beginning is removed + self.pattern = pattern.lstrip("/") # / at beginning is removed self.regex = re.compile(shellpattern.translate(self.pattern)) def _match(self, path): - return self.regex.match(path + os.path.sep) is not None + return self.regex.match(path + "/") is not None class RegexPattern(PatternBase): @@ -295,14 +291,11 @@ class RegexPattern(PatternBase): PREFIX = "re" def _prepare(self, pattern): - self.pattern = pattern # sep at beginning is NOT removed + self.pattern = pattern # / at beginning is NOT removed self.regex = re.compile(pattern) def _match(self, path): - # Normalize path separators - if os.path.sep != "/": - path = path.replace(os.path.sep, "/") - + assert "\\" not in path return self.regex.search(path) is not None diff --git a/src/borg/repository.py b/src/borg/repository.py index 2fa873ede0..c0829aaf98 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -1,6 +1,6 @@ import os -import sys import time +from pathlib import Path from borgstore.store import Store from borgstore.store import ObjectNotFound as StoreObjectNotFound @@ -106,11 +106,11 @@ def __init__( if isinstance(path_or_location, Location): location = path_or_location if location.proto == "file": - url = _local_abspath_to_file_url(location.path) + url = Path(location.path).as_uri() else: url = location.processed # location as given by user, processed placeholders else: - url = _local_abspath_to_file_url(os.path.abspath(path_or_location)) + url = Path(path_or_location).absolute().as_uri() location = Location(url) self._location = location self.url = url @@ -566,16 +566,3 @@ def store_delete(self, name, *, deleted=False): def store_move(self, name, new_name=None, *, delete=False, undelete=False, deleted=False): self._lock_refresh() return self.store.move(name, new_name, delete=delete, undelete=undelete, deleted=deleted) - - -def _local_abspath_to_file_url(path: str) -> str: - """Create a file URL from a local, absolute path. - - Expects `path` to be an absolute path on the local filesystem, e.g.: - - POSIX: `/foo/bar` - - Windows: `c:/foo/bar` (or `c:\foo\bar`) - The easiest way to ensure this is for the caller to pass `path` through `os.path.abspath` first. - """ - if sys.platform in ("win32", "msys", "cygwin"): - path = "/" + path.replace("\\", "/") - return "file://%s" % path diff --git a/src/borg/testsuite/archiver/lock_cmds_test.py b/src/borg/testsuite/archiver/lock_cmds_test.py index a1f66be317..7727e764fc 100644 --- a/src/borg/testsuite/archiver/lock_cmds_test.py +++ b/src/borg/testsuite/archiver/lock_cmds_test.py @@ -2,6 +2,7 @@ import subprocess import sys import time +from pathlib import Path import pytest @@ -9,7 +10,6 @@ from . import cmd, generate_archiver_tests, RK_ENCRYPTION from ...helpers import CommandError from ...platformflags import is_haiku, is_win32 -from ...repository import _local_abspath_to_file_url pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA @@ -24,7 +24,7 @@ def test_break_lock(archivers, request): def test_with_lock(tmp_path): repo_path = tmp_path / "repo" env = os.environ.copy() - env["BORG_REPO"] = _local_abspath_to_file_url(str(repo_path.absolute())) + env["BORG_REPO"] = Path(repo_path).as_uri() # test debug output: print("sys.path: %r" % sys.path) print("PYTHONPATH: %s" % env.get("PYTHONPATH", "")) diff --git a/src/borg/testsuite/storelocking_test.py b/src/borg/testsuite/storelocking_test.py index d39f055b2b..143a99a0e1 100644 --- a/src/borg/testsuite/storelocking_test.py +++ b/src/borg/testsuite/storelocking_test.py @@ -1,10 +1,10 @@ import time +from pathlib import Path import pytest from borgstore.store import Store -from ..repository import _local_abspath_to_file_url from ..storelocking import Lock, NotLocked, LockTimeout ID1 = "foo", 1, 1 @@ -13,8 +13,7 @@ @pytest.fixture() def lockstore(tmp_path): - lockstore_path = tmp_path / "lockstore" - store = Store(_local_abspath_to_file_url(str(lockstore_path.absolute())), levels={"locks/": [0]}) + store = Store(Path(tmp_path / "lockstore").as_uri(), levels={"locks/": [0]}) store.create() with store: yield store