From 585242ef11d197fb0486551c8dab5d1c5e02ac9c Mon Sep 17 00:00:00 2001 From: mferrera Date: Mon, 20 Nov 2023 08:45:41 +0100 Subject: [PATCH] `eclcompress`: deprecate providing a keyword regex --- docs/scripts/eclcompress.rst | 4 +- src/subscript/eclcompress/eclcompress.py | 59 +++++------------------- tests/test_eclcompress.py | 39 ---------------- 3 files changed, 13 insertions(+), 89 deletions(-) diff --git a/docs/scripts/eclcompress.rst b/docs/scripts/eclcompress.rst index cf279a71c..7c2e4130f 100644 --- a/docs/scripts/eclcompress.rst +++ b/docs/scripts/eclcompress.rst @@ -76,9 +76,7 @@ Notes compressed file into account. - Eclipse loading time of the compressed file is probably reduced by the same factor as the compression factor. -- Only known compressable keywords are compressed by default. If you wish - to specify particular keywords or a regex instead this can be provided - directly through the command line. +- Only known compressable keywords are compressed. Possible improvements diff --git a/src/subscript/eclcompress/eclcompress.py b/src/subscript/eclcompress/eclcompress.py index 2cacabf95..d55f906de 100755 --- a/src/subscript/eclcompress/eclcompress.py +++ b/src/subscript/eclcompress/eclcompress.py @@ -6,12 +6,10 @@ import itertools import logging import os -import re import shutil import textwrap -import warnings from pathlib import Path -from typing import List, Optional, Pattern, Tuple, Union +from typing import List, Tuple, Union import subscript @@ -79,7 +77,6 @@ def eclcompress( files: Union[str, List[str]], keeporiginal: bool = False, dryrun: bool = False, - eclkw_regexp: Optional[str] = None, ) -> int: """Run-length encode a set of grdecl files. @@ -89,8 +86,7 @@ def eclcompress( files: Filenames to be compressed keeporiginal: Whether to copy the original to a backup file dryrun: If true, only print compression efficiency - eclkw_regexp: Regular expression for locating Eclipse keywords. - Default is [A-Z]{2-8}$ + Returns: Number of bytes saved by compression. """ @@ -98,12 +94,6 @@ def eclcompress( if not isinstance(files, list): files = [files] # List with one element - if eclkw_regexp: - warnings.warn( - "Providing a keyword regex will be removed in Komodo 2023.11.", - DeprecationWarning, - ) - totalsavings = 0 for filename in files: @@ -137,7 +127,7 @@ def eclcompress( # Index the list of strings (the file contents) by the line numbers # where Eclipse keywords start, and where the first data record of the keyword # ends (compression is not attempted in record 2 and onwards for any keyword) - keywordsets = find_keyword_sets(filelines, eclkw_regexp=eclkw_regexp) + keywordsets = find_keyword_sets(filelines) if not keywordsets: logger.info( @@ -299,9 +289,7 @@ def compress_multiple_keywordsets( return compressedlines -def find_keyword_sets( - filelines: List[str], eclkw_regexp: Optional[Union[str, Pattern[str]]] = None -) -> List[Tuple[int, int]]: +def find_keyword_sets(filelines: List[str]) -> List[Tuple[int, int]]: """Parse list of strings, looking for Eclipse data sets that we want. Example: @@ -335,9 +323,6 @@ def find_keyword_sets( Args: filelines: Eclipse deck lines (not necessarily complete decks) - eclkw_regexp: Regular expression for locating Eclipse keywords. - Default is None, in which it uses a predefined allowlist of - keywords Return: 2-tuples, with start and end line indices for datasets to @@ -346,26 +331,19 @@ def find_keyword_sets( """ keywordsets = [] kwstart = None - if eclkw_regexp: - eclkw_regexp = re.compile(eclkw_regexp) for lineidx, line in enumerate(filelines): line = line.strip() if not line: continue - if eclkw_regexp: - if re.match(eclkw_regexp, line) and line not in DENYLIST_KEYWORDS: - kwstart = lineidx - continue - else: - # Remove embracing quotes if in a multi-keyword - keyword = line.split(" ")[0].strip("'") - if keyword in ALLOWLIST_KEYWORDS: - kwstart = lineidx - if "/" in line: - keywordsets.append((kwstart, lineidx)) - kwstart = None - continue + # Remove embracing quotes if in a multi-keyword + keyword = line.split(" ")[0].strip("'") + if keyword in ALLOWLIST_KEYWORDS: + kwstart = lineidx + if "/" in line: + keywordsets.append((kwstart, lineidx)) + kwstart = None + continue if kwstart is not None and line[0:2] == "--": # This means we found a comment section within a data set # In that case it is vital to preserve the current line @@ -453,14 +431,6 @@ def get_parser() -> argparse.ArgumentParser: "no files are specified on the command line." ), ) - parser.add_argument( - "--eclkw_regexp", - help=( - "Regular expression to determine which Eclipse keyword " - "to recognize. Default is None, using instead a list of known " - "compressable keywords." - ), - ) parser.add_argument( "--version", action="version", @@ -506,7 +476,6 @@ def main(): args.files, keeporiginal=args.keeporiginal, dryrun=args.dryrun, - eclkw_regexp=args.eclkw_regexp, ) @@ -515,7 +484,6 @@ def main_eclcompress( wildcardfile: str, keeporiginal: bool = False, dryrun: bool = False, - eclkw_regexp: Optional[str] = None, ) -> None: """Implements the command line functionality @@ -525,8 +493,6 @@ def main_eclcompress( keeporiginal: Whether a backup file should be left behind dryrun: Nothing written to disk, only statistics for compression printed to terminal. - eclkw_regexp: Regular expression for locating Eclipse keywords. - Default is None """ # A list of wildcards on the command line should always be compressed: if grdeclfiles: @@ -558,7 +524,6 @@ def main_eclcompress( globbedfiles, keeporiginal=keeporiginal, dryrun=dryrun, - eclkw_regexp=eclkw_regexp, ) savings_mb = savings / 1024.0 / 1024.0 print(f"eclcompress finished. Saved {savings_mb:.1f} Mb from compression") diff --git a/tests/test_eclcompress.py b/tests/test_eclcompress.py index 27d6229b7..e8ca5ea62 100644 --- a/tests/test_eclcompress.py +++ b/tests/test_eclcompress.py @@ -622,45 +622,6 @@ def test_glob_patterns(tmp_path): parse_wildcardfile("notthere") -def test_eclkw_regexp(tmp_path, mocker): - """Test that custom regular expressions can be supplied to compress - otherwise unknown (which implies no compression) keywords""" - os.chdir(tmp_path) - - uncompressed_str = "G1\n0 0 0 0 0 0 0 0 0 0 0 0 0\n/" - - # Nothing is found by default here. - assert not find_keyword_sets(uncompressed_str.split()) - - # Only if we specify a regexp catching this odd keyword name: - - kw_sets = find_keyword_sets(uncompressed_str.split(), eclkw_regexp="G1") - kwend_idx = len(uncompressed_str.split()) - 1 - assert kw_sets == [(0, kwend_idx)] - assert compress_multiple_keywordsets(kw_sets, uncompressed_str.split()) == [ - "G1", - " 13*0", - "/", - ] - - Path("g1.grdecl").write_text(uncompressed_str, encoding="utf8") - - # Alternative regexpes that should also work with this G1: - kw_sets = find_keyword_sets( - uncompressed_str.split(), eclkw_regexp="[A-Z]{1-8}$" - ) == [(0, kwend_idx)] - - kw_sets = find_keyword_sets( - uncompressed_str.split(), eclkw_regexp="[A-Z0-9]{2-8}$" - ) == [(0, kwend_idx)] - - mocker.patch("sys.argv", ["eclcompress", "g1.grdecl", "--eclkw_regexp", "G1"]) - main() - compressed = Path("g1.grdecl").read_text(encoding="utf8") - assert "File compressed with eclcompress" in compressed - assert "13*0" in compressed - - def test_binary_example_file(tmp_path, mocker): """Test that a particular binary file is not touched by eclcompress