[SNOW-1732087] Fix doctest file descriptor capture. (#2490)

Fixes doctest for phase0 with `--ast-enabled` by switching the runner to macOS and fixing up various `logging.<func>` usages by switching them to `_logger` as in the rest of the code base. Doctest do not pass at the moment (error: returning <BLANKLINE>, interference with logging module, cf. JIRA) under ubuntu (windows: not tested). Other: - warn for missing unparser.jar file now only if `--update-expectations` is passed. - Use file-specific logger instead of global loggers.
snowflakedb · Oct 23, 2024 · 15cabe0 · 15cabe0
1 parent e843c05
commit 15cabe0
Show file tree

Hide file tree

Showing 11 changed files with 39 additions and 31 deletions.
diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml
@@ -307,7 +307,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ ubuntu-latest ]
+        os: [ macos-latest ]
         python-version: [ "3.9"]
         cloud-provider: [ aws ]
     steps:

diff --git a/src/conftest.py b/src/conftest.py
@@ -11,7 +11,8 @@
 
 from snowflake.snowpark import Session
 
-logging.getLogger("snowflake.connector").setLevel(logging.ERROR)
+_logger = logging.getLogger("snowflake.connector")
+_logger.setLevel(logging.ERROR)
 
 RUNNING_ON_GH = os.getenv("GITHUB_ACTIONS") == "true"
 TEST_SCHEMA = "GH_JOB_{}".format(str(uuid.uuid4()).replace("-", "_"))

diff --git a/src/snowflake/snowpark/_internal/analyzer/cte_utils.py b/src/snowflake/snowpark/_internal/analyzer/cte_utils.py
@@ -17,6 +17,8 @@
     random_name_for_temp_object,
 )
 
+_logger = logging.getLogger(__name__)
+
 if TYPE_CHECKING:
     from snowflake.snowpark._internal.analyzer.select_statement import Selectable
     from snowflake.snowpark._internal.analyzer.snowflake_plan import SnowflakePlan
@@ -155,5 +157,5 @@ def encode_id(
     try:
         return hashlib.sha256(string.encode()).hexdigest()[:10]
     except Exception as ex:
-        logging.warning(f"Encode SnowflakePlan ID failed: {ex}")
+        _logger.warning(f"Encode SnowflakePlan ID failed: {ex}")
         return None
diff --git a/src/snowflake/snowpark/_internal/temp_table_auto_cleaner.py b/src/snowflake/snowpark/_internal/temp_table_auto_cleaner.py
@@ -8,10 +8,11 @@
 
 from snowflake.snowpark._internal.analyzer.snowflake_plan_node import SnowflakeTable
 
+_logger = logging.getLogger(__name__)
+
 if TYPE_CHECKING:
     from snowflake.snowpark.session import Session  # pragma: no cover
 
-
 DROP_TABLE_STATEMENT_PARAM_NAME = "auto_clean_up_temp_table"
 
 
@@ -53,13 +54,13 @@ def _delete_ref_count(self, name: str) -> None:  # pragma: no cover
             ):
                 self.drop_table(name)
         elif self.ref_count_map[name] < 0:
-            logging.debug(
+            _logger.debug(
                 f"Unexpected reference count {self.ref_count_map[name]} for table {name}"
             )
 
     def drop_table(self, name: str) -> None:  # pragma: no cover
         common_log_text = f"temp table {name} in session {self.session.session_id}"
-        logging.debug(f"Ready to drop {common_log_text}")
+        _logger.debug(f"Ready to drop {common_log_text}")
         query_id = None
         try:
             async_job = self.session.sql(
@@ -68,10 +69,10 @@ def drop_table(self, name: str) -> None:  # pragma: no cover
                 block=False, statement_params={DROP_TABLE_STATEMENT_PARAM_NAME: name}
             )
             query_id = async_job.query_id
-            logging.debug(f"Dropping {common_log_text} with query id {query_id}")
+            _logger.debug(f"Dropping {common_log_text} with query id {query_id}")
         except Exception as ex:  # pragma: no cover
             warning_message = f"Failed to drop {common_log_text}, exception: {ex}"
-            logging.warning(warning_message)
+            _logger.warning(warning_message)
             if query_id is None:
                 # If no query_id is available, it means the query haven't been accepted by gs,
                 # and it won't occur in our job_etl_view, send a separate telemetry for recording.

diff --git a/src/snowflake/snowpark/_internal/utils.py b/src/snowflake/snowpark/_internal/utils.py
@@ -55,7 +55,7 @@
     except ImportError:
         ResultMetadataV2 = ResultMetadata
 
-logger = logging.getLogger("snowflake.snowpark")
+_logger = logging.getLogger("snowflake.snowpark")
 
 STAGE_PREFIX = "@"
 SNOWURL_PREFIX = "snow://"
@@ -208,7 +208,7 @@ def _pandas_importer():  # noqa: E302
         # since we enable relative imports without dots this import gives us an issues when ran from test directory
         from pandas import DataFrame  # NOQA
     except ImportError as e:
-        logger.error(f"pandas is not installed {e}")
+        _logger.error(f"pandas is not installed {e}")
     return pandas
 
 
@@ -683,7 +683,7 @@ def __init__(self, warning_times: int) -> None:
 
     def warning(self, text: str) -> None:
         if self.count < self.warning_times:
-            logger.warning(text)
+            _logger.warning(text)
         self.count += 1
 
 
@@ -724,7 +724,7 @@ def infer_ast_enabled_from_global_sessions(func: Callable) -> bool:
                 pass
     finally:
         if session is None:
-            logging.debug(
+            _logger.debug(
                 f"Could not retrieve default session "
                 f"for function {func.__qualname__}, capturing AST by default."
             )
@@ -973,7 +973,7 @@ def get_aliased_option_name(
     upper_key = key.strip().upper()
     aliased_key = alias_map.get(upper_key, upper_key)
     if aliased_key != upper_key:
-        logger.warning(
+        _logger.warning(
             f"Option '{key}' is aliased to '{aliased_key}'. You may see unexpected behavior."
             " Please refer to format specific options for more information"
         )

diff --git a/src/snowflake/snowpark/dataframe.py b/src/snowflake/snowpark/dataframe.py
@@ -5,7 +5,6 @@
 
 import copy
 import itertools
-import logging
 import re
 import sys
 from collections import Counter
@@ -4525,7 +4524,7 @@ def create_or_replace_dynamic_table(
             isinstance(self._session._conn, MockServerConnection)
             and self._session._conn._suppress_not_implemented_error
         ):
-            logging.error(
+            _logger.error(
                 "create_or_replace_dynamic_table not supported in local testing mode, returning empty result."
             )
             # Allow AST tests to pass.

diff --git a/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py b/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
@@ -44,6 +44,8 @@
 else:
     from collections.abc import Iterable
 
+_logger = logging.getLogger(__name__)
+
 
 @dataclass(frozen=True)
 class OrderingColumn:
@@ -653,7 +655,7 @@ def select(
             dataframe_ref.cached_snowflake_quoted_identifiers_tuple = tuple(
                 new_column_identifiers
             )
-            logging.debug(
+            _logger.debug(
                 f"The Snowpark DataFrame in DataFrameReference with id={dataframe_ref._id} is updated"
             )
 

diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -1784,7 +1784,7 @@ def index(self) -> Union["pd.Index", native_pd.MultiIndex]:
         """
         if self.is_multiindex():
             # Lazy multiindex is not supported
-            logging.warning(
+            _logger.warning(
                 "Lazy MultiIndex is not supported. MultiIndex values are evaluated eagerly and pulled out of Snowflake."
             )
             return self._modin_frame.index_columns_pandas_index()
@@ -3375,7 +3375,7 @@ def sort_rows_by_column_values(
                 raise ValueError(f"sort kind must be 'stable' or None (got '{kind}')")
             # Do not show warning for 'quicksort' as this the default option.
             if kind not in ("stable", "quicksort"):
-                logging.warning(
+                _logger.warning(
                     f"choice of sort algorithm '{kind}' is ignored. sort kind must be 'stable', 'quicksort', or None"
                 )
 
@@ -13689,7 +13689,7 @@ def sample(
             )
         else:
             sampled_odf = frame.ordered_dataframe.sample(n=n, frac=frac)
-        logging.warning(
+        _logger.warning(
             "Snowpark pandas `sample` will create a temp table for sampled results to keep it deterministic."
         )
         res = SnowflakeQueryCompiler(

diff --git a/src/snowflake/snowpark/session.py b/src/snowflake/snowpark/session.py
@@ -8,7 +8,6 @@
 import decimal
 import inspect
 import json
-import logging
 import os
 import re
 import sys
@@ -1558,7 +1557,7 @@ def _get_dependency_packages(
                             f"but not on your local environment."
                         )
                     except Exception as ex:  # pragma: no cover
-                        logging.warning(
+                        _logger.warning(
                             "Failed to get the local distribution of package %s: %s",
                             package_name,
                             ex,
@@ -3020,7 +3019,7 @@ def create_dataframe(
                     )
                     schema_query = f"SELECT * FROM {self.get_fully_qualified_name_if_possible(temp_table_name)}"
                 except ProgrammingError as e:
-                    logging.debug(
+                    _logger.debug(
                         f"Cannot create temp table for specified non-nullable schema, fall back to using schema "
                         f"string from select query. Exception: {str(e)}"
                     )

diff --git a/tests/ast/conftest.py b/tests/ast/conftest.py
@@ -36,11 +36,13 @@ def pytest_configure(config):
     pytest.unparser_jar = config.getoption("--unparser-jar")
     if not os.path.exists(pytest.unparser_jar):
         pytest.unparser_jar = None
-        logging.error(
+    pytest.update_expectations = config.getoption("--update-expectations")
+
+    if pytest.unparser_jar is None and pytest.update_expectations:
+        raise RuntimeError(
             f"Unparser JAR not found at {pytest.unparser_jar}. "
             f"Please set the correct path with --unparser-jar or SNOWPARK_UNPARSER_JAR."
         )
-    pytest.update_expectations = config.getoption("--update-expectations")
 
 
 class TestTables:

diff --git a/tests/ast/test_ast_driver.py b/tests/ast/test_ast_driver.py
@@ -28,6 +28,8 @@
     textproto_to_request,
 )
 
+_logger = logging.getLogger(__name__)
+
 TEST_DIR = pathlib.Path(__file__).parent
 
 DATA_DIR = TEST_DIR / "data"
@@ -201,7 +203,7 @@ def run_test(session, tables):
 
 @pytest.mark.parametrize("test_case", load_test_cases(), ids=idfn)
 def test_ast(session, tables, test_case):
-    logging.info(f"Testing AST encoding with protobuf {google.protobuf.__version__}.")
+    _logger.info(f"Testing AST encoding with protobuf {google.protobuf.__version__}.")
 
     actual, base64_str = run_test(
         session, tables, test_case.filename.replace(".", "_"), test_case.source
@@ -263,7 +265,7 @@ def test_ast(session, tables, test_case):
                 line for line in differ.compare(actual_lines, expected_lines)
             ]
 
-            logging.error(
+            _logger.error(
                 "expected vs. actual encoded protobuf:\n" + "\n".join(diffed_lines)
             )
 
@@ -278,7 +280,7 @@ def override_time_zone(tz_name: str = "EST") -> None:
 
     tz = dateutil.tz.gettz(tz_name)
     tz_code = tz.tzname(datetime.datetime.now())
-    logging.debug(f"Overriding time zone to {tz_name} ({tz_code}).")
+    _logger.debug(f"Overriding time zone to {tz_name} ({tz_code}).")
 
     if platform.system() != "Windows":
         # This works only under Unix systems.
@@ -301,7 +303,7 @@ def override_time_zone(tz_name: str = "EST") -> None:
         # Possible modification code (not working):
         # Use direct msvcrt.dll override (only for this process, does not work for child processes).
         # cf. https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/tzset?view=msvc-170
-        logging.debug(
+        _logger.debug(
             f"Windows current time (before msvcrt set): {datetime.datetime.now()}"
         )
 
@@ -310,18 +312,18 @@ def override_time_zone(tz_name: str = "EST") -> None:
         cdll.msvcrt._putenv(f"TZ={tz_code}")
         cdll.msvcrt._tzset()
         # If working, we would expect this to show output adjusted to the timezone referred to by tz_code.
-        logging.debug(
+        _logger.debug(
             f"Windows current time (after msvcrt set): {datetime.datetime.now()}"
         )
         # Other python libraries would have been updated then as well.
         from tzlocal import get_localzone
 
-        logging.debug(
+        _logger.debug(
             f"Windows: tzlocal={get_localzone()} TZ={env_tz}, will be using TZ when encoding for AST."
         )
 
     tz_name = datetime.datetime.now(tzlocal()).tzname()
-    logging.debug(f"Local time zone is now: {tz_name}.")
+    _logger.debug(f"Local time zone is now: {tz_name}.")
 
 
 if __name__ == "__main__":