Skip to content

Commit

Permalink
[SNOW-1732087] Fix doctest file descriptor capture. (#2490)
Browse files Browse the repository at this point in the history
Fixes doctest for phase0 with `--ast-enabled` by switching the runner to
macOS and fixing up various `logging.<func>` usages by switching them to
`_logger` as in the rest of the code base. Doctest do not pass at the
moment (error: returning <BLANKLINE>, interference with logging module,
cf. JIRA) under ubuntu (windows: not tested).

Other:
- warn for missing unparser.jar file now only if `--update-expectations`
is passed.
- Use file-specific logger instead of global loggers.
  • Loading branch information
sfc-gh-lspiegelberg authored Oct 23, 2024
1 parent e843c05 commit 15cabe0
Show file tree
Hide file tree
Showing 11 changed files with 39 additions and 31 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ ubuntu-latest ]
os: [ macos-latest ]
python-version: [ "3.9"]
cloud-provider: [ aws ]
steps:
Expand Down
3 changes: 2 additions & 1 deletion src/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@

from snowflake.snowpark import Session

logging.getLogger("snowflake.connector").setLevel(logging.ERROR)
_logger = logging.getLogger("snowflake.connector")
_logger.setLevel(logging.ERROR)

RUNNING_ON_GH = os.getenv("GITHUB_ACTIONS") == "true"
TEST_SCHEMA = "GH_JOB_{}".format(str(uuid.uuid4()).replace("-", "_"))
Expand Down
4 changes: 3 additions & 1 deletion src/snowflake/snowpark/_internal/analyzer/cte_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
random_name_for_temp_object,
)

_logger = logging.getLogger(__name__)

if TYPE_CHECKING:
from snowflake.snowpark._internal.analyzer.select_statement import Selectable
from snowflake.snowpark._internal.analyzer.snowflake_plan import SnowflakePlan
Expand Down Expand Up @@ -155,5 +157,5 @@ def encode_id(
try:
return hashlib.sha256(string.encode()).hexdigest()[:10]
except Exception as ex:
logging.warning(f"Encode SnowflakePlan ID failed: {ex}")
_logger.warning(f"Encode SnowflakePlan ID failed: {ex}")
return None
11 changes: 6 additions & 5 deletions src/snowflake/snowpark/_internal/temp_table_auto_cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@

from snowflake.snowpark._internal.analyzer.snowflake_plan_node import SnowflakeTable

_logger = logging.getLogger(__name__)

if TYPE_CHECKING:
from snowflake.snowpark.session import Session # pragma: no cover


DROP_TABLE_STATEMENT_PARAM_NAME = "auto_clean_up_temp_table"


Expand Down Expand Up @@ -53,13 +54,13 @@ def _delete_ref_count(self, name: str) -> None: # pragma: no cover
):
self.drop_table(name)
elif self.ref_count_map[name] < 0:
logging.debug(
_logger.debug(
f"Unexpected reference count {self.ref_count_map[name]} for table {name}"
)

def drop_table(self, name: str) -> None: # pragma: no cover
common_log_text = f"temp table {name} in session {self.session.session_id}"
logging.debug(f"Ready to drop {common_log_text}")
_logger.debug(f"Ready to drop {common_log_text}")
query_id = None
try:
async_job = self.session.sql(
Expand All @@ -68,10 +69,10 @@ def drop_table(self, name: str) -> None: # pragma: no cover
block=False, statement_params={DROP_TABLE_STATEMENT_PARAM_NAME: name}
)
query_id = async_job.query_id
logging.debug(f"Dropping {common_log_text} with query id {query_id}")
_logger.debug(f"Dropping {common_log_text} with query id {query_id}")
except Exception as ex: # pragma: no cover
warning_message = f"Failed to drop {common_log_text}, exception: {ex}"
logging.warning(warning_message)
_logger.warning(warning_message)
if query_id is None:
# If no query_id is available, it means the query haven't been accepted by gs,
# and it won't occur in our job_etl_view, send a separate telemetry for recording.
Expand Down
10 changes: 5 additions & 5 deletions src/snowflake/snowpark/_internal/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
except ImportError:
ResultMetadataV2 = ResultMetadata

logger = logging.getLogger("snowflake.snowpark")
_logger = logging.getLogger("snowflake.snowpark")

STAGE_PREFIX = "@"
SNOWURL_PREFIX = "snow://"
Expand Down Expand Up @@ -208,7 +208,7 @@ def _pandas_importer(): # noqa: E302
# since we enable relative imports without dots this import gives us an issues when ran from test directory
from pandas import DataFrame # NOQA
except ImportError as e:
logger.error(f"pandas is not installed {e}")
_logger.error(f"pandas is not installed {e}")
return pandas


Expand Down Expand Up @@ -683,7 +683,7 @@ def __init__(self, warning_times: int) -> None:

def warning(self, text: str) -> None:
if self.count < self.warning_times:
logger.warning(text)
_logger.warning(text)
self.count += 1


Expand Down Expand Up @@ -724,7 +724,7 @@ def infer_ast_enabled_from_global_sessions(func: Callable) -> bool:
pass
finally:
if session is None:
logging.debug(
_logger.debug(
f"Could not retrieve default session "
f"for function {func.__qualname__}, capturing AST by default."
)
Expand Down Expand Up @@ -973,7 +973,7 @@ def get_aliased_option_name(
upper_key = key.strip().upper()
aliased_key = alias_map.get(upper_key, upper_key)
if aliased_key != upper_key:
logger.warning(
_logger.warning(
f"Option '{key}' is aliased to '{aliased_key}'. You may see unexpected behavior."
" Please refer to format specific options for more information"
)
Expand Down
3 changes: 1 addition & 2 deletions src/snowflake/snowpark/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import copy
import itertools
import logging
import re
import sys
from collections import Counter
Expand Down Expand Up @@ -4525,7 +4524,7 @@ def create_or_replace_dynamic_table(
isinstance(self._session._conn, MockServerConnection)
and self._session._conn._suppress_not_implemented_error
):
logging.error(
_logger.error(
"create_or_replace_dynamic_table not supported in local testing mode, returning empty result."
)
# Allow AST tests to pass.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
else:
from collections.abc import Iterable

_logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class OrderingColumn:
Expand Down Expand Up @@ -653,7 +655,7 @@ def select(
dataframe_ref.cached_snowflake_quoted_identifiers_tuple = tuple(
new_column_identifiers
)
logging.debug(
_logger.debug(
f"The Snowpark DataFrame in DataFrameReference with id={dataframe_ref._id} is updated"
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1784,7 +1784,7 @@ def index(self) -> Union["pd.Index", native_pd.MultiIndex]:
"""
if self.is_multiindex():
# Lazy multiindex is not supported
logging.warning(
_logger.warning(
"Lazy MultiIndex is not supported. MultiIndex values are evaluated eagerly and pulled out of Snowflake."
)
return self._modin_frame.index_columns_pandas_index()
Expand Down Expand Up @@ -3375,7 +3375,7 @@ def sort_rows_by_column_values(
raise ValueError(f"sort kind must be 'stable' or None (got '{kind}')")
# Do not show warning for 'quicksort' as this the default option.
if kind not in ("stable", "quicksort"):
logging.warning(
_logger.warning(
f"choice of sort algorithm '{kind}' is ignored. sort kind must be 'stable', 'quicksort', or None"
)

Expand Down Expand Up @@ -13689,7 +13689,7 @@ def sample(
)
else:
sampled_odf = frame.ordered_dataframe.sample(n=n, frac=frac)
logging.warning(
_logger.warning(
"Snowpark pandas `sample` will create a temp table for sampled results to keep it deterministic."
)
res = SnowflakeQueryCompiler(
Expand Down
5 changes: 2 additions & 3 deletions src/snowflake/snowpark/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import decimal
import inspect
import json
import logging
import os
import re
import sys
Expand Down Expand Up @@ -1558,7 +1557,7 @@ def _get_dependency_packages(
f"but not on your local environment."
)
except Exception as ex: # pragma: no cover
logging.warning(
_logger.warning(
"Failed to get the local distribution of package %s: %s",
package_name,
ex,
Expand Down Expand Up @@ -3020,7 +3019,7 @@ def create_dataframe(
)
schema_query = f"SELECT * FROM {self.get_fully_qualified_name_if_possible(temp_table_name)}"
except ProgrammingError as e:
logging.debug(
_logger.debug(
f"Cannot create temp table for specified non-nullable schema, fall back to using schema "
f"string from select query. Exception: {str(e)}"
)
Expand Down
6 changes: 4 additions & 2 deletions tests/ast/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,13 @@ def pytest_configure(config):
pytest.unparser_jar = config.getoption("--unparser-jar")
if not os.path.exists(pytest.unparser_jar):
pytest.unparser_jar = None
logging.error(
pytest.update_expectations = config.getoption("--update-expectations")

if pytest.unparser_jar is None and pytest.update_expectations:
raise RuntimeError(
f"Unparser JAR not found at {pytest.unparser_jar}. "
f"Please set the correct path with --unparser-jar or SNOWPARK_UNPARSER_JAR."
)
pytest.update_expectations = config.getoption("--update-expectations")


class TestTables:
Expand Down
16 changes: 9 additions & 7 deletions tests/ast/test_ast_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
textproto_to_request,
)

_logger = logging.getLogger(__name__)

TEST_DIR = pathlib.Path(__file__).parent

DATA_DIR = TEST_DIR / "data"
Expand Down Expand Up @@ -201,7 +203,7 @@ def run_test(session, tables):

@pytest.mark.parametrize("test_case", load_test_cases(), ids=idfn)
def test_ast(session, tables, test_case):
logging.info(f"Testing AST encoding with protobuf {google.protobuf.__version__}.")
_logger.info(f"Testing AST encoding with protobuf {google.protobuf.__version__}.")

actual, base64_str = run_test(
session, tables, test_case.filename.replace(".", "_"), test_case.source
Expand Down Expand Up @@ -263,7 +265,7 @@ def test_ast(session, tables, test_case):
line for line in differ.compare(actual_lines, expected_lines)
]

logging.error(
_logger.error(
"expected vs. actual encoded protobuf:\n" + "\n".join(diffed_lines)
)

Expand All @@ -278,7 +280,7 @@ def override_time_zone(tz_name: str = "EST") -> None:

tz = dateutil.tz.gettz(tz_name)
tz_code = tz.tzname(datetime.datetime.now())
logging.debug(f"Overriding time zone to {tz_name} ({tz_code}).")
_logger.debug(f"Overriding time zone to {tz_name} ({tz_code}).")

if platform.system() != "Windows":
# This works only under Unix systems.
Expand All @@ -301,7 +303,7 @@ def override_time_zone(tz_name: str = "EST") -> None:
# Possible modification code (not working):
# Use direct msvcrt.dll override (only for this process, does not work for child processes).
# cf. https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/tzset?view=msvc-170
logging.debug(
_logger.debug(
f"Windows current time (before msvcrt set): {datetime.datetime.now()}"
)

Expand All @@ -310,18 +312,18 @@ def override_time_zone(tz_name: str = "EST") -> None:
cdll.msvcrt._putenv(f"TZ={tz_code}")
cdll.msvcrt._tzset()
# If working, we would expect this to show output adjusted to the timezone referred to by tz_code.
logging.debug(
_logger.debug(
f"Windows current time (after msvcrt set): {datetime.datetime.now()}"
)
# Other python libraries would have been updated then as well.
from tzlocal import get_localzone

logging.debug(
_logger.debug(
f"Windows: tzlocal={get_localzone()} TZ={env_tz}, will be using TZ when encoding for AST."
)

tz_name = datetime.datetime.now(tzlocal()).tzname()
logging.debug(f"Local time zone is now: {tz_name}.")
_logger.debug(f"Local time zone is now: {tz_name}.")


if __name__ == "__main__":
Expand Down

0 comments on commit 15cabe0

Please sign in to comment.