diff --git a/.changes/next-release/enhancement-Useragent-93485.json b/.changes/next-release/enhancement-Useragent-93485.json new file mode 100644 index 0000000000..09bb785413 --- /dev/null +++ b/.changes/next-release/enhancement-Useragent-93485.json @@ -0,0 +1,5 @@ +{ + "type": "feature", + "category": "Useragent", + "description": "Update User-Agent header format" +} diff --git a/botocore/args.py b/botocore/args.py index b844fce1d9..7f4766bf76 100644 --- a/botocore/args.py +++ b/botocore/args.py @@ -28,6 +28,7 @@ from botocore.regions import EndpointResolverBuiltins as EPRBuiltins from botocore.regions import EndpointRulesetResolver from botocore.signers import RequestSigner +from botocore.useragent import UserAgentString from botocore.utils import ensure_boolean, is_s3_accelerate_url logger = logging.getLogger(__name__) @@ -55,6 +56,9 @@ 'us-west-1', 'us-west-2', ] +# Maximum allowed length of the ``user_agent_appid`` config field. Longer +# values result in a warning-level log message. +USERAGENT_APPID_MAXLEN = 50 class ClientArgsCreator: @@ -66,13 +70,17 @@ def __init__( loader, exceptions_factory, config_store, + user_agent_creator=None, ): self._event_emitter = event_emitter - self._user_agent = user_agent self._response_parser_factory = response_parser_factory self._loader = loader self._exceptions_factory = exceptions_factory self._config_store = config_store + if user_agent_creator is None: + self._session_ua_creator = UserAgentString.from_environment() + else: + self._session_ua_creator = user_agent_creator def get_client_args( self, @@ -159,6 +167,13 @@ def get_client_args( event_emitter, ) + # Copy the session's user agent factory and adds client configuration. + client_ua_creator = self._session_ua_creator.with_client_config( + new_config + ) + supplied_ua = client_config.user_agent if client_config else None + new_config._supplied_user_agent = supplied_ua + return { 'serializer': serializer, 'endpoint': endpoint, @@ -171,6 +186,7 @@ def get_client_args( 'partition': partition, 'exceptions_factory': self._exceptions_factory, 'endpoint_ruleset_resolver': ruleset_resolver, + 'user_agent_creator': client_ua_creator, } def compute_client_args( @@ -193,14 +209,6 @@ def compute_client_args( if raw_value is not None: parameter_validation = ensure_boolean(raw_value) - # Override the user agent if specified in the client config. - user_agent = self._user_agent - if client_config is not None: - if client_config.user_agent is not None: - user_agent = client_config.user_agent - if client_config.user_agent_extra is not None: - user_agent += ' %s' % client_config.user_agent_extra - s3_config = self.compute_s3_config(client_config) endpoint_config = self._compute_endpoint_config( service_name=service_name, @@ -211,13 +219,23 @@ def compute_client_args( s3_config=s3_config, ) endpoint_variant_tags = endpoint_config['metadata'].get('tags', []) + + # Some third-party libraries expect the final user-agent string in + # ``client.meta.config.user_agent``. To maintain backwards + # compatibility, the preliminary user-agent string (before any Config + # object modifications and without request-specific user-agent + # components) is stored in the new Config object's ``user_agent`` + # property but not used by Botocore itself. + preliminary_ua_string = self._session_ua_creator.with_client_config( + client_config + ).to_string() # Create a new client config to be passed to the client based # on the final values. We do not want the user to be able # to try to modify an existing client with a client config. config_kwargs = dict( region_name=endpoint_config['region_name'], signature_version=endpoint_config['signature_version'], - user_agent=user_agent, + user_agent=preliminary_ua_string, ) if 'dualstack' in endpoint_variant_tags: config_kwargs.update(use_dualstack_endpoint=True) @@ -234,9 +252,12 @@ def compute_client_args( client_cert=client_config.client_cert, inject_host_prefix=client_config.inject_host_prefix, tcp_keepalive=client_config.tcp_keepalive, + user_agent_extra=client_config.user_agent_extra, + user_agent_appid=client_config.user_agent_appid, ) self._compute_retry_config(config_kwargs) self._compute_connect_timeout(config_kwargs) + self._compute_user_agent_appid_config(config_kwargs) s3_config = self.compute_s3_config(client_config) is_s3_service = self._is_s3_service(service_name) @@ -249,7 +270,6 @@ def compute_client_args( return { 'service_name': service_name, 'parameter_validation': parameter_validation, - 'user_agent': user_agent, 'endpoint_config': endpoint_config, 'protocol': protocol, 'config_kwargs': config_kwargs, @@ -646,3 +666,19 @@ def compute_endpoint_resolver_builtin_defaults( ), EPRBuiltins.SDK_ENDPOINT: given_endpoint, } + + def _compute_user_agent_appid_config(self, config_kwargs): + user_agent_appid = config_kwargs.get('user_agent_appid') + if user_agent_appid is None: + user_agent_appid = self._config_store.get_config_variable( + 'user_agent_appid' + ) + if ( + user_agent_appid is not None + and len(user_agent_appid) > USERAGENT_APPID_MAXLEN + ): + logger.warning( + 'The configured value for user_agent_appid exceeds the ' + f'maximum length of {USERAGENT_APPID_MAXLEN} characters.' + ) + config_kwargs['user_agent_appid'] = user_agent_appid diff --git a/botocore/client.py b/botocore/client.py index 7180779414..ec34bf7c0f 100644 --- a/botocore/client.py +++ b/botocore/client.py @@ -39,6 +39,7 @@ from botocore.model import ServiceModel from botocore.paginate import Paginator from botocore.retries import adaptive, standard +from botocore.useragent import UserAgentString from botocore.utils import ( CachedProperty, EventbridgeSignerSetter, @@ -91,6 +92,7 @@ def __init__( response_parser_factory=None, exceptions_factory=None, config_store=None, + user_agent_creator=None, ): self._loader = loader self._endpoint_resolver = endpoint_resolver @@ -105,6 +107,7 @@ def __init__( # config and environment variables (and potentially more in the # future). self._config_store = config_store + self._user_agent_creator = user_agent_creator def create_client( self, @@ -481,6 +484,7 @@ def _get_client_args( self._loader, self._exceptions_factory, config_store=self._config_store, + user_agent_creator=self._user_agent_creator, ) return args_creator.get_client_args( service_model, @@ -840,6 +844,7 @@ def __init__( partition, exceptions_factory, endpoint_ruleset_resolver=None, + user_agent_creator=None, ): self._serializer = serializer self._endpoint = endpoint @@ -859,6 +864,13 @@ def __init__( ) self._exceptions_factory = exceptions_factory self._exceptions = None + self._user_agent_creator = user_agent_creator + if self._user_agent_creator is None: + self._user_agent_creator = ( + UserAgentString.from_environment().with_client_config( + self._client_config + ) + ) self._register_handlers() def __getattr__(self, item): @@ -996,7 +1008,7 @@ def _convert_to_request_dict( if headers is not None: request_dict['headers'].update(headers) if set_user_agent_header: - user_agent = self._client_config.user_agent + user_agent = self._user_agent_creator.to_string() else: user_agent = None prepare_request_dict( diff --git a/botocore/config.py b/botocore/config.py index 049ad47535..6ce25f8b60 100644 --- a/botocore/config.py +++ b/botocore/config.py @@ -38,6 +38,12 @@ class Config: :param user_agent_extra: The value to append to the current User-Agent header value. + :type user_agent_appid: str + :param user_agent_appid: A value that gets included in the User-Agent + string in the format "app/". Allowed characters are + ASCII alphanumerics and ``!$%&'*+-.^_`|~``. All other characters will + be replaced by a ``-``. + :type connect_timeout: float or int :param connect_timeout: The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 @@ -201,6 +207,7 @@ class Config: ('signature_version', None), ('user_agent', None), ('user_agent_extra', None), + ('user_agent_appid', None), ('connect_timeout', DEFAULT_TIMEOUT), ('read_timeout', DEFAULT_TIMEOUT), ('parameter_validation', True), diff --git a/botocore/configprovider.py b/botocore/configprovider.py index 6f1d6cf0e7..e2ebd5efe3 100644 --- a/botocore/configprovider.py +++ b/botocore/configprovider.py @@ -139,6 +139,7 @@ # We can't have a default here for v1 because we need to defer to # whatever the defaults are in _retry.json. 'max_attempts': ('max_attempts', 'AWS_MAX_ATTEMPTS', None, int), + 'user_agent_appid': ('sdk_ua_app_id', 'AWS_SDK_UA_APP_ID', None, None), } # A mapping for the s3 specific configuration vars. These are the configuration # vars that typically go in the s3 section of the config file. This mapping diff --git a/botocore/session.py b/botocore/session.py index 444f60e8b7..9aa596bdda 100644 --- a/botocore/session.py +++ b/botocore/session.py @@ -64,12 +64,16 @@ from botocore.model import ServiceModel from botocore.parsers import ResponseParserFactory from botocore.regions import EndpointResolver +from botocore.useragent import UserAgentString from botocore.utils import ( EVENT_ALIASES, IMDSRegionProvider, validate_region_name, ) +from botocore.compat import HAS_CRT # noqa + + logger = logging.getLogger(__name__) @@ -165,6 +169,7 @@ def _register_components(self): self._register_monitor() self._register_default_config_resolver() self._register_smart_defaults_factory() + self._register_user_agent_creator() def _register_event_emitter(self): self._components.register_component('event_emitter', self._events) @@ -263,6 +268,10 @@ def _register_monitor(self): 'monitor', self._create_csm_monitor ) + def _register_user_agent_creator(self): + uas = UserAgentString.from_environment() + self._components.register_component('user_agent_creator', uas) + def _create_csm_monitor(self): if self.get_config_variable('csm_enabled'): client_id = self.get_config_variable('csm_client_id') @@ -283,12 +292,8 @@ def _create_csm_monitor(self): return None def _get_crt_version(self): - try: - import awscrt - - return awscrt.__version__ - except AttributeError: - return "Unknown" + user_agent_creator = self.get_component('user_agent_creator') + return user_agent_creator._crt_version or 'Unknown' @property def available_profiles(self): @@ -953,6 +958,15 @@ def create_client( endpoint_resolver = self._get_internal_component('endpoint_resolver') exceptions_factory = self._get_internal_component('exceptions_factory') config_store = self.get_component('config_store') + user_agent_creator = self.get_component('user_agent_creator') + # Session configuration values for the user agent string are applied + # just before each client creation because they may have been modified + # at any time between session creation and client creation. + user_agent_creator.set_session_config( + session_user_agent_name=self.user_agent_name, + session_user_agent_version=self.user_agent_version, + session_user_agent_extra=self.user_agent_extra, + ) defaults_mode = self._resolve_defaults_mode(config, config_store) if defaults_mode != 'legacy': smart_defaults_factory = self._get_internal_component( @@ -972,6 +986,7 @@ def create_client( response_parser_factory, exceptions_factory, config_store, + user_agent_creator=user_agent_creator, ) client = client_creator.create_client( service_name=service_name, diff --git a/botocore/useragent.py b/botocore/useragent.py new file mode 100644 index 0000000000..8cfc731ee6 --- /dev/null +++ b/botocore/useragent.py @@ -0,0 +1,490 @@ +# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +""" +NOTE: All classes and functions in this module are considered private and are +subject to abrupt breaking changes. Please do not use them directly. + +To modify the User-Agent header sent by botocore, use one of these +configuration options: +* The ``AWS_SDK_UA_APP_ID`` environment variable. +* The ``sdk_ua_app_id`` setting in the shared AWS config file. +* The ``user_agent_appid`` field in the :py:class:`botocore.config.Config`. +* The ``user_agent_extra`` field in the :py:class:`botocore.config.Config`. + +""" +import os +import platform +from copy import copy +from string import ascii_letters, digits +from typing import NamedTuple, Optional + +from botocore import __version__ as botocore_version +from botocore.compat import HAS_CRT + +_USERAGENT_ALLOWED_CHARACTERS = ascii_letters + digits + "!$%&'*+-.^_`|~" +_USERAGENT_ALLOWED_OS_NAMES = ( + 'windows', + 'linux', + 'macos', + 'android', + 'ios', + 'watchos', + 'tvos', + 'other', +) +_USERAGENT_PLATFORM_NAME_MAPPINGS = {'darwin': 'macos'} +# The name by which botocore is identified in the User-Agent header. While most +# AWS SDKs follow a naming pattern of "aws-sdk-*", botocore and boto3 continue +# using their existing values. Uses uppercase "B" with all other characters +# lowercase. +_USERAGENT_SDK_NAME = 'Botocore' + + +def sanitize_user_agent_string_component(raw_str, allow_hash): + """Replaces all not allowed characters in the string with a dash ("-"). + + Allowed characters are ASCII alphanumerics and ``!$%&'*+-.^_`|~``. If + ``allow_hash`` is ``True``, "#"``" is also allowed. + + :type raw_str: str + :param raw_str: The input string to be sanitized. + + :type allow_hash: bool + :param allow_hash: Whether "#" is considered an allowed character. + """ + return ''.join( + c + if c in _USERAGENT_ALLOWED_CHARACTERS or (allow_hash and c == '#') + else '-' + for c in raw_str + ) + + +class UserAgentComponent(NamedTuple): + """ + Component of a Botocore User-Agent header string in the standard format. + + Each component consists of a prefix, a name, and a value. In the string + representation these are combined in the format ``prefix/name#value``. + + This class is considered private and is subject to abrupt breaking changes. + """ + + prefix: str + name: str + value: Optional[str] = None + + def to_string(self): + """Create string like 'prefix/name#value' from a UserAgentComponent.""" + clean_prefix = sanitize_user_agent_string_component( + self.prefix, allow_hash=True + ) + clean_name = sanitize_user_agent_string_component( + self.name, allow_hash=False + ) + if self.value is None or self.value == '': + return f'{clean_prefix}/{clean_name}' + clean_value = sanitize_user_agent_string_component( + self.value, allow_hash=True + ) + return f'{clean_prefix}/{clean_name}#{clean_value}' + + +class RawStringUserAgentComponent: + """ + UserAgentComponent interface wrapper around ``str``. + + Use for User-Agent header components that are not constructed from + prefix+name+value but instead are provided as strings. No sanitization is + performed. + """ + + def __init__(self, value): + self._value = value + + def to_string(self): + return self._value + + +class UserAgentString: + """ + Generator for AWS SDK User-Agent header strings. + + The User-Agent header format contains information from session, client, and + request context. ``UserAgentString`` provides methods for collecting the + information and ``to_string`` for assembling it into the standardized + string format. + + Example usage: + + ua_session = UserAgentString.from_environment() + ua_session.set_session_config(...) + ua_client = ua_session.with_client_config(Config(...)) + ua_string = ua_request.to_string() + + For testing or when information from all sources is available at the same + time, the methods can be chained: + + ua_string = ( + UserAgentString + .from_environment() + .set_session_config(...) + .with_client_config(Config(...)) + .to_string() + ) + + """ + + def __init__( + self, + platform_name, + platform_version, + platform_machine, + python_version, + python_implementation, + execution_env, + crt_version=None, + ): + """ + :type platform_name: str + :param platform_name: Name of the operating system or equivalent + platform name. Should be sourced from :py:meth:`platform.system`. + :type platform_version: str + :param platform_version: Version of the operating system or equivalent + platform name. Should be sourced from :py:meth:`platform.version`. + :type platform_machine: str + :param platform_version: Processor architecture or machine type. For + example "x86_64". Should be sourced from :py:meth:`platform.machine`. + :type python_version: str + :param python_version: Version of the python implementation as str. + Should be sourced from :py:meth:`platform.python_version`. + :type python_implementation: str + :param python_implementation: Name of the python implementation. + Should be sourced from :py:meth:`platform.python_implementation`. + :type execution_env: str + :param execution_env: The value of the AWS execution environment. + Should be sourced from the ``AWS_EXECUTION_ENV` environment + variable. + :type crt_version: str + :param crt_version: Version string of awscrt package, if installed. + """ + self._platform_name = platform_name + self._platform_version = platform_version + self._platform_machine = platform_machine + self._python_version = python_version + self._python_implementation = python_implementation + self._execution_env = execution_env + self._crt_version = crt_version + + # Components that can be added with ``set_session_config()`` + self._session_user_agent_name = None + self._session_user_agent_version = None + self._session_user_agent_extra = None + + self._client_config = None + self._uses_paginator = None + self._uses_waiter = None + self._uses_resource = None + + @classmethod + def from_environment(cls): + crt_version = None + if HAS_CRT: + crt_version = _get_crt_version() or 'Unknown' + return cls( + platform_name=platform.system(), + platform_version=platform.release(), + platform_machine=platform.machine(), + python_version=platform.python_version(), + python_implementation=platform.python_implementation(), + execution_env=os.environ.get('AWS_EXECUTION_ENV'), + crt_version=crt_version, + ) + + def set_session_config( + self, + session_user_agent_name, + session_user_agent_version, + session_user_agent_extra, + ): + """ + Set the user agent configuration values that apply at session level. + + :param user_agent_name: The user agent name configured in the + :py:class:`botocore.session.Session` object. For backwards + compatibility, this will always be at the beginning of the + User-Agent string, together with ``user_agent_version``. + :param user_agent_version: The user agent version configured in the + :py:class:`botocore.session.Session` object. + :param user_agent_extra: The user agent "extra" configured in the + :py:class:`botocore.session.Session` object. + """ + self._session_user_agent_name = session_user_agent_name + self._session_user_agent_version = session_user_agent_version + self._session_user_agent_extra = session_user_agent_extra + return self + + def with_client_config(self, client_config): + """ + Create a copy with all original values and client-specific values. + + :type client_config: botocore.config.Config + :param client_config: The client configuration object. + """ + cp = copy(self) + cp._client_config = client_config + return cp + + def to_string(self): + """ + Build User-Agent header string from the object's properties. + """ + config_ua_override = None + if self._client_config: + if hasattr(self._client_config, '_supplied_user_agent'): + config_ua_override = self._client_config._supplied_user_agent + else: + config_ua_override = self._client_config.user_agent + + if config_ua_override is not None: + return self._build_legacy_ua_string(config_ua_override) + + components = [ + *self._build_sdk_metadata(), + RawStringUserAgentComponent('ua/2.0'), + *self._build_os_metadata(), + *self._build_architecture_metadata(), + *self._build_language_metadata(), + *self._build_execution_env_metadata(), + *self._build_feature_metadata(), + *self._build_config_metadata(), + *self._build_app_id(), + *self._build_extra(), + ] + return ' '.join([comp.to_string() for comp in components]) + + def _build_sdk_metadata(self): + """ + Build the SDK name and version component of the User-Agent header. + + For backwards-compatibility both session-level and client-level config + of custom tool names are honored. If this removes the Botocore + information from the start of the string, Botocore's name and version + are included as a separate field with "md" prefix. + """ + sdk_md = [] + if ( + self._session_user_agent_name + and self._session_user_agent_version + and ( + self._session_user_agent_name != _USERAGENT_SDK_NAME + or self._session_user_agent_version != botocore_version + ) + ): + sdk_md.extend( + [ + UserAgentComponent( + self._session_user_agent_name, + self._session_user_agent_version, + ), + UserAgentComponent( + 'md', _USERAGENT_SDK_NAME, botocore_version + ), + ] + ) + else: + sdk_md.append( + UserAgentComponent(_USERAGENT_SDK_NAME, botocore_version) + ) + + if self._crt_version is not None: + sdk_md.append( + UserAgentComponent('md', 'awscrt', self._crt_version) + ) + + return sdk_md + + def _build_os_metadata(self): + """ + Build the OS/platform components of the User-Agent header string. + + For recognized platform names that match or map to an entry in the list + of standardized OS names, a single component with prefix "os" is + returned. Otherwise, one component "os/other" is returned and a second + with prefix "md" and the raw platform name. + + String representations of example return values: + * ``os/macos#10.13.6`` + * ``os/linux`` + * ``os/other`` + * ``os/other md/foobar#1.2.3`` + """ + if self._platform_name is None: + return [UserAgentComponent('os', 'other')] + + plt_name_lower = self._platform_name.lower() + if plt_name_lower in _USERAGENT_ALLOWED_OS_NAMES: + os_family = plt_name_lower + elif plt_name_lower in _USERAGENT_PLATFORM_NAME_MAPPINGS: + os_family = _USERAGENT_PLATFORM_NAME_MAPPINGS[plt_name_lower] + else: + os_family = None + + if os_family is not None: + return [ + UserAgentComponent('os', os_family, self._platform_version) + ] + else: + return [ + UserAgentComponent('os', 'other'), + UserAgentComponent( + 'md', self._platform_name, self._platform_version + ), + ] + + def _build_architecture_metadata(self): + """ + Build architecture component of the User-Agent header string. + + Returns the machine type with prefix "md" and name "arch", if one is + available. Common values include "x86_64", "arm64", "i386". + """ + if self._platform_machine: + return [ + UserAgentComponent( + 'md', 'arch', self._platform_machine.lower() + ) + ] + return [] + + def _build_language_metadata(self): + """ + Build the language components of the User-Agent header string. + + Returns the Python version in a component with prefix "lang" and name + "python". The Python implementation (e.g. CPython, PyPy) is returned as + separate metadata component with prefix "md" and name "pyimpl". + + String representation of an example return value: + ``lang/python#3.10.4 md/pyimpl#CPython`` + """ + lang_md = [ + UserAgentComponent('lang', 'python', self._python_version), + ] + if self._python_implementation: + lang_md.append( + UserAgentComponent('md', 'pyimpl', self._python_implementation) + ) + return lang_md + + def _build_execution_env_metadata(self): + """ + Build the execution environment component of the User-Agent header. + + Returns a single component prefixed with "exec-env", usually sourced + from the environment variable AWS_EXECUTION_ENV. + """ + if self._execution_env: + return [UserAgentComponent('exec-env', self._execution_env)] + else: + return [] + + def _build_feature_metadata(self): + """ + Build the features components of the User-Agent header string. + + Botocore currently does not report any features. This may change in a + future version. + """ + return [] + + def _build_config_metadata(self): + """ + Build the configuration components of the User-Agent header string. + + Returns a list of components with prefix "cfg" followed by the config + setting name and its value. Tracked configuration settings may be + added or removed in future versions. + """ + if not self._client_config or not self._client_config.retries: + return [] + retry_mode = self._client_config.retries.get('mode') + cfg_md = [UserAgentComponent('cfg', 'retry-mode', retry_mode)] + if self._client_config.endpoint_discovery_enabled: + cfg_md.append(UserAgentComponent('cfg', 'endpoint-discovery')) + return cfg_md + + def _build_app_id(self): + """ + Build app component of the User-Agent header string. + + Returns a single component with prefix "app" and value sourced from the + ``user_agent_appid`` field in :py:class:`botocore.config.Config` or + the ``sdk_ua_app_id`` setting in the shared configuration file, or the + ``AWS_SDK_UA_APP_ID`` environment variable. These are the recommended + ways for apps built with Botocore to insert their identifer into the + User-Agent header. + """ + if self._client_config and self._client_config.user_agent_appid: + return [ + UserAgentComponent('app', self._client_config.user_agent_appid) + ] + else: + return [] + + def _build_extra(self): + """User agent string components based on legacy "extra" settings. + + Creates components from the session-level and client-level + ``user_agent_extra`` setting, if present. Both are passed through + verbatim and should be appended at the end of the string. + + Preferred ways to inject application-specific information into + botocore's User-Agent header string are the ``user_agent_appid` field + in :py:class:`botocore.config.Config`. The ``AWS_SDK_UA_APP_ID`` + environment variable and the ``sdk_ua_app_id`` configuration file + setting are alternative ways to set the ``user_agent_appid`` config. + """ + extra = [] + if self._session_user_agent_extra: + extra.append( + RawStringUserAgentComponent(self._session_user_agent_extra) + ) + if self._client_config and self._client_config.user_agent_extra: + extra.append( + RawStringUserAgentComponent( + self._client_config.user_agent_extra + ) + ) + return extra + + def _build_legacy_ua_string(self, config_ua_override): + components = [config_ua_override] + if self._session_user_agent_extra: + components.append(self._session_user_agent_extra) + if self._client_config.user_agent_extra: + components.append(self._client_config.user_agent_extra) + return ' '.join(components) + + +def _get_crt_version(): + """ + This function is considered private and is subject to abrupt breaking + changes. + """ + try: + import awscrt + + return awscrt.__version__ + except AttributeError: + return None diff --git a/tests/functional/test_useragent.py b/tests/functional/test_useragent.py new file mode 100644 index 0000000000..d69451253c --- /dev/null +++ b/tests/functional/test_useragent.py @@ -0,0 +1,301 @@ +# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import logging +from itertools import product + +import pytest + +from botocore import __version__ as botocore_version +from botocore.config import Config +from tests import ClientHTTPStubber + + +class UACapHTTPStubber(ClientHTTPStubber): + """ + Wrapper for ClientHTTPStubber that captures UA header from one request. + """ + + def __init__(self, obj_with_event_emitter): + super().__init__(obj_with_event_emitter, strict=False) + self.add_response() # expect exactly one request + + @property + def captured_ua_string(self): + if len(self.requests) > 0: + return self.requests[0].headers['User-Agent'].decode() + return None + + +@pytest.mark.parametrize( + 'sess_name, sess_version, sess_extra, cfg_extra, cfg_appid', + # Produce every combination of User-Agent related config settings other + # than Config.user_agent which will always be set in this test. + product( + ('sess_name', None), + ('sess_version', None), + ('sess_extra', None), + ('cfg_extra', None), + ('cfg_appid', None), + ), +) +def test_user_agent_from_config_replaces_default( + sess_name, + sess_version, + sess_extra, + cfg_extra, + cfg_appid, + patched_session, +): + # Config.user_agent replaces all parts of the regular User-Agent header + # format except for itself and "extras" set in Session and Config. This + # behavior exists to maintain backwards compatibility for clients who + # expect an exact User-Agent header value. + expected_str = 'my user agent str' + if sess_name: + patched_session.user_agent_name = sess_name + if sess_version: + patched_session.user_agent_version = sess_version + if sess_extra: + patched_session.user_agent_extra = sess_extra + expected_str += f' {sess_extra}' + client_cfg = Config( + user_agent='my user agent str', + user_agent_extra=cfg_extra, + user_agent_appid=cfg_appid, + ) + if cfg_extra: + expected_str += f' {cfg_extra}' + client_s3 = patched_session.create_client('s3', config=client_cfg) + with UACapHTTPStubber(client_s3) as stub_client: + client_s3.list_buckets() + + assert stub_client.captured_ua_string == expected_str + + +@pytest.mark.parametrize( + 'sess_name, sess_version, cfg_appid', + # Produce every combination of User-Agent related config settings other + # than Config.user_agent which is never set in this test + # (``test_user_agent_from_config_replaces_default`` covers all cases where + # it is set) and Session.user_agent_extra and Config.user_agent_extra + # which both are always set in this test + product( + ('sess_name', None), + ('sess_version', None), + ('cfg_appid', None), + ), +) +def test_user_agent_includes_extra( + sess_name, + sess_version, + cfg_appid, + patched_session, +): + # Libraries and apps can use the ``Config.user_agent_extra`` and + # ``Session.user_agent_extra`` to append arbitrary data to the User-Agent + # header. Unless Config.user_agent is also set, these two fields should + # always appear at the end of the header value. + if sess_name: + patched_session.user_agent_name = sess_name + if sess_version: + patched_session.user_agent_version = sess_version + patched_session.user_agent_extra = "sess_extra" + client_cfg = Config( + user_agent=None, + user_agent_extra='cfg_extra', + user_agent_appid=cfg_appid, + ) + client_s3 = patched_session.create_client('s3', config=client_cfg) + with UACapHTTPStubber(client_s3) as stub_client: + client_s3.list_buckets() + + assert stub_client.captured_ua_string.endswith(' sess_extra cfg_extra') + + +@pytest.mark.parametrize( + 'sess_name, sess_version, sess_extra, cfg_extra', + # Produce every combination of User-Agent related config settings other + # than Config.user_agent which is never set in this test and + # Config.user_agent_appid which is always set in this test. + product( + ('sess_name', None), + ('sess_version', None), + ('sess_extra', None), + ('cfg_extra', None), + ), +) +def test_user_agent_includes_appid( + sess_name, + sess_version, + sess_extra, + cfg_extra, + patched_session, +): + # The User-Agent header string should always include the value set in + # ``Config.user_agent_appid``, unless ``Config.user_agent`` is also set + # (this latter case is covered in ``test_user_agent_from_config_replaces_default``). + if sess_name: + patched_session.user_agent_name = sess_name + if sess_version: + patched_session.user_agent_version = sess_version + if sess_extra: + patched_session.user_agent_extra = sess_extra + client_cfg = Config( + user_agent=None, + user_agent_appid='123456', + user_agent_extra=cfg_extra, + ) + client_s3 = patched_session.create_client('s3', config=client_cfg) + with UACapHTTPStubber(client_s3) as stub_client: + client_s3.list_buckets() + + uafields = stub_client.captured_ua_string.split(' ') + assert 'app/123456' in uafields + + +def test_user_agent_long_appid_yields_warning(patched_session, caplog): + # user_agent_appid config values longer than 50 characters should result + # in a warning + sixtychars = '000000000011111111112222222222333333333344444444445555555555' + assert len(sixtychars) > 50 + client_cfg = Config(user_agent_appid=sixtychars) + client_s3 = patched_session.create_client('s3', config=client_cfg) + with UACapHTTPStubber(client_s3): + with caplog.at_level(logging.INFO): + client_s3.list_buckets() + + assert ( + 'The configured value for user_agent_appid exceeds the maximum length' + in caplog.text + ) + + +def test_user_agent_appid_gets_sanitized(patched_session, caplog): + # Parentheses are not valid characters in the user agent string + badchars = '1234(' + client_cfg = Config(user_agent_appid=badchars) + client_s3 = patched_session.create_client('s3', config=client_cfg) + + with UACapHTTPStubber(client_s3) as stub_client: + with caplog.at_level(logging.INFO): + client_s3.list_buckets() + + # given string should be truncated to 50 characters + uafields = stub_client.captured_ua_string.split(' ') + assert 'app/1234-' in uafields + + +def test_boto3_user_agent(patched_session): + # emulate Boto3's behavior + botocore_info = f'Botocore/{patched_session.user_agent_version}' + if patched_session.user_agent_extra: + patched_session.user_agent_extra += ' ' + botocore_info + else: + patched_session.user_agent_extra = botocore_info + patched_session.user_agent_name = 'Boto3' + patched_session.user_agent_version = '9.9.9' # Boto3 version + + client_s3 = patched_session.create_client('s3') + with UACapHTTPStubber(client_s3) as stub_client: + client_s3.list_buckets() + # The user agent string should start with "Boto3/9.9.9" from the setting + # above, followed by Botocore's version info as metadata ("md/..."). + assert stub_client.captured_ua_string.startswith( + f'Boto3/9.9.9 md/Botocore#{botocore_version} ' + ) + # The regular User-Agent header components for platform, language, ... + # should also be present: + assert ' ua/2.0 ' in stub_client.captured_ua_string + assert ' os/' in stub_client.captured_ua_string + assert ' lang/' in stub_client.captured_ua_string + assert ' cfg/' in stub_client.captured_ua_string + + +def test_awscli_v1_user_agent(patched_session): + # emulate behavior from awscli.clidriver._set_user_agent_for_session + patched_session.user_agent_name = 'aws-cli' + patched_session.user_agent_version = '1.1.1' + patched_session.user_agent_extra = f'botocore/{botocore_version}' + + client_s3 = patched_session.create_client('s3') + with UACapHTTPStubber(client_s3) as stub_client: + client_s3.list_buckets() + + # The user agent string should start with "aws-cli/1.1.1" from the setting + # above, followed by Botocore's version info as metadata ("md/..."). + assert stub_client.captured_ua_string.startswith( + f'aws-cli/1.1.1 md/Botocore#{botocore_version} ' + ) + # The regular User-Agent header components for platform, language, ... + # should also be present: + assert ' ua/2.0 ' in stub_client.captured_ua_string + assert ' os/' in stub_client.captured_ua_string + assert ' lang/' in stub_client.captured_ua_string + assert ' cfg/' in stub_client.captured_ua_string + + +def test_awscli_v2_user_agent(patched_session): + # emulate behavior from awscli.clidriver._set_user_agent_for_session + patched_session.user_agent_name = 'aws-cli' + patched_session.user_agent_version = '2.2.2' + patched_session.user_agent_extra = 'sources/x86_64' + # awscli.clidriver.AWSCLIEntrypoint._run_driver + patched_session.user_agent_extra += ' prompt/off' + # from awscli.clidriver.ServiceOperation._add_customization_to_user_agent + patched_session.user_agent_extra += ' command/service-name.op-name' + + client_s3 = patched_session.create_client('s3') + with UACapHTTPStubber(client_s3) as stub_client: + client_s3.list_buckets() + # The user agent string should start with "aws-cli/1.1.1" from the setting + # above, followed by Botocore's version info as metadata ("md/..."). + assert stub_client.captured_ua_string.startswith( + f'aws-cli/2.2.2 md/Botocore#{botocore_version} ' + ) + assert stub_client.captured_ua_string.endswith( + ' sources/x86_64 prompt/off command/service-name.op-name' + ) + # The regular User-Agent header components for platform, language, ... + # should also be present: + assert ' ua/2.0 ' in stub_client.captured_ua_string + assert ' os/' in stub_client.captured_ua_string + assert ' lang/' in stub_client.captured_ua_string + assert ' cfg/' in stub_client.captured_ua_string + + +def test_s3transfer_user_agent(patched_session): + # emulate behavior from s3transfer ClientFactory + cfg = Config(user_agent_extra='s3transfer/0.1.2 processpool') + client = patched_session.create_client('s3', config=cfg) + # s3transfer tests make assertions against the _modified_ `user_agent` field + # in ``client.meta.config.user_agent``. See for example + # ``tests.unit.test_processpool.TestClientFactory`` in s3transfer. + assert 'processpool' in client.meta.config.user_agent + + +def test_chalice_user_agent(patched_session): + # emulate behavior from chalice's cli.factory._add_chalice_user_agent + suffix = '{}/{}'.format( + patched_session.user_agent_name, + patched_session.user_agent_version, + ) + patched_session.user_agent_name = 'aws-chalice' + patched_session.user_agent_version = '0.1.2' + patched_session.user_agent_extra = suffix + client_s3 = patched_session.create_client('s3') + + with UACapHTTPStubber(client_s3) as stub_client: + client_s3.list_buckets() + assert stub_client.captured_ua_string.startswith( + f'aws-chalice/0.1.2 md/Botocore#{botocore_version} ' + ) diff --git a/tests/unit/test_args.py b/tests/unit/test_args.py index 832a689e12..e82226b000 100644 --- a/tests/unit/test_args.py +++ b/tests/unit/test_args.py @@ -19,6 +19,7 @@ from botocore.configprovider import ConfigValueStore from botocore.hooks import HierarchicalEmitter from botocore.model import ServiceModel +from botocore.useragent import UserAgentString from tests import mock, unittest @@ -26,8 +27,23 @@ class TestCreateClientArgs(unittest.TestCase): def setUp(self): self.event_emitter = mock.Mock(HierarchicalEmitter) self.config_store = ConfigValueStore() + user_agent_creator = UserAgentString( + platform_name=None, + platform_version=None, + platform_machine=None, + python_version=None, + python_implementation=None, + execution_env=None, + crt_version=None, + ) self.args_create = args.ClientArgsCreator( - self.event_emitter, None, None, None, None, self.config_store + event_emitter=self.event_emitter, + user_agent=None, + response_parser_factory=None, + loader=None, + exceptions_factory=None, + config_store=self.config_store, + user_agent_creator=user_agent_creator, ) self.service_name = 'ec2' self.region = 'us-west-2' @@ -518,6 +534,15 @@ class TestEndpointResolverBuiltins(unittest.TestCase): def setUp(self): event_emitter = mock.Mock(HierarchicalEmitter) self.config_store = ConfigValueStore() + user_agent_creator = UserAgentString( + platform_name=None, + platform_version=None, + platform_machine=None, + python_version=None, + python_implementation=None, + execution_env=None, + crt_version=None, + ) self.args_create = args.ClientArgsCreator( event_emitter=event_emitter, user_agent=None, @@ -525,6 +550,7 @@ def setUp(self): loader=None, exceptions_factory=None, config_store=self.config_store, + user_agent_creator=user_agent_creator, ) self.bridge = ClientEndpointBridge( endpoint_resolver=mock.Mock(), diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index eb7a30787b..c868615b8d 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -35,6 +35,7 @@ UnknownSignatureVersionError, ) from botocore.stub import Stubber +from botocore.useragent import UserAgentString from tests import mock, unittest @@ -169,6 +170,7 @@ def create_client_creator( endpoint_prefix=None, exceptions_factory=None, config_store=None, + user_agent_creator=None, ): if event_emitter is None: event_emitter = hooks.HierarchicalEmitter() @@ -187,6 +189,14 @@ def create_client_creator( exceptions_factory = ClientExceptionsFactory() if config_store is None: config_store = self.config_store + if user_agent_creator is None: + user_agent_creator = ( + UserAgentString.from_environment().set_session_config( + session_user_agent_name='MyUserAgent', + session_user_agent_version='1.2.3-rc5', + session_user_agent_extra=None, + ) + ) creator = client.ClientCreator( self.loader, self.resolver, @@ -197,6 +207,7 @@ def create_client_creator( response_parser_factory, exceptions_factory, config_store, + user_agent_creator, ) return creator @@ -647,7 +658,7 @@ def test_client_user_agent_in_request(self): k.lower(): v for k, v in self.endpoint.make_request.call_args[0][1].items() } - self.assertEqual(params['headers']['User-Agent'], 'user-agent') + self.assertIn('MyUserAgent/1.2.3', params['headers']['User-Agent']) def test_client_custom_user_agent_in_request(self): creator = self.create_client_creator() @@ -673,7 +684,7 @@ def test_client_custom_user_agent_extra_in_request(self): ) service_client.test_operation(Foo='one') headers = self.endpoint.make_request.call_args[0][1]['headers'] - self.assertEqual(headers['User-Agent'], 'user-agent extrastuff') + self.assertTrue(headers['User-Agent'].endswith('extrastuff')) def test_client_registers_request_created_handler(self): event_emitter = self.create_mock_emitter() diff --git a/tests/unit/test_useragent.py b/tests/unit/test_useragent.py new file mode 100644 index 0000000000..640a2f7469 --- /dev/null +++ b/tests/unit/test_useragent.py @@ -0,0 +1,182 @@ +# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import platform + +import pytest + +from botocore import __version__ as botocore_version +from botocore.config import Config +from botocore.useragent import ( + UserAgentString, + sanitize_user_agent_string_component, +) + +from .. import requires_crt + + +@pytest.mark.parametrize( + 'raw_str, allow_hash, expected_str', + [ + ('foo', False, 'foo'), + ('foo', True, 'foo'), + ('ExampleFramework (1.2.3)', False, 'ExampleFramework--1.2.3-'), + ('foo#1.2.3', False, 'foo-1.2.3'), + ('foo#1.2.3', True, 'foo#1.2.3'), + ('', False, ''), + ('', True, ''), + ('', False, ''), + ('#', False, '-'), + ('#', True, '#'), + (' ', False, '-'), + (' ', False, '--'), + ('@=[]{ }/\\øß©', True, '------------'), + ( + 'Java_HotSpot_(TM)_64-Bit_Server_VM/25.151-b12', + True, + 'Java_HotSpot_-TM-_64-Bit_Server_VM-25.151-b12', + ), + ], +) +def test_sanitize_ua_string_component(raw_str, allow_hash, expected_str): + actual_str = sanitize_user_agent_string_component(raw_str, allow_hash) + assert actual_str == expected_str + + +def test_basic_user_agent_string(): + ua = UserAgentString( + platform_name='linux', + platform_version='1.2.3-foo', + platform_machine='x86_64', + python_version='3.8.20', + python_implementation='Dpython', + execution_env='AWS_Lambda_python3.8', + crt_version='Unknown', + ).with_client_config( + Config(retries={'mode': 'legacy'}, user_agent_appid='fooapp') + ) + + actual = ua.to_string() + expected = ( + f'Botocore/{botocore_version} ' + 'md/awscrt#Unknown ' + 'ua/2.0 ' + 'os/linux#1.2.3-foo ' + 'md/arch#x86_64 ' + 'lang/python#3.8.20 ' + 'md/pyimpl#Dpython ' + 'exec-env/AWS_Lambda_python3.8 ' + 'cfg/retry-mode#legacy ' + 'app/fooapp' + ) + assert actual == expected + + +def test_shared_test_case(): + # This test case is shared across AWS SDKs. + + uas = UserAgentString( + platform_name="Linux", + platform_version="5.4.228-131.415.AMZN2.X86_64", + platform_machine="", + python_version="4.3.2", + python_implementation=None, + execution_env='lambda', + ).with_client_config( + Config(user_agent_appid='123456', retries={'mode': 'standard'}) + ) + actual = uas.to_string().split(' ') + expected_in_exact_order = [ + f"Botocore/{botocore_version}", + "ua/2.0", + "os/linux#5.4.228-131.415.AMZN2.X86_64", + "lang/python#4.3.2", + "exec-env/lambda", + ] + expected_in_any_order = [ + "cfg/retry-mode#standard", + "app/123456", + ] + for el in [*expected_in_exact_order, *expected_in_any_order]: + assert el in actual + + indices = [actual.index(el) for el in expected_in_exact_order] + assert indices == list(sorted(indices)), 'Elements were found out of order' + + +def test_user_agent_string_with_missing_information(): + # Even when collecting information from the environment fails completely, + # some minimal string should be generated. + uas = UserAgentString( + platform_name=None, + platform_version=None, + platform_machine=None, + python_version=None, + python_implementation=None, + execution_env=None, + crt_version=None, + ).with_client_config(Config()) + actual = uas.to_string() + assert actual == f'Botocore/{botocore_version} ua/2.0 os/other lang/python' + + +def test_from_environment(monkeypatch): + monkeypatch.setenv('AWS_EXECUTION_ENV', 'lambda') + monkeypatch.setattr(platform, 'system', lambda: 'Linux') + monkeypatch.setattr( + platform, 'release', lambda: '5.4.228-131.415.AMZN2.X86_64' + ) + monkeypatch.setattr(platform, 'python_version', lambda: '4.3.2') + monkeypatch.setattr(platform, 'python_implementation', lambda: 'Cpython') + + uas = UserAgentString.from_environment() + + assert uas._execution_env == 'lambda' + assert uas._platform_name == 'Linux' + assert uas._platform_version == '5.4.228-131.415.AMZN2.X86_64' + assert uas._python_version == '4.3.2' + assert uas._python_implementation == 'Cpython' + + +@requires_crt() +def test_from_environment_can_read_crt_version(monkeypatch): + import awscrt + + monkeypatch.setattr(awscrt, '__version__', 'a.b.c') + uas = UserAgentString.from_environment() + assert uas._crt_version == 'a.b.c' + + +def test_from_environment_with_most_values_not_available(monkeypatch): + # Asserts that ``None`` values are properly passed through to the + # UserAgentString class. There are separate tests to assert that + # ``UserAgentString.to_string()`` can handle ``None`` values. + monkeypatch.delenv('AWS_EXECUTION_ENV', raising=False) + monkeypatch.setattr(platform, 'system', lambda: None) + monkeypatch.setattr(platform, 'release', lambda: None) + monkeypatch.setattr(platform, 'python_version', lambda: None) + monkeypatch.setattr(platform, 'python_implementation', lambda: None) + + uas = UserAgentString.from_environment() + + assert uas._execution_env is None + assert uas._platform_name is None + assert uas._platform_version is None + assert uas._python_version is None + assert uas._python_implementation is None + + +def test_from_environment_unknown_platform(monkeypatch): + monkeypatch.setattr(platform, 'system', lambda: 'FooOS') + monkeypatch.setattr(platform, 'release', lambda: '0.0.1') + uas = UserAgentString.from_environment() + assert ' os/other md/FooOS#0.0.1 ' in uas.to_string()