Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enabled TLS certificate verification in get_vault_token() #357

Merged
merged 6 commits into from
Feb 23, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@ on:
branches: [ master ]
pull_request:
branches: [ master ]
workflow_dispatch:

jobs:
build:
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# BigFlow changelog

## Version 1.6.0

### Fixed

* Enabled vault endpoint TLS certificate verification by default for `bf build` and `bf deploy` commands. This fixes the MITM attack vulnerability. Kudos to Konstantin Weddige for reporting.

### Breaking changes

* Default vault endpoint TLS certificate verification for `bf build` and `bf deploy` may fail in some environments. Use `-vev`/`--vault-endpoint-verify` option to disable or provide path to custom trusted certificates or CA certificates. Disabling makes execution vulnerable for MITM attacks and is discouraged - do it only when justified and in trusted environments. See [https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification](https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification) for details.

## Version 1.5.4

### Changed
2 changes: 1 addition & 1 deletion bigflow/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.5.4'
__version__ = '1.6.0.dev1'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be changed to '1.6.0'?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do on a release branch

2 changes: 2 additions & 0 deletions bigflow/build/operate.py
Original file line number Diff line number Diff line change
@@ -95,6 +95,7 @@ def _build_docker_image(
auth_method=cache_params.auth_method or bigflow.deploy.AuthorizationType.LOCAL_ACCOUNT,
vault_endpoint=cache_params.vault_endpoint,
vault_secret=cache_params.vault_secret,
vault_endpoint_verify=cache_params.vault_endpoint_verify
)

for image in (cache_params.cache_from_image or []):
@@ -120,6 +121,7 @@ class BuildImageCacheParams:
vault_secret: str | None = None
cache_from_version: list[str] | None = None
cache_from_image: list[str] | None = None
vault_endpoint_verify: str | bool | None = None


def build_image(
28 changes: 25 additions & 3 deletions bigflow/cli.py
Original file line number Diff line number Diff line change
@@ -13,8 +13,7 @@
from importlib import import_module
from pathlib import Path
from types import ModuleType
from typing import Tuple, Iterator
from typing import Optional
from typing import Tuple, Iterator, Optional
import fnmatch

import bigflow as bf
@@ -384,6 +383,13 @@ def _add_parsers_common_arguments(parser):


def _add_auth_parsers_arguments(parser):
class VaultEndpointVerifyAction(argparse.Action):
def __call__(self, parser, args, values, option_string=None):
if values in ['true', 'false']:
setattr(args, self.dest, values == 'true')
else:
setattr(args, self.dest, str(values))

parser.add_argument('-a', '--auth-method',
type=bigflow.deploy.AuthorizationType,
default='local_account',
@@ -399,6 +405,17 @@ def _add_auth_parsers_arguments(parser):
'Required if auth-method is vault. '
'If not set, will be read from deployment_config.py.'
)
parser.add_argument('-vev', '--vault-endpoint-verify',
type=str,
action=VaultEndpointVerifyAction,
help='Can be "true", "false", a path to certificate PEM file or a path to '
'directory with PEM files (see the link for details). '
'Enables/disables vault endpoint TLS certificate verification. Enabled by default. '
'Disabling makes execution vulnerable for MITM attacks - do it only when justified and in trusted environments. '
'For details see: https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification',
dest='vault_endpoint_verify',
default=True
)
parser.add_argument('-vs', '--vault-secret',
type=str,
help='Vault secret token. '
@@ -514,7 +531,7 @@ def _resolve_vault_endpoint(args):
def _resolve_property(args, property_name, ignore_value_error=False):
try:
cli_atr = getattr(args, property_name)
if cli_atr:
if cli_atr or cli_atr is False:
return cli_atr
else:
config = import_deployment_config(_resolve_deployment_config_path(args), property_name)
@@ -533,6 +550,7 @@ def _cli_deploy_dags(args):
clear_dags_folder=args.clear_dags_folder,
auth_method=args.auth_method,
vault_endpoint=_resolve_vault_endpoint(args),
vault_endpoint_verify=_resolve_property(args, 'vault_endpoint_verify', ignore_value_error=True),
vault_secret=vault_secret,
project_id=_resolve_property(args, 'gcp_project_id')
)
@@ -543,13 +561,15 @@ def _cli_deploy_image(args):
docker_repository = _resolve_property(args, 'docker_repository')
vault_secret = _resolve_property(args, 'vault_secret', ignore_value_error=True)
vault_endpoint = _resolve_vault_endpoint(args)
vault_endpoint_verify = _resolve_property(args, 'vault_endpoint_verify', ignore_value_error=True)
image_tar_path = args.image_tar_path if args.image_tar_path else find_image_file()

bigflow.deploy.deploy_docker_image(
image_tar_path=image_tar_path,
auth_method=args.auth_method,
docker_repository=docker_repository,
vault_endpoint=vault_endpoint,
vault_endpoint_verify=vault_endpoint_verify,
vault_secret=vault_secret,
)

@@ -579,12 +599,14 @@ def _grab_image_cache_params(args):
logger.debug("Image caching is requested - create build image cache params obj")
vault_secret = _resolve_property(args, 'vault_secret', ignore_value_error=True)
vault_endpoint = _resolve_vault_endpoint(args)
vault_endpoint_verify = _resolve_property(args, 'vault_endpoint_verify', ignore_value_error=True)
return bigflow.build.operate.BuildImageCacheParams(
auth_method=args.auth_method,
vault_endpoint=vault_endpoint,
vault_secret=vault_secret,
cache_from_version=args.cache_from_version,
cache_from_image=args.cache_from_image,
vault_endpoint_verify=vault_endpoint_verify
)
else:
logger.debug("No caching is requested - so just disable it completly")
28 changes: 20 additions & 8 deletions bigflow/deploy.py
Original file line number Diff line number Diff line change
@@ -42,6 +42,7 @@ def deploy_docker_image(
docker_repository: str,
auth_method: AuthorizationType = AuthorizationType.LOCAL_ACCOUNT,
vault_endpoint: T.Optional[str] = None,
vault_endpoint_verify: str | bool | None = None,
vault_secret: T.Optional[str] = None,
) -> str:
if image_tar_path.endswith(".toml"):
@@ -53,6 +54,7 @@ def deploy_docker_image(
docker_repository,
auth_method,
vault_endpoint,
vault_endpoint_verify,
vault_secret,
)

@@ -62,6 +64,7 @@ def _deploy_docker_image_from_local_repo(
docker_repository: str,
auth_method: AuthorizationType = AuthorizationType.LOCAL_ACCOUNT,
vault_endpoint: str | None = None,
vault_endpoint_verify: str | bool | None = None,
vault_secret: str | None = None,
) -> str:

@@ -81,6 +84,7 @@ def _deploy_docker_image_from_local_repo(
docker_repository=docker_repository,
auth_method=auth_method,
vault_endpoint=vault_endpoint,
vault_endpoint_verify=vault_endpoint_verify,
vault_secret=vault_secret,
image_id=image_id,
build_ver=build_ver,
@@ -92,6 +96,7 @@ def _deploy_docker_image_from_fs(
docker_repository: str,
auth_method: AuthorizationType = AuthorizationType.LOCAL_ACCOUNT,
vault_endpoint: str | None = None,
vault_endpoint_verify: str | bool | None = None,
vault_secret: str | None = None,
) -> str:
build_ver = bf_commons.decode_version_number_from_file_name(Path(image_tar_path))
@@ -105,6 +110,7 @@ def _deploy_docker_image_from_fs(
image_id=image_id,
auth_method=auth_method,
vault_endpoint=vault_endpoint,
vault_endpoint_verify=vault_endpoint_verify,
vault_secret=vault_secret,
)
finally:
@@ -118,14 +124,15 @@ def _deploy_image_loaded_to_local_registry(
image_id: str,
auth_method: AuthorizationType,
vault_endpoint: str | None = None,
vault_endpoint_verify: str | bool | None = None,
vault_secret: str | None = None,
) -> str:
docker_image = docker_repository + ":" + build_ver
docker_image_latest = docker_repository + ":latest"
tag_image(image_id, docker_repository, "latest")

logger.info("Deploying docker image tag=%s auth_method=%s", docker_image, auth_method)
authenticate_to_registry(auth_method, vault_endpoint, vault_secret)
authenticate_to_registry(auth_method, vault_endpoint, vault_secret, vault_endpoint_verify)
bf_commons.run_process(['docker', 'push', docker_image])
bf_commons.run_process(['docker', 'push', docker_image_latest])

@@ -136,13 +143,14 @@ def authenticate_to_registry(
auth_method: AuthorizationType,
vault_endpoint: T.Optional[str] = None,
vault_secret: T.Optional[str] = None,
vault_endpoint_verify: str | bool | None = None,
):
logger.info("Authenticating to registry with auth_method=%s", auth_method)

if auth_method == AuthorizationType.LOCAL_ACCOUNT:
bf_commons.run_process(['gcloud', 'auth', 'configure-docker'])
elif auth_method == AuthorizationType.VAULT:
oauthtoken = get_vault_token(vault_endpoint, vault_secret)
oauthtoken = get_vault_token(vault_endpoint, vault_secret, vault_endpoint_verify)
bf_commons.run_process(
['docker', 'login', '-u', 'oauth2accesstoken', '--password-stdin', 'https://eu.gcr.io'],
input=oauthtoken,
@@ -156,9 +164,10 @@ def check_images_exist(
auth_method: AuthorizationType,
vault_endpoint: T.Optional[str] = None,
vault_secret: T.Optional[str] = None,
vault_endpoint_verify: str | bool | None = None
):
logger.info("Checking if images used in DAGs exist in the registry")
authenticate_to_registry(auth_method, vault_endpoint, vault_secret)
authenticate_to_registry(auth_method, vault_endpoint, vault_secret, vault_endpoint_verify)
missing_images = set()
for image in images:
found_images = bf_commons.run_process(['docker', 'manifest', 'inspect', image], check=False, verbose=False)
@@ -189,19 +198,21 @@ def deploy_dags_folder(
clear_dags_folder: bool = False,
auth_method: AuthorizationType = AuthorizationType.LOCAL_ACCOUNT,
vault_endpoint: T.Optional[str] = None,
vault_endpoint_verify: str | bool | None = None,
vault_secret: T.Optional[str] = None,
gs_client: T.Optional[storage.Client] = None,
) -> str:
images = get_image_tags_from_image_version_file(dags_dir)
if images:
check_images_exist(auth_method=auth_method,
vault_endpoint=vault_endpoint,
vault_endpoint_verify=vault_endpoint_verify,
vault_secret=vault_secret,
images=images)

logger.info("Deploying DAGs folder, auth_method=%s, clear_dags_folder=%s, dags_dir=%s", auth_method, clear_dags_folder, dags_dir)

client = gs_client or create_storage_client(auth_method, project_id, vault_endpoint, vault_secret)
client = gs_client or create_storage_client(auth_method, project_id, vault_endpoint, vault_secret, vault_endpoint_verify)
bucket = client.bucket(dags_bucket)

if clear_dags_folder:
@@ -246,30 +257,31 @@ def create_storage_client(
project_id: str,
vault_endpoint: str,
vault_secret: str,
vault_endpoint_verify: str | bool | None = None
) -> storage.Client:
if auth_method == AuthorizationType.LOCAL_ACCOUNT:
return storage.Client(project=project_id)
elif auth_method == AuthorizationType.VAULT:
oauthtoken = get_vault_token(vault_endpoint, vault_secret)
oauthtoken = get_vault_token(vault_endpoint, vault_secret, vault_endpoint_verify)
return storage.Client(project=project_id, credentials=credentials.Credentials(oauthtoken))
else:
raise ValueError(f"unsupported auth_method: {auth_method!r}")


def get_vault_token(vault_endpoint: str, vault_secret: str) -> str:
def get_vault_token(vault_endpoint: str, vault_secret: str, vault_endpoint_verify: str | bool | None = True) -> str:
if not vault_endpoint:
raise ValueError('vault_endpoint is required')
if not vault_secret:
raise ValueError('vault_secret is required')

headers = {'X-Vault-Token': vault_secret}
response = requests.get(vault_endpoint, headers=headers, verify=False)
response = requests.get(vault_endpoint, headers=headers, verify=vault_endpoint_verify)

logger.info("get oauth token from %s status_code=%s", vault_endpoint, response.status_code)
if response.status_code != 200:
logger.info(response.text)
raise ValueError(
'Could not get vault token, response code: {}'.format(
response.status_code))

logger.info("get oauth token from %s status_code=%s", vault_endpoint, response.status_code)
return response.json()['data']['token']
3 changes: 2 additions & 1 deletion requirements.in
Original file line number Diff line number Diff line change
@@ -12,4 +12,5 @@ bs4
pytest
pytest-html
pytest-cov
pytest-github-actions-annotate-failures
pytest-github-actions-annotate-failures
parameterized
12 changes: 7 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# *** autogenerated: don't edit ***
# $source-hash: sha256:758f72d93f463f46b9823c68917482b898b517bf66baaa0dc72b6b6dd534aab7
# $source-file: requirements.in
#
# This file is autogenerated by pip-compile with python 3.7
# To update, run:
#
# pip-compile requirements.in
#
# run 'bigflow build-requirements requirements.in' to update this file

apache-beam[gcp]==2.36.0
# via -r requirements/dataflow_extras.txt
attrs==22.1.0
@@ -205,6 +205,8 @@ packaging==21.3
# pytest
pandas==1.3.5
# via -r requirements/bigquery_extras.txt
parameterized==0.8.1
# via -r requirements.in
pep517==0.13.0
# via build
pexpect==4.8.0
Loading