Skip to content

Commit

Permalink
Glue - Implemented create_crawler, get_crawler, get_crawlers, delete_…
Browse files Browse the repository at this point in the history
…crawler. Updated IMPLEMENTATION_COVERAGE.md.
  • Loading branch information
mwoods-familiaris committed Aug 25, 2021
1 parent 29b0122 commit 9234252
Show file tree
Hide file tree
Showing 6 changed files with 494 additions and 10 deletions.
20 changes: 10 additions & 10 deletions IMPLEMENTATION_COVERAGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -1027,7 +1027,7 @@

## batch
<details>
<summary>78% implemented</summary>
<summary>84% implemented</summary>

- [X] cancel_job
- [X] create_compute_environment
Expand Down Expand Up @@ -3797,7 +3797,7 @@
- [ ] modify_vpc_endpoint_connection_notification
- [ ] modify_vpc_endpoint_service_configuration
- [ ] modify_vpc_endpoint_service_permissions
- [ ] modify_vpc_peering_connection_options
- [X] modify_vpc_peering_connection_options
- [X] modify_vpc_tenancy
- [ ] modify_vpn_connection
- [ ] modify_vpn_connection_options
Expand Down Expand Up @@ -4904,7 +4904,7 @@

## glue
<details>
<summary>4% implemented</summary>
<summary>7% implemented</summary>

- [ ] batch_create_partition
- [ ] batch_delete_connection
Expand All @@ -4923,7 +4923,7 @@
- [ ] check_schema_version_validity
- [ ] create_classifier
- [ ] create_connection
- [ ] create_crawler
- [X] create_crawler
- [X] create_database
- [ ] create_dev_endpoint
- [ ] create_job
Expand All @@ -4942,7 +4942,7 @@
- [ ] delete_column_statistics_for_partition
- [ ] delete_column_statistics_for_table
- [ ] delete_connection
- [ ] delete_crawler
- [X] delete_crawler
- [ ] delete_database
- [ ] delete_dev_endpoint
- [ ] delete_job
Expand All @@ -4966,9 +4966,9 @@
- [ ] get_column_statistics_for_table
- [ ] get_connection
- [ ] get_connections
- [ ] get_crawler
- [X] get_crawler
- [ ] get_crawler_metrics
- [ ] get_crawlers
- [X] get_crawlers
- [ ] get_data_catalog_encryption_settings
- [X] get_database
- [X] get_databases
Expand Down Expand Up @@ -10589,7 +10589,7 @@

## ssm
<details>
<summary>16% implemented</summary>
<summary>17% implemented</summary>

- [X] add_tags_to_resource
- [ ] associate_ops_item_related_item
Expand Down Expand Up @@ -10626,7 +10626,7 @@
- [ ] describe_automation_step_executions
- [ ] describe_available_patches
- [X] describe_document
- [ ] describe_document_permission
- [X] describe_document_permission
- [ ] describe_effective_instance_associations
- [ ] describe_effective_patches_for_patch_baseline
- [ ] describe_instance_associations_status
Expand Down Expand Up @@ -10692,7 +10692,7 @@
- [ ] list_resource_compliance_summaries
- [ ] list_resource_data_sync
- [X] list_tags_for_resource
- [ ] modify_document_permission
- [X] modify_document_permission
- [ ] put_compliance_items
- [ ] put_inventory
- [X] put_parameter
Expand Down
12 changes: 12 additions & 0 deletions moto/glue/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ def __init__(self):
super(PartitionAlreadyExistsException, self).__init__("Partition")


class CrawlerAlreadyExistsException(AlreadyExistsException):
def __init__(self):
super(CrawlerAlreadyExistsException, self).__init__("Crawler")


class EntityNotFoundException(GlueClientError):
def __init__(self, msg):
super(GlueClientError, self).__init__("EntityNotFoundException", msg)
Expand All @@ -48,6 +53,13 @@ def __init__(self):
super(PartitionNotFoundException, self).__init__("Cannot find partition.")


class CrawlerNotFoundException(EntityNotFoundException):
def __init__(self, crawler):
super(CrawlerNotFoundException, self).__init__(
"Crawler %s not found." % crawler
)


class VersionNotFoundException(EntityNotFoundException):
def __init__(self):
super(VersionNotFoundException, self).__init__("Version not found.")
152 changes: 152 additions & 0 deletions moto/glue/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from collections import OrderedDict
from .exceptions import (
JsonRESTError,
CrawlerAlreadyExistsException,
CrawlerNotFoundException,
DatabaseAlreadyExistsException,
DatabaseNotFoundException,
TableAlreadyExistsException,
Expand All @@ -20,6 +22,7 @@
class GlueBackend(BaseBackend):
def __init__(self):
self.databases = OrderedDict()
self.crawlers = OrderedDict()

def create_database(self, database_name, database_input):
if database_name in self.databases:
Expand Down Expand Up @@ -67,6 +70,59 @@ def delete_table(self, database_name, table_name):
raise TableNotFoundException(table_name)
return {}

def create_crawler(
self,
name,
role,
database_name,
description,
targets,
schedule,
classifiers,
table_prefix,
schema_change_policy,
recrawl_policy,
lineage_configuration,
configuration,
crawler_security_configuration,
tags,
):
if name in self.crawlers:
raise CrawlerAlreadyExistsException()

crawler = FakeCrawler(
name=name,
role=role,
database_name=database_name,
description=description,
targets=targets,
schedule=schedule,
classifiers=classifiers,
table_prefix=table_prefix,
schema_change_policy=schema_change_policy,
recrawl_policy=recrawl_policy,
lineage_configuration=lineage_configuration,
configuration=configuration,
crawler_security_configuration=crawler_security_configuration,
tags=tags,
)
self.crawlers[name] = crawler

def get_crawler(self, name):
try:
return self.crawlers[name]
except KeyError:
raise CrawlerNotFoundException(name)

def get_crawlers(self):
return [self.crawlers[key] for key in self.crawlers] if self.crawlers else []

def delete_crawler(self, name):
try:
del self.crawlers[name]
except KeyError:
raise CrawlerNotFoundException(name)


class FakeDatabase(BaseModel):
def __init__(self, database_name, database_input):
Expand Down Expand Up @@ -177,4 +233,100 @@ def as_dict(self):
return obj


class FakeCrawler(BaseModel):
def __init__(
self,
name,
role,
database_name,
description,
targets,
schedule,
classifiers,
table_prefix,
schema_change_policy,
recrawl_policy,
lineage_configuration,
configuration,
crawler_security_configuration,
tags,
):
self.name = name
self.role = role
self.database_name = database_name
self.description = description
self.targets = targets
self.schedule = schedule
self.classifiers = classifiers
self.table_prefix = table_prefix
self.schema_change_policy = schema_change_policy
self.recrawl_policy = recrawl_policy
self.lineage_configuration = lineage_configuration
self.configuration = configuration
self.crawler_security_configuration = crawler_security_configuration
self.tags = tags
self.state = "READY"
self.creation_time = datetime.utcnow()
self.last_updated = self.creation_time
self.version = 1
self.crawl_elapsed_time = 0
self.last_crawl_info = None

def as_dict(self):
last_crawl = self.last_crawl_info.as_dict() if self.last_crawl_info else None
data = {
"Name": self.name,
"Role": self.role,
"Targets": self.targets,
"DatabaseName": self.database_name,
"Description": self.description,
"Classifiers": self.classifiers,
"RecrawlPolicy": self.recrawl_policy,
"SchemaChangePolicy": self.schema_change_policy,
"LineageConfiguration": self.lineage_configuration,
"State": self.state,
"TablePrefix": self.table_prefix,
"CrawlElapsedTime": self.crawl_elapsed_time,
"CreationTime": self.creation_time.isoformat(),
"LastUpdated": self.last_updated.isoformat(),
"LastCrawl": last_crawl,
"Version": self.version,
"Configuration": self.configuration,
"CrawlerSecurityConfiguration": self.crawler_security_configuration,
}

if self.schedule:
data["Schedule"] = {
"ScheduleExpression": self.schedule,
"State": "SCHEDULED",
}

if self.last_crawl_info:
data["LastCrawl"] = self.last_crawl_info.as_dict()

return data


class LastCrawlInfo(BaseModel):
def __init__(
self, error_message, log_group, log_stream, message_prefix, start_time, status,
):
self.error_message = error_message
self.log_group = log_group
self.log_stream = log_stream
self.message_prefix = message_prefix
self.start_time = start_time
self.status = status

def as_dict(self):
return {
"ErrorMessage": self.error_message,
"LogGroup": self.log_group,
"LogStream": self.log_stream,
"MessagePrefix": self.message_prefix,
"StartTime": self.start_time,
"Status": self.status,
}


glue_backend = GlueBackend()
35 changes: 35 additions & 0 deletions moto/glue/responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,38 @@ def batch_delete_partition(self):
out["Errors"] = errors_output

return json.dumps(out)

def create_crawler(self):
self.glue_backend.create_crawler(
name=self.parameters.get("Name"),
role=self.parameters.get("Role"),
database_name=self.parameters.get("DatabaseName"),
description=self.parameters.get("Description"),
targets=self.parameters.get("Targets"),
schedule=self.parameters.get("Schedule"),
classifiers=self.parameters.get("Classifiers"),
table_prefix=self.parameters.get("TablePrefix"),
schema_change_policy=self.parameters.get("SchemaChangePolicy"),
recrawl_policy=self.parameters.get("RecrawlPolicy"),
lineage_configuration=self.parameters.get("LineageConfiguration"),
configuration=self.parameters.get("Configuration"),
crawler_security_configuration=self.parameters.get(
"CrawlerSecurityConfiguration"
),
tags=self.parameters.get("Tags"),
)
return ""

def get_crawler(self):
name = self.parameters.get("Name")
crawler = self.glue_backend.get_crawler(name)
return json.dumps({"Crawler": crawler.as_dict()})

def get_crawlers(self):
crawlers = self.glue_backend.get_crawlers()
return json.dumps({"Crawlers": [crawler.as_dict() for crawler in crawlers]})

def delete_crawler(self):
name = self.parameters.get("Name")
self.glue_backend.delete_crawler(name)
return ""
40 changes: 40 additions & 0 deletions tests/test_glue/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,43 @@ def get_partition(client, database_name, table_name, values):
return client.get_partition(
DatabaseName=database_name, TableName=table_name, PartitionValues=values
)


def create_crawler(
client, crawler_name, crawler_role=None, crawler_targets=None, **kwargs
):
optional_param_map = {
"database_name": "DatabaseName",
"description": "Description",
"schedule": "Schedule",
"classifiers": "Classifiers",
"table_prefix": "TablePrefix",
"schema_change_policy": "SchemaChangePolicy",
"recrawl_policy": "RecrawlPolicy",
"lineage_configuration": "LineageConfiguration",
"configuration": "Configuration",
"crawler_security_configuration": "CrawlerSecurityConfiguration",
"tags": "Tags",
}

params = {
boto3_key: kwargs.get(key)
for key, boto3_key in optional_param_map.items()
if kwargs.get(key) is not None
}

if crawler_role is None:
crawler_role = "arn:aws:iam::123456789012:role/Glue/Role"

if crawler_targets is None:
crawler_targets = {
"S3Targets": [],
"JdbcTargets": [],
"MongoDBTargets": [],
"DynamoDBTargets": [],
"CatalogTargets": [],
}

return client.create_crawler(
Name=crawler_name, Role=crawler_role, Targets=crawler_targets, **params,
)
Loading

0 comments on commit 9234252

Please sign in to comment.