From a62a9c705941ee09c16c306992d8716a01b92bb1 Mon Sep 17 00:00:00 2001 From: James Armes Date: Thu, 5 Sep 2024 14:59:36 -0400 Subject: [PATCH 01/14] Added documentation with mkdocs. --- .editorconfig | 10 ++ .github/pull_request_template.md | 1 + doc/api.md | 55 ++++++++- doc/{ => api}/destinations.md | 0 doc/{ => api}/sources.md | 0 doc/architecture.md | 129 ++++++++++++++++++++ doc/architecture/api.md | 198 +++++++++++++++++++++++++++++++ doc/architecture/destinations.md | 37 ++++++ doc/architecture/jobs.md | 51 ++++++++ doc/architecture/sources.md | 37 ++++++ doc/architecture/worker.md | 113 ++++++++++++++++++ doc/database.md | 2 +- doc/index.md | 1 + doc/license.md | 1 + doc/worker.md | 17 --- lib/source/base.rb | 26 +++- mkdocs.yaml | 85 +++++++++++++ 17 files changed, 738 insertions(+), 25 deletions(-) create mode 100644 .editorconfig rename doc/{ => api}/destinations.md (100%) rename doc/{ => api}/sources.md (100%) create mode 100644 doc/architecture.md create mode 100644 doc/architecture/api.md create mode 100644 doc/architecture/destinations.md create mode 100644 doc/architecture/jobs.md create mode 100644 doc/architecture/sources.md create mode 100644 doc/architecture/worker.md create mode 100644 doc/index.md create mode 100644 doc/license.md create mode 100644 mkdocs.yaml diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..028f8cb --- /dev/null +++ b/.editorconfig @@ -0,0 +1,10 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +indent_style = space +indent_size = 2 +insert_final_newline = true +max_line_length = 80 +trim_trailing_whitespace = true diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 5bef95b..db9a8e7 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -17,5 +17,6 @@ CCAP-XXX #### ✅ Completion tasks +- [ ] Updated documentation - [ ] Added relevant tests - [ ] Meets acceptance criteria diff --git a/doc/api.md b/doc/api.md index 63d6739..141615e 100644 --- a/doc/api.md +++ b/doc/api.md @@ -1,5 +1,56 @@ # Document Transfer Service API +```mermaid +sequenceDiagram + critical Request a document transfer + activate Consumer + activate API + activate Database + Consumer->>API: Request transfer + API->>Database: Record transfer + API->>Database: Enqueue background job + API->>Consumer: Return transfer id + deactivate Consumer + end + + critical Background job processing + activate Worker + loop until success or max attempts + Worker->>Database: Reserve job + activate Source + Worker->>Source: Fetch document + deactivate Source + activate Destination + Worker->>Destination: Transfer document + deactivate Destination + alt Transfer succeeded + Worker->>Database: Update request status + Worker->>Database: Mark job complete + else Transfer failed + alt Max attempts reached + rect rgb(191, 223, 255) + Worker->>Database: Update request status + Worker->>Database: Mark job failed + end + else Retry transfer + Worker->>Database: Mark job failed + end + end + end + deactivate Worker + end + + opt Check transfer status + activate Consumer + Consumer->>API: Check transfer status + API->>Database: Retrieve transfer status + API->>Consumer: Return transfer status + deactivate Database + deactivate API + deactivate Consumer + end +``` + Interacting with the Document Transfer Service is done through a RESTful API. All requests and responses should be in JSON format, unless otherwise indicated. @@ -73,7 +124,7 @@ middleware for rack. The logger utilizes [semantic logging][semantic_logger] in JSON format to provide easily parsable log entries. [authentication]: ./api/authentication.md -[destination]: ./destinations.md +[destination]: ./api/destinations.md [semantic_logger]: https://logger.rocketjob.io/ -[source]: ./sources.md +[source]: ./api/sources.md [spec]: ../openapi.yaml diff --git a/doc/destinations.md b/doc/api/destinations.md similarity index 100% rename from doc/destinations.md rename to doc/api/destinations.md diff --git a/doc/sources.md b/doc/api/sources.md similarity index 100% rename from doc/sources.md rename to doc/api/sources.md diff --git a/doc/architecture.md b/doc/architecture.md new file mode 100644 index 0000000..7ff3818 --- /dev/null +++ b/doc/architecture.md @@ -0,0 +1,129 @@ +# Architecture + +The Document Transfer Service is designed to be as lightweight as possible. As +such, we have chosen to use [grape] as the API framework. Grape is simple +microservice framework for ruby. It's opinionated, but less so than Rails, with +minimal dependencies. + +## System Context + +Although the exposed APIs are meant to be simple and generic, and could +therefore be used by any system, the primary use case is to support the +application process for benefits within America's [safety net]. As such, we'll +use this as our context for system documentation. + +Within this larger system, the Document Transfer Service acts as a component +dedicated to the secure transfer of documents between the applicant and the +benefits system. This could be a state run system, or a third party system +authorized to process benefits applications. + +```plantuml +!include https://raw.githubusercontent.com/plantuml-stdlib/C4-PlantUML/master/C4_Container.puml + +title Document Transfer Service - System Context +AddRelTag("optional", $textColor="gray", $lineColor="gray", $lineStyle = DashedLine()) + +Person(applicant, "Applicant") +System_Ext(benefits_app, "Benefits Application", "Digital application for benefits") +System(doc_transfer, "Document Transfer Service", "A microservice to securely transfer documents") +System_Ext(benefits_system, "Benefits System", "System that processes benefits applications") + +Rel_R(applicant, benefits_app, "Applies") +Rel_R(benefits_app, doc_transfer, "Requests document transfer") +Rel_D(doc_transfer, benefits_system, "Sends") +Rel(benefits_app, benefits_system, "Submits", $tags="optional") +Rel(benefits_system, applicant, "Notifies", $tags="optional") + +footer Last updated 2024-09-04 for Document Transfer Service v1.0.0 +``` + +The benefits application is typically a web form application that simplifies the +benefits application process for the application. The application will upload +their required documents to this system, which will then make an API call to +the Document Transfer Service to transfer the documents to the appropriate +destination. Typically, this call will be done asynchronously, so the applicant +is not waiting for the transfer to complete. + +## The application + +The application can be broken down into three [containers][c4-containers]: + +- The [**API**][api] that handles the requests +- The [**worker**][worker] that processes background jobs +- The **database** that stores the transfer requests + +```plantuml +!include https://raw.githubusercontent.com/plantuml-stdlib/C4-PlantUML/master/C4_Container.puml + +title Document Transfer Service - Container Diagram +AddRelTag("optional", $textColor="gray", $lineColor="gray", $lineStyle = DashedLine()) + +Person(applicant, "Applicant") +System_Ext(benefits_app, "Benefits Application", "Digital application for benefits") +System_Ext(benefits_system, "Benefits System", "System that processes benefits applications") + +System_Boundary(doc_transfer, "Document Transfer Service") { + Container(api, "API", "Handles incoming requests") + ContainerQueue(worker, "Worker", "Processes document transfers") + ContainerDb(postgres, "PostgreSQL", "Stores transfer requests") + + Rel(api, postgres, "Record and queue transfer requests") + Rel(worker, postgres, "Process background jobs") +} + +Rel_R(applicant, benefits_app, "Applies") +Rel_D(benefits_app, api, "Requests document transfer") +Rel_L(worker, benefits_system, "Sends") +Rel(benefits_app, benefits_system, "Submits", $tags="optional") +Rel_U(benefits_system, applicant, "Notifies", $tags="optional") + +footer Last updated 2024-09-04 for Document Transfer Service v1.0.0 +``` + +### API + +The API is the entry point for the service. It handles incoming requests, +records them in the database, and queues the transfer job for the worker to +process. + +See the [API Architecture][api] for more details. + +### Worker + +The worker is responsible for processing the background jobs. It reads the +job queue from the database and processes jobs as they are available. Transfer +jobs are responsible for retrieving the source document and transferring it to +the destination. + +See the [Worker Architecture][worker] for more details. + +### Database + +!!! warning "Sensitive Data Warning" + + While the service does not intend to store any PII (Personally + Identifiable Information) or other sensitive data, any data passed in the source + and destination parameters will be stored until the transfer has completed. + +The Document Transfer service is designed to work with a [PostgreSQL] database. +Other database backends may work, but only the PostgreSQL drivers are included. + +The database is used to store consumer details -- such as name and +authentication keys -- and the transfer requests. The service _does not_ store +or otherwise handle the contents of the documents being transferred. + +Request data is stored in two pieces: the request itself and the background job +that performs the transfer. This allows the service to track the status of +transfers, while retaining the source and destination information for a minimum +length of time as jobs are deleted once they have been completed. + +See the [database] documentation for more details on how the database is +configured. + +[api]: architecture/api.md +[c4-containers]: https://c4model.com/#ContainerDiagram +[database]: database.md +[grape]: https://github.com/ruby-grape/grape +[Postgresql]: https://www.postgresql.org/ +[safety net]: https://codeforamerica.org/programs/social-safety-net/ +[worker]: architecture/worker.md diff --git a/doc/architecture/api.md b/doc/architecture/api.md new file mode 100644 index 0000000..92d4923 --- /dev/null +++ b/doc/architecture/api.md @@ -0,0 +1,198 @@ +# API Architecture + +The Document Transfer Service is designed to be as lightweight as possible. As +such, we have chosen to use [grape] as the API framework. Grape is simple +microservice framework for ruby. It's opinionated, but less so than Rails, with +minimal dependencies. + +# Components + +The API can be broken down into the following components: + +* **Rack**: HTTP server +* **Auth**: Authentication +* **Grape**: API framework +* **Endpoint**: API endpoint controller +* **Job**: Document transfer job +* **Sequel**: Database ORM + +```plantuml +!include https://raw.githubusercontent.com/plantuml-stdlib/C4-PlantUML/master/C4_Component.puml + +title Document Transfer Service - Component Diagram +AddRelTag("optional", $textColor="gray", $lineColor="gray", $lineStyle = DashedLine()) +AddBoundaryTag("system", $borderColor="#1168bd", $fontColor="#1168bd", $bgColor="transparent") +AddBoundaryTag("container", $borderColor="#438dd5", $fontColor="#438dd5", $bgColor="transparent") + +System_Ext(benefits_app, "Benefits Application", "Digital application for benefits") +System_Ext(benefits_system, "Benefits System", "System that processes benefits applications") + +System_Boundary(doc_transfer, "Document Transfer Service", $tags="system") { + Container_Boundary(api, "API", $tags="container") { + Component(rack, "Rack", "HTTP server") + Component(auth, "Auth", "Authentication") + Component(grape, "Grape", "API framework") + Component(endpoint, "Endpoint", "API endpoint controller") + Component(job, "Job", "Document transfer job") + Component(sequel, "Sequel", "Database ORM") + + Rel_D(rack, auth, "Authorizes") + Rel_R(rack, grape, "Routes") + Rel_D(grape, endpoint, "Handles") + Rel_R(endpoint, job, "Queues") + Rel(endpoint, sequel, "Records") + Rel(job, sequel, "Queues") + Rel(auth, sequel, "Verifies") + } + + ContainerQueue(worker, "Worker", "Processes document transfers") + ContainerDb(postgres, "PostgreSQL", "Stores transfer requests") + + Rel_R(sequel, postgres, "Read/Write") + Rel_D(worker, postgres, "Process background jobs") +} + +Rel_D(benefits_app, rack, "Requests document transfer") +Rel_R(benefits_app, benefits_system, "Submits", $tags="optional") +Rel_U(worker, benefits_system, "Sends") + +footer Last updated 2024-09-04 for Document Transfer Service v1.0.0 +``` + +## Rack + +[Rack] is a minimal interface between web servers supporting Ruby and Ruby +frameworks. It is used by Grape to handle incoming HTTP requests. + +Additionally, Rack is responsible for inserting our middleware into the request. +This middleware is used to handle common tasks such as logging, error handling, +and authentication. + +### Middleware + +!!! warning "Middleware Loading Order" + + The middleware is loaded in the order it is defined in the `config.ru` file. + This is important as the order can affect how the middleware interacts with + the request. + + For example, the `CorrelationId` and `RequestId` middleware should be loaded + before the `RequestLogging` middleware to ensure these ids are available. + +The following middleware is used by the API: + +* **AuthKey**: Handles authentication for restricted endpoints. +* **CorrelationId**: Adds a correlation ID to the request headers for logging + and tracing, if not already present. This id will be passed to requests made + to other systems. +* **Instrument**: Custom StatsD instrumentation for the API. +* **OpenTelemetry::Instrumentation::Rack::Instrumentation**: Instrumentation + for [OpenTelemetry]. +* **Rack::RewindableInput::Middleware**: Rewinds the input stream so it can be + read multiple times. +* **RequestId**: Adds a unique request ID to the request headers for logging + and tracing. +* **RequestLogging**: [Semantic logging][semantic-logger] of incoming requests. + +## Auth + +!!! warning "Key Expiration" + + Auth keys will authmatically expire after 90 days. This is to ensure that + keys are rotated regularly and to prevent unauthorized access. + + It is important to keep track of key expiration dates and to create new keys + before the old ones expire in order to prevent service interruptions. + +!!! note "Permissions" + + Although multiple Consumer's and auth keys can be created, the API lacks + the ability to restrict access to specific endpoints, sources, or + destinations. All endpoints are accessable to all Consumers with a valid + key. + +Authentication is handled by the `AuthKey` middleware. This middleware checks +the `authorization` header for a valid `realm` and API key. + +In order to authenticate, the following conditions mut be met: + +1. The `authorization` header must be present +1. The `realm` must be a UUID that matches a Consumer record in the database +1. The API key must match an active, non-expired key for the Consumer + +```mermaid +flowchart + A[Request] --> B{Authorization Header} + B -->|Present| C{Realm} + C -->|Present| D{Consumer} + D -->|Exists| E{Auth Key} + E -->|Valid| F[Authenticated] + E -->|Expired| G[Unauthorized] + E -->|Invalid| G[Unauthorized] + D -->|Does Not Exist| G[Unauthorized] + C -->|Not Present| G[Unauthorized] + E -->|Not Present| G[Unauthorized] + B -->|Not Present| G[Unauthorized] +``` + +See the [authentication] documentation for more details on authenticating as a +consumer. + +## Grape + +[Grape] is simple microservice framework for ruby. It's opinionated, but less +so than Rails, with minimal dependencies. + +The application for the API is defined in `DocumentTransfer::API::API`. This +class is responsible for defining global settings for the API, such as the +supported formats, error handling, and [OpenAPI] documentation. + +Endpoints are mounted by the application, where grape will handle routing the +request to the appropriate endpoint. + +## Endpoint + +Endpoints are where the API logic is defined. Each endpoint is responsible for +handling a specific set of requests related to their domain. For example, the +`/transfer` endpoint is responsible for initiating a document transfer (`POST`) +and retrieving the status of a transfer (`GET`). + +Currently available endpoints are: + +* **`DocumentTransfer::API::Health`**: System health checks +* **`DocumentTransfer::API::Transfer`**: Document transfer requests + +## Job + +!!! danger "Work in Progress" + + This functionality is still a work in progress and is not yet implemented. + + Requests are currently processes synchronously, blocking the caller, and + without built-in retries. + +Jobs are tasks that are queued for background processing. In the case of the +API, jobs are used to process document transfers. When a transfer request is +received, a job is created and queued for the worker to process. + +This job is responsible for retrieving the source document and transferring it +to the destination. It handles retries with exponential backoff, logging, and +error handling. This allows for a more resilient system that can recover from +temporary failures at either the source or destination. + +# TODO: More information about jobs? Cron jobs? + +## Sequel + +[Sequel] is a simple, flexible, and powerful SQL database access toolkit for +Ruby. It is used by the API to interact with the database without having to +write raw SQL queries. + +[authentication]: ../api/authentication.md +[grape]: https://github.com/ruby-grape/grape +[openapi]: https://www.openapis.org/ +[opentelemetry]: https://opentelemetry.io/ +[rack]: https://github.com/rack/rack +[semantic-logger]: https://logger.rocketjob.io/ +[sequel]: https://sequel.jeremyevans.net/ +[statsd]: https://github.com/statsd/statsd diff --git a/doc/architecture/destinations.md b/doc/architecture/destinations.md new file mode 100644 index 0000000..40748e6 --- /dev/null +++ b/doc/architecture/destinations.md @@ -0,0 +1,37 @@ +# Destination Architecture + +**Destinations** represent the target location for a document transfer. They are +responsible for ensuring the document is successfully delivered. Like [sources], +destinations are designed to be easily extensible. + +For information on specifying a destination for a transfer request, see +[Document Destinations][destinations]. + +## Implementation + +Destinations are defined in `lib/destination`. Each destination type is +represented by a class that extends `DocumentTransfer::Destination::Base`. The +base class provides common functionality, along with a consistent interface for +all destination types to implement. + +To create a new destination type, create a new class that extends +`DocumentTransfer::Destination::Base` and implements the following methods: + +* `#transfer`: Transfers the source document to the destination + +To add your new destination to the system, update the factory method in +`DocumentTransfer::Destination.load`. If your destination adds new parameters, +update the endpoint at `DocumentTransfer::API::Transfer` and the destination +configuration at `DocumentTransfer::Config::Destination`. + +## Destination types + +The currently available destination types are: + +* `DocumentTransfer::Destination::OneDrive`: Transfer a document to a [OneDrive] + or [SharePoint] folder. + +[destinations]: ../api/destinations.md +[onedrive]: https://www.microsoft.com/en-us/microsoft-365/onedrive/onedrive-for-business +[sharepoint]: https://www.microsoft.com/en-us/microsoft-365/sharepoint/collaboration +[sources]: sources.md diff --git a/doc/architecture/jobs.md b/doc/architecture/jobs.md new file mode 100644 index 0000000..a237e45 --- /dev/null +++ b/doc/architecture/jobs.md @@ -0,0 +1,51 @@ +# Job Architecture + +**Jobs** are the background tasks that are processed by the worker. There are +multiple types of jobs, each with its own responsibilities. While some jobs are +queued to be run immediately, others are scheduled to run recurring at a set +interval. + +## Implementation + +Individual jobs are defined in `lib/job`. To implement a new, non-recurring job, +create a new class that extends `DocumentTransfer::Job::Base` and implements the +`#perform` method. The `#perform` method should contain the logic to be executed +when the job is processed. + +To create a new recurring job, create a new class under `lib/job/cron` that +extends `DocumentTransfer::Job::Cron::Base` and implements the `#perform` method +as described above. Additionally, set `self.cron_expression` at the top of your +class to a valid [cron expression][cron]. + +The base classes will handle queuing, initializing the `logger`, and recording +metrics. To add your new job to the system, require the class in +`DocumentTransfer::Job.load`. + +Cron jobs are automatically scheduled at boot time by the api and worker. To +ensure your jobs is scheduled, make sure it uses the +`DocumentTransfer::Job::Cron` namespace. You can find the scheduling logic in +`DocumentTransfer::Job.schedule`. + +## Job types + +The currently available job types are: + +* `DocumentTransfer::Job::Cron::ExpireKey`: Deactivates expired authentication + keys + +### Cron::ExpireKey + +!!! note + + The authentication component of the API ensures keys are not expired before + authorizing a request. This job is a safety net to ensure that any keys that + have expired are deactivated, and helps to keep the database clean and + accurate. + +The `DocumentTransfer::Job::Cron::ExpireKey` job is responsible for deactivating +expired authentication keys. It is scheduled to run once a day at midnight UTC. + +This job will search for any keys that have expired and are still marked as +`active`. It will update the `active` flag to `false` for all matching keys. + +[cron]: https://crontab.guru/ diff --git a/doc/architecture/sources.md b/doc/architecture/sources.md new file mode 100644 index 0000000..cb886bd --- /dev/null +++ b/doc/architecture/sources.md @@ -0,0 +1,37 @@ +# Sources Architecture + +**Sources** represent the origin of a document. They are responsible for +providing the document content and metadata for a transfer. The source +architecture is designed to be extensible, allowing for the addition of new +source types with minimal changes to existing code. + +For information on specifying a source for a transfer request, see [Document +Sources][sources]. + +## Implementation + +Sources are defined in `lib/source`. Each source type is represented by a class +that extends `DocumentTransfer::Source::Base`. The base class provides common +functionality, along consistent interface for all source types to implement. + +To create a new source type, create a new class that extends +`DocumentTransfer::Source::Base` and implements the following methods: + +* `#fetch`: Fetches the document content from the source +* `#filename`: Returns the filename of the document at the source +* `#mime_type`: Returns the MIME type of the document +* `#size`: Returns the size of the document in bytes + +To add your new source to the system, update the factory method in +`DocumentTransfer::Source.load`. If your source adds new parameters, update the +endpoint at `DocumentTransfer::API::Transfer` and the source configuration at +`DocumentTransfer::Config::Source`. + +## Source types + +The currently available source types are: + +* `DocumentTransfer::Source::Url`: Represents a document available at a remote + URL + +[sources]: ../api/sources.md diff --git a/doc/architecture/worker.md b/doc/architecture/worker.md new file mode 100644 index 0000000..93b48bb --- /dev/null +++ b/doc/architecture/worker.md @@ -0,0 +1,113 @@ +# Worker Architecture + +The **Worker** is responsible for processing all background jobs. It is a key +component of the Document Transfer Service, as it is responsible for the actual +transfer of documents between the source and destination. + +We use [Delayed::Job][delayed_job] to manage background jobs. It uses the same +PostgreSQL database as the rest of the system to store job information through a +custom backend implementation. + +# Components + +The worker can be broken down into the following components: + +* **Delayed::Job**: Background job processing +* **Job**: Background task to be processed +* **Source**: Document source +* **Destination**: Document destination +* **Sequel**: Database ORM + +```plantuml +!include https://raw.githubusercontent.com/plantuml-stdlib/C4-PlantUML/master/C4_Component.puml + +title Document Transfer Service - Worker Component Diagram +AddRelTag("optional", $textColor="gray", $lineColor="gray", $lineStyle = DashedLine()) +AddBoundaryTag("system", $borderColor="#1168bd", $fontColor="#1168bd", $bgColor="transparent") +AddBoundaryTag("container", $borderColor="#438dd5", $fontColor="#438dd5", $bgColor="transparent") + +System_Ext(benefits_app, "Benefits Application", "Digital application for benefits") +System_Ext(benefits_system, "Benefits System", "System that processes benefits applications") + +System_Boundary(doc_transfer, "Document Transfer Service") { + Container(api, "API", "Handles incoming requests") + ContainerDb(postgres, "PostgreSQL", "Stores transfer requests") + + Container_Boundary(worker, "Worker") { + Component(delayed_job, "Delayed::Job", "Background job processing") + Component(sequel, "Sequel", "Database ORM") + Component(job, "Job", "Document transfer job") + Component(source, "Source", "Document source") + Component(destination, "Destination", "Document destination") + + Rel_D(delayed_job, sequel, "Claims job") + Rel_L(sequel, postgres, "Read/Write") + Rel_R(delayed_job, job, "Processes") + Rel_D(job, source, "Retrieves") + Rel_R(job, destination, "Transfers") + Rel_D(job, sequel, "Updates request") + Rel_U(destination, benefits_system, "Sends") + } + + Rel_D(api, postgres, "Records") +} + +Rel_R(benefits_app, benefits_system, "Submits", $tags="optional") +Rel_D(benefits_app, api, "Requests") + +footer Last updated 2024-09-04 for Document Transfer Service v1.0.0 +``` + +## Delayed::Job + +!!! note "Alternative Backends" + + We chose a background processing library that uses a database to store job + information. While other backends such as Valkey or Dragonfly may allow for + faster queue processing, our current traffic volume does not require such + optimizations, and we prefer the simplicity and resiliency of a + database-backed solution. + +[Delayed::Job][delayed_job] is a background job processing library for Ruby that +utilizes a database to store job information. It is run as a daemon process that +polls the database for new jobs to process. Jobs are claimed by a worker +instance, locking it to prevent other workers from processing the same job. + +If a job fails, **Delayed::Job** will automatically add the job back to the +queue so that it can be retried, using an [exponential backoff][backoff] +strategy. This increases the likelihood of the job succeeding on subsequent +attempts in the case of transient failures -- such as the source or destination +being temporarily unavailable. + +If a job succeeds, **Delayed::Job** will remove the job from the queue. + +## Job + +The **job** component is a background task that is processed by the worker. The +data required to process the job is deserialized automatically when it is loaded +by the worker. + +The actions performed by the job differ depending on the type of job. For +details on how the different job types are processed, see the +[Jobs Architecture][jobs]. + +## Source + +Used by document transfer jobs to retrieve the source document. The source +component is responsible for fetching the document from the source location, +as well as exposing necessary metadata about the document. + +See the [Sources Architecture][sources] for more details. + +## Destination + +Used by document transfer jobs to transfer the document to the configured +destination. + +See the [Destinations Architecture][destinations] for more details. + +[backoff]: https://en.wikipedia.org/wiki/Exponential_backoff +[delayed_job]: https://github.com/collectiveidea/delayed_job +[destinations]: destinations.md +[jobs]: jobs.md +[sources]: sources.md diff --git a/doc/database.md b/doc/database.md index b773c84..4479e77 100644 --- a/doc/database.md +++ b/doc/database.md @@ -53,7 +53,7 @@ will drop all existing tables and recreate the schema. ```bash bundle exec rake db:reset -```` +``` Finally, you can drop the entire database with: diff --git a/doc/index.md b/doc/index.md new file mode 100644 index 0000000..612c7a5 --- /dev/null +++ b/doc/index.md @@ -0,0 +1 @@ +--8<-- "README.md" diff --git a/doc/license.md b/doc/license.md new file mode 100644 index 0000000..f409d45 --- /dev/null +++ b/doc/license.md @@ -0,0 +1 @@ +--8<-- "LICENSE" diff --git a/doc/worker.md b/doc/worker.md index 7c6747f..8402beb 100644 --- a/doc/worker.md +++ b/doc/worker.md @@ -31,22 +31,6 @@ the following environment variables: |------------------------|---------------------------------------------------|---------| | `QUEUE_STATS_INTERVAL` | Interval, in seconds, to report queue statistics. | `30` | -## Implementation - -Individual jobs are defined in `lib/job`. To implement a new, non-recurring job, -create a new class that extends `DocumentTransfer::Job::Base` and implements the -`#perform` method. The `#perform` method should contain the logic to be executed -when the job is processed. - -To create a new recurring job, create a new class under `lib/job/cron` that -extends `DocumentTransfer::Job::Cron::Base` and implements the `#perform` as -described above. Additionally, set `self.cron_expression` at the top of your -class to a valid [cron expression][cron]. - -The base classes will handle queuing, initialize a `logger`, and record metrics. -To add your new job to the system, require the class in -`DocumentTransfer::Job.load`. - ## Instrumentation The following metrics are collected and reported by StatsD regularly (default is @@ -75,6 +59,5 @@ The following [rake] tasks are available to help with managing the queue: - `rake jobs:queue` - Print information about the queue in JSON format. - `rake jobs:schedule` - Schedule all recurring jobs. -[cron]: https://crontab.guru/ [delayed_job]: https://github.com/collectiveidea/delayed_job [rake]: https://ruby.github.io/rake/ diff --git a/lib/source/base.rb b/lib/source/base.rb index 27cdf82..fb14db1 100644 --- a/lib/source/base.rb +++ b/lib/source/base.rb @@ -4,11 +4,13 @@ module DocumentTransfer module Source # Base class for sources. # - # @abstract Subclass and override {#filename} and {#mime_type} to implement a source. + # @abstract Subclass and override {#fetch}, {#filename}, {#mime_type}, and + # {#size} to implement a source. class Base # Initializes the source. # - # @param config [DocumentTransfer::Config::Source] Configuration for the source. + # @param config [DocumentTransfer::Config::Source] Configuration for the + # source. def initialize(config) @config = config end @@ -17,7 +19,8 @@ def initialize(config) # # @return [String] The document content. # - # @raise [NotImplementedError] If the method is not implemented by the subclass. + # @raise [NotImplementedError] If the method is not implemented by the + # subclass. # @raise [SourceError] If the document cannot be retrieved. def fetch raise NotImplementedError @@ -27,7 +30,8 @@ def fetch # # @return [String] # - # @raise [NotImplementedError] If the method is not implemented by the subclass. + # @raise [NotImplementedError] If the method is not implemented by the + # subclass. # @raise [SourceError] If the size cannot be retrieved. def filename raise NotImplementedError @@ -37,11 +41,23 @@ def filename # # @return [String] # - # @raise [NotImplementedError] If the method is not implemented by the subclass. + # @raise [NotImplementedError] If the method is not implemented by the + # subclass. # @raise [SourceError] If the mime-type cannot be retrieved. def mime_type raise NotImplementedError end + + # Returns the size of the document, in bytes. + # + # @return [Integer] + # + # @raise [NotImplementedError] If the method is not implemented by the + # subclass. + # @raise [SourceError] If the size cannot be retrieved. + def size + raise NotImplementedError + end end end end diff --git a/mkdocs.yaml b/mkdocs.yaml new file mode 100644 index 0000000..12750b3 --- /dev/null +++ b/mkdocs.yaml @@ -0,0 +1,85 @@ +site_name: Document Transfer Service +repo_url: https://github.com/codeforamerica/document-transfer-service +edit_uri: edit/main/doc/ +docs_dir: doc + +plugins: + - search + +markdown_extensions: + - admonition + - footnotes + - plantuml_markdown + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format + - pymdownx.tabbed: + alternate_style: true + +theme: + name: material + icon: + logo: material/puzzle + palette: + scheme: slate + features: + - content.code.copy + - content.code.select + - content.action.edit + - navigation.path + - navigation.top + - toc.follow + - toc.integrate + - search.highlight + - search.suggest + +extra: + generator: false + social: + - icon: material/web + link: https://codeforamerica.org/ + name: Code for America website + - icon: fontawesome/brands/github + link: https://github.com/codeforamerica + name: Code for America on GitHub + - icon: fontawesome/brands/threads + link: https://www.twitter.com/codeforamerica + name: Code for America on Threads + - icon: fontawesome/brands/x-twitter + link: https://www.threads.net/@codeforamerica + name: Code for America on X (formerly Twitter) + +nav: + - Home: index.md + - Usage: + - API: + - API Reference: api.md + - Authentication: api/authentication.md + - Sources: api/sources.md + - Destinations: api/destinations.md + - Background Jobs: worker.md + - Operation: + - Database: database.md + - Developer Console: console.md + - Create Auth Key: runbooks/create_auth_key.md + - Developing: + - Architecture: + - System: architecture.md + - API: architecture/api.md + - Worker: architecture/worker.md + - Jobs: architecture/jobs.md + - Sources: architecture/sources.md + - Destinations: architecture/destinations.md + - About: + # Include changelog. + - License: license.md + +copyright: Produced by Code for America under the MIT license. From 33c4f06871c15fa6fe55fb3d339f41276aec1c64 Mon Sep 17 00:00:00 2001 From: James Armes Date: Thu, 5 Sep 2024 15:02:04 -0400 Subject: [PATCH 02/14] Added link to jobs architecture --- doc/architecture/api.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/architecture/api.md b/doc/architecture/api.md index 92d4923..73ae9c0 100644 --- a/doc/architecture/api.md +++ b/doc/architecture/api.md @@ -180,7 +180,7 @@ to the destination. It handles retries with exponential backoff, logging, and error handling. This allows for a more resilient system that can recover from temporary failures at either the source or destination. -# TODO: More information about jobs? Cron jobs? +For more information on jobs, see the [Jobs Architecture][jobs] documentation. ## Sequel @@ -190,6 +190,7 @@ write raw SQL queries. [authentication]: ../api/authentication.md [grape]: https://github.com/ruby-grape/grape +[jobs]: jobs.md [openapi]: https://www.openapis.org/ [opentelemetry]: https://opentelemetry.io/ [rack]: https://github.com/rack/rack From 731bf66a0cababbb2a7d4f933b001700e499a8a1 Mon Sep 17 00:00:00 2001 From: James Armes Date: Thu, 5 Sep 2024 18:50:02 -0400 Subject: [PATCH 03/14] Updated links to code from documentation. --- README.md | 22 +++++++++++----------- doc/api.md | 2 +- doc/database.md | 4 ++-- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 05456b6..67776c3 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ service. Using docker compose, the application code will be mounted from your local system to the running container. This will allow you to make changes to the code and see them reflected in the running service without having to rebuild the -image. Using docker compose will launch the api, the worker, and a database. +image. Using docker compose will launch the api, the worker, and a database. To run the service with docker compose, make sure you have [Docker Desktop] installed and run the following: @@ -132,16 +132,16 @@ bundle exec rake db:drop See the [API documentation][api] for information on how to interact with the service. -[.env]: ./sample.env -[api]: ./doc/api.md -[create-key]: ./doc/runbooks/create_auth_key.md -[database]: ./doc/database.md -[destination]: ./doc/destinations.md -[Dockerfile]: ./Dockerfile -[docker compose]: ./docker-compose.yaml +[.env]: https://github.com/codeforamerica/document-transfer-service/blob/main/sample.env +[api]: ./api.md +[create-key]: ./runbooks/create_auth_key.md +[database]: ./database.md +[destination]: ./api/destinations.md +[Dockerfile]: https://github.com/codeforamerica/document-transfer-service/blob/main/Dockerfile +[docker compose]: https://github.com/codeforamerica/document-transfer-service/blob/main/docker-compose.yaml [Docker Desktop]: https://docs.docker.com/desktop/ [omz]: https://ohmyz.sh/ -[ruby-version]: ./.ruby-version +[ruby-version]: https://github.com/codeforamerica/document-transfer-service/blob/main/.ruby-version [rvm]: https://rvm.io/ -[source]: ./doc/sources.md -[worker]: ./doc/worker.md +[source]: ./api/sources.md +[worker]: ./worker.md diff --git a/doc/api.md b/doc/api.md index 141615e..72d1d9d 100644 --- a/doc/api.md +++ b/doc/api.md @@ -127,4 +127,4 @@ JSON format to provide easily parsable log entries. [destination]: ./api/destinations.md [semantic_logger]: https://logger.rocketjob.io/ [source]: ./api/sources.md -[spec]: ../openapi.yaml +[spec]: https://github.com/codeforamerica/document-transfer-service/blob/main/openapi.yaml diff --git a/doc/database.md b/doc/database.md index 4479e77..0a18916 100644 --- a/doc/database.md +++ b/doc/database.md @@ -67,7 +67,7 @@ Note that both `db:reset` and `db:drop` will refuse to run in production. The service uses the [sequel] gem to manage the database schema via [migrations][sequel-migrations]. These migrations are stored in the -[`db/migrate`][migrate] directory and are prefixed with a timestamp to ensure +[`db/migrations`][migrate] directory and are prefixed with a timestamp to ensure they run in proper order and avoid collisions. Migrations can be run using the following command: @@ -83,7 +83,7 @@ number: bundle exec rake db:migrate\[202407082156] ``` -[migrate]: ./db/migrate +[migrate]: https://github.com/codeforamerica/document-transfer-service/tree/main/db/migrations [migrations]: #schema-and-migrations [rake]: https://ruby.github.io/rake/ [sequel]: https://sequel.jeremyevans.net/ From 6ba39c03b08d0b1796fd1ee05e8575fb60b04522 Mon Sep 17 00:00:00 2001 From: James Armes Date: Thu, 5 Sep 2024 18:51:15 -0400 Subject: [PATCH 04/14] Added documentation for bootstrap --- .gitignore | 3 +- doc/bootstrap.md | 84 +++++++++++++++++++++++++++++++++++++ lib/bootstrap/api.rb | 17 ++++---- lib/bootstrap/console.rb | 21 ++++------ lib/bootstrap/mode.rb | 32 ++++++++++++++ lib/bootstrap/rake.rb | 15 +++---- lib/bootstrap/stage/base.rb | 3 ++ lib/bootstrap/worker.rb | 17 ++++---- mkdocs.yaml | 1 + 9 files changed, 151 insertions(+), 42 deletions(-) create mode 100644 doc/bootstrap.md create mode 100644 lib/bootstrap/mode.rb diff --git a/.gitignore b/.gitignore index fe48e4f..138a60b 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,8 @@ !sample.env coverage/ log/ +out/ +site/ # Ignore Byebug command history file. .byebug_history @@ -21,4 +23,3 @@ log/ .idea/dataSources.xml *.iml *.iws -out/ diff --git a/doc/bootstrap.md b/doc/bootstrap.md new file mode 100644 index 0000000..ad9cf7a --- /dev/null +++ b/doc/bootstrap.md @@ -0,0 +1,84 @@ +# Boostrap System + +Before the system can be run, the components must be properly initialized. The +**boostrap system** provides a modular way to initialize the required system +components. Bootstrap modes can include one or more reusable stages, and are +called by the various system entry points. + +## Modes + +Boostrap modes define one or more stages that are executed in order. Modes can +be used to define the initialization process for different system entry points. + +The following modes are available: + +* `API`: Initializes components required to run the API +* `Console`: Initializes components required to use the developer console +* `Rake`: Initializes components required to run rake tasks +* `Worker`: Initializes components required to run the worker + +### Implementation + +Boostrap modes are defined in the `lib/bootstrap`. Each mode is represented by a +class that extends `DocumentTransfer::Bootstrap::Mode`. + +To create a new bootstrap mode, create a new class that extends +`DocumentTransfer::Bootstrap::Mode` and implement the `#bootstrap` method. This +method should execute the stages required to initialize the system, along with +performing any other necessary startup tasks. + +!!! tip "Stage Ordering" + + The order in which stages are executed is determined by the order in which + they are defined in the `#bootstrap` method. Some stages may depend on + others being executed first; however, there is no built-in dependency + management for stages. + +For example, to initialize the logger and a database, you would define your +`#bootstrap` method as follows: + +```ruby +def bootstrap + Stage::Logger.new(config).bootstrap + Stage::Database.new(config).bootstrap +end +``` + +Note that `config` comes from the base class and is an [application +configuration object][config]. + +## Stages + +Stages are the individual components that make up a bootstrap mode. Each stage +is responsible for initializing a specific component of the system. + +!!! tip "Reusable Stages" + + Stages are designed to be reusable across multiple bootstrap modes. This + allows you to define a stage once and use it in multiple modes. + +The following stages are currently implemented: + +* `Database`: Initializes the database connection +* `Jobs`: Initializes the job queue and ensures recurring jobs are scheduled +* `Logger`: Initializes the logger +* `Models`: Load models into memory. This is useful for preloading models in + environments where lazy loading is not desired. +* `Prompt`: Configures the prompt for the developer console +* `RakeTasks`: Loads custom rake tasks +* `Telemetry`: Configures OpenTelemetry for distributed tracing +* `Worker`: Creates a thread to monitor the job queue + +## Implementation + +Bootstrap stages are defined in the `lib/bootstrap/stage` directory. Each stage +is represented by a class that extends +`DocumentTransfer::Bootstrap::Stage::Base`. + +To create a new bootstrap stage, create a new class that extends +`DocumentTransfer::Bootstrap::Stage::Base` and implement the `#bootstrap` +method. This method should perform the necessary initialization tasks for the +component the stage is responsible for. + + +[config]: configuration.md diff --git a/lib/bootstrap/api.rb b/lib/bootstrap/api.rb index 6cdb8c1..7ad91da 100644 --- a/lib/bootstrap/api.rb +++ b/lib/bootstrap/api.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require_relative 'mode' require_relative 'stage/database' require_relative 'stage/logger' require_relative 'stage/models' @@ -9,17 +10,13 @@ module DocumentTransfer module Bootstrap # Boostrap the API. - class API - def initialize(config) - @config = config - end - + class API < Mode def bootstrap - Stage::Logger.new(@config).bootstrap - Stage::Database.new(@config).bootstrap - Stage::Models.new(@config).bootstrap - Stage::Jobs.new(@config).bootstrap - Stage::Telemetry.new(@config).bootstrap + Stage::Logger.new(config).bootstrap + Stage::Database.new(config).bootstrap + Stage::Models.new(config).bootstrap + Stage::Jobs.new(config).bootstrap + Stage::Telemetry.new(config).bootstrap end end end diff --git a/lib/bootstrap/console.rb b/lib/bootstrap/console.rb index 22685eb..9d36dcc 100644 --- a/lib/bootstrap/console.rb +++ b/lib/bootstrap/console.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require_relative 'mode' require_relative 'stage/database' require_relative 'stage/logger' require_relative 'stage/jobs' @@ -10,18 +11,14 @@ module DocumentTransfer module Bootstrap # Boostrap the console. - class Console - def initialize(config) - @config = config - end - - def bootstrap - Stage::Prompt.new(@config).bootstrap - Stage::Logger.new(@config).bootstrap - Stage::Database.new(@config).bootstrap - Stage::Models.new(@config).bootstrap - Stage::Jobs.new(@config).bootstrap - Stage::Telemetry.new(@config).bootstrap + class Console < Mode + def bootstrap # rubocop:disable Metrics/AbcSize + Stage::Prompt.new(config).bootstrap + Stage::Logger.new(config).bootstrap + Stage::Database.new(config).bootstrap + Stage::Models.new(config).bootstrap + Stage::Jobs.new(config).bootstrap + Stage::Telemetry.new(config).bootstrap end end end diff --git a/lib/bootstrap/mode.rb b/lib/bootstrap/mode.rb new file mode 100644 index 0000000..986f9c5 --- /dev/null +++ b/lib/bootstrap/mode.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require_relative 'stage/database' +require_relative 'stage/jobs' +require_relative 'stage/logger' +require_relative 'stage/rake_tasks' + +module DocumentTransfer + module Bootstrap + # Base class for bootstrapping the application. + # + # @abstract Subclass and implement {#bootstrap} to define a boostrap mode. + class Mode + attr_reader :config + + # Initialize the mode. + # + # @param config [DocumentTransfer::Config::Application] The application + # configuration. + def initialize(config) + @config = config + end + + # Execute the bootstrap process. + # + # @raise [NotImplementedError] If the method is not implemented. + def bootstrap + raise NotImplementedError + end + end + end +end diff --git a/lib/bootstrap/rake.rb b/lib/bootstrap/rake.rb index 7d075e5..ee5b019 100644 --- a/lib/bootstrap/rake.rb +++ b/lib/bootstrap/rake.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require_relative 'mode' require_relative 'stage/database' require_relative 'stage/jobs' require_relative 'stage/logger' @@ -8,26 +9,22 @@ module DocumentTransfer module Bootstrap # Boostrap the API. - class Rake + class Rake < Mode include SemanticLogger::Loggable - def initialize(config) - @config = config - end - def bootstrap - Stage::Logger.new(@config).bootstrap + Stage::Logger.new(config).bootstrap # We may not have a database available when running rake tasks. Try to # bootstrap it, but don't fail if it's not available. begin - Stage::Database.new(@config).bootstrap - Stage::Jobs.new(@config).bootstrap + Stage::Database.new(config).bootstrap + Stage::Jobs.new(config).bootstrap rescue Sequel::DatabaseConnectionError => e warn("Database connection error: #{e.message}") end - Stage::RakeTasks.new(@config).bootstrap + Stage::RakeTasks.new(config).bootstrap end end end diff --git a/lib/bootstrap/stage/base.rb b/lib/bootstrap/stage/base.rb index 96ae6c1..e82577f 100644 --- a/lib/bootstrap/stage/base.rb +++ b/lib/bootstrap/stage/base.rb @@ -6,6 +6,9 @@ module DocumentTransfer module Bootstrap module Stage # Base class for bootstrap stages. + # + # @abstract Subclass and implement {#bootstrap} to define a bootstrap + # stage. class Base include SemanticLogger::Loggable diff --git a/lib/bootstrap/worker.rb b/lib/bootstrap/worker.rb index bdf1984..7185983 100644 --- a/lib/bootstrap/worker.rb +++ b/lib/bootstrap/worker.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require_relative 'mode' require_relative 'stage/database' require_relative 'stage/jobs' require_relative 'stage/logger' @@ -9,17 +10,13 @@ module DocumentTransfer module Bootstrap # Boostrap the worker. - class Worker - def initialize(config) - @config = config - end - + class Worker < Mode def bootstrap - Stage::Logger.new(@config).bootstrap - Stage::Database.new(@config).bootstrap - Stage::Jobs.new(@config).bootstrap - Stage::Telemetry.new(@config).bootstrap - Stage::Worker.new(@config).bootstrap + Stage::Logger.new(config).bootstrap + Stage::Database.new(config).bootstrap + Stage::Jobs.new(config).bootstrap + Stage::Telemetry.new(config).bootstrap + Stage::Worker.new(config).bootstrap end end end diff --git a/mkdocs.yaml b/mkdocs.yaml index 12750b3..a331fbb 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -78,6 +78,7 @@ nav: - Jobs: architecture/jobs.md - Sources: architecture/sources.md - Destinations: architecture/destinations.md + - Bootstrap: bootstrap.md - About: # Include changelog. - License: license.md From 90490f22730f7a52de31190c91e0c1c752778725 Mon Sep 17 00:00:00 2001 From: James Armes Date: Fri, 6 Sep 2024 12:11:18 -0400 Subject: [PATCH 05/14] Added documentation for configuration. --- Gemfile.lock | 4 + doc/configuration.md | 42 ++++++++++ document-transfer-service.gemspec | 1 + lib/config/base.rb | 16 +--- lib/config/destination.rb | 2 +- lib/config/dsl.rb | 51 ------------ lib/config/from_environment.rb | 41 ---------- lib/config/validation.rb | 29 ------- mkdocs.yaml | 1 + .../unit/document_transfer/config/dsl_spec.rb | 77 ------------------- 10 files changed, 53 insertions(+), 211 deletions(-) create mode 100644 doc/configuration.md delete mode 100644 lib/config/dsl.rb delete mode 100644 lib/config/from_environment.rb delete mode 100644 lib/config/validation.rb delete mode 100644 spec/unit/document_transfer/config/dsl_spec.rb diff --git a/Gemfile.lock b/Gemfile.lock index fef1bad..118c8df 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -13,6 +13,7 @@ PATH activesupport (~> 7.1.0) adal (~> 1.0) bcrypt (~> 3.1) + configsl (~> 1.0) daemons (~> 1.4) delayed_job (~> 4.1) faraday (~> 2.9) @@ -58,6 +59,8 @@ GEM bigdecimal (3.1.8) coderay (1.1.3) concurrent-ruby (1.3.3) + configsl (1.0.1) + facets (~> 3.1) connection_pool (2.4.1) csv (3.3.0) daemons (1.4.1) @@ -83,6 +86,7 @@ GEM zeitwerk (~> 2.6) et-orbi (1.2.11) tzinfo + facets (3.1.0) factory_bot (6.4.6) activesupport (>= 5.0.0) faker (3.4.1) diff --git a/doc/configuration.md b/doc/configuration.md new file mode 100644 index 0000000..56935a0 --- /dev/null +++ b/doc/configuration.md @@ -0,0 +1,42 @@ +# Configuration + +Configuration is an important part of any system. We utilize configuration not +just to boot the application, but also to define source and destinations based +on a transfer request's parameters. + +## Implementation + +We utilize [configsl], which provides a simple DSL (Domain Specific Language) +to define configuration using classes. This allows us to define configuration +in a more declarative and human-readable way. + +Configuration classes are defined in the `lib/config` directory. Each class +extends `DocumentTransfer::Config::Base` which provides some basic functionality +at instantiation. + +To create a new configuration, create a new class that extends +`DocumentTransfer::Config::Base`, and use the configsl DSL to define your +options. + +```ruby +class MyConfig < DocumentTransfer::Config::Base + option :my_option, type: String, default: 'default value' + option :required_option, type: Symbol, required: true, enum: [:a, :b, :c] + + # You can also define nested configuration. + option :nested_config, type: MyNestedConfig, required: true +end +``` + +For more details on the syntax, see the [configsl] documentation. + +To create a new instance of your configuration, you can instantiate it directly, +load it from environment variables, or read it from a file. + +```ruby +config = MyConfig.new(my_option: 'value', required_option: :a, nested_config: { ... }) +config = MyConfig.from_environment +config = MyConfig.from_file('path/to/file.yaml') +``` + +[configsl]: https://github.com/jamesiarmes/configsl diff --git a/document-transfer-service.gemspec b/document-transfer-service.gemspec index 8f4d145..058cb93 100644 --- a/document-transfer-service.gemspec +++ b/document-transfer-service.gemspec @@ -27,6 +27,7 @@ Gem::Specification.new do |s| s.add_dependency 'activesupport', '~> 7.1.0' s.add_dependency 'adal', '~> 1.0' s.add_dependency 'bcrypt', '~> 3.1' + s.add_dependency 'configsl', '~> 1.0' s.add_dependency 'daemons', '~> 1.4' s.add_dependency 'delayed_job', '~> 4.1' s.add_dependency 'faraday', '~> 2.9' diff --git a/lib/config/base.rb b/lib/config/base.rb index 47b8d4b..d610d8e 100644 --- a/lib/config/base.rb +++ b/lib/config/base.rb @@ -1,23 +1,15 @@ # frozen_string_literal: true -require_relative 'dsl' -require_relative 'from_environment' -require_relative 'validation' +require 'configsl' module DocumentTransfer module Config - class InvalidConfigurationError < ArgumentError; end - # Base class for configuration. - class Base - include DSL - include FromEnvironment - include Validation - + class Base < ConfigSL::Config def initialize(params = {}) - @params = params + super - validate + validate! end end end diff --git a/lib/config/destination.rb b/lib/config/destination.rb index 35ab711..0c4d959 100644 --- a/lib/config/destination.rb +++ b/lib/config/destination.rb @@ -8,7 +8,7 @@ module Config class Destination < Base option :type, type: Symbol, enum: [:onedrive], required: true option :path, type: String, default: '' - option :filename, type: String + option :filename, type: String, default: '' end end end diff --git a/lib/config/dsl.rb b/lib/config/dsl.rb deleted file mode 100644 index ba68ac8..0000000 --- a/lib/config/dsl.rb +++ /dev/null @@ -1,51 +0,0 @@ -# frozen_string_literal: true - -module DocumentTransfer - module Config - # DSL for configuration - module DSL - def self.included(base) - base.extend ClassMethods - end - - def options - self.class.options - end - - def method_missing(name, *args, &) - super unless options.key?(name) - - @params[name] = format_value(name, args.first) if args.any? - format_value(name, @params[name] || options[name]&.[](:default)) - end - - def respond_to_missing?(name, include_private = false) - options.key?(name) || super - end - - def format_value(option, value) - return value if value.is_a?(options[option][:type]) - - case options[option][:type].name.to_sym - when :Integer then value.to_i - when :Symbol then value.to_sym - when :String then value.to_s - else value - end - end - - # Required class methods for the config DSL. - # - # @todo Can we do this without using class variables? - module ClassMethods - def option(name, opts = {}) - options.merge!({ name => opts }) - end - - def options - @options ||= {} - end - end - end - end -end diff --git a/lib/config/from_environment.rb b/lib/config/from_environment.rb deleted file mode 100644 index 735eb36..0000000 --- a/lib/config/from_environment.rb +++ /dev/null @@ -1,41 +0,0 @@ -# frozen_string_literal: true - -module DocumentTransfer - module Config - # Supports configuration through environment variables. - # - # This module should be loaded after the DSL module. - # - # By default, options will be read from environment variables with the same - # name, capitalized. For example, an option named `database_url` will be - # read as 'DATABASE_URL'. To override this behavior, set the `env_variable` - # when defining the option. - module FromEnvironment - def self.included(base) - base.extend ClassMethods - end - - # Class methods necessary for loading configuration from the environment. - module ClassMethods - # Create a new instance of the class using values from the environment. - # - # @return [self] The new config object - def from_environment - params = options.transform_values do |opts| - ENV.fetch(opts[:env_variable], opts[:default]) - end - - new(params) - end - - # Override the options method to ensure each option has an environment - # variable defined. - def options - super.each do |name, opts| - opts[:env_variable] = opts[:env_variable] || name.to_s.upcase - end - end - end - end - end -end diff --git a/lib/config/validation.rb b/lib/config/validation.rb deleted file mode 100644 index 95120c7..0000000 --- a/lib/config/validation.rb +++ /dev/null @@ -1,29 +0,0 @@ -# frozen_string_literal: true - -module DocumentTransfer - module Config - # Validator for configuration. - module Validation - def validate - errors = validate_required + validate_values - raise InvalidConfigurationError, errors.join("\n") unless errors.empty? - end - - def validate_required - errors = options.select do |name, opts| - opts[:required] && @params[name].nil? - end - - errors.empty? ? [] : ["Missing required options: #{errors.keys.join(', ')}"] - end - - def validate_values - options.each_with_object([]) do |(name, opts), errors| - next unless opts[:enum] - - errors << "Invalid value for #{name}: #{@params[name]}" unless opts[:enum].include?(send(name)) - end - end - end - end -end diff --git a/mkdocs.yaml b/mkdocs.yaml index a331fbb..92919b2 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -79,6 +79,7 @@ nav: - Sources: architecture/sources.md - Destinations: architecture/destinations.md - Bootstrap: bootstrap.md + - Configuration: configuration.md - About: # Include changelog. - License: license.md diff --git a/spec/unit/document_transfer/config/dsl_spec.rb b/spec/unit/document_transfer/config/dsl_spec.rb deleted file mode 100644 index 27a12f5..0000000 --- a/spec/unit/document_transfer/config/dsl_spec.rb +++ /dev/null @@ -1,77 +0,0 @@ -# frozen_string_literal: true - -require_relative '../../../../lib/config/dsl' - -describe DocumentTransfer::Config::DSL do - subject(:config) do - Class.new do - include DocumentTransfer::Config::DSL - - def initialize(params = {}) - @params = params - end - - option :name, type: String, required: true - option :enum, type: Symbol, enum: %i[one two three], required: true - option :optional, type: String, default: 'default' - end - end - - describe '.option' do - it 'adds an option' do - expect(config.instance_variable_get(:@options)).to have_key(:name) - end - - it 'adds all options' do - expect(config.instance_variable_get(:@options)[:enum]).to eq( - type: Symbol, enum: %i[one two three], required: true - ) - end - end - - describe '.options' do - it 'returns the options' do - expect(config.options.keys).to eq(%i[name enum optional]) - end - end - - describe '#options' do - subject(:instance) { config.new } - - it 'returns the options' do - expect(instance.options.keys).to eq(%i[name enum optional]) - end - end - - describe '#method_missing' do - subject(:instance) { config.new(name: 'rspec-config', enum: 'two') } - - it 'returns the value' do - expect(instance.name).to eq('rspec-config') - end - - it 'returns the default value' do - expect(instance.optional).to eq('default') - end - - it 'returns the formatted value' do - expect(instance.enum).to eq(:two) - end - - it 'raises an error for unknown options' do - expect { instance.unknown }.to raise_error(NoMethodError) - end - end - - describe '#format_value' do - subject(:instance) { config.new } - - it 'returns an expected string' do - expect(instance.format_value(:name, :rspec)).to be_a(String) - end - - it 'returns the symbol' do - expect(instance.format_value(:enum, 'three')).to be_a(Symbol) - end - end -end From b8e587f4a53089e408b5590dd8896899198d5b46 Mon Sep 17 00:00:00 2001 From: James Armes Date: Fri, 6 Sep 2024 12:33:55 -0400 Subject: [PATCH 06/14] Added document deploy. --- .github/workflows/docs.yaml | 36 ++++++++++++++++++++++++++++++++++++ .rubocop.yml | 1 + 2 files changed, 37 insertions(+) create mode 100644 .github/workflows/docs.yaml diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml new file mode 100644 index 0000000..7b84ab5 --- /dev/null +++ b/.github/workflows/docs.yaml @@ -0,0 +1,36 @@ +on: + pull_request: + workflow_dispatch: + push: + branches: + - main + +jobs: + deploy: + name: Deploy Documentation + environment: ${{ env.ENVIRONMENT || 'dev' }} + env: + BUCKET_NAME: ${{ env.BUCKET_NAME || 'dev.docs.cfa.codes' }} + PREFIX: ${{ env.PREFIX || 'document-transfer-service' }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: us-east-1 + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV + - uses: actions/cache@v4 + with: + key: mkdocs-material-${{ env.cache_id }} + path: .cache + restore-keys: | + mkdocs-material- + - run: pip install mkdocs-material pymdown-extensions + - run: mkdocs build + - run: aws s3 sync ./site "s3://${BUCKET_NAME}/${PREFIX}" --recursive diff --git a/.rubocop.yml b/.rubocop.yml index 5dec583..32edd91 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -14,6 +14,7 @@ AllCops: - 'db/*schema.rb' - 'out/**/*' - 'vendor/**/*' + - 'doc/**/*' # Transient properties get mistaken for associations in FactoryBot. # See https://github.com/rubocop/rubocop-factory_bot/issues/73 From 7c05c3c168387615824f7740484af88179a982fd Mon Sep 17 00:00:00 2001 From: James Armes Date: Fri, 6 Sep 2024 12:42:30 -0400 Subject: [PATCH 07/14] Added document deploy. --- .github/workflows/docs.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 7b84ab5..ce6e458 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -1,6 +1,12 @@ on: pull_request: workflow_dispatch: + inputs: + environment: + description: 'Environment to deploy to' + default: 'dev' + required: true + type: environment push: branches: - main @@ -8,7 +14,7 @@ on: jobs: deploy: name: Deploy Documentation - environment: ${{ env.ENVIRONMENT || 'dev' }} + environment: ${{ input.environment || 'dev' }} env: BUCKET_NAME: ${{ env.BUCKET_NAME || 'dev.docs.cfa.codes' }} PREFIX: ${{ env.PREFIX || 'document-transfer-service' }} From f4783145b3fafeebd81c2355a7842dfda7c19cb5 Mon Sep 17 00:00:00 2001 From: James Armes Date: Fri, 6 Sep 2024 12:44:55 -0400 Subject: [PATCH 08/14] Added document deploy. --- .github/workflows/docs.yaml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index ce6e458..09139be 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -14,10 +14,7 @@ on: jobs: deploy: name: Deploy Documentation - environment: ${{ input.environment || 'dev' }} - env: - BUCKET_NAME: ${{ env.BUCKET_NAME || 'dev.docs.cfa.codes' }} - PREFIX: ${{ env.PREFIX || 'document-transfer-service' }} + environment: 'dev' runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -39,4 +36,4 @@ jobs: mkdocs-material- - run: pip install mkdocs-material pymdown-extensions - run: mkdocs build - - run: aws s3 sync ./site "s3://${BUCKET_NAME}/${PREFIX}" --recursive + - run: aws s3 sync ./site "s3://${{ env.BUCKET_NAME || 'dev.docs.cfa.codes' }}/${{ env.PREFIX || 'document-transfer-service' }}" --recursive From 8287a11b757001cda559197143dfff4956f4b949 Mon Sep 17 00:00:00 2001 From: James Armes Date: Fri, 6 Sep 2024 12:48:06 -0400 Subject: [PATCH 09/14] Added document deploy. --- .github/workflows/docs.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 09139be..0274268 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -27,6 +27,8 @@ jobs: - uses: actions/setup-python@v5 with: python-version: 3.x + - name: Install plantuml + uses: Timmy/plantuml-action@v1 - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV - uses: actions/cache@v4 with: @@ -34,6 +36,6 @@ jobs: path: .cache restore-keys: | mkdocs-material- - - run: pip install mkdocs-material pymdown-extensions + - run: pip install mkdocs-material pymdown-extensions plantuml_markdown - run: mkdocs build - run: aws s3 sync ./site "s3://${{ env.BUCKET_NAME || 'dev.docs.cfa.codes' }}/${{ env.PREFIX || 'document-transfer-service' }}" --recursive From 08f015ac2824c4927998f2e1b2f6a3503953eef8 Mon Sep 17 00:00:00 2001 From: James Armes Date: Fri, 6 Sep 2024 12:53:48 -0400 Subject: [PATCH 10/14] Added document deploy. --- .github/workflows/docs.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 0274268..187721a 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -28,7 +28,7 @@ jobs: with: python-version: 3.x - name: Install plantuml - uses: Timmy/plantuml-action@v1 + run: sudo apt-get install -y plantuml - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV - uses: actions/cache@v4 with: @@ -38,4 +38,4 @@ jobs: mkdocs-material- - run: pip install mkdocs-material pymdown-extensions plantuml_markdown - run: mkdocs build - - run: aws s3 sync ./site "s3://${{ env.BUCKET_NAME || 'dev.docs.cfa.codes' }}/${{ env.PREFIX || 'document-transfer-service' }}" --recursive + - run: aws s3 sync ./site "s3://${{ env.BUCKET_NAME || 'dev.docs.cfa.codes' }}/${{ env.PREFIX || 'document-transfer-service' }}" From 751128e08982332fa78dd8089113030dd474bad6 Mon Sep 17 00:00:00 2001 From: James Armes Date: Fri, 6 Sep 2024 13:03:07 -0400 Subject: [PATCH 11/14] Added document deploy. --- .github/workflows/docs.yaml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 187721a..b9aaf11 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -1,12 +1,6 @@ on: + # TODO: remove pull requests before merging. pull_request: - workflow_dispatch: - inputs: - environment: - description: 'Environment to deploy to' - default: 'dev' - required: true - type: environment push: branches: - main From f0b2b185c942e5417a772695cf008fabaf245e40 Mon Sep 17 00:00:00 2001 From: James Armes Date: Fri, 6 Sep 2024 14:18:17 -0400 Subject: [PATCH 12/14] Moving some diagrams --- doc/api.md | 51 --------------------------------------------- doc/architecture.md | 51 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/doc/api.md b/doc/api.md index 72d1d9d..cbbf4e4 100644 --- a/doc/api.md +++ b/doc/api.md @@ -1,56 +1,5 @@ # Document Transfer Service API -```mermaid -sequenceDiagram - critical Request a document transfer - activate Consumer - activate API - activate Database - Consumer->>API: Request transfer - API->>Database: Record transfer - API->>Database: Enqueue background job - API->>Consumer: Return transfer id - deactivate Consumer - end - - critical Background job processing - activate Worker - loop until success or max attempts - Worker->>Database: Reserve job - activate Source - Worker->>Source: Fetch document - deactivate Source - activate Destination - Worker->>Destination: Transfer document - deactivate Destination - alt Transfer succeeded - Worker->>Database: Update request status - Worker->>Database: Mark job complete - else Transfer failed - alt Max attempts reached - rect rgb(191, 223, 255) - Worker->>Database: Update request status - Worker->>Database: Mark job failed - end - else Retry transfer - Worker->>Database: Mark job failed - end - end - end - deactivate Worker - end - - opt Check transfer status - activate Consumer - Consumer->>API: Check transfer status - API->>Database: Retrieve transfer status - API->>Consumer: Return transfer status - deactivate Database - deactivate API - deactivate Consumer - end -``` - Interacting with the Document Transfer Service is done through a RESTful API. All requests and responses should be in JSON format, unless otherwise indicated. diff --git a/doc/architecture.md b/doc/architecture.md index 7ff3818..6bffd15 100644 --- a/doc/architecture.md +++ b/doc/architecture.md @@ -120,6 +120,57 @@ length of time as jobs are deleted once they have been completed. See the [database] documentation for more details on how the database is configured. +```mermaid +sequenceDiagram + critical Request a document transfer + activate Consumer + activate API + activate Database + Consumer->>API: Request transfer + API->>Database: Record transfer + API->>Database: Enqueue background job + API->>Consumer: Return transfer id + deactivate Consumer + end + + critical Background job processing + activate Worker + loop until success or max attempts + Worker->>Database: Reserve job + activate Source + Worker->>Source: Fetch document + deactivate Source + activate Destination + Worker->>Destination: Transfer document + deactivate Destination + alt Transfer succeeded + Worker->>Database: Update request status + Worker->>Database: Mark job complete + else Transfer failed + alt Max attempts reached + rect rgb(191, 223, 255) + Worker->>Database: Update request status + Worker->>Database: Mark job failed + end + else Retry transfer + Worker->>Database: Mark job failed + end + end + end + deactivate Worker + end + + opt Check transfer status + activate Consumer + Consumer->>API: Check transfer status + API->>Database: Retrieve transfer status + API->>Consumer: Return transfer status + deactivate Database + deactivate API + deactivate Consumer + end +``` + [api]: architecture/api.md [c4-containers]: https://c4model.com/#ContainerDiagram [database]: database.md From 066df66176257b720c8f5693fa729bd9d62da325 Mon Sep 17 00:00:00 2001 From: James Armes Date: Tue, 10 Sep 2024 12:21:37 -0400 Subject: [PATCH 13/14] Added documentation for viewing the documentation. --- README.md | 18 ++++++++++++++++++ docker-compose.yaml | 10 ++++++++++ docs.dockerfile | 6 ++++++ 3 files changed, 34 insertions(+) create mode 100644 docs.dockerfile diff --git a/README.md b/README.md index 67776c3..7af91a9 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,23 @@ bundle exec rake db:drop See the [API documentation][api] for information on how to interact with the service. +## Documentation + +Necessary documentation to operate, use, maintain, and contribute to the service +is included in this repository. The majority of these documents are written in +Markdown and can be rendered directly in GitHub or you favorite IDE. However, +the documentation as a whole is meant to be converted to a static site using +[MkDocs]. + +In order to view the documentation in its intended form locally, you can use the +included docker container. Simply run the following: + +```bash +docker compose --profile docs up -d +``` + +The documentation should then be available at http://localhost:8000. + [.env]: https://github.com/codeforamerica/document-transfer-service/blob/main/sample.env [api]: ./api.md [create-key]: ./runbooks/create_auth_key.md @@ -140,6 +157,7 @@ service. [Dockerfile]: https://github.com/codeforamerica/document-transfer-service/blob/main/Dockerfile [docker compose]: https://github.com/codeforamerica/document-transfer-service/blob/main/docker-compose.yaml [Docker Desktop]: https://docs.docker.com/desktop/ +[mkdocs]: https://www.mkdocs.org/ [omz]: https://ohmyz.sh/ [ruby-version]: https://github.com/codeforamerica/document-transfer-service/blob/main/.ruby-version [rvm]: https://rvm.io/ diff --git a/docker-compose.yaml b/docker-compose.yaml index f97df50..e1d1eb6 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -46,5 +46,15 @@ services: <<: *service-defaults command: ./script/worker run + docs: + profiles: + - docs + build: + dockerfile: docs.dockerfile + ports: + - "8000:8000" + volumes: + - .:/docs + volumes: postgres: diff --git a/docs.dockerfile b/docs.dockerfile new file mode 100644 index 0000000..8c4c4a3 --- /dev/null +++ b/docs.dockerfile @@ -0,0 +1,6 @@ +FROM squidfunk/mkdocs-material:latest + +# Install PlantUML so we can render UML diagrams. +RUN pip install plantuml_markdown +RUN apk add --no-cache plantuml --repository=https://dl-cdn.alpinelinux.org/alpine/edge/community \ + && rm -rf /var/cache/apk/* From 5c70f3c24d1973b5e8be8801ddf4ed5a5f859c53 Mon Sep 17 00:00:00 2001 From: James Armes Date: Tue, 10 Sep 2024 12:27:09 -0400 Subject: [PATCH 14/14] Removed pull request document deployment. --- .github/workflows/docs.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index b9aaf11..ccf5357 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -1,6 +1,4 @@ on: - # TODO: remove pull requests before merging. - pull_request: push: branches: - main