Skip to content

Commit

Permalink
Initial
Browse files Browse the repository at this point in the history
  • Loading branch information
isaacseymour committed Jan 12, 2018
0 parents commit 19f092c
Show file tree
Hide file tree
Showing 18 changed files with 594 additions and 0 deletions.
76 changes: 76 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
ruby: &ruby
image: carwow/ruby-ci:2.4.2

elasticsearch_container: &elasticsearch_container
image: carwow/elasticsearch-ci:5.5.1

version: 2
jobs:
bundle:
working_directory: ~/es_index
docker:
- *ruby
steps:
- checkout
- restore_cache:
keys:
- bundle-{{ checksum "Gemfile.lock" }}
- bundle-
- run: |
bundle config --local path vendor/bundle &&
bundle check || bundle install --jobs=4 --retry=3
- save_cache:
key: bundle-{{ checksum "Gemfile.lock" }}
paths: [~/es_index/vendor/bundle]
- persist_to_workspace:
root: '~'
paths: [es_index]

rubocop:
working_directory: ~/es_index
docker:
- *ruby
steps:
- attach_workspace:
at: '~'
- run: bundle exec rubocop

tests:
working_directory: ~/es_index
docker:
- *ruby
- *elasticsearch_container
steps:
- attach_workspace:
at: '~'
- run: |
bundle exec rspec --pattern "**/*_spec.rb" --format "progress"
publish:
working_directory: ~/es_index
docker:
- *ruby
steps:
- attach_workspace:
at: '~'
- run: |
mkdir -p ~/.ssh
ssh-keyscan github.com >> ~/.ssh/known_hosts
chmod 0700 ~/.ssh/known_hosts
git config user.name 'CircleCI'
git config user.email 'developers@carwow.co.uk'
bundle exec rake release
workflows:
version: 2
build-and-deploy:
jobs:
- bundle
- rubocop:
requires: [bundle]
- tests:
requires: [bundle]
# - publish:
# requires: [rubocop, tests]
# filters:
# branches:
# only: [master]
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/.bundle/
/.yardoc
/_yardoc/
/coverage/
/doc/
/pkg/
/spec/reports/
/tmp/

# rspec failure tracking
.rspec_status
3 changes: 3 additions & 0 deletions .rspec
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--format documentation
--color
--require spec_helper
4 changes: 4 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
source "https://rubygems.org"

# Specify your gem's dependencies in es_index.gemspec
gemspec
70 changes: 70 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
PATH
remote: .
specs:
es_index (0.1.0)
activesupport

GEM
remote: https://rubygems.org/
specs:
activesupport (5.1.4)
concurrent-ruby (~> 1.0, >= 1.0.2)
i18n (~> 0.7)
minitest (~> 5.1)
tzinfo (~> 1.1)
ast (2.3.0)
coderay (1.1.2)
concurrent-ruby (1.0.5)
diff-lcs (1.3)
i18n (0.9.1)
concurrent-ruby (~> 1.0)
method_source (0.9.0)
minitest (5.11.1)
parallel (1.12.1)
parser (2.4.0.2)
ast (~> 2.3)
powerpack (0.1.1)
pry (0.11.3)
coderay (~> 1.1.0)
method_source (~> 0.9.0)
rainbow (3.0.0)
rake (12.3.0)
rspec (3.7.0)
rspec-core (~> 3.7.0)
rspec-expectations (~> 3.7.0)
rspec-mocks (~> 3.7.0)
rspec-core (3.7.1)
rspec-support (~> 3.7.0)
rspec-expectations (3.7.0)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.7.0)
rspec-mocks (3.7.0)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.7.0)
rspec-support (3.7.0)
rubocop (0.52.1)
parallel (~> 1.10)
parser (>= 2.4.0.2, < 3.0)
powerpack (~> 0.1)
rainbow (>= 2.2.2, < 4.0)
ruby-progressbar (~> 1.7)
unicode-display_width (~> 1.0, >= 1.0.1)
ruby-progressbar (1.9.0)
thread_safe (0.3.6)
tzinfo (1.2.4)
thread_safe (~> 0.1)
unicode-display_width (1.3.0)

PLATFORMS
ruby

DEPENDENCIES
bundler
es_index!
pry
rake
rspec
rubocop

BUNDLED WITH
1.16.1
21 changes: 21 additions & 0 deletions LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
The MIT License (MIT)

Copyright (c) 2018 Isaac Seymour

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
103 changes: 103 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# Zero-downtime indexing from ActiveRecord->Elasticsearch

## Installation

Add this line to your application's Gemfile:

```ruby
gem 'es_index'
```

And then execute:

$ bundle

Or install it yourself as:

$ gem install es_index

## Usage
### Setup

For each ActiveRecord scope you want to index, you'll need a configuration:
```ruby
class MyModel < ApplicationRecord
...
end

MyModelIndex = EsIndex.new(
client: Elasticsearch::Client.new(...),
index_definition: {
...
},
data_source: MyModel.some_scope,
read_alias: 'my_models', # this is the default - the table name
write_alias: 'my_models_write', # this is the default - <read_alias>_write
type: 'my_model' # default - singularized table name
) do |my_model|
# this block transforms an instance of MyModel into the hash which goes into Elasticsearch
{
attr_1: my_model.attr_1,
attr_2: my_model.attr_2,
attr_3: my_model.attr_3
}
end
```

### Normal usage

You'll need to make sure the following gets run whenever an instance of MyModel is updated:

```ruby
indexer = EsIndex::Indexer.new(MyModelIndex)
indexer.index_record(my_model)
```

And when an instance of MyModel gets deleted:
```ruby
indexer = EsIndex::Indexer.new(MyModelIndex)
indexer.delete_by_id(my_model.id)
```

There's also some bulk-change methods which may be useful:
```ruby
indexer = EsIndex::Indexer.new(MyModelIndex)
indexer.index_batch(MyModel.where(id: [...]))
indexer.delete_by_ids([1, 2, 3])
indexer.delete_by_query(elasticsearch_query)
```

### Re-indexing

Sometimes you'll need to do a full reindex - maybe because of a bug which left the index in a bad
state, or because of a new index definition, or...anything else.

We use index aliases to make it easy to do zero-downtime reindexing. The actual indexes are
`<read_alias>_<random>`. The `read_alias` points to the single "current" index.
The `write_alias` is usually the same as the read alias, except during re-indexing, where it
points at both the old and new indices, so both receive writes. The following steps run a
full reindex:

1. `new_index_name = SecureRandom.hex(3)`
2. `index_manager = EsIndex::IndexManager.new(MyModelIndex)`
2. `index_manager.create_index(new_index_name)`
3. `index_manager.populate_index(new_index_name, batch_size: 3000)`
4. Check that the new index is looking alrightish
5. `index_manager.switch_read_index(new_index_name)`
6. Probably do some more checks, then
7. `index_manager.stop_dual_writes`
8. `index_manager.cleanup_old_indices`

## Development

After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.

To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).

## Contributing

Bug reports and pull requests are welcome on GitHub at https://github.com/carwow/es_index.

## License

The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
6 changes: 6 additions & 0 deletions Rakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
require "bundler/gem_tasks"
require "rspec/core/rake_task"

RSpec::Core::RakeTask.new(:spec)

task :default => :spec
11 changes: 11 additions & 0 deletions bin/console
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env ruby

require "bundler/setup"
require "es_index"

# You can add fixtures and/or initialization code here to make experimenting
# with your gem easier. You can also use a different console, if you like.

# (If you use this, don't forget to add pry to your Gemfile!)
require "pry"
Pry.start
8 changes: 8 additions & 0 deletions bin/setup
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env bash
set -euo pipefail
IFS=$'\n\t'
set -vx

bundle install

# Do any other automated setup that you need to do here
40 changes: 40 additions & 0 deletions es_index.gemspec
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@

lib = File.expand_path("../lib", __FILE__)
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
require "es_index/version"

Gem::Specification.new do |spec|
spec.name = "es_index"
spec.version = EsIndex::VERSION
spec.authors = ["Isaac Seymour"]
spec.email = ["i.seymour@oxon.org"]

spec.summary = %q{Zero-downtime (re-)indexing of ActiveRecord models into Elasticsearch.}
spec.description = %q{An index manager for Elasticsearch and ActiveRecord}
spec.homepage = "https://github.com/carwow/es_index"
spec.license = "MIT"

# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
# to allow pushing to a single host or delete this section to allow pushing to any host.
if spec.respond_to?(:metadata)
spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
else
raise "RubyGems 2.0 or newer is required to protect against " \
"public gem pushes."
end

spec.files = `git ls-files -z`.split("\x0").reject do |f|
f.match(%r{^(test|spec|features)/})
end
spec.bindir = "exe"
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
spec.require_paths = ["lib"]

spec.add_dependency "activesupport"

spec.add_development_dependency "bundler"
spec.add_development_dependency "pry"
spec.add_development_dependency "rubocop"
spec.add_development_dependency "rspec"
spec.add_development_dependency "rake"
end
17 changes: 17 additions & 0 deletions lib/config.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
class Config
attr_reader :data_source, :read_alias, :write_alias, :index_definition, :client

def initialize(data_source:, read_alias: nil, write_alias: nil, index_definition:, client:, &index_data)
@data_source = data_source
@read_alias = read_alias || data_source.table_name
@write_alias = write_alias || [@read_alias, 'write'].join('_')
@type = type || @read_alias.singularize
@index_definition = index_definition
@client = client
@index_data = index_data
end

def index_data(model)
@index_data.call(model)
end
end
4 changes: 4 additions & 0 deletions lib/es_index.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
require 'es_index/version'
require 'es_index/config'
require 'es_index/indexer'
require 'es_index/index_manager'
Loading

0 comments on commit 19f092c

Please sign in to comment.