Skip to content

Commit 7721c93

Browse files
committed
Merge branch 'searching_by_species_country_combinations'
2 parents bd5c93e + 0ad29ea commit 7721c93

13 files changed

+545
-42
lines changed

app/models/document_citation.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,8 @@ def stringy_taxon_concept_ids=(ids)
3030
self.taxon_concept_ids = ids.split(',')
3131
end
3232

33+
after_destroy do |dc|
34+
dc.document.touch
35+
end
36+
3337
end

app/models/document_citation_geo_entity.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,8 @@ class DocumentCitationGeoEntity < ActiveRecord::Base
1616
attr_accessible :created_by_id, :document_citation_id, :geo_entity_id, :updated_by_id
1717
belongs_to :geo_entity
1818
belongs_to :document_citation, touch: true
19+
20+
after_destroy do |dc_ge|
21+
dc_ge.document_citation.touch
22+
end
1923
end

app/models/document_citation_taxon_concept.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,8 @@ class DocumentCitationTaxonConcept < ActiveRecord::Base
1616
attr_accessible :created_by_id, :document_citation_id, :taxon_concept_id, :updated_by_id
1717
belongs_to :taxon_concept
1818
belongs_to :document_citation, touch: true
19+
20+
after_destroy do |dc_tc|
21+
dc_tc.document_citation.touch
22+
end
1923
end

app/models/document_search.rb

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -103,19 +103,56 @@ def add_conditions_for_document
103103
end
104104

105105
def add_extra_conditions
106-
add_taxon_concepts_condition if @taxon_concepts_ids.present?
107-
add_geo_entities_condition if @geo_entities_ids.present?
106+
if @taxon_concepts_ids.present? && @geo_entities_ids.present?
107+
add_citations_condition
108+
elsif @taxon_concepts_ids.present?
109+
add_taxon_concepts_condition
110+
elsif @geo_entities_ids.present?
111+
add_geo_entities_condition
112+
end
108113
add_document_tags_condition if @document_tags_ids.present?
109114
end
110115

116+
def add_citations_condition
117+
combinations = @taxon_concepts_ids.product(@geo_entities_ids)
118+
condition_values = []
119+
condition_string = combinations.map do |c|
120+
condition_values += c
121+
'taxon_concept_id = ? AND geo_entity_id = ?'
122+
end.join(' OR ')
123+
filter_by_citations(
124+
condition_string,
125+
condition_values
126+
)
127+
end
128+
111129
def add_taxon_concepts_condition
112-
@query = @query.where(
113-
"taxon_concept_ids && ARRAY[#{@taxon_concepts_ids.join(',')}]"
130+
filter_by_citations(
131+
'taxon_concept_id IN (?)',
132+
[@taxon_concepts_ids]
114133
)
115134
end
116135

117136
def add_geo_entities_condition
118-
@query = @query.where("geo_entity_ids && ARRAY[#{@geo_entities_ids.join(',')}]")
137+
filter_by_citations(
138+
'geo_entity_id IN (?)',
139+
[@geo_entities_ids]
140+
)
141+
end
142+
143+
def filter_by_citations(condition_string, condition_values)
144+
join_sql = ActiveRecord::Base.send(
145+
:sanitize_sql_array,
146+
[
147+
"JOIN (
148+
SELECT DISTINCT document_id
149+
FROM document_citations_mview
150+
WHERE #{condition_string}
151+
) t ON t.document_id = documents.id",
152+
*condition_values
153+
]
154+
)
155+
@query = @query.joins(join_sql)
119156
end
120157

121158
def add_document_tags_condition
@@ -160,16 +197,27 @@ def select_and_group_query
160197

161198
REFRESH_INTERVAL = 5
162199

163-
def self.needs_refreshing?
200+
def self.documents_need_refreshing?
164201
Document.where('updated_at > ?', REFRESH_INTERVAL.minutes.ago).limit(1).count > 0 ||
165202
Document.count < Document.from('api_documents_mview documents').count
166203
end
167204

168-
def self.refresh
205+
def self.citations_need_refreshing?
206+
DocumentCitation.where('updated_at > ?', REFRESH_INTERVAL.minutes.ago).limit(1).count > 0 ||
207+
DocumentCitation.count < DocumentCitation.select('DISTINCT id').
208+
from('document_citations_mview citations').count
209+
end
210+
211+
def self.refresh_documents
169212
ActiveRecord::Base.connection.execute('REFRESH MATERIALIZED VIEW api_documents_mview')
170213
DocumentSearch.increment_cache_iterator
171214
end
172215

216+
def self.refresh_citations_and_documents
217+
ActiveRecord::Base.connection.execute('REFRESH MATERIALIZED VIEW document_citations_mview')
218+
refresh_documents
219+
end
220+
173221
def self.clear_cache
174222
RefreshDocumentsWorker.perform_async
175223
DownloadsCacheCleanupWorker.perform_async(:documents)

app/workers/refresh_documents_worker.rb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@ class RefreshDocumentsWorker
33
sidekiq_options queue: :admin, backtrace: 50, unique: :until_and_while_executing
44

55
def perform
6-
DocumentSearch.refresh
6+
if DocumentSearch.citations_need_refreshing?
7+
DocumentSearch.refresh_citations_and_documents
8+
else
9+
DocumentSearch.refresh_documents
10+
end
711
DocumentSearch.increment_cache_iterator
812
end
913
end
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
class CreateDocumentCitationsMview < ActiveRecord::Migration
2+
def up
3+
execute "DROP MATERIALIZED VIEW IF EXISTS api_documents_mview"
4+
execute "DROP VIEW IF EXISTS api_documents_view"
5+
6+
execute "CREATE VIEW document_citations_view AS #{view_sql('20160623105336', 'document_citations_view')}"
7+
execute "CREATE MATERIALIZED VIEW document_citations_mview AS SELECT * FROM document_citations_view"
8+
9+
add_index :document_citations_mview,
10+
[:document_id, :taxon_concept_id, :geo_entity_id, :id],
11+
name: :index_combinations_mview_on_document_id_tc_id_ge_id,
12+
unique: true
13+
14+
execute "CREATE VIEW api_documents_view AS #{view_sql('20160623105336', 'api_documents_view')}"
15+
execute "CREATE MATERIALIZED VIEW api_documents_mview AS SELECT * FROM api_documents_view"
16+
17+
add_index :api_documents_mview, [:event_id],
18+
name: 'index_documents_mview_on_event_id'
19+
add_index :api_documents_mview, [:date_raw],
20+
name: 'index_documents_mview_on_date_raw'
21+
execute <<-SQL
22+
CREATE INDEX index_documents_mview_on_title_to_ts_vector
23+
ON api_documents_mview
24+
USING gin
25+
(to_tsvector('simple'::regconfig, COALESCE(title, ''::text)));
26+
SQL
27+
end
28+
29+
def down
30+
execute "DROP MATERIALIZED VIEW IF EXISTS document_citations_mview CASCADE"
31+
execute "DROP VIEW IF EXISTS document_citations_view"
32+
33+
execute "DROP MATERIALIZED VIEW IF EXISTS api_documents_mview"
34+
execute "DROP VIEW IF EXISTS api_documents_view"
35+
execute "CREATE VIEW api_documents_view AS #{view_sql('20160503073026', 'api_documents_view')}"
36+
execute "CREATE MATERIALIZED VIEW api_documents_mview AS SELECT * FROM api_documents_view"
37+
38+
add_index :api_documents_mview, [:event_id],
39+
name: 'index_documents_mview_on_event_id'
40+
add_index :api_documents_mview, [:date_raw],
41+
name: 'index_documents_mview_on_date_raw'
42+
execute <<-SQL
43+
CREATE INDEX index_documents_mview_on_taxon_concepts_ids
44+
ON api_documents_mview
45+
USING GIN (taxon_concept_ids)
46+
SQL
47+
execute <<-SQL
48+
CREATE INDEX index_documents_mview_on_geo_entity_ids
49+
ON api_documents_mview
50+
USING GIN (geo_entity_ids)
51+
SQL
52+
execute <<-SQL
53+
CREATE INDEX index_documents_mview_on_title_to_ts_vector
54+
ON api_documents_mview
55+
USING gin
56+
(to_tsvector('simple'::regconfig, COALESCE(title, ''::text)));
57+
SQL
58+
end
59+
end
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
SELECT d.id,
2+
d.designation_id,
3+
designations.name AS designation_name,
4+
d.event_id,
5+
e.name AS event_name,
6+
CASE
7+
WHEN e.published_at IS NOT NULL THEN to_char(e.published_at, 'DD/MM/YYYY'::text)
8+
ELSE to_char(d.date::timestamp with time zone, 'DD/MM/YYYY'::text)
9+
END AS date,
10+
CASE
11+
WHEN e.published_at IS NOT NULL THEN e.published_at
12+
ELSE d.date::timestamp with time zone
13+
END AS date_raw,
14+
e.type AS event_type,
15+
d.title,
16+
upper("substring"(d.filename, length(d.filename) - "position"(reverse(d.filename), '.'::text) + 2)) AS extension,
17+
d.is_public,
18+
d.type AS document_type,
19+
d.sort_index,
20+
CASE
21+
WHEN l.iso_code1 IS NULL THEN 'EN'::character varying(255)
22+
ELSE l.iso_code1
23+
END AS language,
24+
CASE
25+
WHEN d.primary_language_document_id IS NULL THEN d.id
26+
ELSE d.primary_language_document_id
27+
END AS primary_document_id,
28+
SQUISH_NULL(pd.proposal_number) AS proposal_number,
29+
po.name AS proposal_outcome,
30+
rp.name AS review_phase,
31+
ARRAY_AGG_NOTNULL(pd.proposal_outcome_id) || ARRAY_AGG_NOTNULL(rd.review_phase_id) || ARRAY_AGG_NOTNULL(rd.process_stage_id) AS document_tags_ids,
32+
array_agg_notnull(DISTINCT dc.full_name ORDER BY dc.full_name) AS taxon_names,
33+
array_agg_notnull(DISTINCT dc.name_en ORDER BY dc.name_en) AS geo_entity_names,
34+
d.created_at,
35+
d.updated_at,
36+
d.created_by_id,
37+
uc.name AS created_by,
38+
d.updated_by_id,
39+
uu.name AS updated_by
40+
FROM documents d
41+
LEFT JOIN designations ON designations.id = d.designation_id
42+
LEFT JOIN events e ON e.id = d.event_id
43+
LEFT JOIN document_citations_mview dc ON dc.document_id = d.id
44+
LEFT JOIN languages l ON d.language_id = l.id
45+
LEFT JOIN proposal_details pd ON pd.document_id = d.id
46+
LEFT JOIN document_tags po ON pd.proposal_outcome_id = po.id
47+
LEFT JOIN review_details rd ON rd.document_id = d.id
48+
LEFT JOIN document_tags rp ON rd.review_phase_id = rp.id
49+
LEFT JOIN users as uc ON d.created_by_id = uc.id
50+
LEFT JOIN users as uu ON d.updated_by_id = uu.id
51+
GROUP BY d.id, designations.name, e.name, e.published_at, e.type, d.title, l.iso_code1, pd.proposal_number, po.name, rp.name,
52+
uc.name, uu.name;
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
SELECT
2+
dc.id,
3+
dc.document_id,
4+
dctc.taxon_concept_id,
5+
tc.full_name,
6+
dcge.geo_entity_id,
7+
ge.name_en
8+
FROM document_citations dc
9+
LEFT JOIN document_citation_taxon_concepts dctc
10+
ON dctc.document_citation_id = dc.id
11+
LEFT JOIN taxon_concepts tc
12+
ON tc.id = dctc.taxon_concept_id
13+
LEFT JOIN document_citation_geo_entities dcge
14+
ON dcge.document_citation_id = dc.id
15+
LEFT JOIN geo_entities ge
16+
ON ge.id = dcge.geo_entity_id
17+
GROUP BY
18+
dc.id,
19+
document_id,
20+
taxon_concept_id,
21+
full_name,
22+
geo_entity_id,
23+
name_en;
Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
namespace :elibrary do
22
task :refresh_document_search => :environment do
3-
if DocumentSearch.needs_refreshing?
3+
if DocumentSearch.citations_need_refreshing?
44
elapsed_time = Benchmark.realtime do
5-
DocumentSearch.refresh
5+
DocumentSearch.refresh_citations_and_documents
66
end
7-
puts "#{Time.now} Document search refreshed in #{elapsed_time}s"
7+
puts "#{Time.now} Citations & documents refreshed in #{elapsed_time}s"
8+
elsif DocumentSearch.documents_need_refreshing?
9+
elapsed_time = Benchmark.realtime do
10+
DocumentSearch.refresh_documents
11+
end
12+
puts "#{Time.now} Documents refreshed in #{elapsed_time}s"
813
end
914
end
1015
end

spec/controllers/admin/documents_controller_spec.rb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
describe "GET index" do
2222
before(:each) do
2323
@document3 = create(:document, :title => 'CC no event!', date: DateTime.new(2014,01,01))
24-
DocumentSearch.refresh
24+
DocumentSearch.refresh_citations_and_documents
2525
end
2626

2727
it "assigns @documents sorted by time of creation" do
@@ -66,7 +66,7 @@
6666
before(:each) do
6767
@document3 = create(:proposal, event: create_cites_cop(published_at: DateTime.new(2014,01,01)))
6868
create(:proposal_details, document_id: @document3.id, proposal_outcome_id: proposal_outcome.id)
69-
DocumentSearch.refresh
69+
DocumentSearch.refresh_citations_and_documents
7070
end
7171
it "retrieves documents for tag" do
7272
get :index, "document_tags_ids" => [proposal_outcome.id]
@@ -77,7 +77,7 @@
7777
before(:each) do
7878
@document3 = create(:review_of_significant_trade, event: create_ec_srg(published_at: DateTime.new(2014,01,01)))
7979
create(:review_details, document_id: @document3.id, review_phase_id: review_phase.id, process_stage_id: process_stage.id)
80-
DocumentSearch.refresh
80+
DocumentSearch.refresh_citations_and_documents
8181
end
8282
it "retrieves documents for review_phase tag" do
8383
get :index, "document_tags_ids" => [review_phase.id]
@@ -104,7 +104,7 @@
104104
end
105105
it "assigns @documents for event, sorted by title" do
106106
@document3 = create(:document, title: 'CC hello world', event: event2)
107-
DocumentSearch.refresh
107+
DocumentSearch.refresh_citations_and_documents
108108
get :index, events_ids: [event.id, event2.id]
109109
assigns(:documents).should eq([@document3, @document2, @document1])
110110
end

spec/controllers/admin/event_documents_controller_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
before(:each) do
99
@document1 = create(:document, event: event, sort_index: 2)
1010
@document2 = create(:document, event: event, sort_index: 1)
11-
DocumentSearch.refresh
11+
DocumentSearch.refresh_citations_and_documents
1212
end
1313

1414
describe "GET show_order" do

spec/controllers/api/documents_controller_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
citation3 = create(:document_citation, document_id: @document3.id)
2323
create(:document_citation_taxon_concept, document_citation_id: citation3.id,
2424
taxon_concept_id: @taxon_concept.id)
25-
DocumentSearch.refresh
25+
DocumentSearch.refresh_citations_and_documents
2626
end
2727

2828
context "GET index returns all documents" do

0 commit comments

Comments
 (0)