Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
alexskr committed Jan 12, 2024
2 parents a208272 + 7066b67 commit b01a904
Show file tree
Hide file tree
Showing 15 changed files with 499 additions and 265 deletions.
4 changes: 4 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ COPY Gemfile* *.gemspec /srv/ontoportal/ncbo_cron/

WORKDIR /srv/ontoportal/ncbo_cron

# set rubygem and bundler to the last version supported by ruby 2.7
# remove version after ruby v3 upgrade
RUN gem update --system '3.4.22'
RUN gem install bundler -v '2.4.22'
RUN gem update --system
RUN gem install bundler
ENV BUNDLE_PATH=/srv/ontoportal/bundle
Expand Down
6 changes: 6 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ source 'https://rubygems.org'
gemspec

gem 'ffi'

# This is needed temporarily to pull the Google Universal Analytics (UA)
# data and store it in a file. See (bin/generate_ua_analytics_file.rb)
# The ability to pull this data from Google will cease on July 1, 2024
gem "google-apis-analytics_v3"

gem 'google-analytics-data'
gem 'mail', '2.6.6'
gem 'multi_json'
Expand Down
111 changes: 66 additions & 45 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
GIT
remote: https://github.com/ncbo/goo.git
revision: 657149d6b33813253fa7440252f69c04e0631190
revision: 75436fe8e387febc53e34ee31ff0e6dd837a9d3f
branch: master
specs:
goo (0.0.2)
Expand All @@ -15,7 +15,7 @@ GIT

GIT
remote: https://github.com/ncbo/ncbo_annotator.git
revision: 4f4361e2c181143bba3876326ecda407a587207e
revision: 1170a94d266d3e469bfb034a3aa3c4852bd0de82
branch: master
specs:
ncbo_annotator (0.0.1)
Expand All @@ -26,7 +26,7 @@ GIT

GIT
remote: https://github.com/ncbo/ontologies_linked_data.git
revision: 7783784f9d2ceada9be706cf6c084d272ae653e8
revision: ee0013f0ee23876076bff9d9258b46371ec3b248
branch: master
specs:
ontologies_linked_data (0.0.1)
Expand Down Expand Up @@ -74,103 +74,119 @@ GEM
activesupport (3.2.22.5)
i18n (~> 0.6, >= 0.6.4)
multi_json (~> 1.0)
addressable (2.8.5)
addressable (2.8.6)
public_suffix (>= 2.0.2, < 6.0)
base64 (0.2.0)
bcrypt (3.1.19)
bcrypt (3.1.20)
bigdecimal (3.1.5)
builder (3.2.4)
coderay (1.1.3)
concurrent-ruby (1.2.2)
connection_pool (2.4.1)
cube-ruby (0.0.3)
dante (0.2.0)
declarative (0.0.20)
docile (1.4.0)
domain_name (0.5.20190701)
unf (>= 0.0.5, < 1.0.0)
domain_name (0.6.20240107)
email_spec (2.1.1)
htmlentities (~> 4.3.3)
launchy (~> 2.1)
mail (~> 2.6)
faraday (2.7.11)
faraday (2.8.1)
base64
faraday-net_http (>= 2.0, < 3.1)
ruby2_keywords (>= 0.0.4)
faraday-net_http (3.0.2)
faraday-retry (2.2.0)
faraday (~> 2.0)
ffi (1.16.3)
gapic-common (0.20.0)
gapic-common (0.21.1)
faraday (>= 1.9, < 3.a)
faraday-retry (>= 1.0, < 3.a)
google-protobuf (~> 3.14)
googleapis-common-protos (>= 1.3.12, < 2.a)
googleapis-common-protos-types (>= 1.3.1, < 2.a)
googleauth (~> 1.0)
grpc (~> 1.36)
google-protobuf (~> 3.18)
googleapis-common-protos (>= 1.4.0, < 2.a)
googleapis-common-protos-types (>= 1.11.0, < 2.a)
googleauth (~> 1.9)
grpc (~> 1.59)
google-analytics-data (0.4.0)
google-analytics-data-v1beta (>= 0.7, < 2.a)
google-cloud-core (~> 1.6)
google-analytics-data-v1beta (0.9.0)
gapic-common (>= 0.20.0, < 2.a)
google-analytics-data-v1beta (0.11.1)
gapic-common (>= 0.21.1, < 2.a)
google-cloud-errors (~> 1.0)
google-cloud-core (1.6.0)
google-cloud-env (~> 1.0)
google-apis-analytics_v3 (0.13.0)
google-apis-core (>= 0.11.0, < 2.a)
google-apis-core (0.11.2)
addressable (~> 2.5, >= 2.5.1)
googleauth (>= 0.16.2, < 2.a)
httpclient (>= 2.8.1, < 3.a)
mini_mime (~> 1.0)
representable (~> 3.0)
retriable (>= 2.0, < 4.a)
rexml
webrick
google-cloud-core (1.6.1)
google-cloud-env (>= 1.0, < 3.a)
google-cloud-errors (~> 1.0)
google-cloud-env (1.6.0)
faraday (>= 0.17.3, < 3.0)
google-cloud-env (2.1.0)
faraday (>= 1.0, < 3.a)
google-cloud-errors (1.3.1)
google-protobuf (3.25.0)
google-protobuf (3.25.0-x86_64-darwin)
google-protobuf (3.25.0-x86_64-linux)
google-protobuf (3.25.2)
google-protobuf (3.25.2-x86_64-darwin)
google-protobuf (3.25.2-x86_64-linux)
googleapis-common-protos (1.4.0)
google-protobuf (~> 3.14)
googleapis-common-protos-types (~> 1.2)
grpc (~> 1.27)
googleapis-common-protos-types (1.10.0)
googleapis-common-protos-types (1.11.0)
google-protobuf (~> 3.18)
googleauth (1.8.1)
faraday (>= 0.17.3, < 3.a)
googleauth (1.9.1)
faraday (>= 1.0, < 3.a)
google-cloud-env (~> 2.1)
jwt (>= 1.4, < 3.0)
multi_json (~> 1.11)
os (>= 0.9, < 2.0)
signet (>= 0.16, < 2.a)
grpc (1.59.2)
google-protobuf (~> 3.24)
grpc (1.60.0)
google-protobuf (~> 3.25)
googleapis-common-protos-types (~> 1.0)
grpc (1.59.2-x86_64-darwin)
google-protobuf (~> 3.24)
grpc (1.60.0-x86_64-darwin)
google-protobuf (~> 3.25)
googleapis-common-protos-types (~> 1.0)
grpc (1.59.2-x86_64-linux)
google-protobuf (~> 3.24)
grpc (1.60.0-x86_64-linux)
google-protobuf (~> 3.25)
googleapis-common-protos-types (~> 1.0)
htmlentities (4.3.4)
http-accept (1.7.0)
http-cookie (1.0.5)
domain_name (~> 0.5)
httpclient (2.8.3)
i18n (0.9.5)
concurrent-ruby (~> 1.0)
json (2.6.3)
json_pure (2.6.3)
json (2.7.1)
json_pure (2.7.1)
jwt (2.7.1)
launchy (2.5.2)
addressable (~> 2.8)
libxml-ruby (4.1.2)
libxml-ruby (5.0.2)
logger (1.6.0)
macaddr (1.7.2)
systemu (~> 2.6.5)
mail (2.6.6)
mime-types (>= 1.16, < 4)
method_source (1.0.0)
mime-types (3.5.1)
mime-types (3.5.2)
mime-types-data (~> 3.2015)
mime-types-data (3.2023.1003)
mime-types-data (3.2023.1205)
mini_mime (1.1.5)
minitest (4.7.5)
mlanett-redis-lock (0.2.7)
redis
multi_json (1.15.0)
net-http-persistent (2.9.4)
netrc (0.11.0)
oj (3.16.1)
oj (3.16.3)
bigdecimal (>= 3.0)
omni_logger (0.1.4)
logger
os (1.1.4)
Expand All @@ -180,7 +196,7 @@ GEM
pry (0.14.2)
coderay (~> 1.1)
method_source (~> 1.0)
public_suffix (5.0.3)
public_suffix (5.0.4)
rack (3.0.8)
rack-test (2.1.0)
rack (>= 1.3)
Expand All @@ -189,13 +205,18 @@ GEM
addressable (>= 2.2)
redis (5.0.8)
redis-client (>= 0.17.0)
redis-client (0.18.0)
redis-client (0.19.1)
connection_pool
representable (3.2.0)
declarative (< 0.1.0)
trailblazer-option (>= 0.1.1, < 0.2.0)
uber (< 0.2.0)
rest-client (2.1.0)
http-accept (>= 1.7.0, < 2.0)
http-cookie (>= 1.0.2, < 2.0)
mime-types (>= 1.16, < 4.0)
netrc (~> 0.8)
retriable (3.1.2)
rexml (3.2.6)
rsolr (2.5.0)
builder (>= 2.1.2)
Expand Down Expand Up @@ -224,17 +245,16 @@ GEM
systemu (2.6.5)
test-unit-minitest (0.9.1)
minitest (~> 4.7)
trailblazer-option (0.1.2)
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
unf (0.1.4)
unf_ext
unf_ext (0.0.8.2)
uber (0.1.0)
uuid (2.3.9)
macaddr (~> 1.0)
webrick (1.8.1)

PLATFORMS
ruby
x86_64-darwin-18
x86_64-darwin-21
x86_64-linux

Expand All @@ -244,6 +264,7 @@ DEPENDENCIES
ffi
goo!
google-analytics-data
google-apis-analytics_v3
mail (= 2.6.6)
minitest (< 5.0)
multi_json
Expand All @@ -264,4 +285,4 @@ DEPENDENCIES
test-unit-minitest

BUNDLED WITH
2.3.15
2.4.22
126 changes: 126 additions & 0 deletions bin/generate_ua_analytics_file.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
require 'logger'
require 'google/apis/analytics_v3'
require 'google/api_client/auth/key_utils'

module NcboCron
module Models

class OntologyAnalyticsUA

def initialize(logger)
@logger = logger
end

def run
redis = Redis.new(:host => NcboCron.settings.redis_host, :port => NcboCron.settings.redis_port)
ontology_analytics = fetch_ontology_analytics
File.open(NcboCron.settings.analytics_path_to_ua_data_file, 'w') do |f|
f.write(ontology_analytics.to_json)
end
end

def fetch_ontology_analytics
google_client = authenticate_google
aggregated_results = Hash.new
start_year = Date.parse(NcboCron.settings.analytics_start_date).year || 2013
ont_acronyms = LinkedData::Models::Ontology.where.include(:acronym).all.map {|o| o.acronym}
# ont_acronyms = ["NCIT", "ONTOMA", "CMPO", "AEO", "SNOMEDCT"]
filter_str = (NcboCron.settings.analytics_filter_str.nil? || NcboCron.settings.analytics_filter_str.empty?) ? "" : ";#{NcboCron.settings.analytics_filter_str}"

ont_acronyms.each do |acronym|
max_results = 10000
num_results = 10000
start_index = 1
results = nil

loop do
results = google_client.get_ga_data(
ids = NcboCron.settings.analytics_profile_id,
start_date = NcboCron.settings.analytics_start_date,
end_date = Date.today.to_s,
metrics = 'ga:pageviews',
{
dimensions: 'ga:pagePath,ga:year,ga:month',
filters: "ga:pagePath=~^(\\/ontologies\\/#{acronym})(\\/?\\?{0}|\\/?\\?{1}.*)$#{filter_str}",
start_index: start_index,
max_results: max_results
}
)
results.rows ||= []
start_index += max_results
num_results = results.rows.length
@logger.info "Acronym: #{acronym}, Results: #{num_results}, Start Index: #{start_index}"
@logger.flush

results.rows.each do |row|
if aggregated_results.has_key?(acronym)
# year
if aggregated_results[acronym].has_key?(row[1].to_i)
# month
if aggregated_results[acronym][row[1].to_i].has_key?(row[2].to_i)
aggregated_results[acronym][row[1].to_i][row[2].to_i] += row[3].to_i
else
aggregated_results[acronym][row[1].to_i][row[2].to_i] = row[3].to_i
end
else
aggregated_results[acronym][row[1].to_i] = Hash.new
aggregated_results[acronym][row[1].to_i][row[2].to_i] = row[3].to_i
end
else
aggregated_results[acronym] = Hash.new
aggregated_results[acronym][row[1].to_i] = Hash.new
aggregated_results[acronym][row[1].to_i][row[2].to_i] = row[3].to_i
end
end

if num_results < max_results
# fill up non existent years
(start_year..Date.today.year).each do |y|
aggregated_results[acronym] = Hash.new if aggregated_results[acronym].nil?
aggregated_results[acronym][y] = Hash.new unless aggregated_results[acronym].has_key?(y)
end
# fill up non existent months with zeros
(1..12).each { |n| aggregated_results[acronym].values.each { |v| v[n] = 0 unless v.has_key?(n) } }
break
end
end
end

@logger.info "Completed Universal Analytics pull..."
@logger.flush

aggregated_results
end

def authenticate_google
Google::Apis::ClientOptions.default.application_name = NcboCron.settings.analytics_app_name
Google::Apis::ClientOptions.default.application_version = NcboCron.settings.analytics_app_version
# enable google api call retries in order to
# minigate analytics processing failure due to occasional google api timeouts and other outages
Google::Apis::RequestOptions.default.retries = 5
# uncoment to enable logging for debugging purposes
# Google::Apis.logger.level = Logger::DEBUG
# Google::Apis.logger = @logger
client = Google::Apis::AnalyticsV3::AnalyticsService.new
key = Google::APIClient::KeyUtils::load_from_pkcs12(NcboCron.settings.analytics_path_to_ua_key_file, 'notasecret')
client.authorization = Signet::OAuth2::Client.new(
:token_credential_uri => 'https://accounts.google.com/o/oauth2/token',
:audience => 'https://accounts.google.com/o/oauth2/token',
:scope => 'https://www.googleapis.com/auth/analytics.readonly',
:issuer => NcboCron.settings.analytics_service_account_email_address,
:signing_key => key
).tap { |auth| auth.fetch_access_token! }
client
end
end
end
end

require 'ontologies_linked_data'
require 'goo'
require 'ncbo_annotator'
require 'ncbo_cron/config'
require_relative '../config/config'
ontology_analytics_log_path = File.join("logs", "ontology-analytics-ua.log")
ontology_analytics_logger = Logger.new(ontology_analytics_log_path)
NcboCron::Models::OntologyAnalyticsUA.new(ontology_analytics_logger).run
Loading

0 comments on commit b01a904

Please sign in to comment.