diff --git a/CHANGELOG.md b/CHANGELOG.md index 447ba73..58ee700 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Unreleased - Fixed notifications - Fixed OAR_AUTO_RESUBMIT events (resubmit id could be missed) - Fixed gridstat -f +- Fixed gridstat -j version 3.2.0 ------------- diff --git a/bin/gridstat.rb b/bin/gridstat.rb index ac0d274..3987274 100755 --- a/bin/gridstat.rb +++ b/bin/gridstat.rb @@ -181,6 +181,7 @@ elsif cinfos puts response.body else + j = JSON.parse(response.body) Cigri::Client.print_job(j) end elsif jdl diff --git a/lib/cigri-colombolib.rb b/lib/cigri-colombolib.rb index 439b700..ee6f855 100755 --- a/lib/cigri-colombolib.rb +++ b/lib/cigri-colombolib.rb @@ -91,7 +91,7 @@ def check_clusters if event.props[:class]=="cluster" COLOMBOLIBLOGGER.debug("Checking event #{event.props[:code]}") case event.props[:code] - when "REQUEST_TOO_LARGE","POST_TIMEOUT","TIMEOUT", "CONNECTION_RESET", "SOCKET_ERROR", "CONNECTION_REFUSED", "HOST_UNREACHABLE", "SSL_ERROR", "GET_JOBS", "GET_JOB", "GET_MEDIA","GET_STRESS_FACTOR", "FILL_JOBS_CACHE", "RUNNER_GET_JOB_CHUNK_ERROR" + when "REQUEST_TOO_LARGE","POST_TIMEOUT","TIMEOUT", "CONNECTION_RESET", "SOCKET_ERROR", "CONNECTION_REFUSED", "HOST_UNREACHABLE", "SSL_ERROR", "GET_JOBS", "GET_JOB", "GET_MEDIA","GET_STRESS_FACTOR", "FILL_JOBS_CACHE", "RUNNER_GET_JOB_CHUNK_ERROR", "TOO_MANY_RESUBMIT" blacklist_cluster(event.id,event.props[:cluster_id],event.props[:campaign_id]) event.checked when "CLUSTER_MANUALLY_DISABLED" diff --git a/lib/cigri-iolib.rb b/lib/cigri-iolib.rb index 7cae676..775fb8e 100644 --- a/lib/cigri-iolib.rb +++ b/lib/cigri-iolib.rb @@ -848,7 +848,6 @@ def get_campaign_tasks(dbh, id, limit, offset) res << row.to_h end sth.finish - IOLIBLOGGER.debug(res.inspect) return res end diff --git a/modules/updator.rb b/modules/updator.rb index 0333b15..12806a8 100755 --- a/modules/updator.rb +++ b/modules/updator.rb @@ -63,6 +63,24 @@ def notify_judas end end + ## + # Check for campaigns with too bad resubmit_rate + ## + # TODO for each cluster! Because blacklisting is by cluster! + #campaigns=Cigri::Campaignset.new + #campaigns.get_running + #campaigns.each do |campaign| + # if campaign.resubmit_rate > 0.6 + # logger.info("campaign #{campaign.id} has a lot of resubmits!") + # if campaign.tasks(100,0).length() > 10 + # event=Cigri::Event.new(:class => 'campaign', :state => 'open', :campaign_id => campaign.id, + # :code => "TOO_MANY_RESUBMIT", :message => "Your campaign #{campaign.id} has too many resubmit jobs. Please, check the duration of your jobs and walltime, then kill and restart your campaign.") + # notify_judas + # Cigri::Colombo.new(event).check_clusters + # end + # end + #end + ## # Autofix clusters ##