From 354d1b7e4e9351c5d98a81bb396becb251c5889c Mon Sep 17 00:00:00 2001 From: Nicolo Magini Date: Fri, 26 Feb 2016 20:04:59 +0100 Subject: [PATCH 1/7] Add new MViews with submissiontool filter --- .../collector/config/db/CreateMVDSStat1.sql | 26 ++++++++++++ .../config/db/CreateMVDSStat1Aggr1.sql | 41 +++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1.sql create mode 100644 DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1Aggr1.sql diff --git a/DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1.sql b/DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1.sql new file mode 100644 index 0000000..d6bee4a --- /dev/null +++ b/DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1.sql @@ -0,0 +1,26 @@ +----------------------------------------------------------------------- +--- Unify Stat on DS +----------------------------------------------------------------------- + +--- This MV allows analysis aggregation along time +--- per DS and dataTier (RECO, AOD, etc), siteName, users +--- Popularity is in number of accesses and totCPU +--- Selection is performed only on fileexitflag=1 +--- FIXME: CURRENTLY DIFFERENT NAME FOR TESTING, SHOULD REPLACE +--- MV_DS_stat0 EVENTUALLY + +CREATE MATERIALIZED VIEW MV_DS_stat1 +COMPRESS +PCTFREE 0 +BUILD IMMEDIATE +REFRESH FAST +ENABLE QUERY REWRITE +AS +select trunc(finishedtimestamp) as TDay , siteName, userid, inputcollection as collName, isRemote, + submissiontool, + count(*) as numAccesses, sum(CPUTIME) as totCPU +from raw_file +where fileexitflag=1 +GROUP BY trunc(finishedtimestamp), siteName, userid, inputcollection, isRemote, submissiontool +; +commit; diff --git a/DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1Aggr1.sql b/DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1Aggr1.sql new file mode 100644 index 0000000..2faf305 --- /dev/null +++ b/DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1Aggr1.sql @@ -0,0 +1,41 @@ +--- aggregation: day, sitename, dataset +--- exclude production +--- count distinct users per day, aggregating later more days will +--- double count the same users that submitted in several days +--- FIXME: CURRENTLY QUERYING MV_DS_STAT1 FOR TESTING +--- SHOULD QUERY MV_DS_STAT0 EVENTUALLY + +CREATE MATERIALIZED VIEW MV_DS_stat1_aggr1 +COMPRESS +PCTFREE 0 +BUILD IMMEDIATE +ENABLE QUERY REWRITE +AS +select TDay , siteName, collName, +sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count(distinct userid) as numUsers +from MV_DS_stat1 +where submissiontool!='wmagent' +GROUP BY TDay, siteName, collName +; + +--- aggregation: day, dataset +--- exclude production +--- count distinct users per day, aggregating later more days will +--- double count the same users that submitted in several days +--- FIXME: CURRENTLY QUERYING MV_DS_STAT1 FOR TESTING +--- SHOULD QUERY MV_DS_STAT0 EVENTUALLY + +CREATE MATERIALIZED VIEW MV_DS_stat1_aggr1_summ +COMPRESS +PCTFREE 0 +BUILD IMMEDIATE +ENABLE QUERY REWRITE +AS +select TDay , collName, +sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count(distinct userid) as numUsers +from MV_DS_stat1 +where submissiontool!='wmagent' +GROUP BY TDay, collName +; + +commit; \ No newline at end of file From 93827239901629e69c5c8cbe1fb8e01315400b67 Mon Sep 17 00:00:00 2001 From: Nicolo Magini Date: Mon, 2 May 2016 19:08:03 +0200 Subject: [PATCH 2/7] Add argument includewmagent (default n) for dataset popdb APIs --- .../popdb.web/lib/Apps/popularity/database/popDB.py | 10 ++++++++-- .../Apps/popularity/template/popularity/apidoc.html | 5 ++++- .../lib/Apps/popularity/utils/PopularityParams.py | 10 ++++++++-- .../lib/Apps/popularity/views/data_collection.py | 5 ++++- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/DataPopularity/popdb.web/lib/Apps/popularity/database/popDB.py b/DataPopularity/popdb.web/lib/Apps/popularity/database/popDB.py index 069510e..9d1fba0 100644 --- a/DataPopularity/popdb.web/lib/Apps/popularity/database/popDB.py +++ b/DataPopularity/popdb.web/lib/Apps/popularity/database/popDB.py @@ -15,7 +15,10 @@ def DSStatInTimeWindow(params, MView): if MView == 'DataTier': table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR2") elif MView == 'DS': - table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR1") + if params.includeWMAgent == 'y': + table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR1") + elif params.includeWMAgent == 'n': + table = "%s.%s" % (DBUSER, "MV_DS_STAT1_AGGR1") elif MView == 'DSName': table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR4") @@ -76,7 +79,10 @@ def MostPopDSStat(params, MView, collName): if MView == 'DataTier': table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR2") elif MView == 'DS': - table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR1") + if params.includeWMAgent == 'y': + table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR1") + elif params.includeWMAgent == 'n': + table = "%s.%s" % (DBUSER, "MV_DS_STAT1_AGGR1") elif MView == 'DSName': table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR4") diff --git a/DataPopularity/popdb.web/lib/Apps/popularity/template/popularity/apidoc.html b/DataPopularity/popdb.web/lib/Apps/popularity/template/popularity/apidoc.html index 3d5e828..f0e989e 100644 --- a/DataPopularity/popdb.web/lib/Apps/popularity/template/popularity/apidoc.html +++ b/DataPopularity/popdb.web/lib/Apps/popularity/template/popularity/apidoc.html @@ -48,6 +48,7 @@

Popularity API Documentation


- tstop (Default: today)
- aggr (Default: day. Accepted values: day, week, month, quarter, year )
- n (define how many elements to return, e.g. '5' for 5 first most popular) +
- includewmagent (include accesses by WMAgent in stats. Default: n. Accepted values: y, n)
- orderby (define the popularity metric. Default: totcpu. Accepted values: totcpu, naccess, nusers) @@ -146,6 +147,7 @@

Popularity API Documentation


Takes the following arguments:
- sitename (e.g. T2_IT_Pisa. Default: 'summary' for the overall view)
- aggr (Default: day. Accepted values: day, week, month, quarter, year ) +
- includewmagent (include accesses by WMAgent in stats. Default: n. Accepted values: y, n)
- orderby (define the popularity metric. Default: totcpu. Accepted values: totcpu, naccess, nusers) @@ -225,6 +227,7 @@

Popularity API Documentation


- sitename (e.g. T2_IT_Pisa. Default: 'summary' for the overall view)
- tstart (Default: one week ago)
- tstop (Default: today) +
- includewmagent (include accesses by WMAgent in stats. Default: n. Accepted values: y, n) @@ -252,7 +255,7 @@

Popularity API Documentation


- sitename (e.g. T2_IT_Pisa. Default: 'summary' for the overall view)
- tstart (Default: one week ago)
- tstop (Default: today) - + diff --git a/DataPopularity/popdb.web/lib/Apps/popularity/utils/PopularityParams.py b/DataPopularity/popdb.web/lib/Apps/popularity/utils/PopularityParams.py index 448421e..a04095e 100644 --- a/DataPopularity/popdb.web/lib/Apps/popularity/utils/PopularityParams.py +++ b/DataPopularity/popdb.web/lib/Apps/popularity/utils/PopularityParams.py @@ -88,7 +88,10 @@ def validaten(self, n): return True raise Paramvalidationexception('n', 'param must be a positive int') - + def validateincludewmagent(self, includewmagent): + if includewmagent in ['y', 'n']: + return True + raise Paramvalidationexception('includewmagent', 'includewmagent must be y or n') def setSiteName(self, sitename): @@ -140,7 +143,10 @@ def setOrder(self, order): self.orderVar = order #else: # raise Paramvalidationexception('orderby', 'param must be a string in %s' % (self.ordervalues)) - + + def setIncludeWMAgent(self, includewmagent): + if self.validateincludewmagent(includewmagent): + self.includeWMAgent = includewmagent class Userstatparams(Popularityparams): diff --git a/DataPopularity/popdb.web/lib/Apps/popularity/views/data_collection.py b/DataPopularity/popdb.web/lib/Apps/popularity/views/data_collection.py index bb5bbb3..66199c9 100644 --- a/DataPopularity/popdb.web/lib/Apps/popularity/views/data_collection.py +++ b/DataPopularity/popdb.web/lib/Apps/popularity/views/data_collection.py @@ -73,6 +73,8 @@ def getDSStatInTimeWindow(request,MView=''): par.setOrder(request.GET.get('orderby', 'totcpu')) par.setSiteName(request.GET.get('sitename', 'summary')) + par.setIncludeWMAgent(request.GET.get('includewmagent', 'n')) + jdata = _getDSStatInTimeWindowJSON(par, MView) except Paramvalidationexception as e: @@ -250,7 +252,7 @@ def getTimeEvolutionPlotData(request,MView=''): par.setSiteName(request.GET.get('sitename', 'summary')) if (par.FirstN == 0): return HttpResponseBadRequest("Given n not valid (param must be > 0)") - + par.setIncludeWMAgent(request.GET.get('includewmagent', 'n')) """ data indexing start from 0 @@ -304,6 +306,7 @@ def getDataSetStat(request): par.setN('1') par.setOrder(request.GET.get('orderby', 'totcpu')) par.setSiteName(request.GET.get('sitename', 'summary')) + par.setIncludeWMAgent(request.GET.get('includewmagent', 'n')) """ data indexing start from 0 """ From 484bf367baa9f0a26ab491714b8189911b8340cb Mon Sep 17 00:00:00 2001 From: Nicolo Magini Date: Wed, 4 May 2016 18:04:36 +0200 Subject: [PATCH 3/7] Enable includewmagent argument also in datatier/processeddatasets popdb APIs --- .../lib/Apps/popularity/database/popDB.py | 33 +++++++++++-------- .../template/popularity/apidoc.html | 6 ++++ .../Apps/popularity/views/data_collection.py | 2 ++ 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/DataPopularity/popdb.web/lib/Apps/popularity/database/popDB.py b/DataPopularity/popdb.web/lib/Apps/popularity/database/popDB.py index 9d1fba0..01a84aa 100644 --- a/DataPopularity/popdb.web/lib/Apps/popularity/database/popDB.py +++ b/DataPopularity/popdb.web/lib/Apps/popularity/database/popDB.py @@ -12,17 +12,20 @@ def DSStatInTimeWindow(params, MView): #cursor = connection.cursor() + if params.includeWMAgent == 'y': + baseMV = "STAT0" + elif params.includeWMAgent == 'n': + baseMV = "STAT1" + if MView == 'DataTier': - table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR2") + aggrMV = "AGGR2" elif MView == 'DS': - if params.includeWMAgent == 'y': - table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR1") - elif params.includeWMAgent == 'n': - table = "%s.%s" % (DBUSER, "MV_DS_STAT1_AGGR1") + aggrMV = "AGGR1" elif MView == 'DSName': - table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR4") - + aggrMV = 'AGGR4' + table = "%s.MV_DS_%s_%s" % (DBUSER, baseMV, aggrMV) + vars = '''collName , sum(numAccesses) as nAcc, round(sum(totCPU)/3600,0) as totCPU, sum(numUsers) as nUsers''' whereCondition = '''TDay >= to_date('%s','YYYY-MM-DD') @@ -76,16 +79,20 @@ def MostPopDSStat(params, MView, collName): #cursor = connection.cursor() + if params.includeWMAgent == 'y': + baseMV = "STAT0" + elif params.includeWMAgent == 'n': + baseMV = "STAT1" + if MView == 'DataTier': - table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR2") + aggrMV = "AGGR2" elif MView == 'DS': - if params.includeWMAgent == 'y': - table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR1") - elif params.includeWMAgent == 'n': - table = "%s.%s" % (DBUSER, "MV_DS_STAT1_AGGR1") + aggrMV = "AGGR1" elif MView == 'DSName': - table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR4") + aggrMV = 'AGGR4' + table = "%s.MV_DS_%s_%s" % (DBUSER, baseMV, aggrMV) + #TimeFormats: timeformat acts to the displayed date, timeformatTrunc acts to the truncation of the input dates, and should be keept with the format of the aggregation timeformat = 'YYYY/MM/DD' diff --git a/DataPopularity/popdb.web/lib/Apps/popularity/template/popularity/apidoc.html b/DataPopularity/popdb.web/lib/Apps/popularity/template/popularity/apidoc.html index f0e989e..48a7103 100644 --- a/DataPopularity/popdb.web/lib/Apps/popularity/template/popularity/apidoc.html +++ b/DataPopularity/popdb.web/lib/Apps/popularity/template/popularity/apidoc.html @@ -83,6 +83,7 @@

Popularity API Documentation


- tstop (Default: today)
- aggr (Default: day. Accepted values: day, week, month, quarter, year )
- n (define how many elements to return, e.g. '5' for 5 first most popular) +
- includewmagent (include accesses by WMAgent in stats. Default: n. Accepted values: y, n)
- orderby (define the popularity metric. Default: totcpu. Accepted values: totcpu, naccess, nusers) @@ -117,6 +118,7 @@

Popularity API Documentation


- tstop (Default: today)
- aggr (Default: day. Accepted values: day, week, month, quarter, year )
- n (define how many elements to return, e.g. '5' for 5 first most popular) +
- includewmagent (include accesses by WMAgent in stats. Default: n. Accepted values: y, n)
- orderby (define the popularity metric. Default: totcpu. Accepted values: totcpu, naccess, nusers) @@ -174,6 +176,7 @@

Popularity API Documentation


Takes the following arguments:
- sitename (e.g. T2_IT_Pisa. Default: 'summary' for the overall view)
- aggr (Default: day. Accepted values: day, week, month, quarter, year ) +
- includewmagent (include accesses by WMAgent in stats. Default: n. Accepted values: y, n)
- orderby (define the popularity metric. Default: totcpu. Accepted values: totcpu, naccess, nusers) @@ -200,6 +203,7 @@

Popularity API Documentation


Takes the following arguments:
- sitename (e.g. T2_IT_Pisa. Default: 'summary' for the overall view)
- aggr (Default: day. Accepted values: day, week, month, quarter, year ) +
- includewmagent (include accesses by WMAgent in stats. Default: n. Accepted values: y, n)
- orderby (define the popularity metric. Default: totcpu. Accepted values: totcpu, naccess, nusers) @@ -255,6 +259,7 @@

Popularity API Documentation


- sitename (e.g. T2_IT_Pisa. Default: 'summary' for the overall view)
- tstart (Default: one week ago)
- tstop (Default: today) +
- includewmagent (include accesses by WMAgent in stats. Default: n. Accepted values: y, n) @@ -282,6 +287,7 @@

Popularity API Documentation


- sitename (e.g. T2_IT_Pisa. Default: 'summary' for the overall view)
- tstart (Default: one week ago)
- tstop (Default: today) +
- includewmagent (include accesses by WMAgent in stats. Default: n. Accepted values: y, n) diff --git a/DataPopularity/popdb.web/lib/Apps/popularity/views/data_collection.py b/DataPopularity/popdb.web/lib/Apps/popularity/views/data_collection.py index 66199c9..f09b8d7 100644 --- a/DataPopularity/popdb.web/lib/Apps/popularity/views/data_collection.py +++ b/DataPopularity/popdb.web/lib/Apps/popularity/views/data_collection.py @@ -335,6 +335,7 @@ def getDataTierStat(request): par.setN('1') par.setOrder(request.GET.get('orderby', 'totcpu')) par.setSiteName(request.GET.get('sitename', 'summary')) + par.setIncludeWMAgent(request.GET.get('includewmagent', 'n')) """ data indexing start from 0 """ @@ -363,6 +364,7 @@ def getProcessedDataSetStat(request): par.setN('1') par.setOrder(request.GET.get('orderby', 'totcpu')) par.setSiteName(request.GET.get('sitename', 'summary')) + par.setIncludeWMAgent(request.GET.get('includewmagent', 'n')) """ data indexing start from 0 """ From a2256af51b5b1bd3e0aa0be2851fd676e40018e1 Mon Sep 17 00:00:00 2001 From: Nicolo Magini Date: Wed, 4 May 2016 18:20:30 +0200 Subject: [PATCH 4/7] Add privilege grants to new MV SQLs, and consolidate the sqls --- .../collector/config/db/CreateMVDSStat1.sql | 140 ++++++++++++++++++ .../config/db/CreateMVDSStat1Aggr1.sql | 41 ----- 2 files changed, 140 insertions(+), 41 deletions(-) delete mode 100644 DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1Aggr1.sql diff --git a/DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1.sql b/DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1.sql index d6bee4a..732cd57 100644 --- a/DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1.sql +++ b/DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1.sql @@ -24,3 +24,143 @@ where fileexitflag=1 GROUP BY trunc(finishedtimestamp), siteName, userid, inputcollection, isRemote, submissiontool ; commit; +--- aggregation: day, sitename, dataset +--- exclude production +--- count distinct users per day, aggregating later more days will +--- double count the same users that submitted in several days +--- FIXME: CURRENTLY QUERYING MV_DS_STAT1 FOR TESTING +--- SHOULD QUERY MV_DS_STAT0 EVENTUALLY + +CREATE MATERIALIZED VIEW MV_DS_stat1_aggr1 +COMPRESS +PCTFREE 0 +BUILD IMMEDIATE +ENABLE QUERY REWRITE +AS +select TDay , siteName, collName, +sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count(distinct userid) as numUsers +from MV_DS_stat1 +where submissiontool!='wmagent' +GROUP BY TDay, siteName, collName +; + +--- aggregation: day, dataset +--- exclude production +--- count distinct users per day, aggregating later more days will +--- double count the same users that submitted in several days +--- FIXME: CURRENTLY QUERYING MV_DS_STAT1 FOR TESTING +--- SHOULD QUERY MV_DS_STAT0 EVENTUALLY + +CREATE MATERIALIZED VIEW MV_DS_stat1_aggr1_summ +COMPRESS +PCTFREE 0 +BUILD IMMEDIATE +ENABLE QUERY REWRITE +AS +select TDay , collName, +sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count(distinct userid) as numUsers +from MV_DS_stat1 +where submissiontool!='wmagent' +GROUP BY TDay, collName +; + +commit; +--- aggregation: day, sitename, dataTier +--- count distinct users per day, aggregating later more days will +--- double count the same users that submitted in several days +--- excludes wmagent jobs + +CREATE MATERIALIZED VIEW MV_DS_stat1_aggr2 +COMPRESS +PCTFREE 0 +BUILD IMMEDIATE +ENABLE QUERY REWRITE +AS +select TDay , siteName, substr(collName,INSTR(collName, '/',-1)+1) as collName, +sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers +from MV_DS_stat1 +where submissiontool!='wmagent' +GROUP BY TDay, siteName, substr(collName,INSTR(collName, '/',-1)+1) +; + + +--- aggregation: day, datatier +--- count distinct users per day, aggregating later more days will +--- double count the same users that submitted in several days +--- excludes wmagent jobs + +CREATE MATERIALIZED VIEW MV_DS_stat1_aggr2_summ +COMPRESS +PCTFREE 0 +BUILD IMMEDIATE +ENABLE QUERY REWRITE +AS +select TDay , substr(collName,INSTR(collName, '/',-1)+1) as collName, +sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers +from MV_DS_stat1 +where submissiontool!='wmagent' +GROUP BY TDay, substr(collName,INSTR(collName, '/',-1)+1) +; + + +--- First aggregation: day, sitename, dataset namespace (ReReco, Summer2011, etc) +--- count distinct users per day, aggregating later more days will +--- double count the same users that submitted in several days +--- excludes wmagent jobs + +CREATE MATERIALIZED VIEW MV_DS_stat1_aggr4 +COMPRESS +PCTFREE 0 +BUILD IMMEDIATE +ENABLE QUERY REWRITE +AS +select TDay , siteName, +substr(collname,INSTR(collname, '/',-1,2)+1,INSTR(collname, '/',-1,1)-INSTR(collname, '/',-1,2)-1) as collName, +sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers +from MV_DS_stat1 +where +substr(collname,INSTR(collname, '/',-1,1)+1) not like 'USER' +and +collname not like 'unknown' +and submissiontool!='wmagent' +GROUP BY TDay, siteName, substr(collname,INSTR(collname, '/',-1,2)+1,INSTR(collname, '/',-1,1)-INSTR(collname, '/',-1,2)-1) +; + + +--- Second aggregation: day, dataset namespace (ReReco, Summer2011, etc) +--- count distinct users per day, aggregating later more days will +--- double count the same users that submitted in several days +--- excludes wmagent jobs + +CREATE MATERIALIZED VIEW MV_DS_stat1_aggr4_summ +COMPRESS +PCTFREE 0 +BUILD IMMEDIATE +ENABLE QUERY REWRITE +AS +select TDay , +substr(collname,INSTR(collname, '/',-1,2)+1,INSTR(collname, '/',-1,1)-INSTR(collname, '/',-1,2)-1) as collName, +sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers +from MV_DS_stat1 +where +substr(collname,INSTR(collname, '/',-1,1)+1) not like 'USER' +and +collname not like 'unknown' +and submissiontool!='wmagent' +GROUP BY TDay, substr(collname,INSTR(collname, '/',-1,2)+1,INSTR(collname, '/',-1,1)-INSTR(collname, '/',-1,2)-1) +; + +grant select on MV_DS_STAT1_AGGR1 to CMS_POPULARITY_SYSTEM_R; +grant select on MV_DS_STAT1_AGGR1_SUMM to CMS_POPULARITY_SYSTEM_R; +grant select on MV_DS_STAT1_AGGR2 to CMS_POPULARITY_SYSTEM_R; +grant select on MV_DS_STAT1_AGGR2_SUMM to CMS_POPULARITY_SYSTEM_R; +grant select on MV_DS_STAT1_AGGR4 to CMS_POPULARITY_SYSTEM_R; +grant select on MV_DS_STAT1_AGGR4_SUMM to CMS_POPULARITY_SYSTEM_R; + +grant select, insert, update, delete on MV_DS_STAT1 to CMS_POPULARITY_SYSTEM_W; +grant select, insert, update, delete on MV_DS_STAT1_AGGR1 to CMS_POPULARITY_SYSTEM_W; +grant select, insert, update, delete on MV_DS_STAT1_AGGR1_SUMM to CMS_POPULARITY_SYSTEM_W; +grant select, insert, update, delete on MV_DS_STAT1_AGGR2 to CMS_POPULARITY_SYSTEM_W; +grant select, insert, update, delete on MV_DS_STAT1_AGGR2_SUMM to CMS_POPULARITY_SYSTEM_W; +grant select, insert, update, delete on MV_DS_STAT1_AGGR4 to CMS_POPULARITY_SYSTEM_W; +grant select, insert, update, delete on MV_DS_STAT1_AGGR4_SUMM to CMS_POPULARITY_SYSTEM_W; diff --git a/DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1Aggr1.sql b/DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1Aggr1.sql deleted file mode 100644 index 2faf305..0000000 --- a/DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1Aggr1.sql +++ /dev/null @@ -1,41 +0,0 @@ ---- aggregation: day, sitename, dataset ---- exclude production ---- count distinct users per day, aggregating later more days will ---- double count the same users that submitted in several days ---- FIXME: CURRENTLY QUERYING MV_DS_STAT1 FOR TESTING ---- SHOULD QUERY MV_DS_STAT0 EVENTUALLY - -CREATE MATERIALIZED VIEW MV_DS_stat1_aggr1 -COMPRESS -PCTFREE 0 -BUILD IMMEDIATE -ENABLE QUERY REWRITE -AS -select TDay , siteName, collName, -sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count(distinct userid) as numUsers -from MV_DS_stat1 -where submissiontool!='wmagent' -GROUP BY TDay, siteName, collName -; - ---- aggregation: day, dataset ---- exclude production ---- count distinct users per day, aggregating later more days will ---- double count the same users that submitted in several days ---- FIXME: CURRENTLY QUERYING MV_DS_STAT1 FOR TESTING ---- SHOULD QUERY MV_DS_STAT0 EVENTUALLY - -CREATE MATERIALIZED VIEW MV_DS_stat1_aggr1_summ -COMPRESS -PCTFREE 0 -BUILD IMMEDIATE -ENABLE QUERY REWRITE -AS -select TDay , collName, -sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count(distinct userid) as numUsers -from MV_DS_stat1 -where submissiontool!='wmagent' -GROUP BY TDay, collName -; - -commit; \ No newline at end of file From c68d7d7aeec1d541de019b8fe472f19f25b1f118 Mon Sep 17 00:00:00 2001 From: Nicolo Magini Date: Tue, 24 May 2016 16:24:16 +0200 Subject: [PATCH 5/7] 1) Remove testing tables from schema deployment scripts 2) Add migration script used during May 2016 migration --- .../collector/config/db/CreateMVDSStat1.sql | 44 +------ .../collector/config/db/CreateMVTable.sql | 5 +- .../collector/config/db/MigratePopDB03.sql | 124 ++++++++++++++++++ 3 files changed, 133 insertions(+), 40 deletions(-) create mode 100644 DataPopularity/popdb.crab/collector/config/db/MigratePopDB03.sql diff --git a/DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1.sql b/DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1.sql index 732cd57..7434c48 100644 --- a/DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1.sql +++ b/DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1.sql @@ -1,35 +1,7 @@ ------------------------------------------------------------------------ ---- Unify Stat on DS ------------------------------------------------------------------------ - ---- This MV allows analysis aggregation along time ---- per DS and dataTier (RECO, AOD, etc), siteName, users ---- Popularity is in number of accesses and totCPU ---- Selection is performed only on fileexitflag=1 ---- FIXME: CURRENTLY DIFFERENT NAME FOR TESTING, SHOULD REPLACE ---- MV_DS_stat0 EVENTUALLY - -CREATE MATERIALIZED VIEW MV_DS_stat1 -COMPRESS -PCTFREE 0 -BUILD IMMEDIATE -REFRESH FAST -ENABLE QUERY REWRITE -AS -select trunc(finishedtimestamp) as TDay , siteName, userid, inputcollection as collName, isRemote, - submissiontool, - count(*) as numAccesses, sum(CPUTIME) as totCPU -from raw_file -where fileexitflag=1 -GROUP BY trunc(finishedtimestamp), siteName, userid, inputcollection, isRemote, submissiontool -; -commit; --- aggregation: day, sitename, dataset --- exclude production --- count distinct users per day, aggregating later more days will --- double count the same users that submitted in several days ---- FIXME: CURRENTLY QUERYING MV_DS_STAT1 FOR TESTING ---- SHOULD QUERY MV_DS_STAT0 EVENTUALLY CREATE MATERIALIZED VIEW MV_DS_stat1_aggr1 COMPRESS @@ -39,7 +11,7 @@ ENABLE QUERY REWRITE AS select TDay , siteName, collName, sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count(distinct userid) as numUsers -from MV_DS_stat1 +from MV_DS_stat0 where submissiontool!='wmagent' GROUP BY TDay, siteName, collName ; @@ -48,8 +20,6 @@ GROUP BY TDay, siteName, collName --- exclude production --- count distinct users per day, aggregating later more days will --- double count the same users that submitted in several days ---- FIXME: CURRENTLY QUERYING MV_DS_STAT1 FOR TESTING ---- SHOULD QUERY MV_DS_STAT0 EVENTUALLY CREATE MATERIALIZED VIEW MV_DS_stat1_aggr1_summ COMPRESS @@ -59,12 +29,11 @@ ENABLE QUERY REWRITE AS select TDay , collName, sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count(distinct userid) as numUsers -from MV_DS_stat1 +from MV_DS_stat0 where submissiontool!='wmagent' GROUP BY TDay, collName ; -commit; --- aggregation: day, sitename, dataTier --- count distinct users per day, aggregating later more days will --- double count the same users that submitted in several days @@ -78,7 +47,7 @@ ENABLE QUERY REWRITE AS select TDay , siteName, substr(collName,INSTR(collName, '/',-1)+1) as collName, sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers -from MV_DS_stat1 +from MV_DS_stat0 where submissiontool!='wmagent' GROUP BY TDay, siteName, substr(collName,INSTR(collName, '/',-1)+1) ; @@ -97,7 +66,7 @@ ENABLE QUERY REWRITE AS select TDay , substr(collName,INSTR(collName, '/',-1)+1) as collName, sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers -from MV_DS_stat1 +from MV_DS_stat0 where submissiontool!='wmagent' GROUP BY TDay, substr(collName,INSTR(collName, '/',-1)+1) ; @@ -117,7 +86,7 @@ AS select TDay , siteName, substr(collname,INSTR(collname, '/',-1,2)+1,INSTR(collname, '/',-1,1)-INSTR(collname, '/',-1,2)-1) as collName, sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers -from MV_DS_stat1 +from MV_DS_stat0 where substr(collname,INSTR(collname, '/',-1,1)+1) not like 'USER' and @@ -141,7 +110,7 @@ AS select TDay , substr(collname,INSTR(collname, '/',-1,2)+1,INSTR(collname, '/',-1,1)-INSTR(collname, '/',-1,2)-1) as collName, sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers -from MV_DS_stat1 +from MV_DS_stat0 where substr(collname,INSTR(collname, '/',-1,1)+1) not like 'USER' and @@ -157,7 +126,6 @@ grant select on MV_DS_STAT1_AGGR2_SUMM to CMS_POPULARITY_SYSTEM_R; grant select on MV_DS_STAT1_AGGR4 to CMS_POPULARITY_SYSTEM_R; grant select on MV_DS_STAT1_AGGR4_SUMM to CMS_POPULARITY_SYSTEM_R; -grant select, insert, update, delete on MV_DS_STAT1 to CMS_POPULARITY_SYSTEM_W; grant select, insert, update, delete on MV_DS_STAT1_AGGR1 to CMS_POPULARITY_SYSTEM_W; grant select, insert, update, delete on MV_DS_STAT1_AGGR1_SUMM to CMS_POPULARITY_SYSTEM_W; grant select, insert, update, delete on MV_DS_STAT1_AGGR2 to CMS_POPULARITY_SYSTEM_W; diff --git a/DataPopularity/popdb.crab/collector/config/db/CreateMVTable.sql b/DataPopularity/popdb.crab/collector/config/db/CreateMVTable.sql index 606f531..0429a13 100644 --- a/DataPopularity/popdb.crab/collector/config/db/CreateMVTable.sql +++ b/DataPopularity/popdb.crab/collector/config/db/CreateMVTable.sql @@ -237,11 +237,12 @@ BUILD IMMEDIATE REFRESH FAST ENABLE QUERY REWRITE AS -select trunc(finishedtimestamp) as TDay , siteName, userid, inputcollection as collName, isRemote, +select trunc(finishedtimestamp) as TDay , siteName, userid, inputcollection as collName, isRemote, + submissiontool, count(*) as numAccesses, sum(CPUTIME) as totCPU from raw_file where fileexitflag=1 -GROUP BY trunc(finishedtimestamp), siteName, userid, inputcollection, isRemote +GROUP BY trunc(finishedtimestamp), siteName, userid, inputcollection, isRemote, submissiontool ; commit; diff --git a/DataPopularity/popdb.crab/collector/config/db/MigratePopDB03.sql b/DataPopularity/popdb.crab/collector/config/db/MigratePopDB03.sql new file mode 100644 index 0000000..552c82b --- /dev/null +++ b/DataPopularity/popdb.crab/collector/config/db/MigratePopDB03.sql @@ -0,0 +1,124 @@ +-- SQL USED FOR POPDB SCHEMA MIGRATION ON 2015-05-24 +-- NOT TO BE USED FOR A FRESH SCHEMA DEPLOYMENT +-- TO DEPLOY SCHEMA FROM SCRATCH, RUN CreateMVTable +-- and CreateMVDSStat1 + +DROP MATERIALIZED VIEW MV_DS_stat0; + +CREATE MATERIALIZED VIEW MV_DS_stat0 +COMPRESS +PCTFREE 0 +BUILD IMMEDIATE +REFRESH FAST +ENABLE QUERY REWRITE +AS +select trunc(finishedtimestamp) as TDay , siteName, userid, inputcollection as collName, isRemote, + submissiontool, + count(*) as numAccesses, sum(CPUTIME) as totCPU +from raw_file +where fileexitflag=1 +GROUP BY trunc(finishedtimestamp), siteName, userid, inputcollection, isRemote, submissiontool +; + +CREATE MATERIALIZED VIEW MV_DS_stat1_aggr1 +COMPRESS +PCTFREE 0 +BUILD IMMEDIATE +ENABLE QUERY REWRITE +AS +select TDay , siteName, collName, +sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count(distinct userid) as numUsers +from MV_DS_stat0 +where submissiontool!='wmagent' +GROUP BY TDay, siteName, collName +; + +CREATE MATERIALIZED VIEW MV_DS_stat1_aggr1_summ +COMPRESS +PCTFREE 0 +BUILD IMMEDIATE +ENABLE QUERY REWRITE +AS +select TDay , collName, +sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count(distinct userid) as numUsers +from MV_DS_stat0 +where submissiontool!='wmagent' +GROUP BY TDay, collName +; + +CREATE MATERIALIZED VIEW MV_DS_stat1_aggr2 +COMPRESS +PCTFREE 0 +BUILD IMMEDIATE +ENABLE QUERY REWRITE +AS +select TDay , siteName, substr(collName,INSTR(collName, '/',-1)+1) as collName, +sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers +from MV_DS_stat0 +where submissiontool!='wmagent' +GROUP BY TDay, siteName, substr(collName,INSTR(collName, '/',-1)+1) +; + +CREATE MATERIALIZED VIEW MV_DS_stat1_aggr2_summ +COMPRESS +PCTFREE 0 +BUILD IMMEDIATE +ENABLE QUERY REWRITE +AS +select TDay , substr(collName,INSTR(collName, '/',-1)+1) as collName, +sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers +from MV_DS_stat0 +where submissiontool!='wmagent' +GROUP BY TDay, substr(collName,INSTR(collName, '/',-1)+1) +; + +CREATE MATERIALIZED VIEW MV_DS_stat1_aggr4 +COMPRESS +PCTFREE 0 +BUILD IMMEDIATE +ENABLE QUERY REWRITE +AS +select TDay , siteName, +substr(collname,INSTR(collname, '/',-1,2)+1,INSTR(collname, '/',-1,1)-INSTR(collname, '/',-1,2)-1) as collName, +sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers +from MV_DS_stat0 +where +substr(collname,INSTR(collname, '/',-1,1)+1) not like 'USER' +and +collname not like 'unknown' +and submissiontool!='wmagent' +GROUP BY TDay, siteName, substr(collname,INSTR(collname, '/',-1,2)+1,INSTR(collname, '/',-1,1)-INSTR(collname, '/',-1,2)-1) +; + +CREATE MATERIALIZED VIEW MV_DS_stat1_aggr4_summ +COMPRESS +PCTFREE 0 +BUILD IMMEDIATE +ENABLE QUERY REWRITE +AS +select TDay , +substr(collname,INSTR(collname, '/',-1,2)+1,INSTR(collname, '/',-1,1)-INSTR(collname, '/',-1,2)-1) as collName, +sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers +from MV_DS_stat0 +where +substr(collname,INSTR(collname, '/',-1,1)+1) not like 'USER' +and +collname not like 'unknown' +and submissiontool!='wmagent' +GROUP BY TDay, substr(collname,INSTR(collname, '/',-1,2)+1,INSTR(collname, '/',-1,1)-INSTR(collname, '/',-1,2)-1) +; + +grant select on MV_DS_STAT1_AGGR1 to CMS_POPULARITY_SYSTEM_R; +grant select on MV_DS_STAT1_AGGR1_SUMM to CMS_POPULARITY_SYSTEM_R; +grant select on MV_DS_STAT1_AGGR2 to CMS_POPULARITY_SYSTEM_R; +grant select on MV_DS_STAT1_AGGR2_SUMM to CMS_POPULARITY_SYSTEM_R; +grant select on MV_DS_STAT1_AGGR4 to CMS_POPULARITY_SYSTEM_R; +grant select on MV_DS_STAT1_AGGR4_SUMM to CMS_POPULARITY_SYSTEM_R; + +grant select, insert, update, delete on MV_DS_STAT0 to CMS_POPULARITY_SYSTEM_W; +grant select, insert, update, delete on MV_DS_STAT1_AGGR1 to CMS_POPULARITY_SYSTEM_W; +grant select, insert, update, delete on MV_DS_STAT1_AGGR1_SUMM to CMS_POPULARITY_SYSTEM_W; +grant select, insert, update, delete on MV_DS_STAT1_AGGR2 to CMS_POPULARITY_SYSTEM_W; +grant select, insert, update, delete on MV_DS_STAT1_AGGR2_SUMM to CMS_POPULARITY_SYSTEM_W; +grant select, insert, update, delete on MV_DS_STAT1_AGGR4 to CMS_POPULARITY_SYSTEM_W; +grant select, insert, update, delete on MV_DS_STAT1_AGGR4_SUMM to CMS_POPULARITY_SYSTEM_W; From ed7fce945fc30f4b86189022fba061631f7cb9cf Mon Sep 17 00:00:00 2001 From: Nicolo Magini Date: Tue, 24 May 2016 16:49:07 +0200 Subject: [PATCH 6/7] Add new MViews to MView refresh script --- DataPopularity/popdb.crab/collector/lib/popdbRefresh.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/DataPopularity/popdb.crab/collector/lib/popdbRefresh.py b/DataPopularity/popdb.crab/collector/lib/popdbRefresh.py index 4323601..5ba7124 100644 --- a/DataPopularity/popdb.crab/collector/lib/popdbRefresh.py +++ b/DataPopularity/popdb.crab/collector/lib/popdbRefresh.py @@ -311,8 +311,10 @@ def _refresh_MV(self): _refresh_with_alter('MV_DS_STAT0', 'F') - mvPool = Pool(10) - table_input=['MV_DS_STAT0_AGGR1', 'MV_DS_STAT0_AGGR2', 'MV_DS_STAT0_AGGR1_SUMM', 'MV_DS_STAT0_AGGR2_SUMM', 'MV_DS_STAT0_AGGR3', 'MV_DS_STAT0_AGGR4', 'MV_DS_STAT0_AGGR4_SUMM', 'MV_DS_CountFiles', 'MV_block_stat0_aggr_5_weeks', 'MV_DS_stat0_remote'] + mvPool = Pool(16) + table_input=['MV_DS_STAT0_AGGR1', 'MV_DS_STAT0_AGGR2', 'MV_DS_STAT0_AGGR1_SUMM', 'MV_DS_STAT0_AGGR2_SUMM', 'MV_DS_STAT0_AGGR3', 'MV_DS_STAT0_AGGR4', 'MV_DS_STAT0_AGGR4_SUMM', + 'MV_DS_STAT1_AGGR1', 'MV_DS_STAT1_AGGR2', 'MV_DS_STAT1_AGGR1_SUMM', 'MV_DS_STAT1_AGGR2_SUMM', 'MV_DS_STAT1_AGGR4', 'MV_DS_STAT1_AGGR4_SUMM', + 'MV_DS_CountFiles', 'MV_block_stat0_aggr_5_weeks', 'MV_DS_stat0_remote'] map_input = [ (x, 'C') for x in table_input ] mvPool.map(_refresh_SingleMV_Wrapper, map_input) mvPool.close() From e12bebec861e2d276d0f7b3a8718162603d25e69 Mon Sep 17 00:00:00 2001 From: Nicolo Magini Date: Tue, 24 May 2016 16:50:56 +0200 Subject: [PATCH 7/7] Increase CRAB popdb collector version to 0.3.0 --- DataPopularity/popdb.crab/collector/module.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DataPopularity/popdb.crab/collector/module.cfg b/DataPopularity/popdb.crab/collector/module.cfg index 2810ee8..3b01ccc 100644 --- a/DataPopularity/popdb.crab/collector/module.cfg +++ b/DataPopularity/popdb.crab/collector/module.cfg @@ -3,7 +3,7 @@ name = popularity-crab-cron author = CMS Data Popularity Team (formerly in CERN IT-SDC, IT-ES) author_email = cms-popdb-admins@cern.ch url = '' -version = 0.2.3 +version = 0.3.0 long_description = CMS Data Popularity Collection service to collect the usage of CMS official data from CRAB reports description = CMS Data Popularity Collection service for CRAB reports