diff --git a/.drone.yml b/.drone.yml index 5ab8e1a8..7f6a42b4 100644 --- a/.drone.yml +++ b/.drone.yml @@ -37,6 +37,7 @@ steps: - apt-get install --assume-yes libxml2 - apt-get install --assume-yes libxml2-dev - apt-get install --assume-yes libglpk-dev + - apt-get install --assume-yes libfontconfig1-dev - echo 'options(repos = c(CRAN = "https://cloud.r-project.org"))' >>"/usr/local/lib/R/etc/Rprofile.site" # package installation - Rscript install.R @@ -44,6 +45,12 @@ steps: - Rscript tests.R depends_on: [clone] +- name: R-4.2 + pull: if-not-exists + image: rocker/r-ver:4.2.1 + commands: *runTests + depends_on: [clone] + - name: R-4.1 pull: if-not-exists image: rocker/r-ver:4.1.3 @@ -96,6 +103,7 @@ steps: - apt-get install --assume-yes libxml2 - apt-get install --assume-yes libxml2-dev - apt-get install --assume-yes libglpk-dev + - apt-get install --assume-yes libfontconfig1-dev - echo 'options(repos = c(CRAN = "https://cloud.r-project.org"))' >>"/usr/local/lib/R/etc/Rprofile.site" # package installation - Rscript install.R @@ -103,6 +111,12 @@ steps: - Rscript showcase.R depends_on: [clone] +- name: R-4.2 + pull: if-not-exists + image: rocker/r-ver:4.2.1 + commands: *runShowcase + depends_on: [clone] + - name: R-4.1 pull: if-not-exists image: rocker/r-ver:4.1.3 diff --git a/NEWS.md b/NEWS.md index 72dc4b23..19b7739a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,8 +8,46 @@ - Incorporate custom event timestamps, i.e., add a configuration entry to the project configuration that allows specifying a file from which timestamps can be read, as well as an entry that allows locking this data; add corresponding functions `get.custom.event.timestamps`, `set.custom.event.timestamps` and `clear.custom.event.timestamps` (PR #227, 0aa342430ad3b354b9cf954dbe0838b056cf328a, 0f237d03913d2c940a008ea8fe84ba44817e77ea, c1803982357a3272b108f60cb1c976e3c2d9b1e5, 54e089db0ceea07db94914d02655a7f1f67d3117, 54673f8f88ca276ba06396116d802425093544d4, c5f5403430d55ceff6b6d5acbbca1ae9c5c231e2) - Add function `split.data.time.based.by.timestamps` to allow using custom event timestamps for splitting. Alternatively, timestamps can be specified manually (PR #227, 5b8515f97da4a24f971be453589595d259ab1fa1, 43f23a83bc66e485fea371f958bbb2ce3ddbd8d0) +- Add the following vertex attributes for artifact vertices and corresponding helper functions. +(PR #229, 20728071ca25e1d20cfa05bc15feb3ecc0a1c434, 51b5478ae15598ed3e6115b22e440929f8084660, 56ed57a21cc8004262ebba88429d0649cb238a52, 9b060361b1d1352b5a431df3990e468df7cab572, 52d40ba3657e3c806516653626afd81018a14863, e91161c79b53be7ba8ce3bec65de01ea6be1c575) + - `add.vertex.attribute.artifact.last.edited` + - `add.vertex.attribute.mail.thread.contributer.count`, `get.mail.thread.contributor.count` + - `add.vertex.attribute.mail.thread.message.count`, `get.mail.thread.message.count` + - `add.vertex.attribute.mail.thread.start.date`, `get.mail.thread.start.date` + - `add.vertex.attribute.mail.thread.end.date`, `get.mail.thread.end.date` + - `add.vertex.attribute.mail.thread.originating.mailing.list`, `get.mail.thread.originating.mailing.list` + - `add.vertex.attribute.issue.contributor.count`, `get.issue.contributor.count` + - `add.vertex.attribute.issue.event.count`, `get.issue.event.count` + - `add.vertex.attribute.issue.comment.event.count`, `get.issue.comment.count` + - `add.vertex.attribute.issue.opened.date`, `get.issue.opened.date` + - `add.vertex.attribute.issue.closed.date`, `get.issue.closed.date` + - `add.vertex.attribute.issue.last.activity.date`, `get.issue.last.activity.date` + - `add.vertex.attribute.issue.title`, `get.issue.title` + - `add.vertex.attribute.pr.open.merged.or.closed`, `get.pr.open.merged.or.closed` + - `add.vertex.attribute.issue.is.pull.request`, `get.issue.is.pull.request` + ### Changed/Improved +- Rename existing vertex attributes for author vertices to be distinguishable from attributes for artifact vertices. + With this change, the first word after `add.vertex.attribute.` now signifies the type of vertex the attribute applies to. (PR #229, 75e8514d1d2f6222d2093679f4418e9171d3abf2) + - `add.vertex.attribute.commit.count.author` -> `add.vertex.attribute.author.commit.count` + - `add.vertex.attribute.commit.count.author.not.committer` -> `add.vertex.attribute.author.commit.count.not.committer` + - `add.vertex.attribute.commit.count.committer` -> `add.vertex.attribute.author.commit.count.committer` + - `add.vertex.attribute.commit.count.committer.not.author` -> `add.vertex.attribute.author.commit.count.committer.not.author` + - `add.vertex.attribute.commit.count.committer.and.author` -> `add.vertex.attribute.author.commit.count.committer.and.author` + - `add.vertex.attribute.commit.count.committer.or.author` -> `add.vertex.attribute.author.commit.count.committer.or.author` + - `add.vertex.attribute.artifact.count` -> `add.vertex.attribute.author.artifact.count` + - `add.vertex.attribute.mail.count` -> `add.vertex.attribute.author.mail.count` + - `add.vertex.attribute.mail.thread.count` -> `add.vertex.attribute.author.mail.thread.count` + - `add.vertex.attribute.issue.count` -> `add.vertex.attribute.author.issue.count` + - `add.vertex.attribute.issues.commented.count` -> `add.vertex.attribute.author.issues.commented.count` + - `add.vertex.attribute.issue.creation.count` -> `add.vertex.attribute.author.issue.creation.count` + - `add.vertex.attribute.issue.comment.count` -> `add.vertex.attribute.author.issue.comment.count` + - `add.vertex.attribute.first.activity` -> `add.vertex.attribute.author.first.activity` + - `add.vertex.attribute.active.ranges` -> `add.vertex.attribute.author.active.ranges` +- Add parameter `use.unfiltered.data` to `add.vertex.attribute.issue.*`. This allows selecting whether the filtered or unfiltered issue data is used + for calculating the attribute. (PR #229, b77601dfa1372af5f58fb552cdb015401a344df7, 922258cb743614e0eeffcf38028acfc0a42a0332) +- Improve handling of issue type in vertex attribute name for `add.vertex.attribute.issue.*`. The default attribute name still adjusts to the issue type, but this no longer happens if the same name is specified manually. (PR #229, fe5dc61546b81c7779643c3b2b37c101a55217f8) ### Fixed diff --git a/showcase.R b/showcase.R index e52dc1bf..30dfc91c 100644 --- a/showcase.R +++ b/showcase.R @@ -221,16 +221,16 @@ my.networks = lapply(cf.data, function(range.data) { return (y$get.author.network()) }) ## add commit-count vertex attributes -sample = add.vertex.attribute.commit.count.author(my.networks, x.data, aggregation.level = "range") -sample.cumulative = add.vertex.attribute.commit.count.author(my.networks, x.data, aggregation.level = "cumulative") +sample = add.vertex.attribute.author.commit.count(my.networks, x.data, aggregation.level = "range") +sample.cumulative = add.vertex.attribute.author.commit.count(my.networks, x.data, aggregation.level = "cumulative") ## add email-address vertex attribute sample.mail = add.vertex.attribute.author.email(my.networks, x.data, "author.email") -sample.mail.thread = add.vertex.attribute.mail.thread.count(my.networks, x.data) -sample.issues.created = add.vertex.attribute.issue.creation.count(my.networks, x.data) -sample.pull.requests = add.vertex.attribute.issue.count(my.networks, x.data, issue.type = "pull.requests") +sample.mail.thread = add.vertex.attribute.author.mail.thread.count(my.networks, x.data) +sample.issues.created = add.vertex.attribute.author.issue.creation.count(my.networks, x.data) +sample.pull.requests = add.vertex.attribute.author.issue.count(my.networks, x.data, issue.type = "pull.requests") ## add vertex attributes for the project-level network x.net.as.list = list("1970-01-01 00:00:00-2030-01-01 00:00:00" = x$get.author.network()) -sample.entire = add.vertex.attribute.commit.count.author(x.net.as.list, x.data, aggregation.level = "complete") +sample.entire = add.vertex.attribute.author.commit.count(x.net.as.list, x.data, aggregation.level = "complete") ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / @@ -443,7 +443,7 @@ get.author.class.by.type(network = empty.network, type = "network.eigen") get.author.class.by.type(proj.data = empty.range.data, type = "commit.count") get.author.class.by.type(proj.data = empty.range.data, type = "loc.count") -## test function for mutliple ranges (evolution) +## test function for multiple ranges (evolution) author.class.overview = get.author.class.overview(network.list = network.list, type = "network.degree") get.author.class.overview(network.list = network.list, type = "network.eigen") get.author.class.overview(range.data.list = range.list, type = "commit.count") @@ -462,3 +462,4 @@ calculate.cohens.kappa(author.classification.list = author.class.overview, get.class.turnover.overview(author.class.overview = author.class.overview) get.unstable.authors.overview(author.class.overview = author.class.overview, saturation = 2) + diff --git a/tests/codeface-data/configurations/testing/test_feature.conf b/tests/codeface-data/configurations/testing/test_feature.conf index bade450e..971289b5 100644 --- a/tests/codeface-data/configurations/testing/test_feature.conf +++ b/tests/codeface-data/configurations/testing/test_feature.conf @@ -8,6 +8,9 @@ mailinglists: - name: test type: dev source: gmane + - name: test2 + type: dev + source: gmane # date of first release: # 2009-03-05 diff --git a/tests/codeface-data/configurations/testing/test_proximity.conf b/tests/codeface-data/configurations/testing/test_proximity.conf index d6e7c1d6..f9e29177 100644 --- a/tests/codeface-data/configurations/testing/test_proximity.conf +++ b/tests/codeface-data/configurations/testing/test_proximity.conf @@ -8,6 +8,9 @@ mailinglists: - name: test type: dev source: gmane + - name: test2 + type: dev + source: gmane # date of first release: # 2009-03-05 diff --git a/tests/codeface-data/results/testing/test_feature/feature/emails.list b/tests/codeface-data/results/testing/test_feature/feature/emails.list index 6bf6234f..37300429 100644 --- a/tests/codeface-data/results/testing/test_feature/feature/emails.list +++ b/tests/codeface-data/results/testing/test_feature/feature/emails.list @@ -1,17 +1,17 @@ -"Björn";"bjoern@example.org";"";"2004-10-09 18:38:13";200;"Re: Fw: busybox 202 with tab";1 -"Björn";"bjoern@example.org";"<1107974989.17910.6.camel@jmcmullan>";"2005-02-09 18:49:49";-500;"Doubled date";2 -"udo";"udo@example.org";"";"2010-07-12 10:05:36";200;"Only mail address";3 -"Fritz fritz@example.org";"asd@sample.org";"";"2010-07-12 11:05:35";200;"name is mail address";4 -"georg";"heinz@example.org";"";"2010-07-12 12:05:34";200;"name is mail address";5 -"Hans";"hans1@example.org";"";"2010-07-12 12:05:40";200;"name is mail address";6 -"Hans";"hans1@example.org";"";"2010-07-12 12:05:41";200;"name is mail address";6 -"Hans";"hans1@example.org";"";"2010-07-12 12:05:42";200;"name is mail address";6 -"Hans";"hans1@example.org";"";"2010-07-12 12:05:43";200;"name is mail address";6 -"Hans";"hans1@example.org";"";"2010-07-12 12:05:44";200;"name is mail address";6 -"Hans";"hans1@example.org";"";"2010-07-12 12:05:45";200;"name is mail address";6 -"Hans";"hans1@example.org";"";"2010-07-12 12:05:46";200;"name is mail address";7 -"Thomas";"thomas@example.org";"";"";0;"=?KOI8-R?Q?=EF=D4=D7=C5=D4:_Some_patches?= 2";8 -"Björn";"bjoern@example.org";"<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>";"2016-07-12 15:58:40";0;"Re: busybox 1";8 -"Olaf";"olaf@example.org";"<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>";"2016-07-12 15:58:50";-400;"=?KOI8-R?Q?=EF=D4=D7=C5=D4:_Some_patches?= tab";8 -"Thomas";"thomas@example.org";"<65a1sf31sagd684dfv31@mail.gmail.com>";"2016-07-12 16:04:40";100;"Re: Fw: busybox 2 tab";9 -"Olaf";"olaf@example.org";"<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>";"2016-07-12 16:05:37";200;"Re: Fw: busybox 10";9 +"Björn";"bjoern@example.org";"";"2004-10-09 18:38:13";200;"Re: Fw: busybox 202 with tab";"13#1" +"Björn";"bjoern@example.org";"<1107974989.17910.6.camel@jmcmullan>";"2005-02-09 18:49:49";-500;"Doubled date";"42#2" +"udo";"udo@example.org";"";"2010-07-12 10:05:36";200;"Only mail address";"13#3" +"Fritz fritz@example.org";"asd@sample.org";"";"2010-07-12 11:05:35";200;"name is mail address";"42#4" +"georg";"heinz@example.org";"";"2010-07-12 12:05:34";200;"name is mail address";"42#5" +"Hans";"hans1@example.org";"";"2010-07-12 12:05:40";200;"name is mail address";"42#6" +"Hans";"hans1@example.org";"";"2010-07-12 12:05:41";200;"name is mail address";"42#6" +"Hans";"hans1@example.org";"";"2010-07-12 12:05:42";200;"name is mail address";"42#6" +"Hans";"hans1@example.org";"";"2010-07-12 12:05:43";200;"name is mail address";"42#6" +"Hans";"hans1@example.org";"";"2010-07-12 12:05:44";200;"name is mail address";"42#6" +"Hans";"hans1@example.org";"";"2010-07-12 12:05:45";200;"name is mail address";"42#6" +"Hans";"hans1@example.org";"";"2010-07-12 12:05:46";200;"name is mail address";"42#7" +"Thomas";"thomas@example.org";"";"";0;"=?KOI8-R?Q?=EF=D4=D7=C5=D4:_Some_patches?= 2";"13#8" +"Björn";"bjoern@example.org";"<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>";"2016-07-12 15:58:40";0;"Re: busybox 1";"13#8" +"Olaf";"olaf@example.org";"<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>";"2016-07-12 15:58:50";-400;"=?KOI8-R?Q?=EF=D4=D7=C5=D4:_Some_patches?= tab";"13#8" +"Thomas";"thomas@example.org";"<65a1sf31sagd684dfv31@mail.gmail.com>";"2016-07-12 16:04:40";100;"Re: Fw: busybox 2 tab";"13#9" +"Olaf";"olaf@example.org";"<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>";"2016-07-12 16:05:37";200;"Re: Fw: busybox 10";"13#9" diff --git a/tests/codeface-data/results/testing/test_proximity/proximity/emails.list b/tests/codeface-data/results/testing/test_proximity/proximity/emails.list index 6bf6234f..37300429 100644 --- a/tests/codeface-data/results/testing/test_proximity/proximity/emails.list +++ b/tests/codeface-data/results/testing/test_proximity/proximity/emails.list @@ -1,17 +1,17 @@ -"Björn";"bjoern@example.org";"";"2004-10-09 18:38:13";200;"Re: Fw: busybox 202 with tab";1 -"Björn";"bjoern@example.org";"<1107974989.17910.6.camel@jmcmullan>";"2005-02-09 18:49:49";-500;"Doubled date";2 -"udo";"udo@example.org";"";"2010-07-12 10:05:36";200;"Only mail address";3 -"Fritz fritz@example.org";"asd@sample.org";"";"2010-07-12 11:05:35";200;"name is mail address";4 -"georg";"heinz@example.org";"";"2010-07-12 12:05:34";200;"name is mail address";5 -"Hans";"hans1@example.org";"";"2010-07-12 12:05:40";200;"name is mail address";6 -"Hans";"hans1@example.org";"";"2010-07-12 12:05:41";200;"name is mail address";6 -"Hans";"hans1@example.org";"";"2010-07-12 12:05:42";200;"name is mail address";6 -"Hans";"hans1@example.org";"";"2010-07-12 12:05:43";200;"name is mail address";6 -"Hans";"hans1@example.org";"";"2010-07-12 12:05:44";200;"name is mail address";6 -"Hans";"hans1@example.org";"";"2010-07-12 12:05:45";200;"name is mail address";6 -"Hans";"hans1@example.org";"";"2010-07-12 12:05:46";200;"name is mail address";7 -"Thomas";"thomas@example.org";"";"";0;"=?KOI8-R?Q?=EF=D4=D7=C5=D4:_Some_patches?= 2";8 -"Björn";"bjoern@example.org";"<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>";"2016-07-12 15:58:40";0;"Re: busybox 1";8 -"Olaf";"olaf@example.org";"<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>";"2016-07-12 15:58:50";-400;"=?KOI8-R?Q?=EF=D4=D7=C5=D4:_Some_patches?= tab";8 -"Thomas";"thomas@example.org";"<65a1sf31sagd684dfv31@mail.gmail.com>";"2016-07-12 16:04:40";100;"Re: Fw: busybox 2 tab";9 -"Olaf";"olaf@example.org";"<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>";"2016-07-12 16:05:37";200;"Re: Fw: busybox 10";9 +"Björn";"bjoern@example.org";"";"2004-10-09 18:38:13";200;"Re: Fw: busybox 202 with tab";"13#1" +"Björn";"bjoern@example.org";"<1107974989.17910.6.camel@jmcmullan>";"2005-02-09 18:49:49";-500;"Doubled date";"42#2" +"udo";"udo@example.org";"";"2010-07-12 10:05:36";200;"Only mail address";"13#3" +"Fritz fritz@example.org";"asd@sample.org";"";"2010-07-12 11:05:35";200;"name is mail address";"42#4" +"georg";"heinz@example.org";"";"2010-07-12 12:05:34";200;"name is mail address";"42#5" +"Hans";"hans1@example.org";"";"2010-07-12 12:05:40";200;"name is mail address";"42#6" +"Hans";"hans1@example.org";"";"2010-07-12 12:05:41";200;"name is mail address";"42#6" +"Hans";"hans1@example.org";"";"2010-07-12 12:05:42";200;"name is mail address";"42#6" +"Hans";"hans1@example.org";"";"2010-07-12 12:05:43";200;"name is mail address";"42#6" +"Hans";"hans1@example.org";"";"2010-07-12 12:05:44";200;"name is mail address";"42#6" +"Hans";"hans1@example.org";"";"2010-07-12 12:05:45";200;"name is mail address";"42#6" +"Hans";"hans1@example.org";"";"2010-07-12 12:05:46";200;"name is mail address";"42#7" +"Thomas";"thomas@example.org";"";"";0;"=?KOI8-R?Q?=EF=D4=D7=C5=D4:_Some_patches?= 2";"13#8" +"Björn";"bjoern@example.org";"<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>";"2016-07-12 15:58:40";0;"Re: busybox 1";"13#8" +"Olaf";"olaf@example.org";"<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>";"2016-07-12 15:58:50";-400;"=?KOI8-R?Q?=EF=D4=D7=C5=D4:_Some_patches?= tab";"13#8" +"Thomas";"thomas@example.org";"<65a1sf31sagd684dfv31@mail.gmail.com>";"2016-07-12 16:04:40";100;"Re: Fw: busybox 2 tab";"13#9" +"Olaf";"olaf@example.org";"<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>";"2016-07-12 16:05:37";200;"Re: Fw: busybox 10";"13#9" diff --git a/tests/test-data-cut.R b/tests/test-data-cut.R index fcd8bec3..d1f3ef2a 100644 --- a/tests/test-data-cut.R +++ b/tests/test-data-cut.R @@ -69,7 +69,7 @@ test_that("Cut commit and mail data to same date range.", { date = get.date.from.string(c("2016-07-12 16:04:40", "2016-07-12 16:05:37")), date.offset = as.integer(c(100, 200)), subject = c("Re: Fw: busybox 2 tab", "Re: Fw: busybox 10"), - thread = sprintf("", c(9, 9)), + thread = sprintf("", c("13#9", "13#9")), artifact.type = c("Mail", "Mail")) commit.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.commits.unfiltered() diff --git a/tests/test-networks-covariates.R b/tests/test-networks-covariates.R index 613a5220..5ebaf02e 100644 --- a/tests/test-networks-covariates.R +++ b/tests/test-networks-covariates.R @@ -20,6 +20,7 @@ ## Copyright 2018-2019 by Jakob Kronawitter ## Copyright 2021 by Johannes Hostert ## Copyright 2021-2022 by Niklas Schneider +## Copyright 2022 by Jonathan Baumann ## All Rights Reserved. @@ -59,9 +60,12 @@ myranges.since.2010 = construct.ranges(mybins.since.2010, sliding.window = FALSE #' @return Tuple containing project data and list of networks get.network.covariates.test.networks = function(network.type = c("author", "artifact"), issues = FALSE, author.relation = c("cochange", "issue", "mail"), - bins = mybins) { + artifact.relation = c("cochange", "issue", "mail"), + bins = mybins, + issues.only.comments = FALSE) { author.relation = match.arg(author.relation) + artifact.relation = match.arg(artifact.relation) network.type.function = switch(match.arg(network.type), "author" = "get.author.network", "artifact" = "get.artifact.network") @@ -70,9 +74,10 @@ get.network.covariates.test.networks = function(network.type = c("author", "arti proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) proj.conf$update.value("commits.filter.base.artifact", FALSE) proj.conf$update.value("commits.filter.untracked.files", TRUE) - proj.conf$update.value("issues.only.comments", FALSE) + proj.conf$update.value("issues.only.comments", issues.only.comments) net.conf = NetworkConf$new() - net.conf$update.values(list(author.relation = author.relation, simplify = FALSE)) + net.conf$update.values(list(author.relation = author.relation, artifact.relation = artifact.relation, + simplify = FALSE)) ## retrieve project data project.data = ProjectData$new(proj.conf) @@ -309,7 +314,7 @@ get.expected.first.activity = function() { return(expected.attributes) } -#' Helper for tests of the function add.vertex.attribute.active.ranges: Returns the expected active ranges per range, +#' Helper for tests of the function add.vertex.attribute.author.active.ranges: Returns the expected active ranges per range, #' author and data source as a nested list. #' #' @return A list with elements that represent the range (the test data is split to build one network per range), each @@ -434,8 +439,8 @@ test_that("Test split.and.add.vertex.attribute", { }) }) -#' Test the add.vertex.attribute.commit.count.author method -test_that("Test add.vertex.attribute.commit.count.author", { +#' Test the add.vertex.attribute.author.commit.count method +test_that("Test add.vertex.attribute.author.commit.count", { ## Test setup networks.and.data = get.network.covariates.test.networks() @@ -451,7 +456,7 @@ test_that("Test add.vertex.attribute.commit.count.author", { ## Test lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attr = add.vertex.attribute.commit.count.author( + networks.with.attr = add.vertex.attribute.author.commit.count( networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level ) @@ -461,8 +466,8 @@ test_that("Test add.vertex.attribute.commit.count.author", { }) }) -#' Test the add.vertex.attribute.commit.count.committer.and.author method -test_that("Test add.vertex.attribute.commit.count.committer.and.author", { +#' Test the add.vertex.attribute.author.commit.count.committer.and.author method +test_that("Test add.vertex.attribute.author.commit.count.committer.and.author", { ## Test setup networks.and.data = get.network.covariates.test.networks() @@ -479,7 +484,7 @@ test_that("Test add.vertex.attribute.commit.count.committer.and.author", { ## Test lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attr = add.vertex.attribute.commit.count.committer.and.author( + networks.with.attr = add.vertex.attribute.author.commit.count.committer.and.author( networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level ) @@ -489,8 +494,8 @@ test_that("Test add.vertex.attribute.commit.count.committer.and.author", { }) }) -#' Test the add.vertex.attribute.commit.count.committer.or.author method -test_that("Test add.vertex.attribute.commit.count.committer.or.author", { +#' Test the add.vertex.attribute.author.commit.count.committer.or.author method +test_that("Test add.vertex.attribute.author.commit.count.committer.or.author", { ## Test setup networks.and.data = get.network.covariates.test.networks() @@ -507,7 +512,7 @@ test_that("Test add.vertex.attribute.commit.count.committer.or.author", { ## Test lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attr = add.vertex.attribute.commit.count.committer.or.author( + networks.with.attr = add.vertex.attribute.author.commit.count.committer.or.author( networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level ) @@ -517,8 +522,8 @@ test_that("Test add.vertex.attribute.commit.count.committer.or.author", { }) }) -#' Test the add.vertex.attribute.mail.count method -test_that("Test add.vertex.attribute.mail.count", { +#' Test the add.vertex.attribute.author.mail.count method +test_that("Test add.vertex.attribute.author.mail.count", { ## Test setup networks.and.data = get.network.covariates.test.networks(author.relation = "mail", bins = mybins.since.2010) @@ -534,7 +539,7 @@ test_that("Test add.vertex.attribute.mail.count", { ## Test lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attr = add.vertex.attribute.mail.count( + networks.with.attr = add.vertex.attribute.author.mail.count( networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level ) @@ -544,8 +549,8 @@ test_that("Test add.vertex.attribute.mail.count", { }) }) -#' Test the add.vertex.attribute.mail.count method -test_that("Test add.vertex.attribute.mail.thread.count", { +#' Test the add.vertex.attribute.author.mail.thread.count method +test_that("Test add.vertex.attribute.author.mail.thread.count", { ## Test setup networks.and.data = get.network.covariates.test.networks(author.relation = "mail", bins = mybins.since.2010) @@ -561,7 +566,7 @@ test_that("Test add.vertex.attribute.mail.thread.count", { ## Test lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attr = add.vertex.attribute.mail.thread.count( + networks.with.attr = add.vertex.attribute.author.mail.thread.count( networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level ) @@ -586,8 +591,8 @@ sum.expected.attributes = function(expected.attributes.issues.only, expected.att return(result) } -#' Test the add.vertex.attribute.issue.count method -test_that("Test add.vertex.attribute.issue.count", { +#' Test the add.vertex.attribute.author.issue.count method +test_that("Test add.vertex.attribute.author.issue.count", { ## Test setup networks.and.data = get.network.covariates.test.networks(issues=TRUE, author.relation = "issue") @@ -614,7 +619,7 @@ test_that("Test add.vertex.attribute.issue.count", { ## Test issues only lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attr = add.vertex.attribute.issue.count( + networks.with.attr = add.vertex.attribute.author.issue.count( networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "issues" ) @@ -626,7 +631,7 @@ test_that("Test add.vertex.attribute.issue.count", { # Test PRs only lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attr = add.vertex.attribute.issue.count( + networks.with.attr = add.vertex.attribute.author.issue.count( networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "pull.requests", name = "pull.request.count" ) @@ -639,7 +644,7 @@ test_that("Test add.vertex.attribute.issue.count", { # Test both lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attr = add.vertex.attribute.issue.count( + networks.with.attr = add.vertex.attribute.author.issue.count( networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "all" ) @@ -650,8 +655,8 @@ test_that("Test add.vertex.attribute.issue.count", { }) -#' Test the add.vertex.attribute.issues.commented.count method -test_that("Test add.vertex.attribute.issues.commented.count", { +#' Test the add.vertex.attribute.author.issues.commented.count method +test_that("Test add.vertex.attribute.author.issues.commented.count", { ## Test setup networks.and.data = get.network.covariates.test.networks(issues = TRUE, author.relation = "issue") @@ -678,7 +683,7 @@ test_that("Test add.vertex.attribute.issues.commented.count", { ## Test issues only lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attr = add.vertex.attribute.issues.commented.count( + networks.with.attr = add.vertex.attribute.author.issues.commented.count( networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "issues" ) @@ -690,7 +695,7 @@ test_that("Test add.vertex.attribute.issues.commented.count", { # Test PRs only lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attr = add.vertex.attribute.issues.commented.count( + networks.with.attr = add.vertex.attribute.author.issues.commented.count( networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "pull.requests", name = "pull.requests.commented.count" ) @@ -703,7 +708,7 @@ test_that("Test add.vertex.attribute.issues.commented.count", { # Test both lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attr = add.vertex.attribute.issues.commented.count( + networks.with.attr = add.vertex.attribute.author.issues.commented.count( networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "all" ) @@ -713,8 +718,8 @@ test_that("Test add.vertex.attribute.issues.commented.count", { }) }) -#' Test the add.vertex.attribute.issue.creation.count method -test_that("Test add.vertex.attribute.issue.creation.count", { +#' Test the add.vertex.attribute.author.issue.creation.count method +test_that("Test add.vertex.attribute.author.issue.creation.count", { ## Test setup networks.and.data = get.network.covariates.test.networks(issues = TRUE, author.relation = "issue") @@ -741,7 +746,7 @@ test_that("Test add.vertex.attribute.issue.creation.count", { ## Test issues only lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attr = add.vertex.attribute.issue.creation.count( + networks.with.attr = add.vertex.attribute.author.issue.creation.count( networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "issues" ) @@ -753,7 +758,7 @@ test_that("Test add.vertex.attribute.issue.creation.count", { # Test PRs only lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attr = add.vertex.attribute.issue.creation.count( + networks.with.attr = add.vertex.attribute.author.issue.creation.count( networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "pull.requests", name = "pull.request.creation.count" ) @@ -766,7 +771,7 @@ test_that("Test add.vertex.attribute.issue.creation.count", { # Test both lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attr = add.vertex.attribute.issue.creation.count( + networks.with.attr = add.vertex.attribute.author.issue.creation.count( networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "all" ) @@ -776,8 +781,8 @@ test_that("Test add.vertex.attribute.issue.creation.count", { }) }) -#' Test the add.vertex.attribute.issue.comment.count method -test_that("Test add.vertex.attribute.issue.comment.count", { +#' Test the add.vertex.attribute.author.issue.comment.count method +test_that("Test add.vertex.attribute.author.issue.comment.count", { ## Test setup networks.and.data = get.network.covariates.test.networks(issues = TRUE, author.relation = "issue") @@ -804,7 +809,7 @@ test_that("Test add.vertex.attribute.issue.comment.count", { ## Test issues only lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attr = add.vertex.attribute.issue.comment.count( + networks.with.attr = add.vertex.attribute.author.issue.comment.count( networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "issues" ) @@ -816,7 +821,7 @@ test_that("Test add.vertex.attribute.issue.comment.count", { # Test PRs only lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attr = add.vertex.attribute.issue.comment.count( + networks.with.attr = add.vertex.attribute.author.issue.comment.count( networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "pull.requests", name = "pull.request.comment.count" ) @@ -829,7 +834,7 @@ test_that("Test add.vertex.attribute.issue.comment.count", { # Test both lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attr = add.vertex.attribute.issue.comment.count( + networks.with.attr = add.vertex.attribute.author.issue.comment.count( networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "all" ) @@ -863,8 +868,8 @@ test_that("Test add.vertex.attribute.author.email", { expect_identical(expected.attributes, actual.attributes) }) -#' Test the add.vertex.attribute.artifact.count method -test_that("Test add.vertex.attribute.artifact.count", { +#' Test the add.vertex.attribute.author.artifact.count method +test_that("Test add.vertex.attribute.author.artifact.count", { ## Test setup @@ -882,7 +887,7 @@ test_that("Test add.vertex.attribute.artifact.count", { ## Test lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attr = add.vertex.attribute.artifact.count( + networks.with.attr = add.vertex.attribute.author.artifact.count( networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level ) @@ -892,8 +897,8 @@ test_that("Test add.vertex.attribute.artifact.count", { }) }) -#' Test the add.vertex.attribute.first.activity method with computation over all types. -test_that("Test add.vertex.attribute.first.activity with multiple types and computation over all types", { +#' Test the add.vertex.attribute.author.first.activity method with computation over all types. +test_that("Test add.vertex.attribute.author.first.activity with multiple types and computation over all types", { ## Test setup @@ -968,7 +973,7 @@ test_that("Test add.vertex.attribute.first.activity with multiple types and comp lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attributes = add.vertex.attribute.first.activity( + networks.with.attributes = add.vertex.attribute.author.first.activity( list.of.networks = networks.and.data[["networks"]], project.data = networks.and.data[["project.data"]], activity.types = c("mails", "commits", "issues"), name = "first.activity", aggregation.level = level, default.value = NA, combine.activity.types = TRUE @@ -979,8 +984,8 @@ test_that("Test add.vertex.attribute.first.activity with multiple types and comp }) }) -#' Test the add.vertex.attribute.first.activity method with multiple activity types and computation per type. -test_that("Test add.vertex.attribute.first.activity with multiple types and computation per type", { +#' Test the add.vertex.attribute.author.first.activity method with multiple activity types and computation per type. +test_that("Test add.vertex.attribute.author.first.activity with multiple types and computation per type", { ## Test setup @@ -994,7 +999,7 @@ test_that("Test add.vertex.attribute.first.activity with multiple types and comp lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attributes = add.vertex.attribute.first.activity( + networks.with.attributes = add.vertex.attribute.author.first.activity( list.of.networks = networks.and.data[["networks"]], project.data = networks.and.data[["project.data"]], activity.types = c("mails", "commits", "issues"), name = "first.activity", aggregation.level = level, default.value = NA, combine.activity.types = FALSE @@ -1005,8 +1010,8 @@ test_that("Test add.vertex.attribute.first.activity with multiple types and comp }) }) -#' Test the add.vertex.attribute.first.activity method with one activity type and computation per type. -test_that("Test add.vertex.attribute.first.activity with one type and computation per type", { +#' Test the add.vertex.attribute.author.first.activity method with one activity type and computation per type. +test_that("Test add.vertex.attribute.author.first.activity with one type and computation per type", { ## Test setup @@ -1026,7 +1031,7 @@ test_that("Test add.vertex.attribute.first.activity with one type and computatio lapply(AGGREGATION.LEVELS, function(level) { - networks.with.attributes = add.vertex.attribute.first.activity( + networks.with.attributes = add.vertex.attribute.author.first.activity( list.of.networks = networks.and.data[["networks"]], project.data = networks.and.data[["project.data"]], activity.types = c("mails"), name = "first.activity", aggregation.level = level, default.value = NA, combine.activity.types = FALSE @@ -1037,8 +1042,8 @@ test_that("Test add.vertex.attribute.first.activity with one type and computatio }) }) -#' Test the add.vertex.attribute.active.ranges method with computation over all types -test_that("Test add.vertex.attribute.active.ranges with computation over all types", { +#' Test the add.vertex.attribute.author.active.ranges method with computation over all types +test_that("Test add.vertex.attribute.author.active.ranges with computation over all types", { ## Test setup networks.and.data = get.network.covariates.test.networks() @@ -1047,7 +1052,7 @@ test_that("Test add.vertex.attribute.active.ranges with computation over all typ networks.and.data$project.data$set.project.conf.entry("issues.locked", TRUE) ## Test - networks.with.attr = add.vertex.attribute.active.ranges( + networks.with.attr = add.vertex.attribute.author.active.ranges( networks.and.data[["networks"]], networks.and.data[["project.data"]], combine.activity.types = TRUE ) @@ -1064,8 +1069,8 @@ test_that("Test add.vertex.attribute.active.ranges with computation over all typ expect_identical(expected.attributes, actual.attributes) }) -#' Test default values for the add.vertex.attribute.active.ranges method -test_that("Test default values of add.vertex.attribute.active.ranges", { +#' Test default values for the add.vertex.attribute.author.active.ranges method +test_that("Test default values of add.vertex.attribute.author.active.ranges", { ## Test setup networks.and.data = get.network.covariates.test.networks() @@ -1078,7 +1083,7 @@ test_that("Test default values of add.vertex.attribute.active.ranges", { test.data = networks.and.data[["project.data"]] test.activity.types = c("mails", "issues") test.default.value = "test.default.value" - networks.with.attr = add.vertex.attribute.active.ranges(test.networks, test.data, + networks.with.attr = add.vertex.attribute.author.active.ranges(test.networks, test.data, activity.types = test.activity.types, default.value = test.default.value) actual.attributes = lapply(networks.with.attr, igraph:: get.vertex.attribute, name = "active.ranges") @@ -1311,6 +1316,64 @@ test_that("Test add.vertex.attribute.artifact.first.occurrence", { }) }) +#' Test the add.vertex.attribute.artifact.last.edited method +test_that("Test add.vertex.attribute.artifact.last.edited", { + + ## Test setup + + networks.and.data = get.network.covariates.test.networks("artifact") + + expected.attributes = list( + range = network.covariates.test.build.expected( + c("2016-07-12 15:58:59 UTC"), c("2016-07-12 16:00:45 UTC"), + c("2016-07-12 16:06:32 UTC", "2016-07-12 16:06:32 UTC") + ), + cumulative = network.covariates.test.build.expected( + c("2016-07-12 15:58:59 UTC"), c("2016-07-12 16:00:45 UTC"), + c("2016-07-12 16:06:32 UTC", "2016-07-12 16:06:32 UTC") + ), + all.ranges = network.covariates.test.build.expected( + c("2016-07-12 16:00:45 UTC"), c("2016-07-12 16:00:45 UTC"), + c("2016-07-12 16:06:32 UTC", "2016-07-12 16:06:32 UTC") + ), + project.cumulative = network.covariates.test.build.expected( + c("2016-07-12 15:58:59 UTC"), c("2016-07-12 16:00:45 UTC"), + c("2016-07-12 16:06:32 UTC", "2016-07-12 16:06:32 UTC") + ), + project.all.ranges = network.covariates.test.build.expected( + c("2016-07-12 16:00:45 UTC"), c("2016-07-12 16:00:45 UTC"), + c("2016-07-12 16:06:32 UTC", "2016-07-12 16:06:32 UTC") + ), + complete = network.covariates.test.build.expected( + c("2016-07-12 16:00:45 UTC"), c("2016-07-12 16:00:45 UTC"), + c("2016-07-12 16:06:32 UTC", "2016-07-12 16:06:32 UTC") + ) + ) + + ## convert date strings to POSIXct + expected.attributes = lapply(expected.attributes, function(times) { + lapply(times, function(date.vector) { + get.date.from.string(date.vector) + }) + }) + + ## Test + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.artifact.last.edited( + networks.and.data[["networks"]], networks.and.data[["project.data"]], + aggregation.level = level + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "last.edited") + + ## convert UNIX timestamps to POSIXct + actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) + + expect_equal(expected.attributes[[level]], actual.attributes) + }) +}) + #' Test the add.vertex.attribute.artifact.change.count method test_that("Test add.vertex.attribute.artifact.change.count", { @@ -1347,6 +1410,1037 @@ test_that("Test add.vertex.attribute.artifact.change.count", { }) }) +## Unit tests for mail artifact networks + +#' mail thread contributor count +test_that("Test add.vertex.attribute.mail.thread.contributor.count", { + + ## Test setup + + networks.and.data = get.network.covariates.test.networks("artifact", artifact.relation = "mail") + + expected.attributes = list( + range = network.covariates.test.build.expected( + c(2), c(1), c(1) + ), + cumulative = network.covariates.test.build.expected( + c(2), c(1), c(2) + ), + all.ranges = network.covariates.test.build.expected( + c(2), c(2), c(2) + ), + project.cumulative = network.covariates.test.build.expected( + c(2), c(1), c(2) + ), + project.all.ranges = network.covariates.test.build.expected( + c(2), c(2), c(2) + ), + complete = network.covariates.test.build.expected( + c(2), c(2), c(2) + ) + ) + + ## Test + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.mail.thread.contributor.count( + networks.and.data[["networks"]], networks.and.data[["project.data"]], + aggregation.level = level + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "thread.contributor.count") + + expect_equal(expected.attributes[[level]], actual.attributes) + }) +}) + +#' mail thread message count +test_that("Test add.vertex.attribute.mail.thread.message.count", { + + ## Test setup + + networks.and.data = get.network.covariates.test.networks("artifact", artifact.relation = "mail") + + expected.attributes = list( + range = network.covariates.test.build.expected( + c(2), c(1), c(1) + ), + cumulative = network.covariates.test.build.expected( + c(2), c(1), c(2) + ), + all.ranges = network.covariates.test.build.expected( + c(2), c(2), c(2) + ), + project.cumulative = network.covariates.test.build.expected( + c(2), c(1), c(2) + ), + project.all.ranges = network.covariates.test.build.expected( + c(2), c(2), c(2) + ), + complete = network.covariates.test.build.expected( + c(2), c(2), c(2) + ) + ) + + ## Test + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.mail.thread.message.count( + networks.and.data[["networks"]], networks.and.data[["project.data"]], + aggregation.level = level + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "thread.message.count") + + expect_equal(expected.attributes[[level]], actual.attributes) + }) +}) + +#' mail thread start date +test_that("Test add.vertex.attribute.mail.thread.start.date", { + + ## Test setup + + networks.and.data = get.network.covariates.test.networks("artifact", artifact.relation = "mail") + + expected.attributes = list( + range = network.covariates.test.build.expected( + c("2016-07-12 15:58:40 UTC"), c("2016-07-12 16:04:40 UTC"), c("2016-07-12 16:05:37 UTC") + ), + cumulative = network.covariates.test.build.expected( + c("2016-07-12 15:58:40 UTC"), c("2016-07-12 16:04:40 UTC"), c("2016-07-12 16:04:40 UTC") + ), + all.ranges = network.covariates.test.build.expected( + c("2016-07-12 15:58:40 UTC"), c("2016-07-12 16:04:40 UTC"), c("2016-07-12 16:04:40 UTC") + ), + project.cumulative = network.covariates.test.build.expected( + c("2016-07-12 15:58:40 UTC"), c("2016-07-12 16:04:40 UTC"), c("2016-07-12 16:04:40 UTC") + ), + project.all.ranges = network.covariates.test.build.expected( + c("2016-07-12 15:58:40 UTC"), c("2016-07-12 16:04:40 UTC"), c("2016-07-12 16:04:40 UTC") + ), + complete = network.covariates.test.build.expected( + c("2016-07-12 15:58:40 UTC"), c("2016-07-12 16:04:40 UTC"), c("2016-07-12 16:04:40 UTC") + ) + ) + + ## convert date strings to POSIXct + expected.attributes = lapply(expected.attributes, function(times) { + lapply(times, function(date.vector) { + get.date.from.string(date.vector) + }) + }) + + ## Test + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.mail.thread.start.date( + networks.and.data[["networks"]], networks.and.data[["project.data"]], + aggregation.level = level + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "thread.start.date") + + ## convert UNIX timestamps to POSIXct + actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) + + expect_equal(expected.attributes[[level]], actual.attributes) + }) +}) + +#' mail thread end date +test_that("Test add.vertex.attribute.mail.thread.end.date", { + + ## Test setup + + networks.and.data = get.network.covariates.test.networks("artifact", artifact.relation = "mail") + + expected.attributes = list( + range = network.covariates.test.build.expected( + c("2016-07-12 15:58:50 UTC"), c("2016-07-12 16:04:40 UTC"), c("2016-07-12 16:05:37 UTC") + ), + cumulative = network.covariates.test.build.expected( + c("2016-07-12 15:58:50 UTC"), c("2016-07-12 16:04:40 UTC"), c("2016-07-12 16:05:37 UTC") + ), + all.ranges = network.covariates.test.build.expected( + c("2016-07-12 15:58:50 UTC"), c("2016-07-12 16:05:37 UTC"), c("2016-07-12 16:05:37 UTC") + ), + project.cumulative = network.covariates.test.build.expected( + c("2016-07-12 15:58:50 UTC"), c("2016-07-12 16:04:40 UTC"), c("2016-07-12 16:05:37 UTC") + ), + project.all.ranges = network.covariates.test.build.expected( + c("2016-07-12 15:58:50 UTC"), c("2016-07-12 16:05:37 UTC"), c("2016-07-12 16:05:37 UTC") + ), + complete = network.covariates.test.build.expected( + c("2016-07-12 15:58:50 UTC"), c("2016-07-12 16:05:37 UTC"), c("2016-07-12 16:05:37 UTC") + ) + ) + + ## convert date strings to POSIXct + expected.attributes = lapply(expected.attributes, function(times) { + lapply(times, function(date.vector) { + get.date.from.string(date.vector) + }) + }) + + ## Test + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.mail.thread.end.date( + networks.and.data[["networks"]], networks.and.data[["project.data"]], + aggregation.level = level + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "thread.end.date") + + ## convert UNIX timestamps to POSIXct + actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) + + expect_equal(expected.attributes[[level]], actual.attributes) + }) +}) + +#' mail thread originating mailing list +test_that("Test add.vertex.attribute.mail.thread.originating.mailing.list", { + + ## Test setup + + networks.and.data = get.network.covariates.test.networks("artifact", artifact.relation = "mail", bins = mybins.since.2010) + + expected.attributes = network.covariates.test.build.expected.since.2010( + c("42", "42", "42"), c("13"), c("13"), c("13") + ) + + ## Test + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.mail.thread.originating.mailing.list( + networks.and.data[["networks"]], networks.and.data[["project.data"]], + aggregation.level = level + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, + name = "thread.originating.mailing.list") + + expect_equal(expected.attributes, actual.attributes) + }) +}) + +## Unit tests for issue artifact networks + +#' Helper function for add.vertex.attribute.issue.* tests +#' +#' Merges expected data for issue networks, where attributes may be considered for issues only, PRs only, or both. +#' This function returns the data for both, given the data for issues only and PRs only, where \code{NA} is used for +#' vertices of the other type. +#' The data is given as lists of lists of vectors, and merged by replacing the \code{NA}-values of the first list +#' with the non-\code{NA} values of the second. Therefore, it only works if all vertices have a non-\code{NA} value in +#' exactly one of the two lists. +#' +#' @param expected.attributes.issues.only a list of lists of vectors, containing some value for each issue vertex and +#' \code{NA} for PR vertices +#' @param expected.attributes.prs.only a list of lists of vectors, containing some value for each PR vertex and +#' \code{NA} for issue vertices +#' +#' @return a list of lists of vectors, containing values for issue and PR vertices +merge.expected.attributes = function(expected.attributes.issues.only, expected.attributes.prs.only) { + result = lapply(names(expected.attributes.issues.only), function(n) { + issue.attr = expected.attributes.issues.only[[n]] + pr.attr = expected.attributes.prs.only[[n]] + sum.attr = lapply(names(issue.attr), function (n2) { + a = issue.attr[[n2]] + b = pr.attr[[n2]] + ## assign the non-NA values of b to the previously-NA values of a. + ## this only works properly if, at each index, exactly one of the vectors is NA. + a[is.na(a)] = b[!is.na(b)] + return(a) + }) + names(sum.attr) = names(issue.attr) + return(sum.attr) + }) + names(result) = names(expected.attributes.issues.only) + return(result) +} + +#' issue contributor count +test_that("Test add.vertex.attribute.issue.contributor.count", { + ## Test setup + networks.and.data = get.network.covariates.test.networks("artifact", issues = TRUE, artifact.relation = "issue") + + expected.attributes.issues.only = list( + range = network.covariates.test.build.expected(c(1L, 2L, NA), c(NA, 1L, NA, 1L, NA), c(NA, 2L, 2L, 1L)), + cumulative = network.covariates.test.build.expected(c(1L, 2L, NA), c(NA, 1L, NA, 2L, NA), c(NA, 2L, 2L, 3L)), + all.ranges = network.covariates.test.build.expected(c(3L, 2L, NA), c(NA, 2L, NA, 3L, NA), c(NA, 2L, 2L, 3L)), + project.cumulative = network.covariates.test.build.expected(c(1L, 2L, NA), c(NA, 1L, NA, 2L, NA), c(NA, 2L, 2L, 3L)), + project.all.ranges = network.covariates.test.build.expected(c(3L, 2L, NA), c(NA, 2L, NA, 3L, NA), c(NA, 2L, 2L, 3L)), + complete = network.covariates.test.build.expected(c(4L, 3L, NA), c(NA, 2L, NA, 4L, NA), c(NA, 3L, 2L, 4L)) + ) + + expected.attributes.prs.only = list( + range = network.covariates.test.build.expected(c(NA, NA, 1L), c(1L, NA, 2L, NA, 1L), c(2L, NA, NA, NA)), + cumulative = network.covariates.test.build.expected(c(NA, NA, 1L), c(2L, NA, 2L, NA, 1L), c(3L, NA, NA, NA)), + all.ranges = network.covariates.test.build.expected(c(NA, NA, 3L), c(3L, NA, 2L, NA, 1L), c(3L, NA, NA, NA)), + project.cumulative = network.covariates.test.build.expected(c(NA, NA, 1L), c(2L, NA, 2L, NA, 2L), c(3L, NA, NA, NA)), + project.all.ranges = network.covariates.test.build.expected(c(NA, NA, 3L), c(3L, NA, 2L, NA, 2L), c(3L, NA, NA, NA)), + complete = network.covariates.test.build.expected(c(NA, NA, 3L), c(3L, NA, 2L, NA, 2L), c(3L, NA, NA, NA)) + ) + + expected.attributes.both = merge.expected.attributes(expected.attributes.issues.only, expected.attributes.prs.only) + + ## Test issues only + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.contributor.count( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "issues" + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.contributor.count") + + expect_identical(expected.attributes.issues.only[[level]], actual.attributes) + }) + + # Test PRs only + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.contributor.count( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, + type = "pull.requests") + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pr.contributor.count") + + expect_identical(expected.attributes.prs.only[[level]], actual.attributes) + }) + + # Test both + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.contributor.count( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "all" + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.contributor.count") + + expect_identical(expected.attributes.both[[level]], actual.attributes) + }) +}) + +test_that("Test add.vertex.attribute.issue.contributor.count with issues.only.comments", { + ## Test setup + networks.and.data = get.network.covariates.test.networks("artifact", issues = TRUE, artifact.relation = "issue", + issues.only.comments = TRUE) + + expected.attributes.issues.only = list( + range = network.covariates.test.build.expected(c(NA, 1L), c(NA, NA, 1L, 1L), c(NA, 2L)), + cumulative = network.covariates.test.build.expected(c(NA, 1L), c(NA, NA, 1L, 1L), c(NA, 2L)), + all.ranges = network.covariates.test.build.expected(c(NA, 1L), c(NA, NA, 2L, 1L), c(NA, 2L)), + project.cumulative = network.covariates.test.build.expected(c(NA, 1L), c(NA, NA, 1L, 1L), c(NA, 2L)), + project.all.ranges = network.covariates.test.build.expected(c(NA, 1L), c(NA, NA, 2L, 1L), c(NA, 2L)), + complete = network.covariates.test.build.expected(c(NA, 1L), c(NA, NA, 2L, 2L), c(NA, 2L)) + ) + + expected.attributes.prs.only = list( + range = network.covariates.test.build.expected(c(1L, NA), c(1L, 1L, NA, NA), c(1L, NA)), + cumulative = network.covariates.test.build.expected(c(1L, NA), c(2L, 1L, NA, NA), c(3L, NA)), + all.ranges = network.covariates.test.build.expected(c(3L, NA), c(3L, 1L, NA, NA), c(3L, NA)), + project.cumulative = network.covariates.test.build.expected(c(1L, NA), c(2L, 1L, NA, NA), c(3L, NA)), + project.all.ranges = network.covariates.test.build.expected(c(3L, NA), c(3L, 1L, NA, NA), c(3L, NA)), + complete = network.covariates.test.build.expected(c(3L, NA), c(3L, 1L, NA, NA), c(3L, NA)) + ) + + expected.attributes.both = merge.expected.attributes(expected.attributes.issues.only, expected.attributes.prs.only) + + ## Test issues only + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.contributor.count( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "issues" + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.contributor.count") + + expect_identical(expected.attributes.issues.only[[level]], actual.attributes) + }) + + # Test PRs only + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.contributor.count( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, + type = "pull.requests") + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pr.contributor.count") + + expect_identical(expected.attributes.prs.only[[level]], actual.attributes) + }) + + # Test both + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.contributor.count( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "all" + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.contributor.count") + + expect_identical(expected.attributes.both[[level]], actual.attributes) + }) +}) + +test_that("Test add.vertex.attribute.issue.contributor.count with issues.only.comments and use.unfiltered.data", { + ## Test setup + networks.and.data = get.network.covariates.test.networks("artifact", issues = TRUE, artifact.relation = "issue", + issues.only.comments = TRUE) + + expected.attributes.issues.only = list( + range = network.covariates.test.build.expected(c(NA, 2L), c(NA, NA, 1L, 1L), c(NA, 2L)), + cumulative = network.covariates.test.build.expected(c(NA, 2L), c(NA, NA, 1L, 2L), c(NA, 2L)), + all.ranges = network.covariates.test.build.expected(c(NA, 2L), c(NA, NA, 2L, 3L), c(NA, 2L)), + project.cumulative = network.covariates.test.build.expected(c(NA, 2L), c(NA, NA, 1L, 2L), c(NA, 2L)), + project.all.ranges = network.covariates.test.build.expected(c(NA, 2L), c(NA, NA, 2L, 3L), c(NA, 2L)), + complete = network.covariates.test.build.expected(c(NA, 3L), c(NA, NA, 2L, 4L), c(NA, 2L)) + ) + + expected.attributes.prs.only = list( + range = network.covariates.test.build.expected(c(1L, NA), c(1L, 2L, NA, NA), c(2L, NA)), + cumulative = network.covariates.test.build.expected(c(1L, NA), c(2L, 2L, NA, NA), c(3L, NA)), + all.ranges = network.covariates.test.build.expected(c(3L, NA), c(3L, 2L, NA, NA), c(3L, NA)), + project.cumulative = network.covariates.test.build.expected(c(1L, NA), c(2L, 2L, NA, NA), c(3L, NA)), + project.all.ranges = network.covariates.test.build.expected(c(3L, NA), c(3L, 2L, NA, NA), c(3L, NA)), + complete = network.covariates.test.build.expected(c(3L, NA), c(3L, 2L, NA, NA), c(3L, NA)) + ) + + expected.attributes.both = merge.expected.attributes(expected.attributes.issues.only, expected.attributes.prs.only) + + ## Test issues only + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.contributor.count( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, + type = "issues", use.unfiltered.data = TRUE + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.contributor.count") + + expect_identical(expected.attributes.issues.only[[level]], actual.attributes) + }) + + # Test PRs only + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.contributor.count( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, + type = "pull.requests", use.unfiltered.data = TRUE) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pr.contributor.count") + + expect_identical(expected.attributes.prs.only[[level]], actual.attributes) + }) + + # Test both + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.contributor.count( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, + type = "all", use.unfiltered.data = TRUE + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.contributor.count") + + expect_identical(expected.attributes.both[[level]], actual.attributes) + }) +}) + +#' issue event count +test_that("Test add.vertex.attribute.issue.event.count", { + ## Test setup + networks.and.data = get.network.covariates.test.networks("artifact", issues = TRUE, artifact.relation = "issue") + + expected.attributes.issues.only = list( + range = network.covariates.test.build.expected(c(1L, 3L, NA), c(NA, 2L, NA, 1L, NA), c(NA, 3L, 4L, 1L)), + cumulative = network.covariates.test.build.expected(c(1L, 3L, NA), c(NA, 2L, NA, 2L, NA), c(NA, 6L, 6L, 3L)), + all.ranges = network.covariates.test.build.expected(c(3L, 6L, NA), c(NA, 6L, NA, 3L, NA), c(NA, 6L, 6L, 3L)), + project.cumulative = network.covariates.test.build.expected(c(1L, 3L, NA), c(NA, 2L, NA, 2L, NA), c(NA, 6L, 6L, 3L)), + project.all.ranges = network.covariates.test.build.expected(c(3L, 6L, NA), c(NA, 6L, NA, 3L, NA), c(NA, 6L, 6L, 3L)), + complete = network.covariates.test.build.expected(c(8L, 7L, NA), c(NA, 6L, NA, 8L, NA), c(NA, 7L, 6L, 8L)) + ) + + expected.attributes.prs.only = list( + range = network.covariates.test.build.expected(c(NA, NA, 2L), c(1L, NA, 2L, NA, 1L), c(2L, NA, NA, NA)), + cumulative = network.covariates.test.build.expected(c(NA, NA, 2L), c(3L, NA, 2L, NA, 1L), c(5L, NA, NA, NA)), + all.ranges = network.covariates.test.build.expected(c(NA, NA, 5L), c(5L, NA, 2L, NA, 1L), c(5L, NA, NA, NA)), + project.cumulative = network.covariates.test.build.expected(c(NA, NA, 2L), c(3L, NA, 2L, NA, 2L), c(5L, NA, NA, NA)), + project.all.ranges = network.covariates.test.build.expected(c(NA, NA, 5L), c(5L, NA, 2L, NA, 2L), c(5L, NA, NA, NA)), + complete = network.covariates.test.build.expected(c(NA, NA, 5L), c(5L, NA, 2L, NA, 2L), c(5L, NA, NA, NA)) + ) + + expected.attributes.both = merge.expected.attributes(expected.attributes.issues.only, expected.attributes.prs.only) + + ## Test issues only + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.event.count( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "issues" + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.event.count") + + expect_identical(expected.attributes.issues.only[[level]], actual.attributes) + }) + + # Test PRs only + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.event.count( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, + type = "pull.requests") + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pr.event.count") + + expect_identical(expected.attributes.prs.only[[level]], actual.attributes) + }) + + # Test both + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.event.count( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "all" + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.event.count") + + expect_identical(expected.attributes.both[[level]], actual.attributes) + }) +}) + +#' issue comment count +test_that("Test add.vertex.attribute.issue.comment.count", { + ## Test setup + networks.and.data = get.network.covariates.test.networks("artifact", issues = TRUE, artifact.relation = "issue") + + expected.attributes.issues.only = list( + range = network.covariates.test.build.expected(c(0L, 1L, NA), c(NA, 1L, NA, 1L, NA), c(NA, 0L, 4L, 0L)), + cumulative = network.covariates.test.build.expected(c(0L, 1L, NA), c(NA, 1L, NA, 1L, NA), c(NA, 1L, 5L, 1L)), + all.ranges = network.covariates.test.build.expected(c(1L, 1L, NA), c(NA, 5L, NA, 1L, NA), c(NA, 1L, 5L, 1L)), + project.cumulative = network.covariates.test.build.expected(c(0L, 1L, NA), c(NA, 1L, NA, 1L, NA), c(NA, 1L, 5L, 1L)), + project.all.ranges = network.covariates.test.build.expected(c(1L, 1L, NA), c(NA, 5L, NA, 1L, NA), c(NA, 1L, 5L, 1L)), + complete = network.covariates.test.build.expected(c(2L, 1L, NA), c(NA, 5L, NA, 2L, NA), c(NA, 1L, 5L, 2L)) + ) + + expected.attributes.prs.only = list( + range = network.covariates.test.build.expected(c(NA, NA, 1L), c(1L, NA, 1L, NA, 0L), c(1L, NA, NA, NA)), + cumulative = network.covariates.test.build.expected(c(NA, NA, 1L), c(2L, NA, 1L, NA, 0L), c(3L, NA, NA, NA)), + all.ranges = network.covariates.test.build.expected(c(NA, NA, 3L), c(3L, NA, 1L, NA, 0L), c(3L, NA, NA, NA)), + project.cumulative = network.covariates.test.build.expected(c(NA, NA, 1L), c(2L, NA, 1L, NA, 1L), c(3L, NA, NA, NA)), + project.all.ranges = network.covariates.test.build.expected(c(NA, NA, 3L), c(3L, NA, 1L, NA, 1L), c(3L, NA, NA, NA)), + complete = network.covariates.test.build.expected(c(NA, NA, 3L), c(3L, NA, 1L, NA, 1L), c(3L, NA, NA, NA)) + ) + + expected.attributes.both = merge.expected.attributes(expected.attributes.issues.only, expected.attributes.prs.only) + + ## Test issues only + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.comment.event.count( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "issues" + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.comment.event.count") + + expect_identical(expected.attributes.issues.only[[level]], actual.attributes) + }) + + # Test PRs only + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.comment.event.count( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, + type = "pull.requests") + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pr.comment.event.count") + + expect_identical(expected.attributes.prs.only[[level]], actual.attributes) + }) + + # Test both + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.comment.event.count( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "all" + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.comment.event.count") + + expect_identical(expected.attributes.both[[level]], actual.attributes) + }) +}) + +#' issue opened date +test_that("Test add.vertex.attribute.issue.opened.date", { + ## Test setup + networks.and.data = get.network.covariates.test.networks("artifact", issues = TRUE, artifact.relation = "issue") + + expected.attributes.issues.only = network.covariates.test.build.expected( + c("2016-07-12 14:30:13", + "2016-07-12 15:59:25", + NA), + c(NA, + "2016-07-12 16:01:30", + NA, + "2016-07-12 14:30:13", + NA), + c(NA, + "2016-07-12 15:59:25", + "2016-07-12 16:01:30", + "2016-07-12 14:30:13")) + + expected.attributes.prs.only = network.covariates.test.build.expected( + c(NA, + NA, + "2016-07-14 13:37:00"), + c("2016-07-14 13:37:00", + NA, + "2016-07-12 16:02:02", + NA, + "2016-07-12 14:59:25"), + c("2016-07-14 13:37:00", + NA, + NA, + NA)) + + expected.attributes.both = network.covariates.test.build.expected( + c("2016-07-12 14:30:13", + "2016-07-12 15:59:25", + "2016-07-14 13:37:00"), + c("2016-07-14 13:37:00", + "2016-07-12 16:01:30", + "2016-07-12 16:02:02", + "2016-07-12 14:30:13", + "2016-07-12 14:59:25"), + c("2016-07-14 13:37:00", + "2016-07-12 15:59:25", + "2016-07-12 16:01:30", + "2016-07-12 14:30:13")) + + ## convert date strings to POSIXct + expected.attributes.issues.only = lapply(expected.attributes.issues.only, function(date.vector) {get.date.from.string(date.vector)}) + + expected.attributes.prs.only = lapply(expected.attributes.prs.only, function(date.vector) {get.date.from.string(date.vector)}) + + expected.attributes.both = lapply(expected.attributes.both, function(date.vector) {get.date.from.string(date.vector)}) + + ## Test issues only + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.opened.date( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "issues" + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.opened.date") + ## convert UNIX timestamps to POSIXct + actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) + + expect_identical(expected.attributes.issues.only, actual.attributes) + }) + + # Test PRs only + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.opened.date( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, + type = "pull.requests") + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pr.opened.date") + ## convert UNIX timestamps to POSIXct + actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) + + expect_identical(expected.attributes.prs.only, actual.attributes) + }) + + # Test both + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.opened.date( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "all" + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.opened.date") + ## convert UNIX timestamps to POSIXct + actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) + + expect_identical(expected.attributes.both, actual.attributes) + }) +}) + +#' issue closed date +test_that("Test add.vertex.attribute.issue.closed.date", { + ## Test setup + networks.and.data = get.network.covariates.test.networks("artifact", issues = TRUE, artifact.relation = "issue") + + expected.attributes.issues.only = network.covariates.test.build.expected( + c(NA, + "2016-07-12 16:06:30", + NA), + c(NA, + NA, + NA, + NA, + NA), + c(NA, + "2016-07-12 16:06:30", + NA, + NA)) + + expected.attributes.prs.only = network.covariates.test.build.expected( + c(NA, + NA, + NA), + c(NA, + NA, + NA, + NA, + "2016-07-12 16:04:59"), + c(NA, + NA, + NA, + NA)) + + expected.attributes.both = network.covariates.test.build.expected( + c(NA, + "2016-07-12 16:06:30", + NA), + c(NA, + NA, + NA, + NA, + "2016-07-12 16:04:59"), + c(NA, + "2016-07-12 16:06:30", + NA, + NA)) + + ## convert date strings to POSIXct + expected.attributes.issues.only = lapply(expected.attributes.issues.only, function(date.vector) {get.date.from.string(date.vector)}) + + expected.attributes.prs.only = lapply(expected.attributes.prs.only, function(date.vector) {get.date.from.string(date.vector)}) + + expected.attributes.both = lapply(expected.attributes.both, function(date.vector) {get.date.from.string(date.vector)}) + + ## Test issues only + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.closed.date( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "issues" + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.closed.date") + ## convert UNIX timestamps to POSIXct + actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) + + expect_identical(expected.attributes.issues.only, actual.attributes) + }) + + # Test PRs only + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.closed.date( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, + type = "pull.requests") + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pr.closed.date") + ## convert UNIX timestamps to POSIXct + actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) + + expect_identical(expected.attributes.prs.only, actual.attributes) + }) + + # Test both + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.closed.date( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "all" + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.closed.date") + ## convert UNIX timestamps to POSIXct + actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) + + expect_identical(expected.attributes.both, actual.attributes) + }) +}) + +#' issue last activity date +test_that("Test add.vertex.attribute.issue.last.activity.date", { + ## Test setup + networks.and.data = get.network.covariates.test.networks("artifact", issues = TRUE, artifact.relation = "issue") + + expected.attributes.issues.only = list( + range = network.covariates.test.build.expected( + c("2016-07-12 15:30:02", "2016-07-12 15:59:59", NA), + c(NA , "2016-07-12 16:02:30", NA , "2016-07-12 16:03:59", NA), + c(NA , "2016-08-31 16:45:09", "2016-07-28 06:27:52", "2016-08-31 15:30:02")), + cumulative = network.covariates.test.build.expected( + c("2016-07-12 15:30:02", "2016-07-12 15:59:59", NA), + c(NA , "2016-07-12 16:02:30", NA , "2016-07-12 16:03:59", NA), + c(NA , "2016-08-31 16:45:09", "2016-07-28 06:27:52", "2016-08-31 15:30:02")), + all.ranges = network.covariates.test.build.expected( + c("2016-08-31 15:30:02", "2016-08-31 16:45:09", NA), + c(NA , "2016-07-28 06:27:52", NA , "2016-08-31 15:30:02", NA), + c(NA , "2016-08-31 16:45:09", "2016-07-28 06:27:52", "2016-08-31 15:30:02")), + project.cumulative = network.covariates.test.build.expected( + c("2016-07-12 15:30:02", "2016-07-12 15:59:59", NA), + c(NA , "2016-07-12 16:02:30", NA , "2016-07-12 16:03:59", NA), + c(NA , "2016-08-31 16:45:09", "2016-07-28 06:27:52", "2016-08-31 15:30:02")), + project.all.ranges = network.covariates.test.build.expected( + c("2016-08-31 15:30:02", "2016-08-31 16:45:09", NA), + c(NA , "2016-07-28 06:27:52", NA , "2016-08-31 15:30:02", NA), + c(NA , "2016-08-31 16:45:09", "2016-07-28 06:27:52", "2016-08-31 15:30:02")), + complete = network.covariates.test.build.expected( + c("2017-05-23 12:32:39", "2016-10-05 16:45:09", NA), + c(NA , "2016-07-28 06:27:52", NA , "2017-05-23 12:32:39", NA), + c(NA , "2016-10-05 16:45:09", "2016-07-28 06:27:52", "2017-05-23 12:32:39"))) + + expected.attributes.prs.only = list( + range = network.covariates.test.build.expected( + c(NA , NA , "2016-07-12 15:59:59"), + c("2016-07-12 16:01:01", NA , "2016-07-12 16:02:02", NA , "2016-07-12 16:04:59"), + c("2016-07-14 13:37:00", NA , NA , NA)), + cumulative = network.covariates.test.build.expected( + c(NA , NA , "2016-07-12 15:59:59"), + c("2016-07-12 16:01:01", NA , "2016-07-12 16:02:02", NA , "2016-07-12 16:04:59"), + c("2016-07-14 13:37:00", NA , NA , NA)), + all.ranges = network.covariates.test.build.expected( + c(NA , NA , "2016-07-14 13:37:00"), + c("2016-07-14 13:37:00", NA , "2016-07-12 16:02:02", NA , "2016-07-12 16:04:59"), + c("2016-07-14 13:37:00", NA , NA , NA)), + project.cumulative = network.covariates.test.build.expected( + c(NA , NA , "2016-07-12 15:59:59"), + c("2016-07-12 16:01:01", NA , "2016-07-12 16:02:02", NA , "2016-07-12 16:04:59"), + c("2016-07-14 13:37:00", NA , NA , NA)), + project.all.ranges = network.covariates.test.build.expected( + c(NA , NA , "2016-07-14 13:37:00"), + c("2016-07-14 13:37:00", NA , "2016-07-12 16:02:02", NA , "2016-07-12 16:04:59"), + c("2016-07-14 13:37:00", NA , NA , NA)), + complete = network.covariates.test.build.expected( + c(NA , NA , "2016-07-14 13:37:00"), + c("2016-07-14 13:37:00", NA , "2016-07-12 16:02:02", NA , "2016-07-12 16:04:59"), + c("2016-07-14 13:37:00", NA , NA , NA))) + + expected.attributes.both = list( + range = network.covariates.test.build.expected( + c("2016-07-12 15:30:02", "2016-07-12 15:59:59", "2016-07-12 15:59:59"), + c("2016-07-12 16:01:01", "2016-07-12 16:02:30", "2016-07-12 16:02:02", "2016-07-12 16:03:59", "2016-07-12 16:04:59"), + c("2016-07-14 13:37:00", "2016-08-31 16:45:09", "2016-07-28 06:27:52", "2016-08-31 15:30:02")), + cumulative = network.covariates.test.build.expected( + c("2016-07-12 15:30:02", "2016-07-12 15:59:59", "2016-07-12 15:59:59"), + c("2016-07-12 16:01:01", "2016-07-12 16:02:30", "2016-07-12 16:02:02", "2016-07-12 16:03:59", "2016-07-12 16:04:59"), + c("2016-07-14 13:37:00", "2016-08-31 16:45:09", "2016-07-28 06:27:52", "2016-08-31 15:30:02")), + all.ranges = network.covariates.test.build.expected( + c("2016-08-31 15:30:02", "2016-08-31 16:45:09", "2016-07-14 13:37:00"), + c("2016-07-14 13:37:00", "2016-07-28 06:27:52", "2016-07-12 16:02:02", "2016-08-31 15:30:02", "2016-07-12 16:04:59"), + c("2016-07-14 13:37:00", "2016-08-31 16:45:09", "2016-07-28 06:27:52", "2016-08-31 15:30:02")), + project.cumulative = network.covariates.test.build.expected( + c("2016-07-12 15:30:02", "2016-07-12 15:59:59", "2016-07-12 15:59:59"), + c("2016-07-12 16:01:01", "2016-07-12 16:02:30", "2016-07-12 16:02:02", "2016-07-12 16:03:59", "2016-07-12 16:04:59"), + c("2016-07-14 13:37:00", "2016-08-31 16:45:09", "2016-07-28 06:27:52", "2016-08-31 15:30:02")), + project.all.ranges = network.covariates.test.build.expected( + c("2016-08-31 15:30:02", "2016-08-31 16:45:09", "2016-07-14 13:37:00"), + c("2016-07-14 13:37:00", "2016-07-28 06:27:52", "2016-07-12 16:02:02", "2016-08-31 15:30:02", "2016-07-12 16:04:59"), + c("2016-07-14 13:37:00", "2016-08-31 16:45:09", "2016-07-28 06:27:52", "2016-08-31 15:30:02")), + complete = network.covariates.test.build.expected( + c("2017-05-23 12:32:39", "2016-10-05 16:45:09", "2016-07-14 13:37:00"), + c("2016-07-14 13:37:00", "2016-07-28 06:27:52", "2016-07-12 16:02:02", "2017-05-23 12:32:39", "2016-07-12 16:04:59"), + c("2016-07-14 13:37:00", "2016-10-05 16:45:09", "2016-07-28 06:27:52", "2017-05-23 12:32:39"))) + + ## convert date strings to POSIXct + expected.attributes.issues.only = lapply(expected.attributes.issues.only, function(times) { + lapply(times, function(date.vector) { + get.date.from.string(date.vector) + }) + }) + expected.attributes.prs.only = lapply(expected.attributes.prs.only, function(times) { + lapply(times, function(date.vector) { + get.date.from.string(date.vector) + }) + }) + expected.attributes.both = lapply(expected.attributes.both, function(times) { + lapply(times, function(date.vector) { + get.date.from.string(date.vector) + }) + }) + + ## Test issues only + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.last.activity.date( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "issues" + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.last.activity") + ## convert UNIX timestamps to POSIXct + actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) + + expect_identical(expected.attributes.issues.only[[level]], actual.attributes) + }) + + # Test PRs only + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.last.activity.date( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, + type = "pull.requests") + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pr.last.activity") + ## convert UNIX timestamps to POSIXct + actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) + + expect_identical(expected.attributes.prs.only[[level]], actual.attributes) + }) + + # Test both + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.last.activity.date( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "all" + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.last.activity") + ## convert UNIX timestamps to POSIXct + actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) + + expect_identical(expected.attributes.both[[level]], actual.attributes) + }) +}) + +#' issue title +test_that("Test add.vertex.attribute.issue.title", { + ## Test setup + networks.and.data = get.network.covariates.test.networks("artifact", issues = TRUE, artifact.relation = "issue") + + expected.attributes.issues.only = network.covariates.test.build.expected( + c("Distinguish directedness of networks and edge-construction algorithm", + "Error in construct.networks.from.list for openssl function networks", + NA), + c(NA, + "[ZEPPELIN-332] CNFE when running SQL query against Cassandra temp table", + NA, + "Distinguish directedness of networks and edge-construction algorithm", + NA), + c(NA, + "Error in construct.networks.from.list for openssl function networks", + "[ZEPPELIN-332] CNFE when running SQL query against Cassandra temp table", + "Distinguish directedness of networks and edge-construction algorithm")) + + expected.attributes.prs.only = network.covariates.test.build.expected( + c(NA, + NA, + "Example pull request 1"), + c("Example pull request 1", + NA, + "Example pull request 4", + NA, + "Example pull request 2"), + c("Example pull request 1", + NA, + NA, + NA)) + + expected.attributes.both = network.covariates.test.build.expected( + c("Distinguish directedness of networks and edge-construction algorithm", + "Error in construct.networks.from.list for openssl function networks", + "Example pull request 1"), + c("Example pull request 1", + "[ZEPPELIN-332] CNFE when running SQL query against Cassandra temp table", + "Example pull request 4", + "Distinguish directedness of networks and edge-construction algorithm", + "Example pull request 2"), + c("Example pull request 1", + "Error in construct.networks.from.list for openssl function networks", + "[ZEPPELIN-332] CNFE when running SQL query against Cassandra temp table", + "Distinguish directedness of networks and edge-construction algorithm")) + + ## Test issues only + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.title( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "issues" + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.title") + + expect_identical(expected.attributes.issues.only, actual.attributes) + }) + + # Test PRs only + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.title( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, + type = "pull.requests") + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pr.title") + + expect_identical(expected.attributes.prs.only, actual.attributes) + }) + + # Test both + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.title( + networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "all" + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.title") + + expect_identical(expected.attributes.both, actual.attributes) + }) +}) + +#' pull request state +test_that("Test add.vertex.attribute.pr.open.merged.or.closed", { + + ## Test setup + + networks.and.data = get.network.covariates.test.networks("artifact", artifact.relation = "issue") + + expected.attributes = network.covariates.test.build.expected( + c(NA, NA, "open"), c("open", NA, "open", NA, "merged"), c("open", NA, NA, NA) + ) + + ## Test + + networks.with.attr = add.vertex.attribute.pr.open.merged.or.closed( + networks.and.data[["networks"]], networks.and.data[["project.data"]]) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pull.request.state") + + expect_equal(expected.attributes, actual.attributes) +}) + +#' issue is pull request +test_that("Test add.vertex.attribute.issue.is.pull.request", { + + ## Test setup + + networks.and.data = get.network.covariates.test.networks("artifact", artifact.relation = "issue") + + expected.attributes = network.covariates.test.build.expected( + c(FALSE, FALSE, TRUE), c(TRUE, FALSE, TRUE, FALSE, TRUE), c(TRUE, FALSE, FALSE, FALSE) + ) + + ## Test + + lapply(AGGREGATION.LEVELS, function(level) { + networks.with.attr = add.vertex.attribute.issue.is.pull.request( + networks.and.data[["networks"]], networks.and.data[["project.data"]], + aggregation.level = level + ) + + actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.is.pull.request") + + expect_equal(expected.attributes, actual.attributes) + }) +}) + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Unit tests for empty attribute data ------------------------------------- @@ -1367,7 +2461,7 @@ test_that("Test addition of attributes despite of empty data", { names(networks) = range ## add commit-count attribute - net.commit.count = add.vertex.attribute.commit.count.author(networks, proj.data.empty, default = 0L)[[1]] + net.commit.count = add.vertex.attribute.author.commit.count(networks, proj.data.empty, default = 0L)[[1]] expect_true("commit.count" %in% igraph::list.vertex.attributes(net.commit.count)) ## add author-role attribute: @@ -1399,7 +2493,7 @@ test_that("Test addition of attributes despite of non-captured vertices", { names(networks) = range ## add commit-count attribute - net.commit.count = add.vertex.attribute.commit.count.committer.and.author(networks, proj.data.empty, default = 0L)[[1]] + net.commit.count = add.vertex.attribute.author.commit.count.committer.and.author(networks, proj.data.empty, default = 0L)[[1]] ## check existence and proper value expect_true("commit.count.committer.and.author" %in% igraph::list.vertex.attributes(net.commit.count)) diff --git a/tests/test-networks-cut.R b/tests/test-networks-cut.R index 8baf7421..c959b097 100644 --- a/tests/test-networks-cut.R +++ b/tests/test-networks-cut.R @@ -69,7 +69,7 @@ test_that("Cut commit and mail data to same date range.", { date = get.date.from.string(c("2016-07-12 16:04:40", "2016-07-12 16:05:37")), date.offset = as.integer(c(100, 200)), subject = c("Re: Fw: busybox 2 tab", "Re: Fw: busybox 10"), - thread = sprintf("", c(9, 9)), + thread = sprintf("", c("13#9", "13#9")), artifact.type = c("Mail", "Mail")) commit.data = x$get.project.data()$get.commits.unfiltered() diff --git a/tests/test-networks-equal-constructions.R b/tests/test-networks-equal-constructions.R index 7ff5c073..feb3f7d2 100644 --- a/tests/test-networks-equal-constructions.R +++ b/tests/test-networks-equal-constructions.R @@ -247,12 +247,12 @@ test_that("Compare networks after adding vertex attributes in different order", networks = split.network.time.based(author.network, number.windows = 2) ## add commit count or email attribute - networks.commit.count = add.vertex.attribute.commit.count.author(networks, proj.data, aggregation.level = "range") + networks.commit.count = add.vertex.attribute.author.commit.count(networks, proj.data, aggregation.level = "range") networks.email = add.vertex.attribute.author.email(networks, proj.data) ## add the other attribute networks.both.1 = add.vertex.attribute.author.email(networks.commit.count, proj.data) - networks.both.2 = add.vertex.attribute.commit.count.author(networks.email, proj.data, aggregation.level = "range") + networks.both.2 = add.vertex.attribute.author.commit.count(networks.email, proj.data, aggregation.level = "range") ## Order of attributes is now different, while the content is the same. ## The resulting networks are therefore not equal. diff --git a/tests/test-networks-multi-relation.R b/tests/test-networks-multi-relation.R index 9ff1f26a..b264ec70 100644 --- a/tests/test-networks-multi-relation.R +++ b/tests/test-networks-multi-relation.R @@ -86,7 +86,7 @@ test_that("Network construction of the undirected author network with relation = "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", "<65a1sf31sagd684dfv31@mail.gmail.com>", "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>"), thread = c(NA, NA, NA, NA, NA, NA, NA, NA, - "", "", "", "") + "", "", "", "") ) ## build expected network @@ -132,8 +132,8 @@ test_that("Construction of the bipartite network for the feature artifact with a type = TYPE.ARTIFACT ) threads = data.frame( - name = c("", "", "", - "", "", "", "", "", ""), + name = c("", "", "", + "", "", "", "", "", ""), kind = "MailThread", type = TYPE.ARTIFACT ) @@ -150,9 +150,9 @@ test_that("Construction of the bipartite network for the feature artifact with a "", "", "", "", "", "", "", "", "", "", "", "", "", - "", "", "", "", "", "", "", # mail - "", "", "", "", "", "", "", - "", ""), + "", "", "", "", "", "", "", # mail + "", "", "", "", "", "", "", + "", ""), date = get.date.from.string(c("2013-05-05 21:46:30", "2013-05-05 21:49:21", "2013-05-05 21:49:34", # issue "2013-05-06 01:04:34", "2013-05-25 03:48:41", "2013-05-25 04:08:07", "2016-07-12 14:59:25", "2016-07-12 16:02:30", "2016-07-12 16:06:01", @@ -178,9 +178,9 @@ test_that("Construction of the bipartite network for the feature artifact with a "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", "<65a1sf31sagd684dfv31@mail.gmail.com>", ""), thread = c(rep(NA, 24), - "", "", "", "", "", "", - "", "", "", "", "", "", - "", "", "", ""), + "", "", "", "", "", "", + "", "", "", "", "", "", + "", "", "", ""), issue.id = c("", "", "", "", # issue "", "", "", "", "", "", "", @@ -296,7 +296,7 @@ test_that("Construction of the multi network for the feature artifact with autho "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", rep(NA, 29)), thread = c(rep(NA, 8), - "", "", "", "", + "", "", "", "", rep(NA, 29)), issue.id = c(rep(NA, 20), "", "", "", "", # bipartite issue @@ -434,29 +434,29 @@ test_that("Construction of the multi-artifact bipartite network with artifact re vertices = data.frame(name = c("Björn", "Karl", "Olaf", "Thomas", "A", "Base_Feature", "foo", "Fritz fritz@example.org","georg", - "Hans", "udo", "", - "", "", "", - "", "", "", - "", ""), + "Hans", "udo", "", + "", "", "", + "", "", "", + "", ""), kind = c(rep(TYPE.AUTHOR, 4), rep("Feature", 3), rep(TYPE.AUTHOR, 4), rep("MailThread", 9)), type = c(rep(TYPE.AUTHOR, 4), rep(TYPE.ARTIFACT, 3), rep(TYPE.AUTHOR, 4), rep(TYPE.ARTIFACT, 9)) ) row.names(vertices) = c("Björn", "Karl", "Olaf", "Thomas", "A", "Base_Feature", "foo", "Fritz fritz@example.org","georg", - "Hans", "udo", "", - "", "", "", - "", "", "", - "", "") + "Hans", "udo", "", + "", "", "", + "", "", "", + "", "") edges = data.frame( from = c("Björn", "Karl", "Olaf", "Olaf", "Thomas", "Thomas", "Björn", "Björn", "Björn", "Fritz fritz@example.org", "georg", "Hans", "Hans", "Hans", "Hans", "Hans", "Hans", "Hans", "Olaf", "Olaf", "Thomas", "udo"), - to = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo", "", - "", "", "", "", "", - "", "", "", "", "", - "", "", "", "", ""), + to = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo", "", + "", "", "", "", "", + "", "", "", "", "", + "", "", "", "", ""), date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:06:10", "2016-07-12 16:00:45", "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:32", "2004-10-09 18:38:13", "2005-02-09 18:49:49", "2016-07-12 15:58:40", @@ -484,10 +484,10 @@ test_that("Construction of the multi-artifact bipartite network with artifact re "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", "<65a1sf31sagd684dfv31@mail.gmail.com>", "" ), - thread = c(rep(NA, 6), "", "", "", "", - "", "", "", "", "", - "", "", "", "", "", - "", "") + thread = c(rep(NA, 6), "", "", "", "", + "", "", "", "", "", + "", "", "", "", "", + "", "") ) net.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) @@ -521,18 +521,18 @@ test_that("Construction of the multi-artifact bipartite network with artifact re vertices = data.frame(name = c("Björn", "Karl", "Max", "Olaf", "Thomas", "", "", "", "", "", "", "", - "Fritz fritz@example.org", "georg", "Hans", "udo", "", - "", "", "", "", "", - "", "", ""), + "Fritz fritz@example.org", "georg", "Hans", "udo", "", + "", "", "", "", "", + "", "", ""), kind = c(rep("Author", 5), rep("Issue", 7), rep("Author", 4), rep("MailThread", 9)), type = c(rep("Author", 5), rep("Artifact", 7), rep("Author", 4), rep("Artifact", 9)) ) row.names(vertices) = c("Björn", "Karl", "Max", "Olaf", "Thomas", "", "", "", "", "", "", "", - "Fritz fritz@example.org", "georg", "Hans", "udo", "", - "", "", "", "", "", - "", "", "") + "Fritz fritz@example.org", "georg", "Hans", "udo", "", + "", "", "", "", "", + "", "", "") edges = data.frame( from = c("Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Björn", @@ -547,9 +547,9 @@ test_that("Construction of the multi-artifact bipartite network with artifact re "", "", "", "", "", "", "", "", "", - "", "", "", "", "", - "", "", "", "", "", "", - "", "", "", "", "", ""), + "", "", "", "", "", + "", "", "", "", "", "", + "", "", "", "", "", ""), date = get.date.from.string(c("2013-05-05 21:46:30", "2013-05-05 21:49:21", "2013-05-05 21:49:34", "2013-05-06 01:04:34", "2013-05-25 03:48:41", "2013-05-25 04:08:07", "2016-07-12 14:59:25", "2016-07-12 16:02:30", "2016-07-12 16:06:01", @@ -587,9 +587,9 @@ test_that("Construction of the multi-artifact bipartite network with artifact re "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", "<65a1sf31sagd684dfv31@mail.gmail.com>", "" ), - thread = c(rep(NA, 24), "", "", "", "", "", - "", "", "", "", "", "", - "", "", "", "", "") + thread = c(rep(NA, 24), "", "", "", "", "", + "", "", "", "", "", "", + "", "", "", "", "") ) net.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) @@ -628,9 +628,9 @@ test_that("Construction of the multi-artifact bipartite network with artifact re "Max", "", "", "", "", "", "", "", "Fritz fritz@example.org", - "georg", "Hans", "udo", "", "", "", - "", "", "", "", "", - ""), + "georg", "Hans", "udo", "", "", "", + "", "", "", "", "", + ""), kind = c(rep("Author", 4), rep("Feature", 3), "Author", rep("Issue", 7), rep("Author", 4), rep("MailThread", 9)), type = c(rep("Author", 4), rep("Artifact", 3), "Author", @@ -639,9 +639,9 @@ test_that("Construction of the multi-artifact bipartite network with artifact re "Max", "", "", "", "", "", "", "", "Fritz fritz@example.org", - "georg", "Hans", "udo", "", "", "", - "", "", "", "", "", - "") + "georg", "Hans", "udo", "", "", "", + "", "", "", "", "", + "") edges = data.frame( from = c("Björn", "Karl", "Olaf", "Olaf", "Thomas", "Thomas", "Björn", "Björn", @@ -658,10 +658,10 @@ test_that("Construction of the multi-artifact bipartite network with artifact re "", "", "", "", "", "", "", "", "", - "", "", "", "", - "", "", "", "", "", - "", "", "", "", "", - "", "", "", ""), + "", "", "", "", + "", "", "", "", "", + "", "", "", "", "", + "", "", "", ""), date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:06:10", "2016-07-12 16:00:45", "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:32", "2013-05-05 21:46:30", "2013-05-05 21:49:21", "2013-05-05 21:49:34", @@ -706,10 +706,10 @@ test_that("Construction of the multi-artifact bipartite network with artifact re "", "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", "<65a1sf31sagd684dfv31@mail.gmail.com>", ""), - thread = c(rep(NA, 30), "", "", "", "", - "", "", "", "", "", - "", "", "", "", "", - "", "") + thread = c(rep(NA, 30), "", "", "", "", + "", "", "", "", "", + "", "", "", "", "", + "", "") ) net.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) diff --git a/tests/test-read.R b/tests/test-read.R index ee3fdb84..48cae572 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -217,7 +217,8 @@ test_that("Read the mail data.", { "=?KOI8-R?Q?=EF=D4=D7=C5=D4:_Some_patches?= 2", "Re: busybox 1", "=?KOI8-R?Q?=EF=D4=D7=C5=D4:_Some_patches?= tab", "Re: Fw: busybox 2 tab", "Re: Fw: busybox 10"), - thread = sprintf("", c(1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 7, 8, 8, 8, 9, 9)), + thread = sprintf("", c("13#1", "42#2", "13#3", "42#4", "42#5", "42#6", "42#6", "42#6", + "42#6", "42#6", "42#6", "42#7", "13#8", "13#8", "13#8", "13#9", "13#9")), artifact.type = "Mail" ) ## delete the line with the empty date diff --git a/util-data-misc.R b/util-data-misc.R index a9cec28b..6d0d1803 100644 --- a/util-data-misc.R +++ b/util-data-misc.R @@ -19,6 +19,7 @@ ## Copyright 2019 by Jakob Kronawitter ## Copyright 2021 by Johannes Hostert ## Copyright 2021 by Christian Hechtl +## Copyright 2022 by Jonathan Baumann ## All Rights Reserved. ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / @@ -77,12 +78,14 @@ mask.pull.requests = function(issue.data) { #' @param type which issue type to consider. #' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} #' [default: "all"] +#' @param use.unfiltered.data whether to use the unfiltered issue data, i.e. \code{proj.data$get.issues.unfiltered()} +#' instead of \code{proj.data$get.issues()} [default: FALSE] #' #' @return a filtered sub-data frame of the unfiltered issue data from \code{proj.data}. preprocess.issue.data = function(proj.data, retained.cols = c("author.name", "issue.id", "event.name"), - type = c("all", "pull.requests", "issues")) { + type = c("all", "pull.requests", "issues"), use.unfiltered.data = FALSE) { type = match.arg(type) - df = proj.data$get.issues.unfiltered() + df = if (use.unfiltered.data) proj.data$get.issues.unfiltered() else proj.data$get.issues() ## forall vectors k, if nrow(df) == 0, then df[k, ..] fails ## so we abort beforehand @@ -359,13 +362,14 @@ get.author.mail.thread.count = function(proj.data) { #' @param type which issue type to consider (see \code{preprocess.issue.data}). #' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} #' [default: "all"] +#' @param use.unfiltered.data whether to use unfiltered issue data (see \code{preprocess.issue.data}) [default: FALSE] #' #' @return a dataframe consisting of two columns, the first of which holding the authors' names and the second holding #' their respective issue counts -get.author.issue.count = function(proj.data, type = c("all", "issues", "pull.requests")) { +get.author.issue.count = function(proj.data, type = c("all", "issues", "pull.requests"), use.unfiltered.data = FALSE) { type = match.arg(type) logging::logdebug("get.author.issue.count: starting.") - df = preprocess.issue.data(proj.data, type = type) + df = preprocess.issue.data(proj.data, type = type, use.unfiltered.data = use.unfiltered.data) ## count distinct since an author may appear in the same issue multiple times stmt = "SELECT `author.name`, COUNT( DISTINCT `issue.id`) as `freq` FROM `df` GROUP BY `author.name` ORDER BY `freq` DESC, `author.name` ASC" @@ -383,13 +387,17 @@ get.author.issue.count = function(proj.data, type = c("all", "issues", "pull.req #' @param type which issue type to consider (see \code{preprocess.issue.data}). #' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} #' [default: "all"] +#' @param use.unfiltered.data whether to use unfiltered issue data (see \code{preprocess.issue.data}). Note that the +#' filtered data may not contain issue created events. +#' [default: TRUE] #' #' @return a dataframe consisting of two columns, the first of which holding the authors' names and the second holding #' their respective issue counts -get.author.issues.created.count = function(proj.data, type = c("all", "issues", "pull.requests")) { +get.author.issues.created.count = function(proj.data, type = c("all", "issues", "pull.requests"), + use.unfiltered.data = TRUE) { type = match.arg(type) logging::logdebug("get.author.issues.created.count: starting.") - df = preprocess.issue.data(proj.data, type = type) + df = preprocess.issue.data(proj.data, type = type, use.unfiltered.data = use.unfiltered.data) ## count distinct since an author may appear in the same issue multiple times stmt = "SELECT `author.name`, COUNT( DISTINCT `issue.id`) as `freq` FROM `df` WHERE `event.name` = 'created' @@ -408,13 +416,15 @@ get.author.issues.created.count = function(proj.data, type = c("all", "issues", #' @param type which issue type to consider (see \code{preprocess.issue.data}). #' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} #' [default: "all"] +#' @param use.unfiltered.data whether to use unfiltered issue data (see \code{preprocess.issue.data}) [default: FALSE] #' #' @return a dataframe consisting of two columns, the first of which holding the authors' names and the second holding #' their respective issue counts -get.author.issues.commented.in.count = function(proj.data, type = c("all", "issues", "pull.requests")) { +get.author.issues.commented.in.count = function(proj.data, type = c("all", "issues", "pull.requests"), + use.unfiltered.data = FALSE) { type = match.arg(type) logging::logdebug("get.author.issues.commented.in.count: starting.") - df = preprocess.issue.data(proj.data, type = type) + df = preprocess.issue.data(proj.data, type = type, use.unfiltered.data = use.unfiltered.data) ## count distinct since an author may appear in the same issue multiple times stmt = "SELECT `author.name`, COUNT( DISTINCT `issue.id`) as `freq` FROM `df` WHERE `event.name` = 'commented' @@ -433,13 +443,15 @@ get.author.issues.commented.in.count = function(proj.data, type = c("all", "issu #' @param type which issue type to consider (see \code{preprocess.issue.data}). #' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} #' [default: "all"] +#' @param use.unfiltered.data whether to use unfiltered issue data (see \code{preprocess.issue.data}) [default: FALSE] #' #' @return a dataframe consisting of two columns, the first of which holding the authors' names and the second holding #' their respective comment counts -get.author.issue.comment.count = function(proj.data, type = c("all", "issues", "pull.requests")) { +get.author.issue.comment.count = function(proj.data, type = c("all", "issues", "pull.requests"), + use.unfiltered.data = FALSE) { type = match.arg(type) logging::logdebug("get.author.issue.comment.count: starting.") - df = preprocess.issue.data(proj.data, type = type) + df = preprocess.issue.data(proj.data, type = type, use.unfiltered.data = use.unfiltered.data) stmt = "SELECT `author.name`, COUNT(*) as `freq` FROM `df` WHERE `event.name` = 'commented' GROUP BY `author.name` ORDER BY `freq` DESC, `author.name` ASC" @@ -447,3 +459,311 @@ get.author.issue.comment.count = function(proj.data, type = c("all", "issues", " logging::logdebug("get.author.issue.comment.count: finished") return(res) } + +## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / +## Mail Thread Statistics -------------------------------------------------- + +#' Get the number of contributors to each mail thread based on the mail data contained in the +#' specified \code{ProjectData}. +#' +#' @param proj.data the \code{ProjectData} containing the mail data +#' +#' @return a named list of contributor counts, where the name is the thread. +get.mail.thread.contributor.count = function(proj.data) { + logging::logdebug("get.mail.thread.contributor.count: starting.") + thread.to.mails = get.key.to.value.from.df(proj.data$get.mails(), "thread", "author.email") + thread.to.contributor.count = lapply(thread.to.mails, function(df) { + length(unique(df[["data.vertices"]])) + }) + logging::logdebug("get.mail.thread.contributor.count: finished") + return(thread.to.contributor.count) +} + +#' Get the number of messages in each mail thread based on the mail data contained in the +#' specified \code{ProjectData}. +#' +#' @param proj.data the \code{ProjectData} containing the mail data +#' +#' @return a named list of message counts, where the name is the thread. +get.mail.thread.mail.count = function(proj.data) { + logging::logdebug("get.mail.thread.mail.count: starting.") + thread.to.mails = get.key.to.value.from.df(proj.data$get.mails(), "thread", "author.email") + thread.to.mail.count = lapply(thread.to.mails, function(df) { + length(df[["data.vertices"]]) + }) + logging::logdebug("get.mail.thread.mail.count: finished") + return(thread.to.mail.count) +} + +#' Get the date of the first message in each mail thread based on the mail data contained +#' in the specified \code{ProjectData}. +#' +#' @param proj.data the \code{ProjectData} containing the mail data +#' +#' @return a named list of start dates, where the name is the thread. +get.mail.thread.start.date = function(proj.data) { + logging::logdebug("get.mail.thread.start.date: starting.") + thread.to.dates = get.key.to.value.from.df(proj.data$get.mails(), "thread", "date") + thread.to.start.date = lapply(thread.to.dates, function(df) { + min(df[["data.vertices"]]) + }) + logging::logdebug("get.mail.thread.start.date: finished") + return(thread.to.start.date) +} + +#' Get the date of the last message in each mail thread based on the mail data contained +#' in the specified \code{ProjectData} +#' +#' @param proj.data the \code{ProjectData} containing the mail data +#' +#' @return a named list of end dates, where the name is the thread. +get.mail.thread.end.date = function(proj.data) { + logging::logdebug("get.mail.thread.end.date: starting.") + thread.to.dates = get.key.to.value.from.df(proj.data$get.mails(), "thread", "date") + thread.to.end.date = lapply(thread.to.dates, function(df) { + max(df[["data.vertices"]]) + }) + logging::logdebug("get.mail.thread.end.date: finished") + return(thread.to.end.date) +} + +#' Get the identifier of the mailing list from which a threat originates. +#' This identifier is part of the thread ID as produced by codeface, e.g., if the thread ID is "13#37", then 13 is the +#' ID of the mailing list. +#' +#' Older versions of codeface did not include this identifier. If the identifier is not included in the data used, a +#' warning is produced and the list will contain \code{NA} for each thread. +#' +#' @param proj.data the \code{ProjectData} containing the mail data +#' +#' @return a named list of mailing list identifiers, where the name is the thread. +get.mail.thread.originating.mailing.list = function(proj.data) { + logging::logdebug("get.mail.thread.originating.mailing.list: starting.") + thread.ids = unique(proj.data$get.mails()[["thread"]]) + thread.to.list = lapply(thread.ids, function(thread.name) { + thread.id = substr(thread.name, 9, nchar(thread.name) - 1) # remove '' + if (grepl("#", thread.id, fixed = TRUE)) { # make sure that our data has the shape we expect + mailing.list = strsplit(thread.id, "#")[[1]][1] # split at '#' and keep only first part + return(mailing.list) + } + else { + logging::logwarn("get.mail.thread.originating.mailing.list called on incompatible data") + return(NA) + } + }) + names(thread.to.list) = thread.ids + logging::logdebug("get.mail.thread.originating.mailing.list: finished") + return(thread.to.list) +} + +## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / +## Issue Statistics -------------------------------------------------------- + +#' Get the number of contributors to each issue based on the issue data contained +#' in the specified \code{ProjectData}. +#' +#' The type argument specifies whether we count PRs alone, issues alone, or both (\code{"all"}). +#' +#' @param proj.data the \code{ProjectData} containing the issue data +#' @param type which issue type to consider (see \code{preprocess.issue.data}). +#' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} +#' [default: "all"] +#' @param use.unfiltered.data whether to use unfiltered issue data (see \code{preprocess.issue.data}) [default: FALSE] +#' +#' @return a named list of contributor counts, where the name is the issue ID. +get.issue.contributor.count = function(proj.data, type = c("all", "issues", "pull.requests"), + use.unfiltered.data = FALSE) { + type = match.arg(type) + logging::logdebug("get.issue.contributor.count: starting.") + df = preprocess.issue.data(proj.data, type = type, retained.cols = c("issue.id", "author.email"), + use.unfiltered.data = use.unfiltered.data) + issue.id.to.events = get.key.to.value.from.df(df, "issue.id", "author.email") + issue.id.to.contributor.count = lapply(issue.id.to.events, function(df) { + length(unique(df[["data.vertices"]])) + }) + logging::logdebug("get.issue.contributor.count: finished") + return(issue.id.to.contributor.count) +} + +#' Get the number of events for each issue based on the issue data contained +#' in the specified \code{ProjectData}. +#' +#' The type argument specifies whether we count PRs alone, issues alone, or both (\code{"all"}). +#' +#' @param proj.data the \code{ProjectData} containing the issue data +#' @param type which issue type to consider (see \code{preprocess.issue.data}). +#' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} +#' [default: "all"] +#' @param use.unfiltered.data whether to use unfiltered issue data (see \code{preprocess.issue.data}) [default: FALSE] +#' +#' @return a named list of event counts, where the name is the issue ID. +get.issue.event.count = function(proj.data, type = c("all", "issues", "pull.requests"), use.unfiltered.data = FALSE) { + type = match.arg(type) + logging::logdebug("get.issue.event.count: starting.") + df = preprocess.issue.data(proj.data, type = type, retained.cols = c("issue.id", "event.id"), + use.unfiltered.data = use.unfiltered.data) + issue.id.to.events = get.key.to.value.from.df(df, "issue.id", "event.id") + issue.id.to.event.count = lapply(issue.id.to.events, function(df) { + ## one event might show up multiple times (i.e. 'mentioned' also triggers 'subscribed'), + ## so we count the number of distinct event IDs + length(unique(df[["data.vertices"]])) + }) + logging::logdebug("get.issue.event.count: finished") + return(issue.id.to.event.count) +} + +#' Get the number of 'commented' events for each issue based on the issue data contained +#' in the specified \code{ProjectData}. +#' +#' The type argument specifies whether we count PRs alone, issues alone, or both (\code{"all"}). +#' +#' @param proj.data the \code{ProjectData} containing the issue data +#' @param type which issue type to consider (see \code{preprocess.issue.data}). +#' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} +#' [default: "all"] +#' +#' @return a named list of comment counts, where the name is the issue ID. +get.issue.comment.count = function(proj.data, type = c("all", "issues", "pull.requests")) { + type = match.arg(type) + logging::logdebug("get.issue.comment.count: starting.") + df = preprocess.issue.data(proj.data, type = type, retained.cols = c("issue.id", "event.name")) + issue.id.to.events = get.key.to.value.from.df(df, "issue.id", "event.name") + issue.id.to.comment.count = lapply(issue.id.to.events, function(df) { + event.names = df[["data.vertices"]] + return (length(event.names[event.names == "commented"])) + }) + logging::logdebug("get.issue.comment.count: finished") + return(issue.id.to.comment.count) +} + +#' Get the date each issue was opened, based on the issue data contained +#' in the specified \code{ProjectData}. +#' +#' The type argument specifies whether we count PRs alone, issues alone, or both (\code{"all"}). +#' +#' @param proj.data the \code{ProjectData} containing the issue data +#' @param type which issue type to consider (see \code{preprocess.issue.data}). +#' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} +#' [default: "all"] +#' +#' @return a named list of dates, where the name is the issue ID. +get.issue.opened.date = function(proj.data, type = c("all", "issues", "pull.requests")) { + type = match.arg(type) + logging::logdebug("get.issue.opened.date: starting.") + df = preprocess.issue.data(proj.data, type = type, retained.cols = c("issue.id", "creation.date")) + issue.id.to.dates = get.key.to.value.from.df(df, "issue.id", "creation.date") + issue.id.to.start.date = lapply(issue.id.to.dates, function(df) { + min(df[["data.vertices"]]) # values should all be the same + }) + logging::logdebug("get.issue.opened.date: finished") + return(issue.id.to.start.date) +} + +#' Get the date each issue was closed, based on the issue data contained +#' in the specified \code{ProjectData}, or \code{NA} if the issue is still open. +#' +#' The type argument specifies whether we count PRs alone, issues alone, or both (\code{"all"}). +#' +#' @param proj.data the \code{ProjectData} containing the issue data +#' @param type which issue type to consider (see \code{preprocess.issue.data}). +#' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} +#' [default: "all"] +#' +#' @return a named list of dates, where the name is the issue ID. +get.issue.closed.date = function(proj.data, type = c("all", "issues", "pull.requests")) { + type = match.arg(type) + logging::logdebug("get.issue.closed.date: starting.") + df = preprocess.issue.data(proj.data, type = type, retained.cols = c("issue.id", "closing.date")) + issue.id.to.dates = get.key.to.value.from.df(df, "issue.id", "closing.date") + issue.id.to.closed.date = lapply(issue.id.to.dates, function(df) { + min(df[["data.vertices"]]) # values should all be the same + }) + logging::logdebug("get.issue.closed.date: finished") + return(issue.id.to.closed.date) +} + +#' Get the date of the last activity in each issue based on the issue data contained +#' in the specified \code{ProjectData}. +#' +#' The type argument specifies whether we count PRs alone, issues alone, or both (\code{"all"}). +#' +#' @param proj.data the \code{ProjectData} containing the issue data +#' @param type which issue type to consider (see \code{preprocess.issue.data}). +#' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} +#' [default: "all"] +#' @param use.unfiltered.data whether to use unfiltered issue data (see \code{preprocess.issue.data}) [default: FALSE] +#' +#' @return a named list of dates, where the name is the issue ID. +get.issue.last.activity.date = function(proj.data, type = c("all", "issues", "pull.requests"), + use.unfiltered.data = FALSE) { + type = match.arg(type) + logging::logdebug("get.issue.last.activity.date: starting.") + df = preprocess.issue.data(proj.data, type = type, retained.cols = c("issue.id", "date"), + use.unfiltered.data = use.unfiltered.data) + issue.id.to.dates = get.key.to.value.from.df(df, "issue.id", "date") + issue.id.to.end.date = lapply(issue.id.to.dates, function(df) { + max(df[["data.vertices"]]) + }) + logging::logdebug("get.issue.last.activity.date: finished") + return(issue.id.to.end.date) +} + +#' Get the title of each issue based on the issue data contained +#' in the specified \code{ProjectData}. +#' +#' The type argument specifies whether we count PRs alone, issues alone, or both (\code{"all"}). +#' +#' @param proj.data the \code{ProjectData} containing the issue data +#' @param type which issue type to consider (see \code{preprocess.issue.data}). +#' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} +#' [default: "all"] +#' +#' @return a named list of dates, where the name is the issue ID. +get.issue.title = function(proj.data, type = c("all", "issues", "pull.requests")) { + type = match.arg(type) + logging::logdebug("get.issue.title: starting.") + df = preprocess.issue.data(proj.data, type = type, retained.cols = c("issue.id", "issue.title")) + issue.id.to.title = get.key.to.value.from.df(df, "issue.id", "issue.title") + issue.id.to.title.only = lapply(issue.id.to.title, function(df) { + ## as a result of get.key.to.value.from.df, the "issue.title" column should be duplicated as "data.vertices". + ## The title should be the same in every row, so we can just use the first row. + df[[1,"data.vertices"]] # data frames resulting from get.key.to.value.from.df always have at least one row + }) + logging::logdebug("get.issue.title: finished") + return(issue.id.to.title.only) +} + +#' Get whether a PR is open, has been merged, or has been closed without merging. +#' +#' @param proj.data the \code{ProjectData} containing the issue data +#' @param use.unfiltered.data whether to use unfiltered issue data (see \code{preprocess.issue.data}) [default: TRUE] +#' +#' @return a named list of dates, where the name is the issue ID. +get.pr.open.merged.or.closed = function(proj.data, use.unfiltered.data = TRUE) { + logging::logdebug("get.pr.open.merged.or.closed: starting.") + df = preprocess.issue.data(proj.data, type = "pull.requests", use.unfiltered.data = use.unfiltered.data, + retained.cols = c("issue.id", "issue.state", "event.name")) + issue.id.to.events = get.key.to.value.from.df(df, "issue.id", "event.name") + issue.id.to.state = lapply(issue.id.to.events, function(df) { + return (if ("open" %in% df[["issue.state"]] || "reopened" %in% df[["issue.state"]]) "open" + else if ("merged" %in% df[["event.name"]]) "merged" + else "closed") + }) + logging::logdebug("get.pr.open.merged.or.closed: finished") + return(issue.id.to.state) +} + +#' Get whether each issue is a pull request, based on the issue data contained in the specified +#' \code{ProjectData}. +#' +#' @param proj.data the \code{ProjectData} containing the issue data +#' +#' @return a named list of logical values, where the name is the issue ID. +get.issue.is.pull.request = function(proj.data) { + logging::logdebug("get.issue.is.pull.request: starting.") + issue.data = proj.data$get.issues() + issue.id.to.is.pr = as.list(mask.pull.requests(issue.data)) + names(issue.id.to.is.pr) = issue.data[["issue.id"]] + logging::logdebug("get.issue.is.pull.request: finished") + return(issue.id.to.is.pr) +} diff --git a/util-networks-covariates.R b/util-networks-covariates.R index a550663e..31dd7134 100644 --- a/util-networks-covariates.R +++ b/util-networks-covariates.R @@ -20,6 +20,7 @@ ## Copyright 2020 by Christian Hechtl ## Copyright 2021 by Johannes Hostert ## Copyright 2022 by Niklas Schneider +## Copyright 2022 by Jonathan Baumann ## All Rights Reserved. ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / @@ -121,7 +122,7 @@ add.vertex.attribute = function(net.to.range.list, attr.name, default.value, com function(x) get.or.default(x, attrs.by.vertex.name, default.value)) ## simplify the list of attributes to a vector if all its elements are just vectors (not lists) - if (length(attributes) > 0 && !any(lapply(attributes, is.list))) { + if (length(attributes) > 0 && !any(sapply(attributes, is.list))) { attributes = unlist(attributes) } ## otherwise, the list of attributes contains lists, so we can only remove the outermost list @@ -203,7 +204,7 @@ add.vertex.attribute.count.helper = function(list.of.networks, project.data, nam #' @param default.value The default value to add if a vertex has no matching value [default: 0L] #' #' @return A list of networks with the added attribute -add.vertex.attribute.commit.count.author = function(list.of.networks, project.data, name = "commit.count", +add.vertex.attribute.author.commit.count = function(list.of.networks, project.data, name = "commit.count", aggregation.level = c("range", "cumulative", "all.ranges", "project.cumulative", "project.all.ranges", "complete"), @@ -229,10 +230,12 @@ add.vertex.attribute.commit.count.author = function(list.of.networks, project.da #' @param default.value The default value to add if a vertex has no matching value [default: 0L] #' #' @return A list of networks with the added attribute -add.vertex.attribute.commit.count.author.not.committer = function(list.of.networks, project.data, +add.vertex.attribute.author.commit.count.not.committer = function(list.of.networks, project.data, name = "commit.count.author.not.committer", - aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", "project.all.ranges", + aggregation.level = c("range", "cumulative", + "all.ranges", + "project.cumulative", + "project.all.ranges", "complete"), default.value = 0L) { nets.with.attr = add.vertex.attribute.count.helper( @@ -256,11 +259,13 @@ add.vertex.attribute.commit.count.author.not.committer = function(list.of.networ #' @param default.value The default value to add if a vertex has no matching value [default: 0L] #' #' @return A list of networks with the added attribute -add.vertex.attribute.commit.count.committer = function(list.of.networks, project.data, name = "commit.count.committer", - aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", "project.all.ranges", - "complete"), - default.value = 0L) { +add.vertex.attribute.author.commit.count.committer = function(list.of.networks, project.data, + name = "commit.count.committer", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", + "project.all.ranges", + "complete"), + default.value = 0L) { nets.with.attr = add.vertex.attribute.count.helper( list.of.networks, project.data, name, aggregation.level, default.value, get.committer.commit.count, "committer.name" @@ -282,10 +287,12 @@ add.vertex.attribute.commit.count.committer = function(list.of.networks, project #' @param default.value The default value to add if a vertex has no matching value [default: 0L] #' #' @return A list of networks with the added attribute -add.vertex.attribute.commit.count.committer.not.author = function(list.of.networks, project.data, +add.vertex.attribute.author.commit.count.committer.not.author = function(list.of.networks, project.data, name = "commit.count.committer.not.author", - aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", "project.all.ranges", + aggregation.level = c("range", "cumulative", + "all.ranges", + "project.cumulative", + "project.all.ranges", "complete"), default.value = 0L) { nets.with.attr = add.vertex.attribute.count.helper( @@ -309,10 +316,12 @@ add.vertex.attribute.commit.count.committer.not.author = function(list.of.networ #' @param default.value The default value to add if a vertex has no matching value [default: 0L] #' #' @return A list of networks with the added attribute -add.vertex.attribute.commit.count.committer.and.author = function(list.of.networks, project.data, +add.vertex.attribute.author.commit.count.committer.and.author = function(list.of.networks, project.data, name = "commit.count.committer.and.author", - aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", "project.all.ranges", + aggregation.level = c("range", "cumulative", + "all.ranges", + "project.cumulative", + "project.all.ranges", "complete"), default.value = 0L) { nets.with.attr = add.vertex.attribute.count.helper( @@ -337,10 +346,12 @@ add.vertex.attribute.commit.count.committer.and.author = function(list.of.networ #' @param default.value The default value to add if a vertex has no matching value [default: 0L] #' #' @return A list of networks with the added attribute -add.vertex.attribute.commit.count.committer.or.author = function(list.of.networks, project.data, +add.vertex.attribute.author.commit.count.committer.or.author = function(list.of.networks, project.data, name = "commit.count.committer.or.author", - aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", "project.all.ranges", + aggregation.level = c("range", "cumulative", + "all.ranges", + "project.cumulative", + "project.all.ranges", "complete"), default.value = 0L) { nets.with.attr = add.vertex.attribute.count.helper( @@ -365,7 +376,7 @@ add.vertex.attribute.commit.count.committer.or.author = function(list.of.network #' @param default.value The default value to add if a vertex has no matching value [default: 0L] #' #' @return A list of networks with the added attribute -add.vertex.attribute.artifact.count = function(list.of.networks, project.data, name = "artifact.count", +add.vertex.attribute.author.artifact.count = function(list.of.networks, project.data, name = "artifact.count", aggregation.level = c("range", "cumulative", "all.ranges", "project.cumulative", "project.all.ranges", "complete"), @@ -403,7 +414,7 @@ add.vertex.attribute.artifact.count = function(list.of.networks, project.data, n #' @param default.value The default value to add if a vertex has no matching value [default: 0L] #' #' @return A list of networks with the added attribute -add.vertex.attribute.mail.count = function(list.of.networks, project.data, +add.vertex.attribute.author.mail.count = function(list.of.networks, project.data, name = "mail.count", aggregation.level = c("range", "cumulative", "all.ranges", "project.cumulative", "project.all.ranges", @@ -430,7 +441,7 @@ add.vertex.attribute.mail.count = function(list.of.networks, project.data, #' @param default.value The default value to add if a vertex has no matching value [default: 0L] #' #' @return A list of networks with the added attribute -add.vertex.attribute.mail.thread.count = function(list.of.networks, project.data, +add.vertex.attribute.author.mail.thread.count = function(list.of.networks, project.data, name = "mail.thread.count", aggregation.level = c("range", "cumulative", "all.ranges", "project.cumulative", "project.all.ranges", @@ -459,22 +470,27 @@ add.vertex.attribute.mail.thread.count = function(list.of.networks, project.data #' more details. [default: "range"] #' @param default.value The default value to add if a vertex has no matching value [default: 0L] #' @param issue.type The issue kind,see \code{preprocess.issue.data} [default: "all"] +#' @param use.unfiltered.data whether to use unfiltered issue data (see \code{preprocess.issue.data}) [default: FALSE] #' #' @return A list of networks with the added attribute -add.vertex.attribute.issue.count = function(list.of.networks, project.data, +add.vertex.attribute.author.issue.count = function(list.of.networks, project.data, name = "issue.count", aggregation.level = c("range", "cumulative", "all.ranges", "project.cumulative", "project.all.ranges", "complete"), - default.value = 0L, issue.type = c("all", "pull.requests", "issues")) { + default.value = 0L, issue.type = c("all", "pull.requests", "issues"), + use.unfiltered.data = FALSE) { issue.type = match.arg(issue.type) - if (name == "issue.count" && identical(issue.type, "pull.requests")) { + if (missing(name) && identical(issue.type, "pull.requests")) { name = "pull.request.count" } nets.with.attr = add.vertex.attribute.count.helper( - list.of.networks, project.data, name, aggregation.level, - default.value, function(data) {return(get.author.issue.count(data, type = issue.type))}, "author.name" + list.of.networks, project.data, name, aggregation.level, default.value, + function(data) { + return(get.author.issue.count(data, type = issue.type,use.unfiltered.data = use.unfiltered.data)) + }, + "author.name" ) return(nets.with.attr) @@ -493,22 +509,30 @@ add.vertex.attribute.issue.count = function(list.of.networks, project.data, #' more details. [default: "range"] #' @param default.value The default value to add if a vertex has no matching value [default: 0L] #' @param issue.type The issue kind,see \code{preprocess.issue.data} [default: "all"] +#' @param use.unfiltered.data whether to use unfiltered issue data (see \code{preprocess.issue.data}) [default: FALSE] #' #' @return A list of networks with the added attribute -add.vertex.attribute.issues.commented.count = function(list.of.networks, project.data, +add.vertex.attribute.author.issues.commented.count = function(list.of.networks, project.data, name = "issues.commented.count", aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", "project.all.ranges", + "project.cumulative", + "project.all.ranges", "complete"), - default.value = 0L, issue.type = c("all", "pull.requests", "issues")) { + default.value = 0L, issue.type = c("all", "pull.requests", + "issues"), + use.unfiltered.data = FALSE) { issue.type = match.arg(issue.type) - if (name == "issues.commented.count" && identical(issue.type, "pull.requests")) { + if (missing(name) && identical(issue.type, "pull.requests")) { name = "pull.requests.commented.count" } nets.with.attr = add.vertex.attribute.count.helper( list.of.networks, project.data, name, aggregation.level, - default.value, function(data) {return(get.author.issues.commented.in.count(data, type = issue.type))}, "author.name" + default.value, function(data) { + return(get.author.issues.commented.in.count(data, type = issue.type, + use.unfiltered.data = use.unfiltered.data)) + }, + "author.name" ) return(nets.with.attr) @@ -527,28 +551,37 @@ add.vertex.attribute.issues.commented.count = function(list.of.networks, project #' more details. [default: "range"] #' @param default.value The default value to add if a vertex has no matching value [default: 0L] #' @param issue.type The issue kind,see \code{preprocess.issue.data} [default: "all"] +#' @param use.unfiltered.data whether to use unfiltered issue data (see \code{preprocess.issue.data}). +#' Note that filtered data may not contain issue creation events. +#' [default: TRUE] #' #' @return A list of networks with the added attribute -add.vertex.attribute.issue.creation.count = function(list.of.networks, project.data, +add.vertex.attribute.author.issue.creation.count = function(list.of.networks, project.data, name = "issue.creation.count", aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", "project.all.ranges", + "project.cumulative", + "project.all.ranges", "complete"), - default.value = 0L, issue.type = c("all", "pull.requests", "issues")) { + default.value = 0L, issue.type = c("all", "pull.requests", + "issues"), + use.unfiltered.data = TRUE) { issue.type = match.arg(issue.type) - if (name == "issue.creation.count" && identical(issue.type, "pull.requests")) { + if (missing(name) && identical(issue.type, "pull.requests")) { name = "pull.request.creation.count" } nets.with.attr = add.vertex.attribute.count.helper( list.of.networks, project.data, name, aggregation.level, - default.value, function(data) {return(get.author.issues.created.count(data, type = issue.type))}, "author.name" + default.value, function(data) { + return(get.author.issues.created.count(data, type = issue.type, use.unfiltered.data = use.unfiltered.data)) + }, "author.name" ) return(nets.with.attr) } -#' Add issue-comments-count attribute based on the number of comments in issues, where the person represented by the vertex is the author. +#' Add issue-comments-count attribute based on the number of comments in issues, where the person represented by the +#' vertex is the author. #' #' @param list.of.networks The network list #' @param project.data The project data @@ -560,22 +593,28 @@ add.vertex.attribute.issue.creation.count = function(list.of.networks, project.d #' more details. [default: "range"] #' @param default.value The default value to add if a vertex has no matching value [default: 0L] #' @param issue.type The issue kind,see \code{preprocess.issue.data} [default: "all"] +#' @param use.unfiltered.data whether to use unfiltered issue data (see \code{preprocess.issue.data}) [default: FALSE] #' #' @return A list of networks with the added attribute -add.vertex.attribute.issue.comment.count = function(list.of.networks, project.data, +add.vertex.attribute.author.issue.comment.count = function(list.of.networks, project.data, name = "issue.comment.count", aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", "project.all.ranges", + "project.cumulative", + "project.all.ranges", "complete"), - default.value = 0L, issue.type = c("all", "pull.requests", "issues")) { + default.value = 0L, issue.type = c("all", "pull.requests", + "issues"), + use.unfiltered.data = FALSE) { issue.type = match.arg(issue.type) - if (name == "issue.comment.count" && identical(issue.type, "pull.requests")) { + if (missing(name) && identical(issue.type, "pull.requests")) { name = "pull.request.comment.count" } nets.with.attr = add.vertex.attribute.count.helper( list.of.networks, project.data, name, aggregation.level, - default.value, function(data) {return(get.author.issue.comment.count(data, type = issue.type))}, "author.name" + default.value, function(data) { + return(get.author.issue.comment.count(data, type = issue.type, use.unfiltered.data = use.unfiltered.data)) + }, "author.name" ) return(nets.with.attr) @@ -591,7 +630,8 @@ add.vertex.attribute.issue.comment.count = function(list.of.networks, project.da #' @param default.value The default value to add if a vertex has no matching value [default: NA] #' #' @return A list of networks with the added attribute -add.vertex.attribute.author.email = function(list.of.networks, project.data, name = "author.email", default.value = NA) { +add.vertex.attribute.author.email = function(list.of.networks, project.data, name = "author.email", + default.value = NA) { nets.with.attr = split.and.add.vertex.attribute( list.of.networks, project.data, name, "complete", default.value, function(range, range.data, net) { @@ -627,7 +667,7 @@ add.vertex.attribute.author.email = function(list.of.networks, project.data, nam #' [default: FALSE] #' #' @return A list of networks with the added attribute. -add.vertex.attribute.first.activity = function(list.of.networks, project.data, +add.vertex.attribute.author.first.activity = function(list.of.networks, project.data, activity.types = c("mails", "commits", "issues"), name = "first.activity", aggregation.level = c("range", "cumulative", "all.ranges", @@ -669,8 +709,8 @@ add.vertex.attribute.first.activity = function(list.of.networks, project.data, return(data) } - nets.with.attr = split.and.add.vertex.attribute(list.of.networks, project.data, name, aggregation.level, vertex.default, - compute.attr, list.attributes = TRUE) + nets.with.attr = split.and.add.vertex.attribute(list.of.networks, project.data, name, aggregation.level, + vertex.default, compute.attr, list.attributes = TRUE) return(nets.with.attr) } @@ -690,7 +730,7 @@ add.vertex.attribute.first.activity = function(list.of.networks, project.data, #' [default: FALSE] #' #' @return A list of networks with the added attribute -add.vertex.attribute.active.ranges = function(list.of.networks, project.data, name = "active.ranges", +add.vertex.attribute.author.active.ranges = function(list.of.networks, project.data, name = "active.ranges", activity.types = c("mails", "commits", "issues"), default.value = list(), combine.activity.types = FALSE) { @@ -860,7 +900,7 @@ add.vertex.attribute.author.role = function(list.of.networks, classification.res ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Artifact network functions ---------------------------------------------- -## * Change count ---------------------------------------------------------- +## * Commit-based metrics -------------------------------------------------- #' Add the count of unique editors (i.e., authors) that worked on an artifact #' @@ -889,7 +929,8 @@ add.vertex.attribute.artifact.editor.count = function(list.of.networks, project. if (missing(editor.definition)) { editor.definition = "author" } else { - editor.definition = match.arg.or.default(editor.definition, choices = c("author", "committer"), several.ok = TRUE) + editor.definition = match.arg.or.default(editor.definition, choices = c("author", "committer"), + several.ok = TRUE) } editor.definition = paste0(editor.definition, ".name") @@ -944,8 +985,6 @@ add.vertex.attribute.artifact.change.count = function(list.of.networks, project. return(nets.with.attr) } -## * Activity -------------------------------------------------------------- - #' Add the first occurrence of the artifact #' #' @param list.of.networks The network list @@ -961,7 +1000,8 @@ add.vertex.attribute.artifact.change.count = function(list.of.networks, project. #' @return A list of networks with the added attribute add.vertex.attribute.artifact.first.occurrence = function(list.of.networks, project.data, name = "first.occurrence", aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", "project.all.ranges", + "project.cumulative", + "project.all.ranges", "complete"), default.value = NA) { aggregation.level = match.arg.or.default(aggregation.level, default = "complete") @@ -982,6 +1022,525 @@ add.vertex.attribute.artifact.first.occurrence = function(list.of.networks, proj return(nets.with.attr) } +#' Add the date of the last edit of the artifact +#' +#' @param list.of.networks The network list +#' @param project.data The project data +#' @param name The attribute name to add [default: "last.edited"] +#' @param aggregation.level Determines the data to use for the attribute calculation. +#' One of \code{"range"}, \code{"cumulative"}, \code{"all.ranges"}, +#' \code{"project.cumulative"}, \code{"project.all.ranges"}, and +#' \code{"complete"}. See \code{split.data.by.networks} for +#' more details. [default: "complete"] +#' @param default.value The default value to add if a vertex has no matching value [default: NA] +#' +#' @return A list of networks with the added attribute +add.vertex.attribute.artifact.last.edited = function(list.of.networks, project.data, name = "last.edited", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", + "project.all.ranges", + "complete"), + default.value = NA) { + aggregation.level = match.arg.or.default(aggregation.level, default = "complete") + + ## make sure that the default value contains a tzone attribute (even if the default value is NA) + default.value = get.date.from.string(default.value) + + nets.with.attr = split.and.add.vertex.attribute( + list.of.networks, project.data, name, aggregation.level, default.value, + function(range, range.data, net) { + artifact.to.dates = get.key.to.value.from.df(range.data$get.commits(), "artifact", "date") + artifact.to.last = lapply(artifact.to.dates, function(a) { + max(a[["date"]]) + }) + return(artifact.to.last) + } + ) + return(nets.with.attr) +} + +## * Mail thread metrics --------------------------------------------------- + +#' Add the number of contributors to each mail thread +#' +#' @param list.of.networks The network list +#' @param project.data The project data +#' @param name The attribute name to add [default: "thread.contributor.count"] +#' @param aggregation.level Determines the data to use for the attribute calculation. +#' One of \code{"range"}, \code{"cumulative"}, \code{"all.ranges"}, +#' \code{"project.cumulative"}, \code{"project.all.ranges"}, and +#' \code{"complete"}. See \code{split.data.by.networks} for +#' more details. [default: "complete"] +#' @param default.value The default value to add if a vertex has no matching value [default: NA] +#' +#' @return A list of networks with the added attribute +add.vertex.attribute.mail.thread.contributor.count = function(list.of.networks, project.data, + name = "thread.contributor.count", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", + "project.all.ranges", "complete"), + default.value = NA) { + aggregation.level = match.arg.or.default(aggregation.level, default = "complete") + + nets.with.attr = split.and.add.vertex.attribute( + list.of.networks, project.data, name, aggregation.level, default.value, + function(range, range.data, net) { + return(get.mail.thread.contributor.count(range.data)) + } + ) + return(nets.with.attr) +} + +#' Add the number of messages in each mail thread +#' +#' @param list.of.networks The network list +#' @param project.data The project data +#' @param name The attribute name to add [default: "thread.message.count"] +#' @param aggregation.level Determines the data to use for the attribute calculation. +#' One of \code{"range"}, \code{"cumulative"}, \code{"all.ranges"}, +#' \code{"project.cumulative"}, \code{"project.all.ranges"}, and +#' \code{"complete"}. See \code{split.data.by.networks} for +#' more details. [default: "complete"] +#' @param default.value The default value to add if a vertex has no matching value [default: NA] +#' +#' @return A list of networks with the added attribute +add.vertex.attribute.mail.thread.message.count = function(list.of.networks, project.data, name = "thread.message.count", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", + "project.all.ranges", + "complete"), + default.value = NA) { + aggregation.level = match.arg.or.default(aggregation.level, default = "complete") + + nets.with.attr = split.and.add.vertex.attribute( + list.of.networks, project.data, name, aggregation.level, default.value, + function(range, range.data, net) { + return(get.mail.thread.mail.count(range.data)) + } + ) + return(nets.with.attr) +} + +#' Add the date of the first message in each mail thread +#' +#' @param list.of.networks The network list +#' @param project.data The project data +#' @param name The attribute name to add [default: "thread.start.date"] +#' @param aggregation.level Determines the data to use for the attribute calculation. +#' One of \code{"range"}, \code{"cumulative"}, \code{"all.ranges"}, +#' \code{"project.cumulative"}, \code{"project.all.ranges"}, and +#' \code{"complete"}. See \code{split.data.by.networks} for +#' more details. [default: "complete"] +#' @param default.value The default value to add if a vertex has no matching value [default: NA] +#' +#' @return A list of networks with the added attribute +add.vertex.attribute.mail.thread.start.date = function(list.of.networks, project.data, name = "thread.start.date", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", + "project.all.ranges", + "complete"), + default.value = NA) { + aggregation.level = match.arg.or.default(aggregation.level, default = "complete") + + ## make sure that the default value contains a tzone attribute (even if the default value is NA) + default.value = get.date.from.string(default.value) + + nets.with.attr = split.and.add.vertex.attribute( + list.of.networks, project.data, name, aggregation.level, default.value, + function(range, range.data, net) { + return(get.mail.thread.start.date(range.data)) + } + ) + return(nets.with.attr) +} + +#' Add the date of the first message in each mail thread +#' +#' @param list.of.networks The network list +#' @param project.data The project data +#' @param name The attribute name to add [default: "thread.end.date"] +#' @param aggregation.level Determines the data to use for the attribute calculation. +#' One of \code{"range"}, \code{"cumulative"}, \code{"all.ranges"}, +#' \code{"project.cumulative"}, \code{"project.all.ranges"}, and +#' \code{"complete"}. See \code{split.data.by.networks} for +#' more details. [default: "complete"] +#' @param default.value The default value to add if a vertex has no matching value [default: NA] +#' +#' @return A list of networks with the added attribute +add.vertex.attribute.mail.thread.end.date = function(list.of.networks, project.data, name = "thread.end.date", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", "project.all.ranges", + "complete"), + default.value = NA) { + aggregation.level = match.arg.or.default(aggregation.level, default = "complete") + + ## make sure that the default value contains a tzone attribute (even if the default value is NA) + default.value = get.date.from.string(default.value) + + nets.with.attr = split.and.add.vertex.attribute( + list.of.networks, project.data, name, aggregation.level, default.value, + function(range, range.data, net) { + return(get.mail.thread.end.date(range.data)) + } + ) + return(nets.with.attr) +} + +#' Add the identifier of the mailing list where a mail thread originates. +#' See \code{get.mail.thread.originating.mailing.list} for more details. +#' +#' @param list.of.networks The network list +#' @param project.data The project data +#' @param name The attribute name to add [default: "thread.originating.mailing.list"] +#' @param aggregation.level Determines the data to use for the attribute calculation. +#' One of \code{"range"}, \code{"cumulative"}, \code{"all.ranges"}, +#' \code{"project.cumulative"}, \code{"project.all.ranges"}, and +#' \code{"complete"}. See \code{split.data.by.networks} for +#' more details. [default: "complete"] +#' @param default.value The default value to add if a vertex has no matching value [default: NA] +#' +#' @return A list of networks with the added attribute +add.vertex.attribute.mail.thread.originating.mailing.list = function(list.of.networks, project.data, + name = "thread.originating.mailing.list", + aggregation.level = c("range", "cumulative", + "all.ranges", + "project.cumulative", + "project.all.ranges", + "complete"), + default.value = NA) { + aggregation.level = match.arg.or.default(aggregation.level, default = "complete") + nets.with.attr = split.and.add.vertex.attribute( + list.of.networks, project.data, name, aggregation.level, default.value, + function(range, range.data, net) { + return(get.mail.thread.originating.mailing.list(range.data)) + } + ) + return(nets.with.attr) +} + +## * Issue metrics --------------------------------------------------------- + +#' Add the number of contributors to each issue or PR +#' +#' @param list.of.networks The network list +#' @param project.data The project data +#' @param name The attribute name to add [default: "issue.contributor.count"] +#' @param aggregation.level Determines the data to use for the attribute calculation. +#' One of \code{"range"}, \code{"cumulative"}, \code{"all.ranges"}, +#' \code{"project.cumulative"}, \code{"project.all.ranges"}, and +#' \code{"complete"}. See \code{split.data.by.networks} for +#' more details. [default: "complete"] +#' @param type which issue type to consider (see \code{preprocess.issue.data}). +#' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} +#' [default: "all"] +#' @param default.value The default value to add if a vertex has no matching value [default: NA] +#' @param use.unfiltered.data whether to use unfiltered issue data (see \code{preprocess.issue.data}) [default: FALSE] +#' +#' @return A list of networks with the added attribute +add.vertex.attribute.issue.contributor.count = function(list.of.networks, project.data, + name = "issue.contributor.count", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", + "project.all.ranges", "complete"), + type = c("all", "issues", "pull.requests"), + default.value = NA, use.unfiltered.data = FALSE) { + type = match.arg(type) + aggregation.level = match.arg.or.default(aggregation.level, default = "complete") + if (missing(name) && identical(type, "pull.requests")) { + name = "pr.contributor.count" + } + + nets.with.attr = split.and.add.vertex.attribute( + list.of.networks, project.data, name, aggregation.level, default.value, + function(range, range.data, net) { + return(get.issue.contributor.count(range.data, type = type, use.unfiltered.data = use.unfiltered.data)) + } + ) + return(nets.with.attr) +} + +#' Add the number of events for each issue or PR +#' +#' @param list.of.networks The network list +#' @param project.data The project data +#' @param name The attribute name to add [default: "issue.event.count"] +#' @param aggregation.level Determines the data to use for the attribute calculation. +#' One of \code{"range"}, \code{"cumulative"}, \code{"all.ranges"}, +#' \code{"project.cumulative"}, \code{"project.all.ranges"}, and +#' \code{"complete"}. See \code{split.data.by.networks} for +#' more details. [default: "complete"] +#' @param type which issue type to consider (see \code{preprocess.issue.data}). +#' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} +#' [default: "all"] +#' @param default.value The default value to add if a vertex has no matching value [default: NA] +#' @param use.unfiltered.data whether to use unfiltered issue data (see \code{preprocess.issue.data}) [default: FALSE] +#' +#' @return A list of networks with the added attribute +add.vertex.attribute.issue.event.count = function(list.of.networks, project.data, name = "issue.event.count", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", "project.all.ranges", + "complete"), + type = c("all", "issues", "pull.requests"), default.value = NA, + use.unfiltered.data = FALSE) { + type = match.arg(type) + aggregation.level = match.arg.or.default(aggregation.level, default = "complete") + if (missing(name) && identical(type, "pull.requests")) { + name = "pr.event.count" + } + + nets.with.attr = split.and.add.vertex.attribute( + list.of.networks, project.data, name, aggregation.level, default.value, + function(range, range.data, net) { + return(get.issue.event.count(range.data, type = type, use.unfiltered.data = use.unfiltered.data)) + } + ) + return(nets.with.attr) +} + +#' Add the number of comment events for each issue or PR +#' +#' @param list.of.networks The network list +#' @param project.data The project data +#' @param name The attribute name to add [default: "issue.comment.event.count"] +#' @param aggregation.level Determines the data to use for the attribute calculation. +#' One of \code{"range"}, \code{"cumulative"}, \code{"all.ranges"}, +#' \code{"project.cumulative"}, \code{"project.all.ranges"}, and +#' \code{"complete"}. See \code{split.data.by.networks} for +#' more details. [default: "complete"] +#' @param type which issue type to consider (see \code{preprocess.issue.data}). +#' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} +#' [default: "all"] +#' @param default.value The default value to add if a vertex has no matching value [default: NA] +#' +#' @return A list of networks with the added attribute +add.vertex.attribute.issue.comment.event.count = function(list.of.networks, project.data, + name = "issue.comment.event.count", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", + "project.all.ranges", + "complete"), + type = c("all", "issues", "pull.requests"), + default.value = NA) { + type = match.arg(type) + aggregation.level = match.arg.or.default(aggregation.level, default = "complete") + if (missing(name) && identical(type, "pull.requests")) { + name = "pr.comment.event.count" + } + + nets.with.attr = split.and.add.vertex.attribute( + list.of.networks, project.data, name, aggregation.level, default.value, + function(range, range.data, net) { + return(get.issue.comment.count(range.data, type = type)) + } + ) + return(nets.with.attr) +} + +#' Add the date each issue or PR was opened +#' +#' @param list.of.networks The network list +#' @param project.data The project data +#' @param name The attribute name to add [default: "issue.opened.date"] +#' @param aggregation.level Determines the data to use for the attribute calculation. +#' One of \code{"range"}, \code{"cumulative"}, \code{"all.ranges"}, +#' \code{"project.cumulative"}, \code{"project.all.ranges"}, and +#' \code{"complete"}. See \code{split.data.by.networks} for +#' more details. [default: "complete"] +#' @param type which issue type to consider (see \code{preprocess.issue.data}). +#' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} +#' [default: "all"] +#' @param default.value The default value to add if a vertex has no matching value [default: NA] +#' +#' @return A list of networks with the added attribute +add.vertex.attribute.issue.opened.date = function(list.of.networks, project.data, name = "issue.opened.date", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", "project.all.ranges", + "complete"), + type = c("all", "issues", "pull.requests"), default.value = NA) { + type = match.arg(type) + aggregation.level = match.arg.or.default(aggregation.level, default = "complete") + if (missing(name) && identical(type, "pull.requests")) { + name = "pr.opened.date" + } + + ## make sure that the default value contains a tzone attribute (even if the default value is NA) + default.value = get.date.from.string(default.value) + + nets.with.attr = split.and.add.vertex.attribute( + list.of.networks, project.data, name, aggregation.level, default.value, + function(range, range.data, net) { + return(get.issue.opened.date(range.data, type = type)) + } + ) + return(nets.with.attr) +} + +#' Add the date each issue or PR was closed, or NA if it was not yet closed. +#' +#' @param list.of.networks The network list +#' @param project.data The project data +#' @param name The attribute name to add [default: "issue.closed.date"] +#' @param aggregation.level Determines the data to use for the attribute calculation. +#' One of \code{"range"}, \code{"cumulative"}, \code{"all.ranges"}, +#' \code{"project.cumulative"}, \code{"project.all.ranges"}, and +#' \code{"complete"}. See \code{split.data.by.networks} for +#' more details. [default: "complete"] +#' @param type which issue type to consider (see \code{preprocess.issue.data}). +#' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} +#' [default: "all"] +#' @param default.value The default value to add if a vertex has no matching value [default: NA] +#' +#' @return A list of networks with the added attribute +add.vertex.attribute.issue.closed.date = function(list.of.networks, project.data, name = "issue.closed.date", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", "project.all.ranges", + "complete"), + type = c("all", "issues", "pull.requests"), default.value = NA) { + type = match.arg(type) + aggregation.level = match.arg.or.default(aggregation.level, default = "complete") + if (missing(name) && identical(type, "pull.requests")) { + name = "pr.closed.date" + } + + ## make sure that the default value contains a tzone attribute (even if the default value is NA) + default.value = get.date.from.string(default.value) + + nets.with.attr = split.and.add.vertex.attribute( + list.of.networks, project.data, name, aggregation.level, default.value, + function(range, range.data, net) { + return(get.issue.closed.date(range.data, type = type)) + } + ) + return(nets.with.attr) +} + +#' Add the date of the last activity in each issue or PR +#' +#' @param list.of.networks The network list +#' @param project.data The project data +#' @param name The attribute name to add [default: "issue.last.activity"] +#' @param aggregation.level Determines the data to use for the attribute calculation. +#' One of \code{"range"}, \code{"cumulative"}, \code{"all.ranges"}, +#' \code{"project.cumulative"}, \code{"project.all.ranges"}, and +#' \code{"complete"}. See \code{split.data.by.networks} for +#' more details. [default: "complete"] +#' @param type which issue type to consider (see \code{preprocess.issue.data}). +#' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} +#' [default: "all"] +#' @param default.value The default value to add if a vertex has no matching value [default: NA] +#' @param use.unfiltered.data whether to use unfiltered issue data (see \code{preprocess.issue.data}) [default: FALSE] +#' +#' @return A list of networks with the added attribute +add.vertex.attribute.issue.last.activity.date = function(list.of.networks, project.data, name = "issue.last.activity", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", + "project.all.ranges", "complete"), + type = c("all", "issues", "pull.requests"), + default.value = NA, use.unfiltered.data = FALSE) { + type = match.arg(type) + aggregation.level = match.arg.or.default(aggregation.level, default = "complete") + if (missing(name) && identical(type, "pull.requests")) { + name = "pr.last.activity" + } + + ## make sure that the default value contains a tzone attribute (even if the default value is NA) + default.value = get.date.from.string(default.value) + + nets.with.attr = split.and.add.vertex.attribute( + list.of.networks, project.data, name, aggregation.level, default.value, + function(range, range.data, net) { + return(get.issue.last.activity.date(range.data, type = type, use.unfiltered.data = use.unfiltered.data)) + } + ) + return(nets.with.attr) +} + +#' Add the title of each issue or PR +#' +#' @param list.of.networks The network list +#' @param project.data The project data +#' @param name The attribute name to add [default: "issue.title"] +#' @param aggregation.level Determines the data to use for the attribute calculation. +#' One of \code{"range"}, \code{"cumulative"}, \code{"all.ranges"}, +#' \code{"project.cumulative"}, \code{"project.all.ranges"}, and +#' \code{"complete"}. See \code{split.data.by.networks} for +#' more details. [default: "complete"] +#' @param type which issue type to consider (see \code{preprocess.issue.data}). +#' One of \code{"issues"}, \code{"pull.requests"} or \code{"all"} +#' [default: "all"] +#' @param default.value The default value to add if a vertex has no matching value [default: NA] +#' +#' @return A list of networks with the added attribute +add.vertex.attribute.issue.title = function(list.of.networks, project.data, name = "issue.title", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", "project.all.ranges", + "complete"), + type = c("all", "issues", "pull.requests"), default.value = NA) { + type = match.arg(type) + aggregation.level = match.arg.or.default(aggregation.level, default = "complete") + if (missing(name) && identical(type, "pull.requests")) { + name = "pr.title" + } + + nets.with.attr = split.and.add.vertex.attribute( + list.of.networks, project.data, name, aggregation.level, default.value, + function(range, range.data, net) { + return(get.issue.title(range.data, type = type)) + } + ) + return(nets.with.attr) +} + +#' Add whether each PR is open, has been merged, or was closed without merging. +#' +#' @param list.of.networks The network list +#' @param project.data The project data +#' @param name The attribute name to add [default: "pull.request.state"] +#' @param default.value The default value to add if a vertex has no matching value [default: NA] +#' +#' @return A list of networks with the added attribute +add.vertex.attribute.pr.open.merged.or.closed = function(list.of.networks, project.data, name = "pull.request.state", + default.value = NA) { + + nets.with.attr = split.and.add.vertex.attribute( + list.of.networks, project.data, name, aggregation.level = "complete", default.value, + function(range, range.data, net) { + return(get.pr.open.merged.or.closed(range.data)) + } + ) + return(nets.with.attr) +} + +#' Add whether an issue is a pull request +#' +#' @param list.of.networks The network list +#' @param project.data The project data +#' @param name The attribute name to add [default: "issue.is.pull.request"] +#' @param aggregation.level Determines the data to use for the attribute calculation. +#' One of \code{"range"}, \code{"cumulative"}, \code{"all.ranges"}, +#' \code{"project.cumulative"}, \code{"project.all.ranges"}, and +#' \code{"complete"}. See \code{split.data.by.networks} for +#' more details. [default: "complete"] +#' @param default.value The default value to add if a vertex has no matching value [default: NA] +#' +#' @return A list of networks with the added attribute +add.vertex.attribute.issue.is.pull.request = function(list.of.networks, project.data, name = "issue.is.pull.request", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", + "project.all.ranges", "complete"), + default.value = NA) { + aggregation.level = match.arg.or.default(aggregation.level, default = "complete") + + nets.with.attr = split.and.add.vertex.attribute( + list.of.networks, project.data, name, aggregation.level, default.value, + function(range, range.data, net) { + return(get.issue.is.pull.request(range.data)) + } + ) + return(nets.with.attr) +} + + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Helper ------------------------------------------------------------------ @@ -989,7 +1548,8 @@ add.vertex.attribute.artifact.first.occurrence = function(list.of.networks, proj #' #' @param activity.types The activity types to compute information for. [default: c("mails", "commits", "issues")] #' @param range.data The data to base the computation on. -#' @param default.value The default value to add if no information is available per author and activity type. [default: NA] +#' @param default.value The default value to add if no information is available per author and activity type. +#' [default: NA] #' #' @return A list containing per author a list of first activity values named with the corresponding activity type. #' Empty list if there are no activities in \code{range.data} at all or none corresponding to the configured @@ -1106,11 +1666,13 @@ get.first.activity.data = function(range.data, activity.types = c("commits", "ma #' #' @param activity.types The activity types to compute information for. [default: c("mails", "commits", "issues")] #' @param net.to.range.list The data to base the computation on, split by networks. -#' @param default.value The default value to add if no information is available per author and activity type. [default: list()] +#' @param default.value The default value to add if no information is available per author and activity type. +#' [default: list()] #' #' @return A list with elements representing the authors, each containing a list of elements representing the activity #' types, each containing a list of active ranges. -get.active.ranges.data = function(activity.types = c("mails", "commits", "issues"), net.to.range.list, default.value = list()) { +get.active.ranges.data = function(activity.types = c("mails", "commits", "issues"), net.to.range.list, + default.value = list()) { ## a list with elements representing the parsed activity types, each containing a list of elements ## representing the ranges the data was split by, each containing a list of authors who were active @@ -1151,8 +1713,8 @@ get.active.ranges.data = function(activity.types = c("mails", "commits", "issues #' This function takes a nested list and switches the order of the nesting levels: the innermost level is moved to the -#' outside. This is done by reproducing the given list structure for every element occuring in one of the innermost lists -#' and then deleting every sublist in which the element does not occur. For example, on input +#' outside. This is done by reproducing the given list structure for every element occuring in one of the innermost +#' lists and then deleting every sublist in which the element does not occur. For example, on input #' #' type.range.author = list( #' "type1" = list( @@ -1186,7 +1748,8 @@ get.active.ranges.data = function(activity.types = c("mails", "commits", "issues #' ) #' #' @param nested.list A list nested AT LEAST ONCE, that means: the elements of the outermost list are also lists. The -#' nesting depth of all inner lists must be the same and the lists must be named at every nesting level. +#' nesting depth of all inner lists must be the same and the lists must be named at every nesting +#' level. #' #' @return The nested list with the innermost level as new outermost level. transpose.nested.list.by.innermost.values = function(nested.list) { @@ -1200,7 +1763,8 @@ transpose.nested.list.by.innermost.values = function(nested.list) { if (length(structure) == 0) { return(list()) - ## Base case 2: if the structure isn't nested itself, it is only returned, if it contains the given innerst.element. + ## Base case 2: if the structure isn't nested itself, it is only returned, if it contains the given + ## innerst.element. ## Otherwise, NA is returned. } else if (!is.list(structure[[1]])) { if (innerst.element %in% structure) {