diff --git a/build.gradle.kts b/build.gradle.kts index b8a6bb6..40ac81d 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -29,7 +29,7 @@ repositories { } dependencies { - implementation("org.jetbrains.research.ictl:bf-core:0.0.3") + implementation("org.jetbrains.research.ictl:bf-core:0.0.13") implementation("io.ktor:ktor-server-metrics-micrometer") implementation("io.micrometer:micrometer-registry-prometheus:1.10.5") diff --git a/src/main/kotlin/org/jetbrains/research/ictl/riskypatterns/jgit/CommitsProvider.kt b/src/main/kotlin/org/jetbrains/research/ictl/riskypatterns/jgit/CommitsProvider.kt new file mode 100644 index 0000000..73d14a9 --- /dev/null +++ b/src/main/kotlin/org/jetbrains/research/ictl/riskypatterns/jgit/CommitsProvider.kt @@ -0,0 +1,138 @@ +package org.jetbrains.research.ictl.riskypatterns.jgit + +import org.eclipse.jgit.diff.DiffEntry.ChangeType +import org.eclipse.jgit.diff.DiffFormatter +import org.eclipse.jgit.lib.Constants +import org.eclipse.jgit.lib.ObjectReader +import org.eclipse.jgit.lib.Repository +import org.eclipse.jgit.revwalk.RevCommit +import org.eclipse.jgit.revwalk.RevWalk +import org.eclipse.jgit.revwalk.filter.CommitTimeRevFilter +import org.eclipse.jgit.treewalk.CanonicalTreeParser +import org.eclipse.jgit.treewalk.EmptyTreeIterator +import org.eclipse.jgit.treewalk.TreeWalk +import org.eclipse.jgit.util.io.NullOutputStream +import org.jetbrains.research.ictl.riskypatterns.calculation.BusFactorConstants +import org.jetbrains.research.ictl.riskypatterns.calculation.entities.CommitInfo +import org.jetbrains.research.ictl.riskypatterns.calculation.entities.DiffEntry +import org.jetbrains.research.ictl.riskypatterns.calculation.entities.UserInfo +import java.time.Duration +import java.util.* + +class CommitsProvider(private val repository: Repository, private val dayGap: Long = BusFactorConstants.DAYS_GAP) : Iterable { + override fun iterator(): Iterator = RepoIterator(repository, dayGap) + + class RepoIterator(private val repository: Repository, private val dayGap: Long) : Iterator, AutoCloseable { + + companion object { + fun jgitToLibChangeType(changeType: ChangeType): DiffEntry.ChangeType { + return when (changeType) { + ChangeType.ADD -> DiffEntry.ChangeType.ADD + ChangeType.RENAME -> DiffEntry.ChangeType.RENAME + ChangeType.MODIFY -> DiffEntry.ChangeType.MODIFY + ChangeType.COPY -> DiffEntry.ChangeType.COPY + ChangeType.DELETE -> DiffEntry.ChangeType.DELETE + } + } + } + + private val revWalk = RevWalk(repository) + private val iterator: Iterator + private val reader: ObjectReader = repository.newObjectReader() + + init { + val lastCommit = revWalk.parseCommit(repository.resolve(Constants.HEAD)) + revWalk.markStart(lastCommit) + val beforeDate = lastCommit.commitDate() + val afterDate = afterDate(lastCommit) + revWalk.revFilter = CommitTimeRevFilter.between(afterDate, beforeDate) + iterator = revWalk.iterator() + } + + override fun hasNext(): Boolean { + val hasNext = iterator.hasNext() + if (!hasNext) { + close() + } + return hasNext + } + + override fun next(): CommitInfo { + val commit = iterator.next() + return convertJgitCommit(commit) + } + + private fun getDiffsWithoutText( + commit: RevCommit, + reader: ObjectReader, + repository: Repository, + ): List { + val oldTreeIter = if (commit.parents.isNotEmpty()) { + val firstParent = commit.parents[0] + val treeParser = CanonicalTreeParser() + treeParser.reset(reader, firstParent.tree) + treeParser + } else { + EmptyTreeIterator() + } + val newTreeIter = CanonicalTreeParser() + newTreeIter.reset(reader, commit.tree) + + val treeWalk = TreeWalk(repository) + treeWalk.isRecursive = true + treeWalk.addTree(oldTreeIter) + treeWalk.addTree(newTreeIter) + + val diffFormatter = getDiffFormatter(repository) + return diffFormatter.scan(oldTreeIter, newTreeIter) + } + + private fun convertJgitCommit(commit: RevCommit): CommitInfo { + val authorEmail = commit.authorIdent.emailAddress + val committerEmail = commit.committerIdent.emailAddress + val authorCommitTimestamp = commit.authorIdent.`when`.time + val committerTimestamp = commit.committerIdent.`when`.time + val diffEntries = getDiffsWithoutText(commit, reader, repository).map { + DiffEntry( + it.oldPath, + it.newPath, + jgitToLibChangeType(it.changeType), + ) + } + val numOfParents = commit.parents.size + val fullMessage = commit.fullMessage + + val authorUserInfo = UserInfo(commit.authorIdent.name, authorEmail) + val committerUserInfo = UserInfo(commit.committerIdent.name, committerEmail) + + val hash = commit.name + + return CommitInfo( + authorUserInfo, + committerUserInfo, + authorCommitTimestamp, + committerTimestamp, + diffEntries, + numOfParents, + fullMessage, + hash, + ) + } + + private fun getDiffFormatter(repository: Repository): DiffFormatter { + val diffFormatter = DiffFormatter(NullOutputStream.INSTANCE) + diffFormatter.setRepository(repository) + diffFormatter.isDetectRenames = true + return diffFormatter + } + + private fun RevCommit.commitDate() = Date(this.commitTime * 1000L) + + private fun afterDate(lastCommit: RevCommit) = + Date.from(lastCommit.commitDate().toInstant().minus(Duration.ofDays(dayGap))) + + override fun close() { + revWalk.close() + } + } +} diff --git a/src/main/kotlin/org/jetbrains/research/ictl/riskypatterns/jgit/FileInfoProvider.kt b/src/main/kotlin/org/jetbrains/research/ictl/riskypatterns/jgit/FileInfoProvider.kt new file mode 100644 index 0000000..cb49cb0 --- /dev/null +++ b/src/main/kotlin/org/jetbrains/research/ictl/riskypatterns/jgit/FileInfoProvider.kt @@ -0,0 +1,72 @@ +package org.jetbrains.research.ictl.riskypatterns.jgit + +import org.eclipse.jgit.errors.MissingObjectException +import org.eclipse.jgit.lib.Constants +import org.eclipse.jgit.lib.Repository +import org.eclipse.jgit.revwalk.RevWalk +import org.eclipse.jgit.treewalk.TreeWalk +import org.jetbrains.research.ictl.riskypatterns.calculation.BusFactor +import org.jetbrains.research.ictl.riskypatterns.calculation.entities.FileInfo + +class FileInfoProvider(private val repository: Repository) : Iterable { + + override fun iterator(): Iterator = FilePathToSizeRepoIterator(repository) + + class FilePathToSizeRepoIterator(repository: Repository) : Iterator, AutoCloseable { + private val treeWalk = TreeWalk(repository) + private val reader = repository.newObjectReader() + private var value: FileInfo? = null + + init { + val revWalk = RevWalk(repository) + val lastCommit = revWalk.use { + it.parseCommit(repository.resolve(Constants.HEAD)) + } + treeWalk.addTree(lastCommit.tree) + treeWalk.isRecursive = false + + value = lookForValue() + } + + override fun hasNext(): Boolean { + val hasNext = value != null + if (!hasNext) { + close() + } + return hasNext + } + + override fun next(): FileInfo { + val v = value + if (v != null) { + value = lookForValue() + return v + } + throw Exception("No value found") + } + + private fun lookForValue(): FileInfo? { + while (treeWalk.next()) { + val filePath = treeWalk.pathString + var bytes = 0L + try { + bytes = reader.getObjectSize(treeWalk.getObjectId(0), Constants.OBJ_BLOB) + if (treeWalk.isSubtree) { + treeWalk.enterSubtree() + continue + } + } catch (e: MissingObjectException) { + BusFactor.log.warn("Missing blob : $filePath : ${e.message} ") + } + return FileInfo(filePath, bytes) + } + + return null + } + + override fun close() { + treeWalk.close() + reader.close() + } + } +} diff --git a/src/main/kotlin/org/jetbrains/research/ictl/riskypatterns/service/task/ComputeBusFactorJob.kt b/src/main/kotlin/org/jetbrains/research/ictl/riskypatterns/service/task/ComputeBusFactorJob.kt index 14615b3..5fc77a7 100644 --- a/src/main/kotlin/org/jetbrains/research/ictl/riskypatterns/service/task/ComputeBusFactorJob.kt +++ b/src/main/kotlin/org/jetbrains/research/ictl/riskypatterns/service/task/ComputeBusFactorJob.kt @@ -1,7 +1,12 @@ package org.jetbrains.research.ictl.riskypatterns.service.task import org.eclipse.jgit.internal.storage.file.FileRepository +import org.eclipse.jgit.lib.Repository +import org.jetbrains.research.ictl.riskypatterns.calculation.BotFilter import org.jetbrains.research.ictl.riskypatterns.calculation.BusFactor +import org.jetbrains.research.ictl.riskypatterns.calculation.UserMerger +import org.jetbrains.research.ictl.riskypatterns.calculation.entities.UserInfo +import org.jetbrains.research.ictl.riskypatterns.calculation.processors.CommitProcessor import org.jetbrains.research.ictl.riskypatterns.jgit.CommitsProvider import org.jetbrains.research.ictl.riskypatterns.jgit.FileInfoProvider import org.jetbrains.research.ictl.riskypatterns.service.artifact.ArtifactService @@ -65,14 +70,23 @@ class ComputeBusFactorJob( log.info(repositoryCloned) executionEnvironment.logFile.log(repositoryCloned) - val bots = gitHubClient.loadBots(payload.owner, payload.repo) val started = System.currentTimeMillis() - val busFactor = BusFactor(bots) + val gitDir = File(executionEnvironment.gitDir, ".git") val repository = FileRepository(gitDir) + + val bots = gitHubClient.loadBots(payload.owner, payload.repo) + val botFilter = BotFilter(bots) + val merger = UserMerger(botFilter) + val users = getUsers(repository) + val mergedUsers = merger.mergeUsers(users) + val commitsProvider = CommitsProvider(repository) val fileInfoProvider = FileInfoProvider(repository) - val tree = busFactor.calculate(payload.fullName, commitsProvider, fileInfoProvider) + val busFactor = BusFactor(botFilter, mergedUsers) + busFactor.setLastCommit(commitsProvider.first()) + busFactor.proceedCommits(commitsProvider) + val tree = busFactor.calculate(payload.fullName, fileInfoProvider) val ended = System.currentTimeMillis() executionEnvironment.logFile.log("Finished task: [${payload.fullName}]") @@ -100,6 +114,19 @@ class ComputeBusFactorJob( } } + private fun getUsers(repository: Repository): Set { + val commitsProvider = CommitsProvider(repository) + val set = mutableSetOf() + for (commit in commitsProvider) { + set.add(commit.authorUserInfo) + set.add(commit.committerUserInfo) + CommitProcessor.getCoAuthorsFromMSG(commit.fullMessage).forEach { + set.add(it) + } + } + return set + } + private fun eventProducer(owner: String, repo: String): (EventLevel, String, JobState) -> JobExecutionEvent { return { eventLevel, message, jobState -> JobExecutionEvent(owner, repo, eventLevel, message, jobState)