diff --git a/packages/analysis-engine/src/constant.ts b/packages/analysis-engine/src/constant.ts new file mode 100644 index 00000000..07a83dad --- /dev/null +++ b/packages/analysis-engine/src/constant.ts @@ -0,0 +1,2 @@ +export const COMMIT_SEPARATOR = "4itc2s8hH-oA64s08h19"; +export const GIT_LOG_SEPARATOR = "I9M-0XOzvHlYPegVPpzb"; diff --git a/packages/analysis-engine/src/index.ts b/packages/analysis-engine/src/index.ts index f6de204b..1549803d 100644 --- a/packages/analysis-engine/src/index.ts +++ b/packages/analysis-engine/src/index.ts @@ -92,3 +92,4 @@ export class AnalysisEngine { } export default AnalysisEngine; +export { COMMIT_SEPARATOR, GIT_LOG_SEPARATOR } from "./constant"; diff --git a/packages/analysis-engine/src/parser.spec.ts b/packages/analysis-engine/src/parser.spec.ts index 50feb085..0ae714e2 100644 --- a/packages/analysis-engine/src/parser.spec.ts +++ b/packages/analysis-engine/src/parser.spec.ts @@ -1,5 +1,6 @@ import { getCommitMessageType } from "./commit.util"; -import getCommitRaws from "./parser"; +import { COMMIT_SEPARATOR, GIT_LOG_SEPARATOR } from "./constant"; +import getCommitRaws from "./parser"; import type { CommitRaw, DifferenceStatistic } from "./types"; describe("commit message type", () => { @@ -34,113 +35,114 @@ describe("commit message type", () => { }); }); -describe('getCommitRaws', () => { - const testCommitLines = [ - "commit a b (HEAD)", - "commit a b (HEAD -> main, origin/main, origin/HEAD)", - "commit a b (HEAD, tag: v1.0.0)", - "commit a b (HEAD -> main, origin/main, origin/HEAD, tag: v2.0.0)", - "commit a b (HEAD, tag: v2.0.0, tag: v1.4)" +describe("getCommitRaws", () => { + const testAuthorCommitter = [ + "John Park", + "mail@gmail.com", + "Sun Sep 4 20:17:59 2022 +0900", + "John Park 2", + "mail2@gmail.com", + "Sun Sep 5 20:17:59 2022 +0900", ]; - const expectedBranches = [ - ['HEAD'], - ['HEAD', 'main', 'origin/main', 'origin/HEAD'], - ['HEAD'], - ['HEAD', 'main', 'origin/main', 'origin/HEAD'], - ['HEAD'] + const testRefs = [ + "HEAD", + "HEAD -> main, origin/main, origin/HEAD", + "HEAD, tag: v1.0.0", + "HEAD -> main, origin/main, origin/HEAD, tag: v2.0.0", + "HEAD, tag: v2.0.0, tag: v1.4", ]; - const expectedTags = [ - [], - [], - ['v1.0.0'], - ['v2.0.0'], - ['v2.0.0', 'v1.4'] + const testCommitHash = ["a", "b"]; + + const testCommitMessage = "commit message"; + + const testCommitLines = testRefs.map((ref) => + [...testCommitHash, ref, ...testAuthorCommitter, testCommitMessage].join(GIT_LOG_SEPARATOR) + ); + + const expectedBranches = [ + ["HEAD"], + ["HEAD", "main", "origin/main", "origin/HEAD"], + ["HEAD"], + ["HEAD", "main", "origin/main", "origin/HEAD"], + ["HEAD"], ]; + const expectedTags = [[], [], ["v1.0.0"], ["v2.0.0"], ["v2.0.0", "v1.4"]]; + const testCommitFileChanges = [ "10\t0\ta.ts\n1\t0\tREADME.md", "3\t3\ta.ts", "4\t0\ta.ts", - "0\t6\ta.ts\n2\t0\tb.ts\n3\t3\tc.ts" + "0\t6\ta.ts\n2\t0\tb.ts\n3\t3\tc.ts", ]; - const expectedFileChanged:DifferenceStatistic[] = [ + const expectedFileChanged: DifferenceStatistic[] = [ { totalInsertionCount: 11, totalDeletionCount: 0, fileDictionary: { - 'a.ts': { insertionCount: 10, deletionCount: 0 }, - 'README.md': { insertionCount: 1, deletionCount: 0 }, - } + "a.ts": { insertionCount: 10, deletionCount: 0 }, + "README.md": { insertionCount: 1, deletionCount: 0 }, + }, }, { totalInsertionCount: 3, totalDeletionCount: 3, - fileDictionary: { 'a.ts': { insertionCount: 3, deletionCount: 3 } } + fileDictionary: { "a.ts": { insertionCount: 3, deletionCount: 3 } }, }, { totalInsertionCount: 4, totalDeletionCount: 0, - fileDictionary: { 'a.ts': { insertionCount: 4, deletionCount: 0 } } + fileDictionary: { "a.ts": { insertionCount: 4, deletionCount: 0 } }, }, { totalInsertionCount: 5, totalDeletionCount: 9, fileDictionary: { - 'a.ts': { insertionCount: 0, deletionCount: 6 }, - 'b.ts': { insertionCount: 2, deletionCount: 0 }, - 'c.ts': { insertionCount: 3, deletionCount: 3 }, - } - } + "a.ts": { insertionCount: 0, deletionCount: 6 }, + "b.ts": { insertionCount: 2, deletionCount: 0 }, + "c.ts": { insertionCount: 3, deletionCount: 3 }, + }, + }, ]; - const commonExpectatedResult: CommitRaw={ + const commonExpectatedResult: CommitRaw = { sequence: 0, - id: 'a', - parents: ['b'], - branches: ['HEAD'], + id: "a", + parents: ["b"], + branches: ["HEAD"], tags: [], - author: { name: 'John Park', email: 'mail@gmail.com' }, - authorDate: new Date('Sun Sep 4 20:17:59 2022 +0900'), - committer: { name: 'John Park', email: 'mail@gmail.com' }, - committerDate: new Date('Sun Sep 4 20:17:59 2022 +0900'), - message: 'commit message', + author: { name: testAuthorCommitter[0], email: testAuthorCommitter[1] }, + authorDate: new Date(testAuthorCommitter[2]), + committer: { name: testAuthorCommitter[3], email: testAuthorCommitter[4] }, + committerDate: new Date(testAuthorCommitter[5]), + message: testCommitMessage, differenceStatistic: { totalInsertionCount: 0, totalDeletionCount: 0, fileDictionary: {}, }, - commitMessageType: "" + commitMessageType: "", }; testCommitLines.forEach((mockLog, index) => { it(`should parse gitlog to commitRaw(branch, tag)`, () => { - const mock = `${mockLog} -Author: John Park -AuthorDate: Sun Sep 4 20:17:59 2022 +0900 -Commit: John Park -CommitDate: Sun Sep 4 20:17:59 2022 +0900 -\n\tcommit message -`; - const result = getCommitRaws(mock); - const expectedResult = { ...commonExpectatedResult, branches: expectedBranches[index], tags: expectedTags[index] }; - + const result = getCommitRaws(COMMIT_SEPARATOR + mockLog); + const expectedResult = { + ...commonExpectatedResult, + branches: expectedBranches[index], + tags: expectedTags[index], + }; + expect(result).toEqual([expectedResult]); }); }); testCommitFileChanges.forEach((mockLog, index) => { it(`should parse gitlog to commitRaw(file changed)`, () => { - const mock = `commit a b (HEAD) -Author: John Park -AuthorDate: Sun Sep 4 20:17:59 2022 +0900 -Commit: John Park -CommitDate: Sun Sep 4 20:17:59 2022 +0900 -\n\tcommit message -\n${mockLog} -`; + const mock = `${COMMIT_SEPARATOR}${testCommitLines[0]}\n${mockLog}`; const result = getCommitRaws(mock); const expectedResult = { ...commonExpectatedResult, differenceStatistic: expectedFileChanged[index] }; diff --git a/packages/analysis-engine/src/parser.ts b/packages/analysis-engine/src/parser.ts index d2f3ef1f..243c7127 100644 --- a/packages/analysis-engine/src/parser.ts +++ b/packages/analysis-engine/src/parser.ts @@ -1,121 +1,74 @@ import { getCommitMessageType } from "./commit.util"; -import type { CommitMessageType, CommitRaw, DifferenceStatistic, GitUser } from "./types"; - -function getNameAndEmail(category: GitUser[], preParsedInfo: string) { - category.push({ - name: preParsedInfo.split(": ")[1].split("<")[0].trim(), - email: preParsedInfo.split(": ")[1].split("<")[1].split(">")[0].trim(), - }); -} +import { COMMIT_SEPARATOR, GIT_LOG_SEPARATOR } from "./constant"; +import type { CommitRaw } from "./types"; export default function getCommitRaws(log: string) { if (!log) return []; + const EOL_REGEX = /\r?\n/; - // line 별로 분리하기 - const splitByNewLine = log.split(/\r?\n/); - - // 분리한 것들을 쭉 돌면서 각 카테고리별로 담을 예정 - type Refs = string[]; - - const ids: string[] = []; - const parentsMatrix: string[][] = []; - const branchesMatrix: Refs[] = []; - const tagsMatrix: Refs[] = []; - const authors: GitUser[] = []; - const authorDates: Date[] = []; - const committers: GitUser[] = []; - const commitDates: Date[] = []; - const messages: string[] = []; - const commitTypes: CommitMessageType[] = []; - const differenceStatistics: DifferenceStatistic[] = []; - - // commit별 fileChanged를 분리시키기 위한 임시 index - let commitIdx = -1; + // step 0: Split log into commits + const commits = log.split(COMMIT_SEPARATOR); + const commitRaws: CommitRaw[] = []; + // skip the first empty element + for (let commitIdx = 1; commitIdx < commits.length; commitIdx += 1) { + const commitLines = commits[commitIdx].split(EOL_REGEX); - if (splitByNewLine) { - splitByNewLine.forEach((str, idx) => { - if (str.startsWith("commit")) { - commitIdx += 1; - tagsMatrix.push([]); - branchesMatrix.push([]); - differenceStatistics.push({ - totalInsertionCount: 0, - totalDeletionCount: 0, - fileDictionary: {}, - }); - const splitedCommitLine = str.split("("); - const commitInfos = splitedCommitLine[0] - .replace("commit ", "") - .split(" ") - .filter((e) => e); - ids.push(commitInfos[0]); - commitInfos.splice(0, 1); - parentsMatrix.push(commitInfos); - const branchAndTagInfos = splitedCommitLine[1]?.replace(")", "").replace(" -> ", ", ").split(", "); - if (branchAndTagInfos) { - branchAndTagInfos.forEach((branchAndTagInfo) => { - if (branchAndTagInfo.startsWith("tag:")) - return tagsMatrix[commitIdx].push(branchAndTagInfo.replace("tag: ", "")); - return branchesMatrix[commitIdx].push(branchAndTagInfo); - }); - } - return false; - } - if (str.startsWith("Author:")) return getNameAndEmail(authors, str); - if (str.startsWith("AuthorDate")) return authorDates.push(new Date(str.split(": ")[1].trim())); - if (str.startsWith("Commit:")) return getNameAndEmail(committers, str); - if (str.startsWith("CommitDate")) { - let indexCheckFileChanged = idx + 2; - let eachCommitMessage = ""; - while (splitByNewLine[indexCheckFileChanged] !== "") { - if (eachCommitMessage !== "") { - eachCommitMessage += "\n"; - } - eachCommitMessage += splitByNewLine[indexCheckFileChanged].trim(); - indexCheckFileChanged += 1; + // step 1: Extract commitData from the first line of the commit + const commitData = commitLines[0].split(GIT_LOG_SEPARATOR); + // Extract branch and tag data from commitData[2] + const refs = commitData[2].replace(" -> ", ", ").split(", "); + const [branches, tags]: string[][] = refs.reduce( + ([branches, tags], ref) => { + if (ref.startsWith("tag: ")) { + tags.push(ref.replace("tag: ", "")); + } else { + branches.push(ref); } - commitDates.push(new Date(str.split(": ")[1].trim())); - messages.push(eachCommitMessage); - commitTypes.push(getCommitMessageType(eachCommitMessage)); - } - if (/^\d/.test(str) || /^-/.test(str)) { - const [addition, deletion, path] = str - .split(" ") - .filter((e) => e)[0] - .split("\t"); + return [branches, tags]; + }, + [new Array(), new Array()] + ); - const numberedAddition = addition === "-" ? 0 : Number(addition); - const numberedDeletion = deletion === "-" ? 0 : Number(deletion); - differenceStatistics[commitIdx].totalInsertionCount += numberedAddition; - differenceStatistics[commitIdx].totalDeletionCount += numberedDeletion; - differenceStatistics[commitIdx].fileDictionary[path] = { - insertionCount: numberedAddition, - deletionCount: numberedDeletion, - }; - } - return false; - }); - } - - // 각 카테고리로 담은 다음 다시 JSON으로 변환하기 위함 - const commitRaws: CommitRaw[] = []; + // make base commitRaw object + const commitRaw: CommitRaw = { + sequence: commitIdx - 1, + id: commitData[0], + parents: commitData[1].split(" "), + branches, // commitData[2] is already split into branches and tags + tags, + author: { + name: commitData[3], + email: commitData[4], + }, + authorDate: new Date(commitData[5]), + committer: { + name: commitData[6], + email: commitData[7], + }, + committerDate: new Date(commitData[8]), + message: commitData[9], + commitMessageType: getCommitMessageType(commitData[9]), + differenceStatistic: { + totalInsertionCount: 0, + totalDeletionCount: 0, + fileDictionary: {}, + }, + }; - // 카테고리 별로 담은 것을 JSON화 시키기 - for (let i = 0; i < ids.length; i += 1) { - commitRaws.push({ - sequence: i, - id: ids[i], - parents: parentsMatrix[i], - branches: branchesMatrix[i], - tags: tagsMatrix[i], - author: authors[i], - authorDate: authorDates[i], - committer: committers[i], - committerDate: commitDates[i], - message: messages[i], - commitMessageType: commitTypes[i], - differenceStatistic: differenceStatistics[i], - }); + // step 2: Extract diffStats from the rest of the commit + for (let diffIdx = 1; diffIdx < commitLines.length; diffIdx += 1) { + if (commitLines[diffIdx] === "") continue; + const [insertions, deletions, path] = commitLines[diffIdx].split("\t"); + const numberedInsertions = insertions === "-" ? 0 : Number(insertions); + const numberedDeletions = deletions === "-" ? 0 : Number(deletions); + commitRaw.differenceStatistic.totalInsertionCount += numberedInsertions; + commitRaw.differenceStatistic.totalDeletionCount += numberedDeletions; + commitRaw.differenceStatistic.fileDictionary[path] = { + insertionCount: numberedInsertions, + deletionCount: numberedDeletions, + }; + } + commitRaws.push(commitRaw); } return commitRaws; diff --git a/packages/vscode/src/utils/git.util.ts b/packages/vscode/src/utils/git.util.ts index 25a4ab22..6a1a2f21 100644 --- a/packages/vscode/src/utils/git.util.ts +++ b/packages/vscode/src/utils/git.util.ts @@ -1,3 +1,4 @@ +import { COMMIT_SEPARATOR, GIT_LOG_SEPARATOR } from "@githru-vscode-ext/analysis-engine"; import * as cp from "child_process"; import * as fs from "fs"; import * as path from "path"; @@ -154,6 +155,20 @@ export async function getGitExecutableFromPaths(paths: string[]): Promise { return new Promise((resolve, reject) => { + const gitLogFormat = + COMMIT_SEPARATOR + + [ + "%H", // commit hash (id) + "%P", // parent hashes + "%D", // ref names (branches, tags) + "%an", // author name + "%ae", // author email + "%ad", // author date + "%cn", + "%ce", + "%cd", // committer name, committer email and committer date + "%s", // subject (commit message) + ].join(GIT_LOG_SEPARATOR); const args = [ "--no-pager", "log", @@ -161,7 +176,7 @@ export async function getGitLog(gitPath: string, currentWorkspacePath: string): "--parents", "--numstat", "--date-order", - "--pretty=fuller", + `--pretty=format:${gitLogFormat}`, "--decorate", "-c", ];