Skip to content

Commit

Permalink
Merge pull request #770 from yoouyeon/main
Browse files Browse the repository at this point in the history
[engine] git log 파싱에 사용되는 separator 개선
  • Loading branch information
yoouyeon authored Oct 22, 2024
2 parents aacf687 + 23cb127 commit 59d163c
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 79 deletions.
2 changes: 0 additions & 2 deletions packages/analysis-engine/src/constant.ts

This file was deleted.

3 changes: 1 addition & 2 deletions packages/analysis-engine/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ export class AnalysisEngine {
if (this.isDebugMode) console.log("stemDict: ", stemDict);
const csmDict = buildCSMDict(commitDict, stemDict, this.baseBranchName, pullRequests);
if (this.isDebugMode) console.log("csmDict: ", csmDict);
const nodes = stemDict.get(this.baseBranchName)?.nodes?.map(({commit}) => commit);
const nodes = stemDict.get(this.baseBranchName)?.nodes?.map(({ commit }) => commit);
const geminiCommitSummary = await getSummary(nodes ? nodes?.slice(-10) : []);
if (this.isDebugMode) console.log("GeminiCommitSummary: ", geminiCommitSummary);

Expand All @@ -92,4 +92,3 @@ export class AnalysisEngine {
}

export default AnalysisEngine;
export { COMMIT_SEPARATOR, GIT_LOG_SEPARATOR } from "./constant";
61 changes: 32 additions & 29 deletions packages/analysis-engine/src/parser.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { getCommitMessageType } from "./commit.util";
import { COMMIT_SEPARATOR, GIT_LOG_SEPARATOR } from "./constant";
import getCommitRaws from "./parser";
import type { CommitRaw, DifferenceStatistic } from "./types";

Expand Down Expand Up @@ -36,11 +35,15 @@ describe("commit message type", () => {
});

describe("getCommitRaws", () => {
const fakeAuthorAndCommitter = `${GIT_LOG_SEPARATOR}John Park${GIT_LOG_SEPARATOR}mail@gmail.com${GIT_LOG_SEPARATOR}Sun Sep 4 20:17:59 2022 +0900${GIT_LOG_SEPARATOR}John Park 2${GIT_LOG_SEPARATOR}mail2@gmail.com${GIT_LOG_SEPARATOR}Sun Sep 5 20:17:59 2022 +0900`;
const fakeCommitMessage = `${GIT_LOG_SEPARATOR}commit message${GIT_LOG_SEPARATOR}`;
const fakeCommitMessageAndBody = `${GIT_LOG_SEPARATOR}commit message title\n\ncommit message body${GIT_LOG_SEPARATOR}`;
const fakeCommitHash = `a${GIT_LOG_SEPARATOR}b`;
const fakeCommitRef = `${GIT_LOG_SEPARATOR}HEAD`;
const FRONT_NEW_LINE = "\n\n";
const INDENTATION = " ";

const fakeAuthor = "John Park\nmail@gmail.com\nSun Sep 4 20:17:59 2022 +0900";
const fakeCommitter = `John Park 2\nmail2@gmail.com\nSun Sep 5 20:17:59 2022 +0900`;
const fakeCommitMessage = `commit message\n${INDENTATION}`;
const fakeCommitMessageAndBody = `commit message title\n${INDENTATION}\n${INDENTATION}commit message body`;
const fakeCommitHash = "a\nb";
const fakeCommitRef = "HEAD";
const fakeCommitFileChange = "10\t0\ta.ts\n1\t0\tREADME.md";

const commonExpectatedResult: CommitRaw = {
Expand Down Expand Up @@ -73,23 +76,23 @@ describe("getCommitRaws", () => {

it.each([
[
`${COMMIT_SEPARATOR}${`a${GIT_LOG_SEPARATOR}`}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}`,
`${FRONT_NEW_LINE}${"a\n"}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}`,
{
...commonExpectatedResult,
id: "a",
parents: [""],
parents: [],
},
],
[
`${COMMIT_SEPARATOR}${`c${GIT_LOG_SEPARATOR}b`}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}`,
`${FRONT_NEW_LINE}${"c\nd"}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}`,
{
...commonExpectatedResult,
id: "c",
parents: ["b"],
parents: ["d"],
},
],
[
`${COMMIT_SEPARATOR}${`d${GIT_LOG_SEPARATOR}e f`}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}`,
`${FRONT_NEW_LINE}${"d\ne f"}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}`,
{
...commonExpectatedResult,
id: "d",
Expand All @@ -103,47 +106,47 @@ describe("getCommitRaws", () => {

it.each([
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${`${GIT_LOG_SEPARATOR}HEAD`}${fakeAuthorAndCommitter}${fakeCommitMessage}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${"HEAD"}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}`,
{
...commonExpectatedResult,
branches: ["HEAD"],
tags: [],
},
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${`${GIT_LOG_SEPARATOR}HEAD -> main, origin/main, origin/HEAD`}${fakeAuthorAndCommitter}${fakeCommitMessage}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${"HEAD -> main, origin/main, origin/HEAD"}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}`,
{
...commonExpectatedResult,
branches: ["HEAD", "main", "origin/main", "origin/HEAD"],
tags: [],
},
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${`${GIT_LOG_SEPARATOR}HEAD, tag: v1.0.0`}${fakeAuthorAndCommitter}${fakeCommitMessage}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${"HEAD, tag: v1.0.0"}\n${fakeAuthor}\n${fakeCommitter}$\n${fakeCommitMessage}`,
{
...commonExpectatedResult,
branches: ["HEAD"],
tags: ["v1.0.0"],
},
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${`${GIT_LOG_SEPARATOR}HEAD -> main, origin/main, origin/HEAD, tag: v2.0.0`}${fakeAuthorAndCommitter}${fakeCommitMessage}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${"HEAD -> main, origin/main, origin/HEAD, tag: v2.0.0"}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}`,
{
...commonExpectatedResult,
branches: ["HEAD", "main", "origin/main", "origin/HEAD"],
tags: ["v2.0.0"],
},
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${`${GIT_LOG_SEPARATOR}HEAD, tag: v2.0.0, tag: v1.4`}${fakeAuthorAndCommitter}${fakeCommitMessage}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${"HEAD, tag: v2.0.0, tag: v1.4"}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}`,
{
...commonExpectatedResult,
branches: ["HEAD"],
tags: ["v2.0.0", "v1.4"],
},
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${GIT_LOG_SEPARATOR}${fakeAuthorAndCommitter}${fakeCommitMessage}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${""}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}`,
{
...commonExpectatedResult,
branches: [],
Expand All @@ -157,7 +160,7 @@ describe("getCommitRaws", () => {

it.each([
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}\n${"10\t0\ta.ts\n1\t0\tREADME.md"}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}\n${"10\t0\ta.ts\n1\t0\tREADME.md"}`,
{
...commonExpectatedResult,
differenceStatistic: {
Expand All @@ -171,7 +174,7 @@ describe("getCommitRaws", () => {
},
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}\n${"3\t3\ta.ts"}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}\n${"3\t3\ta.ts"}`,
{
...commonExpectatedResult,
differenceStatistic: {
Expand All @@ -182,7 +185,7 @@ describe("getCommitRaws", () => {
},
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}\n${"4\t0\ta.ts"}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}\n${"4\t0\ta.ts"}`,
{
...commonExpectatedResult,
differenceStatistic: {
Expand All @@ -193,7 +196,7 @@ describe("getCommitRaws", () => {
},
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}\n${"0\t6\ta.ts\n2\t0\tb.ts\n3\t3\tc.ts"}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}\n${"0\t6\ta.ts\n2\t0\tb.ts\n3\t3\tc.ts"}`,
{
...commonExpectatedResult,
differenceStatistic: {
Expand All @@ -213,7 +216,7 @@ describe("getCommitRaws", () => {
});

it(`should parse gitlog to commitRaw(multiple commits)`, () => {
const mockLog = `${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}\n${fakeCommitFileChange}${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}`;
const mockLog = `${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}\n${fakeCommitFileChange}\n\n\n\n${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}`;
const result = getCommitRaws(mockLog);
const expectedResult = [
{ ...commonExpectatedResult, differenceStatistic: expectedFileChange },
Expand All @@ -225,23 +228,23 @@ describe("getCommitRaws", () => {

it.each([
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${`${GIT_LOG_SEPARATOR}commit message title${GIT_LOG_SEPARATOR}`}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${"commit message title"}\n${INDENTATION}`,
{ ...commonExpectatedResult, message: "commit message title" },
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${`${GIT_LOG_SEPARATOR}commit message title\ncommit message${GIT_LOG_SEPARATOR}`}`,
{ ...commonExpectatedResult, message: "commit message title\ncommit message" },
`${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${"commit message title"}\n${INDENTATION}${"commit message body"}`,
{ ...commonExpectatedResult, message: "commit message title\ncommit message body" },
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${`${GIT_LOG_SEPARATOR}commit message title\n\ncommit message body${GIT_LOG_SEPARATOR}`}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${"commit message title"}\n${INDENTATION}\n${INDENTATION}${"commit message body"}`,
{ ...commonExpectatedResult, message: "commit message title\n\ncommit message body" },
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${`${GIT_LOG_SEPARATOR}commit message title\n\n\ncommit message body${GIT_LOG_SEPARATOR}`}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${"commit message title"}\n${INDENTATION}\n${INDENTATION}\n${INDENTATION}${"commit message body"}`,
{ ...commonExpectatedResult, message: "commit message title\n\n\ncommit message body" },
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${`${GIT_LOG_SEPARATOR}${GIT_LOG_SEPARATOR}`}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n\n${INDENTATION}`,
{ ...commonExpectatedResult, message: "" },
],
])("should parse gitlog to commitRaw(commit message)", (mockLog, expectedResult) => {
Expand All @@ -250,7 +253,7 @@ describe("getCommitRaws", () => {
});

it(`should parse gitlog to commitRaw(commit message body and file change)`, () => {
const mockLog = `${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessageAndBody}\n${fakeCommitFileChange}`;
const mockLog = `${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessageAndBody}\n${fakeCommitFileChange}`;
const result = getCommitRaws(mockLog);
const expectedResult = {
...commonExpectatedResult,
Expand Down
85 changes: 47 additions & 38 deletions packages/analysis-engine/src/parser.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
import { getCommitMessageType } from "./commit.util";
import { COMMIT_SEPARATOR, GIT_LOG_SEPARATOR } from "./constant";
import type { CommitRaw } from "./types";
import type { CommitRaw, DifferenceStatistic } from "./types";

export default function getCommitRaws(log: string) {
if (!log) return [];
const EOL_REGEX = /\r?\n/;
const COMMIT_SEPARATOR = new RegExp(`${EOL_REGEX.source}{4}`);
const INDENTATION = " ";

// step 0: Split log into commits
const commits = log.split(COMMIT_SEPARATOR);
const commits = log.substring(2).split(COMMIT_SEPARATOR);
const commitRaws: CommitRaw[] = [];
// skip the first empty element
for (let commitIdx = 1; commitIdx < commits.length; commitIdx += 1) {
// step 1: Extract commitData from the first line of the commit
const commitData = commits[commitIdx].split(GIT_LOG_SEPARATOR);
for (let commitIdx = 0; commitIdx < commits.length; commitIdx += 1) {
// step 1: Extract commitData
const commitData = commits[commitIdx].split(EOL_REGEX);
const [
id,
parents,
Expand All @@ -23,10 +23,9 @@ export default function getCommitRaws(log: string) {
committerName,
committerEmail,
committerDate,
message,
diffStats,
...messageAndDiffStats
] = commitData;
// Extract branch and tag data from refs
// step 2: Extract branch and tag data from refs
const refsArray = refs.replace(" -> ", ", ").split(", ");
const [branches, tags]: string[][] = refsArray.reduce(
([branches, tags], ref) => {
Expand All @@ -41,11 +40,45 @@ export default function getCommitRaws(log: string) {
[new Array<string>(), new Array<string>()]
);

// make base commitRaw object
// step 3: Extract message and diffStats
let messageSubject = "";
let messageBody = "";
const diffStats: DifferenceStatistic = {
totalInsertionCount: 0,
totalDeletionCount: 0,
fileDictionary: {},
};
for (let idx = 0; idx < messageAndDiffStats.length; idx++) {
const line = messageAndDiffStats[idx];
if (idx === 0)
// message subject
messageSubject = line;
else if (line.startsWith(INDENTATION)) {
// message body (add newline if not first line)
messageBody += idx === 1 ? line.trim() : `\n${line.trim()}`;
} else if (line === "")
// pass empty line
continue;
else {
// diffStats
const [insertions, deletions, path] = line.split("\t");
const numberedInsertions = insertions === "-" ? 0 : Number(insertions);
const numberedDeletions = deletions === "-" ? 0 : Number(deletions);
diffStats.totalInsertionCount += numberedInsertions;
diffStats.totalDeletionCount += numberedDeletions;
diffStats.fileDictionary[path] = {
insertionCount: numberedInsertions,
deletionCount: numberedDeletions,
};
}
}

const message = messageBody === "" ? messageSubject : `${messageSubject}\n${messageBody}`;
// step 4: Construct commitRaw
const commitRaw: CommitRaw = {
sequence: commitIdx - 1,
sequence: commitIdx,
id,
parents: parents.split(" "),
parents: parents.length === 0 ? [] : parents.split(" "),
branches,
tags,
author: {
Expand All @@ -60,32 +93,8 @@ export default function getCommitRaws(log: string) {
committerDate: new Date(committerDate),
message,
commitMessageType: getCommitMessageType(message),
differenceStatistic: {
totalInsertionCount: 0,
totalDeletionCount: 0,
fileDictionary: {},
},
differenceStatistic: diffStats,
};

// step 2: Extract diffStats from the rest of the commit
if (!diffStats) {
commitRaws.push(commitRaw);
continue;
}
const diffStatsArray = diffStats.split(EOL_REGEX);
// pass the first empty element
for (let diffIdx = 1; diffIdx < diffStatsArray.length; diffIdx += 1) {
if (diffStatsArray[diffIdx] === "") continue;
const [insertions, deletions, path] = diffStatsArray[diffIdx].split("\t");
const numberedInsertions = insertions === "-" ? 0 : Number(insertions);
const numberedDeletions = deletions === "-" ? 0 : Number(deletions);
commitRaw.differenceStatistic.totalInsertionCount += numberedInsertions;
commitRaw.differenceStatistic.totalDeletionCount += numberedDeletions;
commitRaw.differenceStatistic.fileDictionary[path] = {
insertionCount: numberedInsertions,
deletionCount: numberedDeletions,
};
}
commitRaws.push(commitRaw);
}

Expand Down
15 changes: 7 additions & 8 deletions packages/vscode/src/utils/git.util.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { COMMIT_SEPARATOR, GIT_LOG_SEPARATOR } from "@githru-vscode-ext/analysis-engine";
import * as cp from "child_process";
import * as fs from "fs";
import * as path from "path";
Expand Down Expand Up @@ -156,20 +155,20 @@ export async function getGitExecutableFromPaths(paths: string[]): Promise<GitExe
export async function getGitLog(gitPath: string, currentWorkspacePath: string): Promise<string> {
return new Promise((resolve, reject) => {
const gitLogFormat =
COMMIT_SEPARATOR +
"%n%n" +
[
"%H", // commit hash (id)
"%P", // parent hashes
"%D", // ref names (branches, tags)
"%an", // author name
"%ae", // author email
"%ad", // author date
"%cn",
"%ce",
"%cd", // committer name, committer email and committer date
"%B", // commit message (subject and body)
].join(GIT_LOG_SEPARATOR) +
GIT_LOG_SEPARATOR;
"%cn", // committer name
"%ce", // committer email
"%cd", // committer date
"%w(0,0,4)%s", // commit message subject
"%b", // commit message body
].join("%n");
const args = [
"--no-pager",
"log",
Expand Down

0 comments on commit 59d163c

Please sign in to comment.