From 954bb31c7ce54cfc67b3101ccbbf5fef84185452 Mon Sep 17 00:00:00 2001 From: Aubin <60398825+aubin-tchoi@users.noreply.github.com> Date: Thu, 19 Dec 2024 17:38:12 +0100 Subject: [PATCH] implement missing cases for github (#9536) --- connectors/src/connectors/github/index.ts | 94 +++++++++++++++++-- connectors/src/connectors/github/lib/utils.ts | 42 +++++++++ 2 files changed, 128 insertions(+), 8 deletions(-) diff --git a/connectors/src/connectors/github/index.ts b/connectors/src/connectors/github/index.ts index 17a0c45563cb..27828f269be6 100644 --- a/connectors/src/connectors/github/index.ts +++ b/connectors/src/connectors/github/index.ts @@ -5,6 +5,7 @@ import type { Result, } from "@dust-tt/types"; import { assertNever, Err, Ok } from "@dust-tt/types"; +import { Op } from "sequelize"; import type { GithubRepo } from "@connectors/connectors/github/lib/github_api"; import { @@ -18,7 +19,11 @@ import { } from "@connectors/connectors/github/lib/hierarchy"; import { getCodeRootInternalId, + getDiscussionInternalId, getDiscussionsInternalId, + getGithubIdsFromDiscussionInternalId, + getGithubIdsFromIssueInternalId, + getIssueInternalId, getIssuesInternalId, getRepositoryInternalId, matchGithubInternalIdType, @@ -436,10 +441,12 @@ export class GithubConnectorManager extends BaseConnectorManager { return new Ok(nodes); } - // we should never be getting issues, discussions or code files as parent + // we should never be getting issues, discussions, code files, single issues or discussions as parent case "REPO_ISSUES": case "REPO_DISCUSSIONS": case "REPO_CODE_FILE": + case "REPO_DISCUSSION": + case "REPO_ISSUE": return new Err(new Error("Invalid parent ID.")); default: assertNever(type); @@ -471,6 +478,10 @@ export class GithubConnectorManager extends BaseConnectorManager { const allIssuesFromRepoIds: number[] = []; const allDiscussionsFromRepoIds: number[] = []; + // Single issues or discussions + const issueIds: { repoId: string; issueNumber: number }[] = []; + const discussionIds: { repoId: string; discussionNumber: number }[] = []; + // The full code, or a specific folder or file in the code const allCodeFromRepoIds: string[] = []; const codeDirectoryIds: string[] = []; @@ -500,6 +511,12 @@ export class GithubConnectorManager extends BaseConnectorManager { case "REPO_CODE_FILE": codeFileIds.push(internalId); break; + case "REPO_DISCUSSION": + discussionIds.push(getGithubIdsFromDiscussionInternalId(internalId)); + break; + case "REPO_ISSUE": + issueIds.push(getGithubIdsFromIssueInternalId(internalId)); + break; default: assertNever(type); } @@ -544,6 +561,22 @@ export class GithubConnectorManager extends BaseConnectorManager { }), ]); + // Issues and Discussions are also stored in the db + const [issues, discussions] = await Promise.all([ + GithubIssue.findAll({ + where: { + connectorId: c.id, + [Op.or]: issueIds, + }, + }), + GithubDiscussion.findAll({ + where: { + connectorId: c.id, + [Op.or]: discussionIds, + }, + }), + ]); + // Constructing Nodes for Full Repo fullRepoIds.forEach((repoId) => { const repo = uniqueRepos[repoId]; @@ -605,6 +638,46 @@ export class GithubConnectorManager extends BaseConnectorManager { }); }); + issues.forEach((issue) => { + const { repoId, issueNumber } = issue; + const repo = uniqueRepos[parseInt(repoId, 10)]; + if (!repo) { + return; + } + nodes.push({ + provider: c.type, + internalId: getIssueInternalId(repoId, issueNumber), + parentInternalId: getIssuesInternalId(repoId), + type: "file", + title: `Issue #${issueNumber}`, + sourceUrl: repo.url + `/issues/${issueNumber}`, + expandable: false, + permission: "read", + dustDocumentId: getIssueInternalId(repoId, issueNumber), + lastUpdatedAt: issue.updatedAt.getTime(), + }); + }); + + discussions.forEach((discussion) => { + const { repoId, discussionNumber } = discussion; + const repo = uniqueRepos[parseInt(repoId, 10)]; + if (!repo) { + return; + } + nodes.push({ + provider: c.type, + internalId: getDiscussionInternalId(repoId, discussionNumber), + parentInternalId: getDiscussionsInternalId(repoId), + type: "file", + title: `Discussion #${discussionNumber}`, + sourceUrl: repo.url + `/discussions/${discussionNumber}`, + expandable: false, + permission: "read", + dustDocumentId: getDiscussionInternalId(repoId, discussionNumber), + lastUpdatedAt: discussion.updatedAt.getTime(), + }); + }); + // Constructing Nodes for Code fullCodeInRepos.forEach((codeRepo) => { const repo = uniqueRepos[parseInt(codeRepo.repoId)]; @@ -679,13 +752,6 @@ export class GithubConnectorManager extends BaseConnectorManager { ); } - if (/^github-issue-\d+-\d+$/.test(internalId)) { - return new Ok([internalId]); // this is incorrect but matches the previous behavior, will fix in a follow-up PR - } - if (/^github-discussion-\d+-\d+$/.test(internalId)) { - return new Ok([internalId]); // this is incorrect but matches the previous behavior, will fix in a follow-up PR - } - const { type, repoId } = matchGithubInternalIdType(internalId); switch (type) { @@ -715,6 +781,18 @@ export class GithubConnectorManager extends BaseConnectorManager { ); return new Ok([internalId, ...parents]); } + case "REPO_ISSUE": + return new Ok([ + internalId, + getIssuesInternalId(repoId), + getRepositoryInternalId(repoId), + ]); + case "REPO_DISCUSSION": + return new Ok([ + internalId, + getDiscussionsInternalId(repoId), + getRepositoryInternalId(repoId), + ]); default: { assertNever(type); } diff --git a/connectors/src/connectors/github/lib/utils.ts b/connectors/src/connectors/github/lib/utils.ts index 8cb0faddd1e0..9c95bad9b1cb 100644 --- a/connectors/src/connectors/github/lib/utils.ts +++ b/connectors/src/connectors/github/lib/utils.ts @@ -7,6 +7,8 @@ export const GITHUB_CONTENT_NODE_TYPES = [ "REPO_CODE", "REPO_CODE_DIR", "REPO_CODE_FILE", + "REPO_ISSUE", + "REPO_DISCUSSION", ] as const; export type GithubContentNodeType = (typeof GITHUB_CONTENT_NODE_TYPES)[number]; @@ -19,6 +21,28 @@ export function isGithubCodeFileId(internalId: string): boolean { return /^github-code-\d+-file-[a-f0-9]+$/.test(internalId); } +export function getGithubIdsFromDiscussionInternalId(internalId: string): { + repoId: string; + discussionNumber: number; +} { + const pattern = /^github-discussion-(\d+)-(\d+)$/; + return { + repoId: parseInt(internalId.replace(pattern, "$1"), 10).toString(), + discussionNumber: parseInt(internalId.replace(pattern, "$2"), 10), + }; +} + +export function getGithubIdsFromIssueInternalId(internalId: string): { + repoId: string; + issueNumber: number; +} { + const pattern = /^github-issue-(\d+)-(\d+)$/; + return { + repoId: parseInt(internalId.replace(pattern, "$1"), 10).toString(), + issueNumber: parseInt(internalId.replace(pattern, "$2"), 10), + }; +} + /** * Gets the type of the Github content node from its internal id. */ @@ -74,6 +98,24 @@ export function matchGithubInternalIdType(internalId: string): { ), }; } + if (/^github-issue-\d+-\d+$/.test(internalId)) { + return { + type: "REPO_ISSUE", + repoId: parseInt( + internalId.replace(/^github-issue-(\d+)-\d+$/, "$1"), + 10 + ), + }; + } + if (/^github-discussion-\d+-\d+$/.test(internalId)) { + return { + type: "REPO_DISCUSSION", + repoId: parseInt( + internalId.replace(/^github-discussion-(\d+)-\d+$/, "$1"), + 10 + ), + }; + } throw new Error(`Invalid Github internal id: ${internalId}`); }