Skip to content

Commit

Permalink
Update discussion schema
Browse files Browse the repository at this point in the history
  • Loading branch information
dgparmar14 committed Jun 28, 2024
1 parent d66fae9 commit cac83cf
Show file tree
Hide file tree
Showing 8 changed files with 1,164 additions and 121 deletions.
1,091 changes: 1,091 additions & 0 deletions data/github/discussions/discussions.json

Large diffs are not rendered by default.

69 changes: 24 additions & 45 deletions schemas/discussion-data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,57 +3,36 @@ title: GitHub Discussions Schema
type: array

required:
- id
-source
- title
- author
- url
- category
- createdAt
- isAnswered
- time

properties:
discussions:
type: array
items:
source:
type: string
title:
type: string
author:
type: string
url:
type: string
time:
type: string
category:
type: object
required:
["id", "title", "author", "url", "category", "createdAt", "isAnswered"]
- name
- emoji
properties:
id: string
title: string
author:
type: object
required: ["login", "avatarUrl"]
properties:
login: string
avatarUrl: string
url: string
category:
type: object
required: ["id", "name", "emoji"]
properties:
id: string
name: string
emoji: string
upvoteCount: integer
reactions:
type: object
required: ["totalCount"]
properties:
totalCount: integer
comments:
type: array
items:
type: object
required: ["author", "upvoteCount", "isAnswer"]
properties:
author:
type: object
required: ["login", "avatarUrl"]
properties:
login: string
avatarUrl: string
upvoteCount: integer
isAnswer: boolean
createdAt: string
isAnswered: boolean
name:
type: string
emoji:
type: string
participants:
type: array
items:
type: string

77 changes: 21 additions & 56 deletions scraper/src/github-scraper/discussion.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { octokit } from "./config.js";
import { Discussion, ParsedDiscussion } from "./types.js";
import { saveDiscussionData } from "./utils.js";

// Query to fetch discussions from GitHub
Expand All @@ -16,36 +17,25 @@ query($org: String!, $cursor: String) {
discussions(first: 100) {
edges {
node {
id
title
author {
login
avatarUrl
}
url
category{
id
name
emoji
}
upvoteCount
reactions {
totalCount
}
comments(first: 10) {
edges {
node {
author {
login
avatarUrl
}
upvoteCount
isAnswer
}
}
}
createdAt
isAnswered
}
}
}
Expand All @@ -61,64 +51,39 @@ async function fetchDiscussionsForOrg(org: string, cursor = null) {
const response = await octokit.graphql.paginate(query, variables);

type Edge = typeof response.organization.repositories.edges;
const discussions = response.organization.repositories.edges.map(
const discussions: Edge[] = response.organization.repositories.edges.map(
(edge: Edge) => edge.node.discussions.edges,
);

return discussions.flat();
}

// async function parseDiscussionData(allDiscussions: Discussion[]) {
// const authorList = allDiscussions
// .map((d: Discussion) =>
// d.node.comments.edges.map((c) => c.node.author.login),
// )
// .flat();
// authorList.push(...allDiscussions.map((d) => d.node.author.login));
// const uniqueAuthors = Array.from(new Set(authorList));
// const authorDiscussionList = uniqueAuthors.map((author) => {
// const discussions = allDiscussions.filter(
// (d) =>
// d.node.author.login === author ||
// d.node.comments.edges.some((c) => c.node.author.login === author),
// );
// const data = discussions.map((d) => {
// return {
// id: d.node.id,
// title: d.node.title,
// url: d.node.url,
// createdAt: d.node.createdAt,
// author: d.node.author,
// category: d.node.category,
// isAnswered: d.node.isAnswered,
// upvoteCount: d.node.upvoteCount,
// participants: [
// new Map(
// d.node.comments.edges.map((c) => [
// c.node.author.login,
// {
// login: c.node.author.login,
// avatarUrl: c.node.author.avatarUrl,
// isAnswer: c.node.isAnswer,
// upvoteCount: c.node.upvoteCount,
// },
// ]),
// ).values(),
// ],
// };
// });
// return { user: author, discussions: data };
// });
// return authorDiscussionList;
// }
async function parseDiscussionData(allDiscussions: Discussion[]) {
const parsedDiscussions: ParsedDiscussion[] = allDiscussions.map((d) => {
const participants = Array.from(
new Set(d.node.comments.edges.map((c) => c.node.author.login)),
);
return {
source: "github",
title: d.node.title,
author: d.node.author.login,
url: d.node.url,
time: d.node.createdAt,
category: d.node.category,
participants,
};
});
return parsedDiscussions;
}

export async function fetchAllDiscussionEventsByOrg(
organizationName: string,
dataDir: string,
) {
try {
const allDiscussions = await fetchDiscussionsForOrg(organizationName);
await saveDiscussionData(allDiscussions, dataDir);
const parsedDiscussions = await parseDiscussionData(allDiscussions);
await saveDiscussionData(parsedDiscussions, dataDir);
} catch (error: any) {
throw new Error(`Error fetching discussions: ${error.message}`);
}
Expand Down
4 changes: 2 additions & 2 deletions scraper/src/github-scraper/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ const main = async () => {
console.error("Invalid date value:", dateArg);
process.exit(1);
}
await scrapeGitHub(orgName, date, Number(numDays), orgName);
await merged_data(dataDir, processedData);
// await scrapeGitHub(orgName, date, Number(numDays), orgName);
// await merged_data(dataDir, processedData);
await fetchAllDiscussionEventsByOrg(orgName, dataDir);

console.log("Done");
Expand Down
1 change: 1 addition & 0 deletions scraper/src/github-scraper/parseEvents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ function appendEvent(user: string, event: Activity) {

const nameUserCache: { [key: string]: string } = {};
const emailUserCache: { [key: string]: string } = {};

async function addCollaborations(event: PullRequestEvent, eventTime: Date) {
const collaborators: Set<string> = new Set();

Expand Down
24 changes: 13 additions & 11 deletions scraper/src/github-scraper/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -198,35 +198,37 @@ export interface AuthoredIssueAndPr {

export type Discussion = {
node: {
id: string;
title: string;
author: {
login: string;
avatarUrl: string;
};
url: string;
category: {
id: string;
name: string;
emoji: string;
};
upvoteCount: number;
reactions: {
totalCount: number;
};
comments: {
edges: {
node: {
author: {
login: string;
avatarUrl: string;
};
upvoteCount: number;
isAnswer: boolean;
};
}[];
};
createdAt: string;
isAnswered: boolean;
};
};

export type ParsedDiscussion = {
source: string;
title: string;
author: string;
url: string;
time: string;
category: {
name: string;
emoji: string;
};
participants: string[];
};
18 changes: 12 additions & 6 deletions scraper/src/github-scraper/utils.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import path from "path";
import { octokit } from "./config.js";
import { Action, ActivityData, Discussion, PullRequestEvent } from "./types.js";
import {
Action,
ActivityData,
ParsedDiscussion,
PullRequestEvent,
} from "./types.js";
import { mkdir, readFile, writeFile } from "fs/promises";

export const parseISODate = (isoDate: Date) => {
Expand Down Expand Up @@ -97,10 +102,11 @@ export async function resolveAutonomyResponsibility(
event: Action,
user: string,
) {
if (event.event === "cross-referenced" && event.source.type === "issue") {
return event.source.issue.user.login === user;
}
return false;
return (
event.event === "cross-referenced" &&
event.source.type === "issue" &&
event.source.issue.user.login === user
);
}

export async function loadUserData(user: string, dataDir: string) {
Expand Down Expand Up @@ -140,7 +146,7 @@ export async function saveUserData(
}

export async function saveDiscussionData(
discussions: Discussion,
discussions: ParsedDiscussion[],
dataDir: string,
) {
const discussionDir = path.join(dataDir, "discussions");
Expand Down
1 change: 0 additions & 1 deletion tests/github-discussion-schema.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ use(chaiJsonSchema);
const filesInDir = fs
.readdirSync(GH_DATA)
.filter((file) => path.extname(file) === ".json");
console.log(filesInDir.length);

filesInDir.forEach((file) => {
const content = fs.readFileSync(join(GH_DATA, file)).toString();
Expand Down

0 comments on commit cac83cf

Please sign in to comment.