Skip to content

Commit

Permalink
github import
Browse files Browse the repository at this point in the history
  • Loading branch information
Trilarion committed Mar 5, 2024
1 parent b5a376e commit 32cb166
Show file tree
Hide file tree
Showing 1,133 changed files with 17,087 additions and 2,664 deletions.
67 changes: 32 additions & 35 deletions code/github_import.py → code/synchronization/github_import.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
"""
Uses the GitHub API to learn more about the GitHub projects.
Uses the Github API to learn more about the Github projects.
Updates for example, the starring information.
"""

# TODO remove developers again?
# TODo try to identify main developers (number of commits or change of lines...)

import os
import json
from random import sample
from utils import constants as c, utils, osg, osg_parse, osg_github
Expand All @@ -20,7 +19,7 @@
'http://timpetricola.com': 'https://timpetricola.com',
'http:/code.schwitzer.ca': 'https://code.schwitzer.ca/',
'http:\\www.vampier.net': 'https://www.vampier.net/'}
ignored_blogs = ('https://uto.io',)
ignored_blogs = ('https://uto.io', r'https://¯\_(°_o)_/¯')

ignored_languages = ('CSS', 'HTML', 'CMake', 'XSLT', 'ShaderLab', 'Shell')
language_aliases = {'VBA': 'Visual Basic', 'Common Lisp': 'Lisp', 'Game Maker Language': 'Game Maker Script',
Expand All @@ -43,7 +42,7 @@

def collect_github_entries():
"""
Reads the entries of the database and collects all entries with a GitHub repository. Just for convenience to limit
Reads the entries of the database and collects all entries with a Github repository. Just for convenience to limit
the number of entries to iterate on later.
"""

Expand All @@ -52,35 +51,34 @@ def collect_github_entries():
print(f'{len(entries)} entries read')

# loop over entries
files = []
filenames = []
for entry in entries:
urls = [x for x in entry.get('Code repository', []) if x.startswith(prefix)]
if urls:
files.append(entry['File'])
filenames.append(entry['File'].name)

# write to file
print(f'{len(files)} entries with github repos')
utils.write_text(gh_entries_file, json.dumps(files, indent=1))
print(f'{len(filenames)} entries with github repos')
utils.write_text(gh_entries_file, json.dumps(filenames, indent=1))


def github_import():
"""
Import various information from GitHub repositories (like contributors) or stars for GitHub repos
Import various information from Github repositories (like contributors) or stars for Github repos
Github rate limiting limits us to 1000 queries an hour (also with a personal token?)
"""
private_properties = json.loads(utils.read_text(c.private_properties_file))
token = private_properties['github-token']

files = json.loads(utils.read_text(gh_entries_file))

# Github rate limiting limits us to 1000 queries an hour, currently let's limit it to 100.
if len(files) > 100:
files = sample(files, 100)


all_developers = osg.read_developers()
print(f' {len(all_developers)} developers read')

# loop over each entry
for index, file in enumerate(files):
try:
try:
for index, file in enumerate(files):

print(f' process {file} ({index})')

# read entry
Expand All @@ -93,10 +91,6 @@ def github_import():
repos = [x for x in repos if x not in ignored_repos]
for repo in repos:
print(f' GH repo {repo}')
token = os.environ["GITHUB_TOKEN"]
if not token:
private_properties = json.loads(utils.read_text(c.private_properties_file))
token = private_properties['github-token']

info = osg_github.retrieve_repo_info(repo, token=token)
if info is None:
Expand All @@ -121,11 +115,13 @@ def github_import():
new_comments.append(f"@forks {info['forks']}")

# update comment
for r in code_repositories:
for idx, r in enumerate(code_repositories):
if r.startswith(repo):
if not isinstance(r, osg_parse.Value): # if there was no comment yet, make one
r = osg_parse.Value(r)
code_repositories[idx] = r # need to store it, otherwise changes will be lost
break
if type(r) is not osg_parse.Value:
r = osg_parse.Value(r) # if there was no comment yet, make one

comments = r.comment
if comments:
comments = comments.split(',')
Expand Down Expand Up @@ -161,6 +157,7 @@ def github_import():
blog = blog_alias[blog] if blog in blog_alias else blog
if not blog.startswith('http'):
blog = 'https://' + blog
blog = blog.replace(r'\\', '//') # this was needed at least once
if blog in ignored_blogs:
blog = None

Expand All @@ -172,7 +169,7 @@ def github_import():
# look up author in developers database
if name in all_developers:
dev = all_developers[name]
if not nickname in dev.get('Contact', []):
if nickname not in dev.get('Contact', []):
print(f' existing dev "{name}" added nickname ({nickname}) to developer database')
# check that name has not already @GH contact
if any(x.endswith('@GH') for x in dev.get('Contact', [])):
Expand All @@ -190,15 +187,15 @@ def github_import():

entry['Code repository'] = code_repositories
osg.write_entry(entry)
except:
print(f"Error processing repo {file}")
pass # Keep going to other entries

# shorten file list
utils.write_text(gh_entries_file, json.dumps(files[index:], indent=1))
except RuntimeError as e:
print(f"Error processing repo {file}")
raise e
finally:
# shorten file list
utils.write_text(gh_entries_file, json.dumps(files[index:], indent=1))

osg.write_developers(all_developers)
print('developers database updated')
osg.write_developers(all_developers)
print('developers database updated')


def github_starring_synchronization():
Expand Down Expand Up @@ -226,7 +223,7 @@ def github_starring_synchronization():
all_repos = set(all_repos)
print(f'found {len(all_repos)} Github repos')

# get my GitHub user
# get my Github user
user = osg_github.get_user(private_properties['github-name'], token=private_properties['github-token'])

# get starred repos
Expand All @@ -243,10 +240,10 @@ def github_starring_synchronization():

if __name__ == "__main__":
# collect entries (run this only once)
collect_github_entries()
# collect_github_entries()

# import information from gh
github_import()

# which github repos haven't I starred
# which Github repos have I not starred
# github_starring_synchronization()
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ def gitlab_import():
# search for repository
for idx, r in enumerate(code_repositories):
if r.startswith(repo):
if not isinstance(r, osg_parse.Value):
if not isinstance(r, osg_parse.Value): # if there was no comment yet, make one
r = osg_parse.Value(r)
code_repositories[idx] = r
code_repositories[idx] = r # need to store it, otherwise changes will be lost
break

# update comment
Expand All @@ -95,7 +95,8 @@ def gitlab_import():
entry['Code repository'] = code_repositories
osg.write_entry(entry)
except RuntimeError as e:
raise(e)
print(f"Error processing repo {file}")
raise e
finally:
# shorten file list
utils.write_text(gl_entries_file, json.dumps(files[index:], indent=1))
Expand Down
6 changes: 3 additions & 3 deletions code/utils/osg_github.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Everything specific to the GitHub API (via PyGithub - https://pygithub.readthedocs.io/en/latest/index.html).
Everything specific to the Github API (via PyGithub - https://pygithub.readthedocs.io/en/latest/index.html).
"""

from github import Github, GithubException
Expand Down Expand Up @@ -29,7 +29,7 @@ def repo_get_contributors(repo):

def retrieve_repo_info(repos, token=None):
"""
For a list of GitHub repos, retrieves repo information.
For a list of Github repos, retrieves repo information.
Repos must have the style xxx/yyy example: "PyGithub/PyGithub"
"""
Expand All @@ -48,7 +48,7 @@ def retrieve_repo_info(repos, token=None):
r = g.get_repo(repo)
except GithubException as e:
if type(e) == UnknownObjectException:
print(f'repo "{repo}" does not exist on GitHub')
print(f'repo "{repo}" does not exist on Github')
return None
else:
raise RuntimeError(e)
Expand Down
Loading

0 comments on commit 32cb166

Please sign in to comment.