mirror of
https://codeberg.org/dribdat/dribdat.git
synced 2026-03-13 06:26:11 +00:00
Added Gitea support and tests
This commit is contained in:
parent
25e779adcc
commit
b53e5e493e
4 changed files with 203 additions and 23 deletions
|
|
@ -1,6 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""Utilities for aggregating data
|
||||
"""
|
||||
"""Utilities for aggregating data."""
|
||||
|
||||
from dribdat.user.models import Activity, User, Project
|
||||
from dribdat.user import isUserActive
|
||||
|
|
@ -8,17 +7,18 @@ from dribdat.database import db
|
|||
from dribdat.apifetch import (
|
||||
FetchGitlabProject,
|
||||
FetchGithubProject,
|
||||
FetchGiteaProject,
|
||||
FetchBitbucketProject,
|
||||
FetchDataProject,
|
||||
FetchWebProject,
|
||||
)
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
|
||||
def GetProjectData(url):
|
||||
"""Parses the Readme URL to collect remote data."""
|
||||
"""Parse the Readme URL to collect remote data."""
|
||||
# TODO: find a better way to decide the kind of repo
|
||||
if url.find('//gitlab.com') > 0:
|
||||
apiurl = url
|
||||
apiurl = re.sub(r'(?i)-?/blob/[a-z]+/README.*', '', apiurl)
|
||||
|
|
@ -37,6 +37,16 @@ def GetProjectData(url):
|
|||
return {}
|
||||
return FetchGithubProject(apiurl)
|
||||
|
||||
elif url.find('//codeberg.org') > 0:
|
||||
apiurl = url
|
||||
apiurl = re.sub(r'(?i)/src/branch/[a-z]+/README.*', '', apiurl)
|
||||
apiurl = re.sub(r'https?://codeberg\.org/', '', apiurl).strip('/')
|
||||
if apiurl.endswith('.git'):
|
||||
apiurl = apiurl[:-4]
|
||||
if apiurl == url:
|
||||
return {}
|
||||
return FetchGiteaProject(apiurl)
|
||||
|
||||
elif url.find('//bitbucket.org') > 0:
|
||||
apiurl = url
|
||||
apiurl = re.sub(r'(?i)/src/[a-z]+/(README)?\.?[a-z]*', '', apiurl)
|
||||
|
|
|
|||
|
|
@ -1,12 +1,47 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
""" Collecting events from remote repositories """
|
||||
"""Collect events from remote repositories."""
|
||||
|
||||
import logging
|
||||
import requests
|
||||
from dateutil import parser
|
||||
|
||||
|
||||
def FetchGithubCommits(full_name, since=None, until=None):
|
||||
def fetch_commits_gitea(full_name, limit=10):
|
||||
"""Parse data about Gitea commits."""
|
||||
apiurl = "https://codeberg.org/api/v1/repos/%s/commits?limit=%d" % (
|
||||
full_name, limit)
|
||||
data = requests.get(apiurl)
|
||||
if data.status_code != 200:
|
||||
logging.warn("Could not sync Gitea commits on %s" % full_name)
|
||||
return []
|
||||
json = data.json()
|
||||
if 'message' in json:
|
||||
logging.warn("Could not sync Gitea commits on %s: %s"
|
||||
% (full_name, json['message']))
|
||||
return []
|
||||
commitlog = []
|
||||
for entry in json:
|
||||
if 'commit' not in entry:
|
||||
continue
|
||||
url = entry['html_url']
|
||||
commit = entry['commit']
|
||||
datestamp = parser.parse(entry['created'])
|
||||
author = ''
|
||||
if 'committer' in commit and 'name' in commit['committer']:
|
||||
author = commit['committer']['name']
|
||||
elif 'author' in entry and 'name' in commit['author']:
|
||||
author = commit['author']['name']
|
||||
commitlog.append({
|
||||
'url': url,
|
||||
'date': datestamp,
|
||||
'author': author,
|
||||
'message': commit['message'][:256],
|
||||
})
|
||||
return commitlog
|
||||
|
||||
|
||||
def fetch_commits_github(full_name, since=None, until=None):
|
||||
"""Parse data about GitHub commits."""
|
||||
apiurl = "https://api.github.com/repos/%s/commits?per_page=50" % full_name
|
||||
if since is not None:
|
||||
apiurl += "&since=%s" % since.replace(microsecond=0).isoformat()
|
||||
|
|
@ -14,7 +49,6 @@ def FetchGithubCommits(full_name, since=None, until=None):
|
|||
apiurl += "&until=%s" % until.replace(microsecond=0).isoformat()
|
||||
data = requests.get(apiurl)
|
||||
if data.status_code != 200:
|
||||
print(data)
|
||||
logging.warn("Could not sync GitHub commits on %s" % full_name)
|
||||
return []
|
||||
json = data.json()
|
||||
|
|
@ -28,11 +62,12 @@ def FetchGithubCommits(full_name, since=None, until=None):
|
|||
continue
|
||||
commit = entry['commit']
|
||||
datestamp = parser.parse(commit['committer']['date'])
|
||||
author = ''
|
||||
if 'author' in entry and \
|
||||
entry['author'] is not None and \
|
||||
'login' in entry['author']:
|
||||
author = entry['author']['login']
|
||||
else:
|
||||
elif 'committer' in commit:
|
||||
author = commit['committer']['name'][:100]
|
||||
url = "https://github.com/%s" % full_name
|
||||
if 'html_url' in entry:
|
||||
|
|
@ -44,3 +79,37 @@ def FetchGithubCommits(full_name, since=None, until=None):
|
|||
'message': commit['message'][:256],
|
||||
})
|
||||
return commitlog
|
||||
|
||||
|
||||
def fetch_commits_gitlab(project_id: int, since=None, until=None):
|
||||
"""Parse data about GitLab commits."""
|
||||
apiurl = 'https://gitlab.com/api/v4/'
|
||||
apiurl = apiurl + "projects/%d/repository/commits?" % project_id
|
||||
if since is not None:
|
||||
apiurl += "&since=%s" % since.replace(microsecond=0).isoformat()
|
||||
if until is not None:
|
||||
apiurl += "&until=%s" % until.replace(microsecond=0).isoformat()
|
||||
# Collect basic data
|
||||
data = requests.get(apiurl)
|
||||
if data.text.find('{') < 0:
|
||||
return []
|
||||
json = data.json()
|
||||
if 'message' in json:
|
||||
logging.warn("Could not sync GitLab commits", json['message'])
|
||||
return []
|
||||
commitlog = []
|
||||
for commit in json:
|
||||
if 'message' not in commit:
|
||||
continue
|
||||
datestamp = parser.parse(commit['created_at'])
|
||||
author = ''
|
||||
if 'author_name' in commit and \
|
||||
commit['author_name'] is not None:
|
||||
author = commit['author_name']
|
||||
commitlog.append({
|
||||
'url': commit['web_url'],
|
||||
'date': datestamp,
|
||||
'author': author,
|
||||
'message': commit['message'][:256],
|
||||
})
|
||||
return commitlog
|
||||
|
|
|
|||
|
|
@ -1,31 +1,80 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""Collecting data from third party API repositories."""
|
||||
|
||||
from .apievents import FetchGithubCommits
|
||||
import re
|
||||
import requests
|
||||
import bleach
|
||||
import logging
|
||||
from flask import url_for
|
||||
from pyquery import PyQuery as pq # noqa: N813
|
||||
from base64 import b64decode
|
||||
from flask_misaka import markdown
|
||||
from bleach.sanitizer import ALLOWED_TAGS, ALLOWED_ATTRIBUTES
|
||||
from urllib.parse import quote_plus
|
||||
import re
|
||||
import requests
|
||||
import bleach
|
||||
from .apievents import (
|
||||
fetch_commits_github,
|
||||
fetch_commits_gitlab,
|
||||
fetch_commits_gitea,
|
||||
)
|
||||
from future.standard_library import install_aliases
|
||||
install_aliases()
|
||||
|
||||
|
||||
def FetchGiteaProject(project_url):
|
||||
"""Download data from Codeberg, a large Gitea site."""
|
||||
# Docs: https://codeberg.org/api/swagger
|
||||
site_root = "https://codeberg.org"
|
||||
url_q = quote_plus(project_url, '/')
|
||||
api_repos = site_root + "/api/v1/repos/%s" % url_q
|
||||
api_content = api_repos + "/contents"
|
||||
# Collect basic data
|
||||
data = requests.get(api_repos)
|
||||
if data.text.find('{') < 0:
|
||||
return {}
|
||||
json = data.json()
|
||||
if 'name' not in json:
|
||||
return {}
|
||||
# Collect the README
|
||||
data = requests.get(api_content)
|
||||
readme = ""
|
||||
if not data.text.find('{') < 0:
|
||||
readmeurl = None
|
||||
for repo_file in data.json():
|
||||
if 'readme' in repo_file['name'].lower():
|
||||
readmeurl = repo_file['download_url']
|
||||
readmedata = requests.get(readmeurl)
|
||||
break
|
||||
if readmeurl is None:
|
||||
logging.info("Could not find README", url_q)
|
||||
issuesurl = ''
|
||||
if json['has_issues']:
|
||||
issuesurl = json['html_url'] + '/issues'
|
||||
return {
|
||||
'type': 'Gitea',
|
||||
'name': json['name'],
|
||||
'summary': json['description'],
|
||||
'description': readme,
|
||||
'source_url': json['html_url'],
|
||||
'image_url': json['avatar_url'] or json['owner']['avatar_url'],
|
||||
'contact_url': issuesurl,
|
||||
'commits': fetch_commits_gitea(url_q)
|
||||
}
|
||||
|
||||
|
||||
def FetchGitlabProject(project_url):
|
||||
"""Download data from GitLab."""
|
||||
WEB_BASE = "https://gitlab.com/%s"
|
||||
API_BASE = "https://gitlab.com/api/v4/projects/%s"
|
||||
url_q = quote_plus(project_url)
|
||||
# Collect basic data
|
||||
data = requests.get(API_BASE % url_q)
|
||||
if data.text.find('{') < 0:
|
||||
return {}
|
||||
json = data.json()
|
||||
if 'name' not in json:
|
||||
return {}
|
||||
readmeurl = "%s/raw/master/README.md" % (WEB_BASE % project_url)
|
||||
# Collect the README
|
||||
readmeurl = json['readme_url'] + '?inline=false'
|
||||
readmedata = requests.get(readmeurl)
|
||||
readme = readmedata.text or ""
|
||||
return {
|
||||
|
|
@ -33,14 +82,15 @@ def FetchGitlabProject(project_url):
|
|||
'name': json['name'],
|
||||
'summary': json['description'],
|
||||
'description': readme,
|
||||
# 'homepage_url': "",
|
||||
'source_url': json['web_url'],
|
||||
'image_url': json['avatar_url'],
|
||||
'contact_url': json['web_url'] + '/issues',
|
||||
'commits': fetch_commits_gitlab(json['id'])
|
||||
}
|
||||
|
||||
|
||||
def FetchGitlabAvatar(email):
|
||||
"""Download a user avatar from GitLab."""
|
||||
apiurl = "https://gitlab.com/api/v4/avatar?email=%s&size=80"
|
||||
data = requests.get(apiurl % email)
|
||||
if data.text.find('{') < 0:
|
||||
|
|
@ -52,6 +102,7 @@ def FetchGitlabAvatar(email):
|
|||
|
||||
|
||||
def FetchGithubProject(project_url):
|
||||
"""Download data from GitHub."""
|
||||
API_BASE = "https://api.github.com/repos/%s"
|
||||
data = requests.get(API_BASE % project_url)
|
||||
if data.text.find('{') < 0:
|
||||
|
|
@ -93,11 +144,12 @@ def FetchGithubProject(project_url):
|
|||
'image_url': json['owner']['avatar_url'],
|
||||
'contact_url': json['html_url'] + '/issues',
|
||||
'download_url': json['html_url'] + '/releases',
|
||||
'commits': FetchGithubCommits(repo_full_name)
|
||||
'commits': fetch_commits_github(repo_full_name)
|
||||
}
|
||||
|
||||
|
||||
def FetchBitbucketProject(project_url):
|
||||
"""Download data from Bitbucket."""
|
||||
WEB_BASE = "https://bitbucket.org/%s"
|
||||
API_BASE = "https://api.bitbucket.org/2.0/repositories/%s"
|
||||
data = requests.get(API_BASE % project_url)
|
||||
|
|
@ -138,11 +190,8 @@ def FetchBitbucketProject(project_url):
|
|||
}
|
||||
|
||||
|
||||
DP_VIEWER_URL = 'http://data.okfn.org/tools/view?url=%s'
|
||||
|
||||
|
||||
def FetchDataProject(project_url):
|
||||
""" Tries to load a Data Package formatted JSON file """
|
||||
"""Try to load a Data Package formatted JSON file."""
|
||||
# TODO: use frictionlessdata library!
|
||||
data = requests.get(project_url)
|
||||
if data.text.find('{') < 0:
|
||||
|
|
@ -150,15 +199,15 @@ def FetchDataProject(project_url):
|
|||
json = data.json()
|
||||
if 'name' not in json or 'title' not in json:
|
||||
return {}
|
||||
text_content = project_url + '\n\n'
|
||||
if 'homepage' in json:
|
||||
readme_url = json['homepage']
|
||||
else:
|
||||
readme_url = project_url.replace('datapackage.json', 'README.md')
|
||||
text_content = ""
|
||||
if readme_url.startswith('http') and readme_url != project_url:
|
||||
text_content = requests.get(readme_url).text
|
||||
text_content = text_content + requests.get(readme_url).text
|
||||
if not text_content and 'description' in json:
|
||||
text_content = json['description']
|
||||
text_content = text_content + json['description']
|
||||
contact_url = ''
|
||||
if 'maintainers' in json and \
|
||||
len(json['maintainers']) > 0 and \
|
||||
|
|
@ -169,7 +218,6 @@ def FetchDataProject(project_url):
|
|||
'name': json['name'],
|
||||
'summary': json['title'],
|
||||
'description': text_content,
|
||||
# 'homepage_url': DP_VIEWER_URL % project_url,
|
||||
'source_url': project_url,
|
||||
'image_url': url_for('static', filename='img/datapackage_icon.png',
|
||||
_external=True),
|
||||
|
|
@ -195,6 +243,7 @@ ALLOWED_HTML_ATTR['font'] = ['color']
|
|||
|
||||
|
||||
def FetchWebProject(project_url):
|
||||
"""Parse a remote Document, wiki or website URL."""
|
||||
try:
|
||||
data = requests.get(project_url)
|
||||
except requests.exceptions.RequestException:
|
||||
|
|
@ -219,6 +268,7 @@ def FetchWebProject(project_url):
|
|||
|
||||
|
||||
def FetchWebGoogleDoc(text, url):
|
||||
"""Help extract data from a Google doc."""
|
||||
doc = pq(text)
|
||||
doc("style").remove()
|
||||
ptitle = doc("div#title") or doc("div#header")
|
||||
|
|
@ -249,6 +299,7 @@ def FetchWebGoogleDoc(text, url):
|
|||
|
||||
|
||||
def FetchWebCodiMD(text, url):
|
||||
"""Help extract data from CodiMD."""
|
||||
doc = pq(text)
|
||||
ptitle = doc("title")
|
||||
if len(ptitle) < 1:
|
||||
|
|
@ -267,6 +318,7 @@ def FetchWebCodiMD(text, url):
|
|||
|
||||
|
||||
def FetchWebDokuWiki(text, url):
|
||||
"""Help extract data from DokuWiki."""
|
||||
doc = pq(text)
|
||||
ptitle = doc("span.pageId")
|
||||
if len(ptitle) < 1:
|
||||
|
|
@ -288,6 +340,7 @@ def FetchWebDokuWiki(text, url):
|
|||
|
||||
|
||||
def FetchWebEtherpad(text, url):
|
||||
"""Help extract data from Etherpad Lite."""
|
||||
ptitle = url.split('/')[-1]
|
||||
if len(ptitle) < 1:
|
||||
return {}
|
||||
|
|
@ -303,6 +356,7 @@ def FetchWebEtherpad(text, url):
|
|||
|
||||
|
||||
def FetchWebInstructables(text, url):
|
||||
"""Help extract data from Instructables."""
|
||||
doc = pq(text)
|
||||
ptitle = doc(".header-title")
|
||||
if len(ptitle) < 1:
|
||||
|
|
|
|||
47
tests/test_aggregate.py
Normal file
47
tests/test_aggregate.py
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""Dribdat data aggregation tests."""
|
||||
|
||||
from dribdat.aggregation import GetProjectData
|
||||
|
||||
|
||||
class TestAggregate:
|
||||
"""Here be tests."""
|
||||
|
||||
def test_gitea(self):
|
||||
"""Test parsing a Codeberg readme."""
|
||||
test_url = 'https://codeberg.org/dribdat/dribdat'
|
||||
test_obj = GetProjectData(test_url)
|
||||
assert 'name' in test_obj
|
||||
assert test_obj['name'] == 'dribdat'
|
||||
assert test_obj['type'] == 'Gitea'
|
||||
assert 'commits' in test_obj
|
||||
assert len(test_obj['commits']) > 5
|
||||
|
||||
def test_github(self):
|
||||
"""Test parsing a GitHub readme."""
|
||||
test_url = 'https://github.com/dribdat/dribdat'
|
||||
test_obj = GetProjectData(test_url)
|
||||
assert 'name' in test_obj
|
||||
assert test_obj['name'] == 'dribdat'
|
||||
assert test_obj['type'] == 'GitHub'
|
||||
assert 'commits' in test_obj
|
||||
assert len(test_obj['commits']) > 5
|
||||
|
||||
def test_gitlab(self):
|
||||
"""Test parsing a GitLab readme."""
|
||||
test_url = 'https://gitlab.com/dribdat/dribdat'
|
||||
test_obj = GetProjectData(test_url)
|
||||
assert 'name' in test_obj
|
||||
assert test_obj['name'] == 'dribdat'
|
||||
assert test_obj['type'] == 'GitLab'
|
||||
assert 'commits' in test_obj
|
||||
assert len(test_obj['commits']) > 5
|
||||
|
||||
def test_bitbucket(self):
|
||||
"""Test parsing a Bitbucket readme."""
|
||||
test_url = 'https://bitbucket.org/dribdat/dribdat/src/master/'
|
||||
test_obj = GetProjectData(test_url)
|
||||
assert 'name' in test_obj
|
||||
assert test_obj['name'] == 'dribdat'
|
||||
assert test_obj['type'] == 'Bitbucket'
|
||||
# TODO: support for commits
|
||||
Loading…
Add table
Add a link
Reference in a new issue