mirror of https://codeberg.org/dribdat/dribdat.git
Tweak Git image parsing
This commit is contained in:
parent
dbd070aedf
commit
c1a7af0443
|
@ -15,7 +15,9 @@ from .apievents import (
|
|||
fetch_commits_gitlab,
|
||||
fetch_commits_gitea,
|
||||
)
|
||||
from .utils import sanitize_url, load_presets, load_yaml_presets
|
||||
from .utils import (
|
||||
sanitize_url, load_presets, load_yaml_presets, fix_relative_links
|
||||
)
|
||||
from future.standard_library import install_aliases
|
||||
install_aliases()
|
||||
|
||||
|
@ -154,17 +156,7 @@ def FetchGithubProject(project_url):
|
|||
readme = b64decode(readme['content']).decode('utf-8')
|
||||
# Fix relative links in text
|
||||
imgroot = "https://raw.githubusercontent.com"
|
||||
readme = re.sub(
|
||||
r"<img src=\"(?!http)",
|
||||
"<img src=\"%s/%s/%s/" % (imgroot, repo_full_name, default_branch),
|
||||
readme
|
||||
)
|
||||
readme = re.sub(
|
||||
r"\!\[(.*)\]\((?!http)",
|
||||
# TODO check why we are using \g escape here?
|
||||
r"![\g<1>](%s/%s/%s/" % (imgroot, repo_full_name, default_branch),
|
||||
readme
|
||||
)
|
||||
readme = fix_relative_links(readme, imgroot, repo_full_name, default_branch)
|
||||
return {
|
||||
'type': 'GitHub',
|
||||
'name': json['name'],
|
||||
|
|
|
@ -166,3 +166,19 @@ def load_yaml_presets(filename, by_col='name', filepath=None):
|
|||
with open(fn, mode='r') as file:
|
||||
config = load_presets(file, filename, by_col)
|
||||
return config
|
||||
|
||||
|
||||
def fix_relative_links(readme, imgroot, repo_full_name, default_branch):
|
||||
"""Ensures that images in Markdown are absolute."""
|
||||
readme = re.sub(
|
||||
r" src=\"(?!http)",
|
||||
" src=\"%s/%s/%s/" % (imgroot, repo_full_name, default_branch),
|
||||
readme
|
||||
)
|
||||
readme = re.sub(
|
||||
r"\!\[(.*)\]\((?!http)",
|
||||
# Pass named group to include full path in the image URL
|
||||
"![\g<1>](%s/%s/%s/" % (imgroot, repo_full_name, default_branch),
|
||||
readme
|
||||
)
|
||||
return readme
|
||||
|
|
|
@ -20,7 +20,6 @@ class TestRepository:
|
|||
assert 'dribdat' in test_obj['description']
|
||||
assert 'dribdat/dribdat' in test_obj['source_url']
|
||||
|
||||
|
||||
def test_datapackage_dribdat(self):
|
||||
"""Test parsing a dribdat Data Package."""
|
||||
test_url = 'https://raw.githubusercontent.com/dribdat/dribdat/main/tests/mock/datapackage.json'
|
||||
|
@ -70,6 +69,8 @@ class TestRepository:
|
|||
assert test_obj['type'] == 'GitHub'
|
||||
assert 'commits' in test_obj
|
||||
assert len(test_obj['commits']) > 5
|
||||
assert 'src="dribdat/static/img' not in test_obj['description']
|
||||
assert 'src="https://raw.githubusercontent.com/dribdat/dribdat/main/dribdat/static/img' in test_obj['description']
|
||||
|
||||
def test_github_other(self):
|
||||
"""Test parsing a GitHub Markdown file."""
|
||||
|
|
|
@ -8,6 +8,7 @@ from dribdat.aggregation import (
|
|||
FetchWebProject,
|
||||
ProjectActivity,
|
||||
)
|
||||
from .utils import fix_relative_links
|
||||
from .factories import ProjectFactory
|
||||
|
||||
from .mock.project_data import project_data
|
||||
|
@ -68,6 +69,7 @@ class TestSync:
|
|||
assert test_obj['source_url'] == test_url
|
||||
assert 'Guidelines' in test_obj['description']
|
||||
|
||||
|
||||
def test_googledoc(self):
|
||||
"""Test parsing a Google Document."""
|
||||
# Handbook to Hackathons with Dribdat
|
||||
|
@ -76,3 +78,13 @@ class TestSync:
|
|||
assert 'description' in test_obj
|
||||
assert 'Handbook' in test_obj['description']
|
||||
|
||||
|
||||
def test_fix_relative_links(self):
|
||||
imgroot = "https://raw.githubusercontent.com"
|
||||
repo_full_name = "dribdat/dribdat"
|
||||
default_branch = "main"
|
||||
readme = '![hello there](world.png) <img title="hello" src="again.jpg">'
|
||||
readme = fix_relative_links(readme, imgroot, repo_full_name, default_branch)
|
||||
assert imgroot in readme
|
||||
assert not '(world.png)' in readme
|
||||
assert not '"again.jpg"' in readme
|
||||
|
|
Loading…
Reference in New Issue