From fca0134876b37dbce294fd4302529ea720e460ad Mon Sep 17 00:00:00 2001 From: Jeff Dairiki Date: Wed, 27 Jul 2022 12:44:05 -0700 Subject: [PATCH] Normalize URLs to GitHub project repos In particular we want to remove any trailing `.git` from the project URL, since this results in broken links when we contruct the "Fork" and "Open Issues" links in the plugin page. (While `https://github.com/_owner_/_project_.git` will work to get to the repo, `https://github.com/_owner_/_project_.git/fork` is a 404.) --- packages/project-data/lektor_project_data.py | 21 ++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/packages/project-data/lektor_project_data.py b/packages/project-data/lektor_project_data.py index 463f31e1..6f252168 100644 --- a/packages/project-data/lektor_project_data.py +++ b/packages/project-data/lektor_project_data.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- import cgi +import re import readme_renderer.markdown import readme_renderer.rst @@ -17,6 +18,24 @@ _RENDERERS = { } +def normalize_url(url): + """Normalize project home page URLs.""" + # Normalize any URLS to GitHub project repos. + m = re.match( + r""" + https?://(?:www\.)?github\.com + / (?P[^/]+) + / (?P[^/]+?) (?:\.git) + /? \Z + """, + url, + flags=re.VERBOSE + ) + if m: + return "https://github.com/{owner}/{project}".format(**m.groupdict()) + return url + + class ProjectDataPlugin(Plugin): name = 'Project Data' description = u'Retrieve project information from PyPI.' @@ -71,6 +90,8 @@ class ProjectDataPlugin(Plugin): self.data['description'], self.data['description_content_type']) if not self.data.get('home_page'): self.data['home_page'] = f'https://pypi.org/project/{name}/' + else: + self.data['home_page'] = normalize_url(self.data['home_page']) def github_data(self, owner=None, repo=None): url = 'https://api.github.com/repos/{}/{}'.format(owner, repo)