Normalize URLs to GitHub project repos

In particular we want to remove any trailing `.git` from the project
URL, since this results in broken links when we contruct the "Fork"
and "Open Issues" links in the plugin page.

(While `https://github.com/_owner_/_project_.git` will work to get to the
repo, `https://github.com/_owner_/_project_.git/fork` is a 404.)
This commit is contained in:
Jeff Dairiki 2022-07-27 12:44:05 -07:00
parent 850e52f598
commit fca0134876
1 changed files with 21 additions and 0 deletions

View File

@ -1,5 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import cgi import cgi
import re
import readme_renderer.markdown import readme_renderer.markdown
import readme_renderer.rst import readme_renderer.rst
@ -17,6 +18,24 @@ _RENDERERS = {
} }
def normalize_url(url):
"""Normalize project home page URLs."""
# Normalize any URLS to GitHub project repos.
m = re.match(
r"""
https?://(?:www\.)?github\.com
/ (?P<owner>[^/]+)
/ (?P<project>[^/]+?) (?:\.git)
/? \Z
""",
url,
flags=re.VERBOSE
)
if m:
return "https://github.com/{owner}/{project}".format(**m.groupdict())
return url
class ProjectDataPlugin(Plugin): class ProjectDataPlugin(Plugin):
name = 'Project Data' name = 'Project Data'
description = u'Retrieve project information from PyPI.' description = u'Retrieve project information from PyPI.'
@ -71,6 +90,8 @@ class ProjectDataPlugin(Plugin):
self.data['description'], self.data['description_content_type']) self.data['description'], self.data['description_content_type'])
if not self.data.get('home_page'): if not self.data.get('home_page'):
self.data['home_page'] = f'https://pypi.org/project/{name}/' self.data['home_page'] = f'https://pypi.org/project/{name}/'
else:
self.data['home_page'] = normalize_url(self.data['home_page'])
def github_data(self, owner=None, repo=None): def github_data(self, owner=None, repo=None):
url = 'https://api.github.com/repos/{}/{}'.format(owner, repo) url = 'https://api.github.com/repos/{}/{}'.format(owner, repo)