#!/usr/bin/env python3

"""
Given some markdown text of the general kind we use in Arti changelogs,
look for reference-style links to MRs, issues, and commits, and generate
the appropriate https URLs for them.

Takes input either from a file, or from stdin.

Example:
  ./gen_md_links < new_changelog
"""

import json
import subprocess


def links(s):
    """Extract unresolved markdown links from a string.

    >>> list(links("Hello [world]. This [is a link]"))
    ['world', 'is a link']
    >>> list(links("This [link](is resolved)."))
    []
    """

    # It would have been better to import extract-md-links
    # as a Python module.  But see the comment in its main block.

    p = subprocess.Popen(
        ["maint/extract-md-links"],
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        encoding="utf-8",
    )
    output, dummy = p.communicate(s)
    p.wait()
    assert p.returncode == 0

    output = json.loads(output)
    return output["used"]


def is_commit(s):
    """Return true if `s` looks like a git commit.

    >>> is_commit("a3bcD445")
    True
    >>> is_commit("xyzzy123")
    False
    """

    if len(s) >= 6:
        try:
            int(s, 16)
            return True
        except ValueError:
            pass
    return False


def lookup_git_commit(short):
    """Expand a git commit from its short version.

    >>> lookup_git_commit("214c251e41")
    '214c251e41a7583397cc5939b9447b89752ee323'
    >>> lookup_git_commit("00000000000000")
    Traceback (most recent call last):
        ...
    ValueError: Unrecognized git commit 00000000000000
    """

    p = subprocess.Popen(
        ["git", "rev-parse", short], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL
    )
    p.wait()
    if p.returncode != 0:
        raise ValueError(f"Unrecognized git commit {short}")
    return p.stdout.read().strip().decode("ascii")


class LinkType:
    MergeRequest = 1
    Issue = 2
    Commit = 3
    Other = 4


class Link:
    def __init__(self, s):
        self._s = s
        if s.startswith("!") and s[1:].isdecimal():
            self._type = LinkType.MergeRequest
            self._id = int(s[1:])
        elif s.startswith("#") and s[1:].isdecimal():
            self._type = LinkType.Issue
            self._id = int(s[1:])
        elif is_commit(s):
            self._type = LinkType.Commit
            self._id = s.lower()
        else:
            self._type = LinkType.Other
            self._id = s

    def sort_key(self):
        return (self._type, self._id)

    def link(self):
        if self._type == LinkType.MergeRequest:
            return f"https://gitlab.torproject.org/tpo/core/arti/-/merge_requests/{self._id}"
        elif self._type == LinkType.Issue:
            return f"https://gitlab.torproject.org/tpo/core/arti/-/issues/{self._id}"
        elif self._type == LinkType.Commit:
            full_id = lookup_git_commit(self._id)
            return f"https://gitlab.torproject.org/tpo/core/arti/-/commit/{full_id}"
        elif self._type == LinkType.Other:
            return ""

    def text(self):
        return "[{}]: {}\n".format(self._s, self.link())


def process(s):
    """Given a string with a bunch of markdown links in the style we use
    in our changelog, generate the following material to insert in
    the changelog.

    >>> print(process("Hello [#123] [!456]"), end="")
    [!456]: https://gitlab.torproject.org/tpo/core/arti/-/merge_requests/456
    [#123]: https://gitlab.torproject.org/tpo/core/arti/-/issues/123
    """

    items = sorted((Link(lnk) for lnk in set(links(s))), key=Link.sort_key)
    return "".join(lnk.text() for lnk in items)


if __name__ == "__main__":
    import sys
    import argparse

    parser = argparse.ArgumentParser(prog="gen_md_links")
    parser.add_argument("filename", nargs="?", default="-")
    args = parser.parse_args()

    if args.filename == "-":
        in_file = sys.stdin
    else:
        in_file = open(args.filename, "r")

    text = in_file.read()
    print(process(text))
