#!/usr/bin/python3
#
# Copyright (C) 2010-2018, Benjamin Drung <bdrung@debian.org>
#               2010, Stefano Rivera <stefanor@ubuntu.com>
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

# pylint: disable=invalid-name
# pylint: enable=invalid-name

import argparse
import glob
import operator
import os
import re
import sys

from devscripts.control import (
    HAS_FULL_RTS_FORMATTING,
    HAS_RTS_PARSER,
    Control,
    wrap_and_sort_formatter,
)

try:
    from debian._deb822_repro import LIST_COMMA_SEPARATED_INTERPRETATION
except ImportError:
    LIST_COMMA_SEPARATED_INTERPRETATION = object()


CONTROL_LIST_FIELDS = (
    "Breaks",
    "Build-Conflicts",
    "Build-Conflicts-Arch",
    "Build-Conflicts-Indep",
    "Build-Depends",
    "Build-Depends-Arch",
    "Build-Depends-Indep",
    "Built-Using",
    "Conflicts",
    "Depends",
    "Enhances",
    "Features",
    "Pre-Depends",
    "Provides",
    "Recommends",
    "Replaces",
    "Restrictions",
    "Suggests",
    "Tests",
    "Xb-Npp-MimeType",
)

SUPPORTED_FILES = (
    "clean",
    "control",
    "control*.in",
    "copyright",
    "copyright.in",
    "dirs",
    "*.dirs",
    "docs",
    "*.docs",
    "examples",
    "*.examples",
    "info",
    "*.info",
    "install",
    "*.install",
    "links",
    "*.links",
    "mainscript",
    "*.maintscript",
    "manpages",
    "*.manpages",
    "tests/control",
)


def erase_and_write(file_ob, data):
    """When a file is opened via r+ mode, replaces its content with data"""
    file_ob.seek(0)
    file_ob.write(data)
    file_ob.truncate()


class WrapAndSortControl(Control):
    def __init__(self, filename, args):
        # The Control module supports the RTS parser with python-debian 0.1.43.
        # However, the `wrap_and_sort_formatter` requires 0.1.44.  The command line
        # option check handles the compatibility check for now.
        super().__init__(filename, use_rts_parser=args.rts_parser)
        self.args = args
        self._formatter = None
        if args.rts_parser and HAS_FULL_RTS_FORMATTING:
            max_line_length = args.max_line_length
            self._formatter = wrap_and_sort_formatter(
                1 if args.short_indent else "FIELD_NAME_LENGTH",
                trailing_separator=args.trailing_comma,
                immediate_empty_line=args.short_indent,
                max_line_length_one_liner=0 if args.wrap_always else max_line_length,
            )

    def wrap_and_sort(self):
        for paragraph in self.paragraphs:
            for field in CONTROL_LIST_FIELDS:
                if field in paragraph:
                    self._wrap_field(paragraph, field, True)
            if "Uploaders" in paragraph:
                self._wrap_field(paragraph, "Uploaders", False)
            if "Architecture" in paragraph:
                archs = set(paragraph["Architecture"].split())
                # Sort, with wildcard entries (such as linux-any) first:
                archs = sorted(archs, key=lambda x: ("any" not in x, x))
                paragraph["Architecture"] = " ".join(archs)

        if not self.args.sort_binary_packages or self.filename.endswith(
            "tests/control"
        ):
            return

        if self.had_parse_errors:
            if not self.args.dry_run:
                print(
                    f"Skipping sorting of binary packages in {self.filename}:"
                    " It had parse errors or used template language and the"
                    " sorting could risk changing the semantics the file."
                )
            return

        first = self.paragraphs[: 1 + int(self.args.keep_first)]
        sortable = self.paragraphs[1 + int(self.args.keep_first) :]
        sort_key = operator.itemgetter("Package")
        self.paragraphs = first + sorted(sortable, key=sort_key)

    def _wrap_field(self, control, entry, sort):
        if self.is_roundtrip_safe:
            self._wrap_field_rts(control, entry, sort)
        else:
            self._wrap_field_deb822(control, entry, sort)

    def _wrap_field_rts(self, control, entry, sort):
        view = control.as_interpreted_dict_view(LIST_COMMA_SEPARATED_INTERPRETATION)
        with view[entry] as field_content:
            seen = set()
            for package_ref in field_content.iter_value_references():
                value = package_ref.value
                new_value = " | ".join(x.strip() for x in value.split("|"))
                if not sort or new_value not in seen:
                    package_ref.value = new_value
                    seen.add(new_value)
                else:
                    package_ref.remove()
            if sort:
                field_content.sort(key=_sort_packages_key)
            if self._formatter:
                field_content.value_formatter(self._formatter)
            field_content.reformat_when_finished()

    def _wrap_field_deb822(self, control, entry, sort):
        # An empty element is not explicitly disallowed by Policy but known to
        # break QA tools, so remove any
        packages = [x.strip() for x in control[entry].split(",") if x.strip()]

        # Sanitize alternative packages. E.g. "a|b  |c" -> "a | b | c"
        packages = [" | ".join(x.strip() for x in p.split("|")) for p in packages]

        if sort:
            # Remove duplicate entries
            packages = set(packages)
            packages = sort_list(packages)

        length = len(entry) + sum(2 + len(package) for package in packages)
        if self.args.wrap_always or length > self.args.max_line_length:
            indentation = " "
            if not self.args.short_indent:
                indentation *= len(entry) + len(": ")
            packages_with_indention = [indentation + x for x in packages]
            packages_with_indention = ",\n".join(packages_with_indention)
            if self.args.trailing_comma:
                packages_with_indention += ","
            if self.args.short_indent:
                control[entry] = "\n" + packages_with_indention
            else:
                control[entry] = packages_with_indention.strip()
        else:
            new_value = ", ".join(packages)
            if self.args.trailing_comma:
                new_value += ","
            control[entry] = new_value

    def check_changed(self):
        """Checks if the content has changed in the control file"""
        content = self.dump()
        with open(self.filename, "r", encoding="utf8") as control_file:
            if content != control_file.read():
                return True
        return False


class InstallContent:
    __slots__ = ("content", "comments")

    def __init__(self, content, comments=None):
        self.content = content
        self.comments = comments

    def __str__(self):
        comments = "\n".join(self.comments) + "\n" if self.comments else ""
        return comments + self.content

    def __eq__(self, other):
        return self.content == other.content

    def __lt__(self, other):
        return self.content < other.content


class Install:
    def __init__(self, filename, args):
        self.content = None
        self.filename = None
        self.args = args
        self.leading_comments = None
        self.trailing_comments = None
        self.open(filename)

    def open(self, filename):
        assert os.path.isfile(filename), f"{filename} does not exist."
        self.filename = filename
        comments = []
        content = []
        with open(filename, encoding="utf8") as f:
            # When reading a debhelper file, we want to preserve blocks of comments.
            #
            # For the purpose of wrap-and-sort, we generally associate comments with
            # a line of (non-whitespace) content. Though as special cases, we also
            # preserve a file starting with a comment (with an empty line before the
            # first content to distinguish it from a comment to that comment line) as
            # well as trailing comments (i.e. comments before EOF).
            for line in f:
                line = line.strip()
                if not line:
                    if comments and not content:
                        self.leading_comments = InstallContent("", comments=comments)
                        comments = []
                    continue
                if line[0] == "#":
                    comments.append(line)
                    continue
                if comments:
                    content.append(InstallContent(line, comments=comments))
                    comments = []
                else:
                    content.append(InstallContent(line, comments=None))
        self.trailing_comments = comments
        self.content = content

    def save(self):
        to_write = self._serialize_content()

        with open(self.filename, "r+", encoding="utf8") as install_file:
            content = install_file.read()
            if to_write != content:
                if not self.args.dry_run:
                    erase_and_write(install_file, to_write)
                return True
        return False

    def _serialize_content(self):
        elements = []
        if self.leading_comments:
            elements.append(str(self.leading_comments))
        elements.extend(str(x) for x in self.content)
        if self.trailing_comments:
            # Add a space between the last content and the trailing comments
            # for readability
            elements.append("\n" + "\n".join(self.trailing_comments))
        return "\n".join(elements) + "\n"

    def sort(self):
        self.content = sorted(self.content)


def remove_trailing_whitespaces(filename, args):
    assert os.path.isfile(filename), f"{filename} does not exist."
    with open(filename, "br+") as file_object:
        content = file_object.read()
        if not content:
            return True
        new_content = content.strip() + b"\n"
        new_content = b"\n".join([line.rstrip() for line in new_content.split(b"\n")])
        if new_content != content:
            if not args.dry_run:
                erase_and_write(file_object, new_content)
            return True
    return False


def sort_list(unsorted_list):
    return sorted(unsorted_list, key=_sort_packages_key)


def _sort_packages_key(package):
    # Sort dependencies starting with a "real" package name before ones starting
    # with a substvar
    return 0 if re.match("[a-z0-9]", package) else 1, package


def wrap_and_sort(args):
    modified_files = []
    control_files = [f for f in args.files if re.search("/control[^/]*$", f)]
    for control_file in control_files:
        if args.verbose:
            print(control_file)
        try:
            control = WrapAndSortControl(control_file, args)
        except ValueError as e:
            print(
                f"W: Could not parse {control_file} as a Deb822 file: {str(e.args[0])}",
                file=sys.stderr,
            )
            continue
        if args.cleanup:
            control.strip_trailing_whitespace_on_save = True
        control.wrap_and_sort()
        if control.check_changed():
            if not args.dry_run:
                control.save()
            modified_files.append(control_file)

    copyright_files = [f for f in args.files if re.search("/copyright[^/]*$", f)]
    for copyright_file in copyright_files:
        if args.verbose:
            print(copyright_file)
        if remove_trailing_whitespaces(copyright_file, args):
            modified_files.append(copyright_file)

    pattern = "(dirs|docs|examples|info|install|links|maintscript|manpages)$"
    install_files = [f for f in args.files if re.search(pattern, f)]
    for install_file in sorted(install_files):
        if args.verbose:
            print(install_file)
        install = Install(install_file, args)
        install.sort()
        if install.save():
            modified_files.append(install_file)

    return modified_files


def get_files(debian_directory):
    """Returns a list of files that should be wrapped and sorted."""
    files = []
    for supported_files in SUPPORTED_FILES:
        file_pattern = os.path.join(debian_directory, supported_files)
        files.extend(
            file_name
            for file_name in glob.glob(file_pattern)
            if not os.access(file_name, os.X_OK)
        )
    return files


def main():
    script_name = os.path.basename(sys.argv[0])
    epilog = f"See {script_name}(1) for more info."
    parser = argparse.ArgumentParser(
        epilog=epilog,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )

    # Remember to keep doc/wrap-and-sort.1 updated!
    parser.add_argument(
        "-a",
        "--wrap-always",
        action=argparse.BooleanOptionalAction,
        default=False,
        help="wrap lists even if they do not exceed the line length limit",
    )
    parser.add_argument(
        "-s",
        "--short-indent",
        dest="short_indent",
        help="only indent wrapped lines by one space",
        action=argparse.BooleanOptionalAction,
        default=False,
    )
    parser.add_argument(
        "-b",
        "--sort-binary-packages",
        help="sort binary package paragraphs by name",
        dest="sort_binary_packages",
        action=argparse.BooleanOptionalAction,
        default=False,
    )
    parser.add_argument(
        "-k",
        "--keep-first",
        help="when sorting binary packages, leave the first one at the top",
        dest="keep_first",
        action=argparse.BooleanOptionalAction,
        default=True,
    )
    parser.add_argument(
        "-n",
        "--cleanup",
        help="remove trailing whitespaces",
        dest="cleanup",
        action=argparse.BooleanOptionalAction,
        default=True,
    )
    parser.add_argument(
        "-t",
        "--trailing-comma",
        help="add trailing comma",
        dest="trailing_comma",
        action=argparse.BooleanOptionalAction,
        default=False,
    )
    parser.add_argument(
        "-d",
        "--debian-directory",
        dest="debian_directory",
        help="location of the 'debian' directory",
        metavar="PATH",
        default="debian",
    )
    parser.add_argument(
        "-f",
        "--file",
        metavar="FILE",
        dest="files",
        action="append",
        default=[],
        help="wrap and sort only the specified file",
    )
    parser.add_argument(
        "-v",
        "--verbose",
        help="print all files that are touched",
        dest="verbose",
        action="store_true",
        default=False,
    )
    parser.add_argument(
        "--max-line-length",
        type=int,
        default=79,
        help="set maximum allowed line length before wrapping",
    )
    parser.add_argument(
        "-N",
        "--dry-run",
        dest="dry_run",
        action="store_true",
        default=False,
        help="do not modify any file, instead only print the files"
        " that would be modified",
    )
    parser.add_argument(
        "--experimental-rts-parser",
        dest="rts_parser",
        action="store_true",
        default=False,
        help=argparse.SUPPRESS,
    )

    args = parser.parse_args()
    use_rts_parser = HAS_RTS_PARSER
    if use_rts_parser and not HAS_FULL_RTS_FORMATTING:
        # Cases where we might have some but not full support.
        if args.short_indent or not args.wrap_always or not args.trailing_comma:
            use_rts_parser = False
    setattr(args, "rts_parser", use_rts_parser)

    if not os.path.isdir(args.debian_directory):
        parser.error(
            f'Debian directory not found, expecting "{args.debian_directory}".'
        )

    not_found = [f for f in args.files if not os.path.isfile(f)]
    if not_found:
        parser.error(f"Specified files not found: {', '.join(not_found)}")

    if not args.files:
        args.files = get_files(args.debian_directory)

    modified_files = wrap_and_sort(args)

    # Only report at the end, to avoid potential clash with --verbose
    if modified_files and (args.verbose or args.dry_run):
        if args.dry_run:
            header = "--- Dry run, these files would be modified ---"
        else:
            header = "--- Modified files ---"
        print(header)
        print("\n".join(modified_files))
    elif args.verbose:
        print("--- No file needs modification ---")


if __name__ == "__main__":
    main()
