#!/usr/bin/env python3

# hhh
# Copyright (c) 2023 by Stacy Prowell.  All rights reserved.
# https://gitlab.com/sprowell/hhh

'''
Find, and update, the comment header of all source files in the repository.

To avoid errors and false positives, we look for the copyright block in the first
part of the file (see `maximum_lines`) and require it to be a minimum size
(see `minimum_lines`).  We require it to contain a series of marker strings
(see `markers`).  If all criteria are satisfied, it is discarded and replaced with
the given copyright block (see `copyright`) prefixed with the correct single-line
comment inferred from the file.

This means you can start a new file with just the minimum number of lines of comments
that contains all the markers, and it will be expanded magically.  Right now, that
looks as follows.

// hhh
// Copyright
'''

import datetime
import sys
import os
from typing import List, Tuple

# Folders to scan.  Note: These are scanned *recursively*, so don't include
# a folder that is underneath another folder.
FOLDERS = ["src", "etc", "examples", "docs/src"]

# Extra files to scan, relative to the root folder of the distribution.
FILES = ["build.rs", "Cargo.toml"]

# Minimum size for a comment block to be considered.  If you change this, update
# the module documentation.
MINIMUM_LINES = 2

# Maximum number of lines of the file that must contain the comment block.
MAXIMUM_LINES = 50

# Marker strings.  All must be found before the file will be processed.  If you
# change this, update the module documentation.
MARKERS = [
    "hhh",
    "copyright"
]

# The start of single-line comments.
COMMENT_STARTS = [ "#", "//" ]

# The correct block to place in the header.  The string YYYY is replaced
# with the current four digit year.
COPYRIGHT = """\
hhh
Copyright (c) YYYY by Stacy Prowell.  All rights reserved.
https://gitlab.com/sprowell/hhh
"""

def extract_comment_block(lines: List[str], lno: int, start: str) -> Tuple[List[str], int, bool]:
    """Extract the next contiguous comment block and return it."""
    block = []
    # We need to find all the markers.
    marks = set(MARKERS)
    found_marks = set()
    while lines[lno].startswith(start):
        line = lines[lno]
        block.append(lines[lno])
        lno += 1
        for marker in marks:
            if marker in line:
                found_marks.add(marker)
    marks -= found_marks
    # See if the block matches all criteria.  If so, replace it and
    # indicate that we found the block.
    if lno < MAXIMUM_LINES and len(block) >= MINIMUM_LINES and len(marks) == 0:
        # This is the block we have been looking for.  Replace it with the
        # corrected block.  Along the way we update the year.
        newblock = []
        for line in COPYRIGHT.splitlines(True):
            line = line.replace("YYYY", str(datetime.datetime.now().year))
            line = f"{start} {line}".strip() + "\n"
            newblock.append(line)
        return (newblock, lno, True)

    # Return the comment block as-is.
    return (block, lno, False)

def process_file(file: str) -> bool:
    """Process a single file.

    This looks for the appropriate comment block as described in the module
    help, and replaces the copyright block if found.
    """
    # Open the file and read some lines.
    newlines = []
    found_marker = False
    try:
        with open(file, "rt", encoding = "utf-8") as file_in:
            # If there were massive files, we could use enumerate to avoid reading
            # the entire file.  But why would there be anything so massive in the
            # source distribution?  Just read all the lines for now.
            lines = file_in.readlines()

            # Okay, now we have the lines.  We need to find out if the file has
            # both marker strings in it.  We allow any kind of comment, because
            # why not?
            lno = 0
            while lno < len(lines):
                if not found_marker and lno < MAXIMUM_LINES:
                    hasstart = ""
                    for start in COMMENT_STARTS:
                        if lines[lno].startswith(start):
                            hasstart = start
                    if len(hasstart) == 0:
                        # Copy the line to the output lines.
                        newlines.append(lines[lno])
                        lno += 1
                    else:
                        # We may have found a comment block.  Extract it.
                        (block,lno,found_marker) = extract_comment_block(lines, lno, hasstart)

                        # Save this into the new list of lines.
                        newlines.extend(block)
                else:
                    # Copy the line to the output lines.
                    newlines.append(lines[lno])
                    lno += 1

        # The entire file has been processed.  Now write it out if it changed.
        if found_marker:
            with open(file, "wt", encoding="utf-8") as file_in:
                for line in newlines:
                    file_in.write(line)

    except UnicodeDecodeError:
        print("Not readable text file... ", end="")

    except IOError:
        print("Unexpected error:", sys.exc_info()[0])

    # Tell the caller whether the file was changed.
    return found_marker

def main() -> None:
    '''Process files and update the copyright heading in the file.
    '''
    print("This will update the comment block in all files under these folders:")
    for folder in FOLDERS:
        print(f"  * {folder}")
    print()
    print("The following additional files will be updated, also:")
    for filename in FILES:
        print(f"  * {filename}")
    print()
    print("No backups are made.  Please commit or stash all work before continuing!")
    print()
    proceed = input("Proceed? (y/N) ")
    if proceed.startswith('y'):
        for filename in FILES:
            print(f"Processing file {filename}...", end="")
            found_marker = process_file(filename)
            if found_marker:
                print("Updated")
            else:
                print("Ignored")
        for folder in FOLDERS:
            for root, _dirs, files in os.walk(folder):
                for basename in files:
                    filename = os.path.join(root, basename)
                    print(f"Processing file {filename}... ", end="")
                    found_marker = process_file(filename)
                    if found_marker:
                        print("Updated")
                    else:
                        print("Ignored")


if __name__ == "__main__":
    main()