# !/usr/bin/env python3
# Lint as: python3
"""
This python script can dividing monolithic mock headers into different mock classes. We need to
remove the over-included header files in generated class codes and resolve dependencies in the
corresponding Bazel files manually.
"""
import argparse
import os
import subprocess
import sys
from typing import Type, List, Tuple, Dict

# libclang imports
import clang.cindex
from clang.cindex import TranslationUnit, Index, CursorKind, Cursor


def to_filename(classname: str) -> str:
  """
  maps mock class name (in C++ codes) to filenames under the Envoy naming convention.
  e.g. map "MockAdminStream" to "admin_stream"

  Args:
      classname: mock class name from source

  Returns:
      corresponding file name
  """
  filename = classname.replace("Mock", "", 1)  # Remove only first "Mock"
  ret = ""
  for index, val in enumerate(filename):
    if val.isupper() and index > 0:
      ret += "_"
    ret += val
  return ret.lower()


def get_directives(translation_unit: Type[TranslationUnit]) -> str:
  """
  "extracts" all header includes statements and other directives from the target source code file

  for instance:
      foo.h:
      #pragma once
      #include "a.h"
      #include "b.h"

      int foo(){
      }
  this function should return
  '#pragma once\n#include "a.h"\n#include "b.h"'

  Args:
      translation_unit: parsing result of target source code by libclang

  Returns:
      A string, contains all includes statements and other preprocessor directives before the
      first non-directive statement.

  Notes:
      clang lib provides API like tranlation_unit.get_includes() to get include directives.
      But we can't use it as it requires presence of the included files to return the full list.
      We choose to return the string instead of list of includes since we will simply copy-paste
      the include statements into generated headers. Return string seems more convenient
  """
  cursor = translation_unit.cursor
  for descendant in cursor.walk_preorder():
    if descendant.location.file is not None and descendant.location.file.name == cursor.displayname:
      filename = descendant.location.file.name
      contents = read_file_contents(filename)
      return contents[:descendant.extent.start.offset]
  return ""


def cursors_in_same_file(cursor: Cursor) -> List[Cursor]:
  """
  get all child cursors which are pointing to the same file as the input cursor

  Args:
    cursor: cursor of parsing result of target source code by libclang

  Returns:
    a list of cursor
  """
  cursors = []
  for descendant in cursor.walk_preorder():
    # We don't want Cursors from files other than the input file,
    # otherwise we get definitions for every file included
    # when clang parsed the input file (i.e. if we don't limit descendant location,
    # it will check definitions from included headers and get class definitions like std::string)
    if descendant.location.file is None:
      continue
    if descendant.location.file.name != cursor.displayname:
      continue
    cursors.append(descendant)
  return cursors


def class_definitions(cursor: Cursor) -> List[Cursor]:
  """
  extracts all class definitions in the file pointed by cursor. (typical mocks.h)

  Args:
      cursor: cursor of parsing result of target source code by libclang

  Returns:
      a list of cursor, each pointing to a class definition.
  """
  cursors = cursors_in_same_file(cursor)
  class_cursors = []
  for descendant in cursors:
    # check if descendant is pointing to a class declaration block.
    if descendant.kind != CursorKind.CLASS_DECL:
      continue
    if not descendant.is_definition():
      continue
    # check if this class is directly enclosed by a namespace.
    if descendant.semantic_parent.kind != CursorKind.NAMESPACE:
      continue
    class_cursors.append(descendant)
  return class_cursors


def class_implementations(cursor: Cursor) -> List[Cursor]:
  """
  extracts all class implementation in the file pointed by cursor. (typical mocks.cc)

  Args:
      cursor: cursor of parsing result of target source code by libclang

  Returns:
      a list of cursor, each pointing to a class implementation.
  """
  cursors = cursors_in_same_file(cursor)
  impl_cursors = []
  for descendant in cursors:
    if descendant.kind == CursorKind.NAMESPACE:
      continue
    # check if descendant is pointing to a class method
    if descendant.semantic_parent is None:
      continue
    if descendant.semantic_parent.kind == CursorKind.CLASS_DECL:
      impl_cursors.append(descendant)
  return impl_cursors


def extract_definition(cursor: Cursor, classnames: List[str]) -> Tuple[str, str, List[str]]:
  """
  extracts class definition source code pointed by the cursor parameter.
  and find dependent mock classes by naming look up.

  Args:
      cursor: libclang cursor pointing to the target mock class definition.
      classnames: all mock class names defined in the definition header that needs to be
          divided, used to parse class dependencies.
  Returns:
      class_name: a string representing the mock class name.
      class_defn: a string contains the whole class definition body.
      deps: a set of string contains all dependent classes for the return class.

  Note:
      It can not detect and resolve forward declaration and cyclic dependency. Need to address
      manually.
  """
  filename = cursor.location.file.name
  contents = read_file_contents(filename)
  class_name = cursor.spelling
  class_defn = contents[cursor.extent.start.offset:cursor.extent.end.offset] + ";"
  # need to know enclosing semantic parents (namespaces)
  # to generate corresponding definitions
  parent_cursor = cursor.semantic_parent
  while parent_cursor.kind == CursorKind.NAMESPACE:
    if parent_cursor.spelling == "":
      break
    class_defn = "namespace {} {{\n".format(parent_cursor.spelling) + class_defn + "\n}\n"
    parent_cursor = parent_cursor.semantic_parent
  # resolve dependency
  # by simple naming look up
  deps = set()
  for classname in classnames:
    if classname in class_defn and classname != class_name:
      deps.add(classname)
  return class_name, class_defn, deps


def get_implline(cursor: Cursor) -> int:
  """
  finds the first line of implementation source code for class method pointed by the cursor
  parameter. 

  Args:
      cursor: libclang cursor pointing to the target mock class definition.

  Returns:
      an integer, the line number of the first line of the corresponding method implementation
      code (zero indexed)

  Note:
      This function return line number only. Because in certain case libclang will fail in parsing
      the method body and stops parsing early (see headersplit_test.test_class_implementations_error
      for details). To address this issue when parsing implementation code, we passed the flag that
      ask clang to ignore function bodies.
      We can not get the function body directly with the same way we used in extract_definition() 
      since clang didn't parse function this time. Though we can't get the correct method extent
      offset from Cursor, we can still get the start line of the corresponding method instead.
      (We can't get the correct line number for the last line due to skipping function bodies)
  """
  return cursor.extent.start.line - 1


def extract_implementations(impl_cursors: List[Cursor], source_code: str) -> Dict[str, str]:
  """
  extracts method function body for each cursor in list impl_cursors from source code
  groups those function bodies with class name to help generating the divided {classname}.cc
  returns a dict maps class name to the concatenation of all its member methods implementations.

  Args:
      impl_cursors: list of cursors, each pointing to a mock class member function implementation.
      source_code: string, the source code for implementations (e.g. mocks.cc)

  Returns:
      classname_to_impl: a dict maps class name to its member methods implementations
  """
  classname_to_impl = dict()
  for i, cursor in enumerate(impl_cursors):
    classname = cursor.semantic_parent.spelling
    # get first line of function body
    implline = get_implline(cursor)
    # get last line of function body
    if i + 1 < len(impl_cursors):
      # i is not the last method, get the start line for the next method
      # as the last line of i
      impl_end = get_implline(impl_cursors[i + 1])
      impl = "".join(source_code[implline:impl_end])
    else:
      # i is the last method, after removing the lines containing close brackets
      # for namespaces, the rest should be the function body
      offset = 0
      while implline + offset < len(source_code):
        if "// namespace" in source_code[implline + offset]:
          break
        offset += 1
      impl = "".join(source_code[implline:implline + offset])
    if classname in classname_to_impl:
      classname_to_impl[classname] += impl + "\n"
    else:
      classname_to_impl[classname] = impl + "\n"
  return classname_to_impl


def get_enclosing_namespace(defn: Cursor) -> Tuple[str, str]:
  """
  retrieves all enclosing namespaces for the class pointed by defn.
  this is necessary to construct the mock class header
  e.g.:
  defn is pointing MockClass in the follow source code:

  namespace Envoy {
  namespace Server {
  class MockClass2 {...}
  namespace Configuration {
  class MockClass {...}
        ^ 
        defn
  }
  }
  }

  this function will return:
  "namespace Envoy {\nnamespace Server {\nnamespace Configuration{\n" and "\n}\n}\n}\n" 

  Args:
      defn: libclang Cursor pointing to a mock class

  Returns:
      namespace_prefix, namespace_suffix: a pair of string, representing the enclosing namespaces
  """
  namespace_prefix = ""
  namespace_suffix = ""
  parent_cursor = defn.semantic_parent
  while parent_cursor.kind == CursorKind.NAMESPACE:
    if parent_cursor.spelling == "":
      break
    namespace_prefix = "namespace {} {{\n".format(parent_cursor.spelling) + namespace_prefix
    namespace_suffix += "\n}"
    parent_cursor = parent_cursor.semantic_parent
  namespace_suffix += "\n"
  return namespace_prefix, namespace_suffix


def read_file_contents(path):
  with open(path, "r") as input_file:
    return input_file.read()


def write_file_contents(class_name, class_defn, class_impl):
  with open("{}.h".format(to_filename(class_name)), "w") as decl_file:
    decl_file.write(class_defn)
  with open("{}.cc".format(to_filename(class_name)), "w") as impl_file:
    impl_file.write(class_impl)
  # generating bazel build file, need to fill dependency manually
  bazel_text = """
envoy_cc_mock(
  name = "{}_mocks",
  srcs = ["{}.cc"],
  hdrs = ["{}.h"],
  deps = [

  ]
)
""".format(to_filename(class_name), to_filename(class_name), to_filename(class_name))
  with open("BUILD", "r+") as bazel_file:
    contents = bazel_file.read()
    if 'name = "{}_mocks"'.format(to_filename(class_name)) not in contents:
      bazel_file.write(bazel_text)


def main(args):
  """
  divides the monolithic mock file into different mock class files.
  """
  decl_filename = args["decl"]
  impl_filename = args["impl"]
  idx = Index.create()
  impl_translation_unit = TranslationUnit.from_source(
      impl_filename, options=TranslationUnit.PARSE_SKIP_FUNCTION_BODIES)
  impl_includes = get_directives(impl_translation_unit)
  decl_translation_unit = idx.parse(decl_filename, ["-x", "c++"])
  defns = class_definitions(decl_translation_unit.cursor)
  decl_includes = get_directives(decl_translation_unit)
  impl_cursors = class_implementations(impl_translation_unit.cursor)
  contents = read_file_contents(impl_filename)
  classname_to_impl = extract_implementations(impl_cursors, contents)
  classnames = [cursor.spelling for cursor in defns]
  for defn in defns:
    # writing {class}.h and {classname}.cc
    class_name, class_defn, deps = extract_definition(defn, classnames)
    includes = ""
    for name in deps:
      includes += '#include "{}.h"\n'.format(to_filename(name))
    class_defn = decl_includes + includes + class_defn
    class_impl = ""
    if class_name not in classname_to_impl:
      print("Warning: empty class {}".format(class_name))
    else:
      impl_include = impl_includes.replace(decl_filename, "{}.h".format(to_filename(class_name)))
      # we need to enclose methods with namespaces
      namespace_prefix, namespace_suffix = get_enclosing_namespace(defn)
      class_impl = impl_include + namespace_prefix + \
          classname_to_impl[class_name] + namespace_suffix
    write_file_contents(class_name, class_defn, class_impl)


if __name__ == "__main__":
  PARSER = argparse.ArgumentParser()
  PARSER.add_argument(
      "-d",
      "--decl",
      default="mocks.h",
      help="Path to the monolithic header .h file that needs to be splitted",
  )
  PARSER.add_argument(
      "-i",
      "--impl",
      default="mocks.cc",
      help="Path to the implementation code .cc file that needs to be splitted",
  )
  main(vars(PARSER.parse_args()))