#!/usr/bin/env python3

import json
import os

kDrivers = [
    "cc", "c++", "cc1",
    "gcc", "g++",
    "clang", "clang++",
]

kSourceExtensions = [".c", ".cc", ".cpp", ".cxx", ".c++"]
kAssemblyExtensions = [".s", ".S"]


class Command(object):
    def __init__(self, cwd, parent, argv, line):
        assert isinstance(cwd, str)
        assert parent is None or isinstance(parent, Command)
        self.cwd = cwd
        self.parent = parent
        self.argv = argv
        self.line = line
        self.isCompilerDriver = (os.path.basename(self.argv[0]).split('-', 1)[0] in kDrivers)


def readPidList(pidlist):
    """The pidlist field of a record is a flattened list of
    (pidDelta, startTimeDelta) pairs, where each successive pair is the delta
    from the previous pair to the new pair.  Return a list of
    (pid, startTime) pairs.
    """
    assert len(pidlist) % 2 == 0 and len(pidlist) >= 2
    ret = [(pidlist[0], pidlist[1])]
    for i in range(2, len(pidlist), 2):
        ret.append((ret[-1][0] + pidlist[i], ret[-1][1] + pidlist[i + 1]))
    return ret


def readFile(path):
    commandList = []
    commandIDToCommand = {}
    line = 0

    with open(path) as fp:
        for recordString in fp:
            line += 1
            record = json.loads(recordString)
            parent = None
            procList = readPidList(record["pidlist"])
            for procID in procList[:-1]:
                parent = commandIDToCommand.get(procID, parent)
            command = Command(record["cwd"], parent, record["argv"], line)
            commandList.append(command)
            commandIDToCommand[procList[-1]] = command
    return commandList


def endsWithOneOf(text, extensions):
    for extension in extensions:
        if text.endswith(extension):
            return True
    return False


def joinCommandLine(argv):
    # TODO: Actually get this right -- quotes,spaces,escapes,newlines
    # TODO: Review what Clang's libtooling and CMake do, especially on Windows.
    return " ".join(argv)


def isChildOfCompilerDriver(command):
    parent = command.parent
    while parent is not None:
        if parent.isCompilerDriver:
            return True
        parent = parent.parent
    return False


def compileMode(argv):
    """Return the last compile-mode argument in a command line."""
    kModes = ["-c", "-S", "-E"]
    mode = None
    for arg in argv:
        if arg in kModes:
            mode = arg
    return mode


def extractSourceFile(command, include_path):
    """Attempt to extract a compile step from a driver command line."""

    # Accommodate ccache.  With ccache, a parent process will be exec'ed using
    # a program name like "g++", but the g++ is really a symlink to a ccache
    # program.  ccache invokes children g++ subprocesses, but with altered
    # arguments.  e.g. Instead of:
    #    g++ -c file.cc -o file.o
    # we get something like this:
    #    g++ -E file.cc   [output redirected to a tmp file]
    #    g++ -c $HOME/.ccache/tmp/tmp.i -o $HOME/.ccache/tmp.o.1234
    # The translation unit compilation is now split into two, and the output
    # filename is lost.  The approach taken here is to ignore any subprocesses
    # of a compiler driver invocation.

    # TODO: this misses gcc foo.c -o foo; it only catches gcc -c foo.c -o foo. Can't we default to assuming it's -c?

    if not command.isCompilerDriver or isChildOfCompilerDriver(command) or \
            compileMode(command.argv) != "-c":
        return None

    args = command.argv[1:]
    inputFile = None


    while len(args) > 0:
        arg = args.pop(0)
        if arg[0] == "-":
            pass
        elif endsWithOneOf(arg, kSourceExtensions):
            assert inputFile is None
            inputFile = arg
        elif endsWithOneOf(arg, kAssemblyExtensions):
            return None

    if inputFile is None:
        return None
    if not os.path.isdir(command.cwd):
        print('warning: line %d: directory %s does not exist, skipping' %
            (command.line, command.cwd))
        return None
    absoluteInputFile = os.path.join(command.cwd, inputFile)
    if not os.path.isfile(absoluteInputFile):
        print('warning: line %d: input file %s does not exist, skipping' %
            (command.line, absoluteInputFile))
        return None

    cmd = joinCommandLine(command.argv) + " -I" + include_path

    output = {"directory": command.cwd, "command": cmd, "file": inputFile}

    return output


def main():
    import sys
    if len(sys.argv) < 2:
        print("Need to specify include path.")
        print("usage: %s <clang_system_headers>" % sys.argv[0])
        return
    include_path = sys.argv[1]

    commands = readFile("btrace.log")

    results = []
    for x in commands:
        sourceFile = extractSourceFile(x, include_path)
        if sourceFile is not None:
            results.append(sourceFile)

    if not len(results):
        raise RuntimeError("No source files found by sw-btrace-to-compilerdb")

    with open("compile_commands.json", "w") as f:
        json.dump(results, f, indent=4)


if __name__ == "__main__":
    main()
