#!/usr/bin/python

# dependencies - visualization of source code dependencies
# Copyright (C) 2009-2022  Joachim Reichel <joachim.reichel@posteo.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

'''Visualizes source code dependencies as graph in dot format.'''

# pylint: disable=invalid-name

import argparse
import enum
import fnmatch
import os
import re
import sys

#
# cache for os.path.normpath
#

# pylint: disable=too-few-public-methods
class Normpath_cache:
    '''Caches results of os.path.normpath.'''
    def __init__(self):
        '''Initializes the cache.'''
        self.cache = {}
        # optimization
        self.os_path_split = os.path.split
        self.os_sep = os.sep
        self.os_path_normpath = os.path.normpath
    def __call__(self, path):
        '''Normalizes the path.'''
        (head, tail) = self.os_path_split(path)
        try:
            # avoid os.path.join for performance reasons
            return self.cache[head] + self.os_sep + tail
        except KeyError:
            path = self.os_path_normpath(path)
            (head_norm, _) = self.os_path_split(path)
            self.cache[head] = head_norm
            return path

#
# global variables
#

options = ()
filters = []
normpath_cache = Normpath_cache()

#
# misc helper functions
#

def find(root, glob):
    '''Returns all files below root matching any of the glob patterns.

    Similar to "find $root -name "$glob" -type f".'''
    result = []
    for directory, _subdirectories, files in os.walk(root):
        matched = []
        filter_glob = lambda s: fnmatch.fnmatchcase(s, glob)
        matched.extend([f for f in files if filter_glob(f)])
        result.extend(["%s%s%s" % (directory, os.sep, f) for f in matched])
    return result


def get_include_level_and_filename(s):
    '''Returns the include level and filename of line s.'''
    x = s.find(" ")
    if x < 1 or s[:x] != "."*x:
        return (-1, "")
    return (x, s[x+1:])


def remove_prefix(s, prefix):
    '''Returns the string s with the given prefix removed.

    If the prefix does not match, s is returned.'''
    if s.startswith(prefix):
        return s[len(prefix):]
    return s


def remove_suffix(s, suffix):
    '''Returns the string s with the given suffix removed.

    If the suffix does not match, s is returned.'''
    if s.endswith(suffix):
        return s[:-len(suffix)]
    return s


def remove_prefixes(s, prefixes):
    '''Returns the string s with the first matching prefix removed.

    If the first prefix matches, the remaining prefixes are still
    considered. If no prefix matches, s is returned.'''
    s = remove_prefix(s, prefixes[0])
    for p in prefixes[1:]:
        if s.startswith(p):
            return s[len(p):]
    return s


def dirname(s):
    '''Returns the dirname of s, or '.' if s contains no directory.'''
    s = os.path.dirname(s)
    if s == "":
        return "."
    return s


#
# graph related methods
#

def get_nodes(graph):
    '''Returns the nodes of the graph.'''
    sources = set(source for (source, target) in graph)
    targets = set(target for (source, target) in graph)
    return sources | targets


def stats_graph(graph):
    '''Returns the number of nodes and edges in the dependency graph.'''
    return (len(get_nodes(graph)), len(graph))


def induced_subgraph(graph, nodes):
    '''Returns the subgraph of graph induced by nodes.'''
    return set((s, t) for (s, t) in graph if s in nodes and t in nodes)


def print_graph(graph):
    '''Prints graph in .dot format.'''

    if options.output is not None:
        f = open(options.output, 'w')
    else:
        f = sys.stdout

    (n_nodes, n_edges) = stats_graph(graph)
    print("// %d nodes" % n_nodes, file=f)
    print("// %d edges" % n_edges, file=f)
    print("digraph foo {", file=f)

    nodes = list(get_nodes(graph))
    for node in sorted(nodes):
        print("  \"%s\";" % node, file=f)

    edges = list(graph)
    for (source, target) in sorted(edges):
        print("  \"%s\" -> \"%s\";" % (source, target), file=f)

    print("}", file=f)

    if options.output is not None:
        f.close()


def graph_traversal(graph, initial):
    '''Does a BFS graph traversal starting from initial node list.'''
    # Convert graph into adjacency list representation
    graph2 = {}
    for (source, target) in graph:
        try:
            graph2[source].append(target)
        except KeyError:
            graph2[source] = [target]
    # Run BFS on graph
    todo = initial
    reached = initial
    while todo:
        (source, todo) = (todo[0], todo[1:])
        if source in graph2:
            for target in graph2[source]:
                if target not in reached:
                    reached.append(target)
                    todo.append(target)
    return reached


#
# filters
#

def trace_filter_name(name):
    '''Prints a message for given filter name in verbose mode.'''
    if options.verbose:
        sys.stdout.write("Applying filter %s" % name)


def trace_filter_stats(graph):
    '''Prints graph stats in verbose mode.'''
    if options.verbose:
        (n_nodes, n_edges) = stats_graph(graph)
        print(", %d nodes and %d edges remain" % (n_nodes, n_edges))


def filter_contract_directories(graph):
    '''Replaces all nodes representing files in a particular directory by a
    single node.'''
    trace_filter_name("contract_directories")
    result = set()
    for (source, target) in graph:
        result.add((dirname(source), dirname(target)))
    trace_filter_stats(result)
    return result


def filter_drop_absolute_paths(graph):
    '''Removes all nodes representing files with an absolute path.'''
    trace_filter_name("drop_absolute_paths")
    result = set()
    for (source, target) in graph:
        if not os.path.isabs(source) and not os.path.isabs(target):
            result.add((source, target))
    trace_filter_stats(result)
    return result


def filter_drop_loops(graph):
    '''Removes all loops, i.e., edges where source and target are equal.'''
    trace_filter_name("drop_loops")
    result = set()
    for (source, target) in graph:
        if source != target:
            result.add((source, target))
    trace_filter_stats(result)
    return result


def filter_source(graph, regex):
    '''Removes all edges with sources not matching regex.'''
    trace_filter_name("source")
    result = set()
    for (source, target) in graph:
        if regex.search(source):
            result.add((source, target))
    trace_filter_stats(result)
    return result


def filter_target(graph, regex):
    '''Removes all edges with targets not matching regex.'''
    trace_filter_name("target")
    result = set()
    for (source, target) in graph:
        if regex.search(target):
            result.add((source, target))
    trace_filter_stats(result)
    return result


def filter_source_and_target(graph, regex):
    '''Removes all edges with sources and targets not matching regex.'''
    trace_filter_name("source_and_target")
    result = set()
    for (source, target) in graph:
        if regex.search(source) and regex.search(target):
            result.add((source, target))
    trace_filter_stats(result)
    return result


def filter_drop_source(graph, regex):
    '''Removes all edges with sources matching regex.'''
    trace_filter_name("drop_source")
    result = set()
    for (source, target) in graph:
        if not regex.search(source):
            result.add((source, target))
    trace_filter_stats(result)
    return result


def filter_drop_target(graph, regex):
    '''Removes all edges with targets matching regex.'''
    trace_filter_name("drop_target")
    result = set()
    for (source, target) in graph:
        if not regex.search(target):
            result.add((source, target))
    trace_filter_stats(result)
    return result


def filter_drop_source_and_target(graph, regex):
    '''Removes all edges with sources and targets matching regex.'''
    trace_filter_name("drop_source_and_target")
    result = set()
    for (source, target) in graph:
        if not (regex.search(source) and regex.search(target)):
            result.add((source, target))
    trace_filter_stats(result)
    return result

def filter_dependencies(graph, regex):
    '''Removes all nodes not reachable from nodes matching regex.'''
    trace_filter_name("dependencies")
    sources = set(source for(source, target) in graph)
    initial = [source for source in sources if regex.search(source)]
    reached = graph_traversal(graph, initial)
    result = induced_subgraph(graph, reached)
    trace_filter_stats(result)
    return result


def filter_reverse_dependencies(graph, regex):
    '''Removes all nodes not reaching any node matching regex.'''
    trace_filter_name("reverse_dependencies")
    targets = set(target for(source, target) in graph)
    initial = [target for target in targets if regex.search(target)]
    reversed_graph = set((target, source) for (source, target) in graph)
    reached = graph_traversal(reversed_graph, initial)
    result = induced_subgraph(graph, reached)
    trace_filter_stats(result)
    return result


#pylint: disable=eval-used
def filter_generic(graph, lambda_string):
    '''Applies a generic filter to all edges.'''
    trace_filter_name("generic")
    f = eval(lambda_string)
    result = set()
    for (source, target) in graph:
        x = f(source, target)
        if x is not None:
            result.add(x)
    trace_filter_stats(result)
    return result


#
# command line parsing
#

class FilterKind(enum.Enum):
    '''Different filter kinds.'''
    NONE  = enum.auto()
    '''Filter without any argument.'''
    REGEX = enum.auto()
    '''Filter with one regex argument.'''
    EVAL  = enum.auto()
    '''Filter with one eval argument.'''

class FilterAction(argparse.Action):
    '''Custom parser action for filters.'''

    def __init__(self, option_strings, dest, **kwargs):
        self.filter=kwargs["filter"]
        del kwargs["filter"]
        self.filter_kind=kwargs["filter_kind"]
        del kwargs["filter_kind"]
        super().__init__(option_strings, dest, nargs=self.get_nargs(), **kwargs)

    def __call__(self, parser, namespace, values, option_string=None):
        assert len(values) == self.get_nargs()
        if self.filter_kind == FilterKind.NONE:
            filters.append(self.filter)
        elif self.filter_kind == FilterKind.REGEX:
            filters.append(lambda graph: self.filter(graph, re.compile(values[0])))
        elif self.filter_kind == FilterKind.EVAL:
            filters.append(lambda graph: self.filter(graph, values[0]))
        else:
            assert False

    def get_nargs(self):
        '''Returns the number of arguments for this filter arg kind.'''
        return 0 if self.filter_kind == FilterKind.NONE else 1

def parse():
    '''Parses the command line.'''

    parser = argparse.ArgumentParser(
        prog="dependencies.py")

    parser.add_argument(
        "-v", "--version",
        action="version",
        version="%(prog)s 0.9.3")

    parser.add_argument(
        "-f", "--format",
        default="gcc_minus_h",
        choices=['gcc-minus-h', 'cmake'],
        help="The format of the dependency information, either \"gcc-minus-h\" "
            + "or \"cmake\" (default: \"gcc-minus-h\").")

    parser.add_argument(
        "-r", "--root",
        default=".",
        help="The root of the subtree with dependency information "
            + "(default: \".\").")

    parser.add_argument(
        "-s", "--suffix",
        default=".d",
        help="The suffix of the files with dependency information "
            + "(default: \".d\"). Use \"depend.make\" for cmake dependency format.")

    parser.add_argument(
        "-o", "--output",
        metavar="FILE",
        help="The output is written to this file (default: stdout).")

    parser.add_argument(
        "--strip-prefix",
        metavar="PREFIX",
        action="append",
        help="Strips PREFIX from all filenames if matching. The root directory "
            + "is always stripped. Useful for out-of-source builds or weird "
            + "automounters. Can be given multiple times. The prefixes are tried "
            + "in the order of their appearance on the command line. The first "
            + "match (except the root directory) stops processing of the "
            + "remaining prefixes.")

    parser.add_argument(
        "--verbose",
        default=False,
        action="store_true",
        help="Enables the verbose mode.")

    group = parser.add_argument_group(
        "Filter options",
        "The following options control filters to postprocess the dependency "
            + "graph. All filter options can be given multiple times and are "
            + "applied in the order of their appearance on the command line.")

    group.add_argument(
        "--filter-contract-directories",
        action=FilterAction,
        filter=filter_contract_directories,
        filter_kind=FilterKind.NONE,
        help="Replaces all nodes representing files in a particular "
            + "directory by a single node for that directory.")

    group.add_argument(
        "--filter-drop-absolute-paths",
        action=FilterAction,
        filter=filter_drop_absolute_paths,
        filter_kind=FilterKind.NONE,
        help="Removes all nodes representing files with an absolute path.")

    group.add_argument(
        "--filter-drop-loops",
        action=FilterAction,
        filter=filter_drop_loops,
        filter_kind=FilterKind.NONE,
        help="Removes all loops, i.e., edges where source and target are "
            + "equal.")

    group.add_argument(
        "--filter-source",
        metavar="REGEX",
        action=FilterAction,
        filter=filter_source,
        filter_kind=FilterKind.REGEX,
        help="Removes all edges with sources not matching REGEX.")

    group.add_argument(
        "--filter-target",
        metavar="REGEX",
        action=FilterAction,
        filter=filter_target,
        filter_kind=FilterKind.REGEX,
        help="Removes all edges with targets not matching REGEX.")

    group.add_argument(
        "--filter-source-and-target",
        metavar="REGEX",
        action=FilterAction,
        filter=filter_source_and_target,
        filter_kind=FilterKind.REGEX,
        help="Removes all edges with sources and targets not matching "
            + "REGEX.")

    group.add_argument(
        "--filter-drop-source",
        metavar="REGEX",
        action=FilterAction,
        filter=filter_drop_source,
        filter_kind=FilterKind.REGEX,
        help="Removes all edges with sources matching REGEX.")

    group.add_argument(
        "--filter-drop-target",
        metavar="REGEX",
        action=FilterAction,
        filter=filter_drop_target,
        filter_kind=FilterKind.REGEX,
        help="Removes all edges with targets matching REGEX.")

    group.add_argument(
        "--filter-drop-source-and-target",
        metavar="REGEX",
        action=FilterAction,
        filter=filter_drop_source_and_target,
        filter_kind=FilterKind.REGEX,
        help="Removes all edges with sources and targets matching REGEX.")

    group.add_argument(
        "--filter-dependencies",
        metavar="REGEX",
        action=FilterAction,
        filter=filter_dependencies,
        filter_kind=FilterKind.REGEX,
        help="Removes all nodes not reachable from nodes matching REGEX by "
            + "a forward graph traversal, i.e., only the dependencies of "
            + "nodes matching REGEX remain.")

    group.add_argument(
        "--filter-reverse-dependencies",
        metavar="REGEX",
        action=FilterAction,
        filter=filter_reverse_dependencies,
        filter_kind=FilterKind.REGEX,
        help="Removes all nodes not reachable from nodes matching REGEX by "
            + "a backward graph traversal, i.e., only the nodes depending on "
            + "nodes matching REGEX remain.")

    group.add_argument(
        "--filter-generic",
        metavar="LAMBDA",
        action=FilterAction,
        filter=filter_generic,
        filter_kind=FilterKind.EVAL,
        help="Applies the given lambda function to all edges of the graph. The "
            + "lambda function must accept two string arguments. It is called for "
            + "all edges with the source and target nodes as arguments. It must "
            + "return None, which removes the edge, or a pair of two strings. In "
            + "the latter case, this pair is interpreted as new source and target "
            + "node of the edge. For example, \"lambda s, t: (t, s)\" reverts the "
            + "direction of all edges.")

    parser.add_argument_group(group)

    options = parser.parse_args()

    options.root = os.path.abspath(options.root)

    if options.strip_prefix is None:
        options.strip_prefix = []
    options.strip_prefix = [options.root + os.sep] + options.strip_prefix

    return options


#
# collecting dependency information
#

def process_file_gcc_minus_h(filename, graph):
    '''Collects dependency information from filename in graph (assumes
    dependency information in "gcc -H" format.'''
    if options.verbose:
        print("Processing file %s" % filename)

    working_directory = os.path.dirname(filename)
    explicit_working_directory = False
    if os.access(filename + ".cwd", os.R_OK):
        explicit_working_directory = True
        f2 = open(filename + ".cwd")
        working_directory = f2.readline().rstrip('\n')
        f2.close()

    # optimization
    os_sep = os.sep
    os_path_isabs = os.path.isabs
    options_strip_prefix = options.strip_prefix

    if explicit_working_directory:
        # avoid os.path.join for performance reasons
        fn = working_directory + os_sep + os.path.basename(filename)
    else:
        fn = filename
    fn = normpath_cache(fn)
    fn = remove_prefixes(fn, options.strip_prefix)
    fn = remove_suffix(fn, options.suffix)
    stack = [fn]

    f = open(filename)
    line_number = 0

    for line in f:
        line_number += 1
        s = line.rstrip('\n')

        if s == "Multiple include guards may be useful for:":
            break

        (l, fn) = get_include_level_and_filename(s)

        # Ignore error messages which might have been captured with the
        # dependency information. Sometimes, the file being compiled is
        # also contained in the output of the -H option.
        if l < 0:
            # print >> sys.stderr, "%s:%d: unexpected line" \
            #     % (stack[0], line_number)
            # print >> sys.stderr, "  %s" % line
            continue

        if l > len(stack):
            print(("%s:%d: indentation error, skipping " \
                + "remainder") % (stack[0], line_number), file=sys.stderr)
            print("  %s" % line, file=sys.stderr)
            break

        stack = stack[:l]

        if not os_path_isabs(fn):
            # avoid os.path.join for performance reasons
            fn = working_directory + os_sep + fn
        fn = normpath_cache(fn)
        fn = remove_prefixes(fn, options_strip_prefix)

        graph.add((stack[-1], fn))
        stack.append(fn)

    f.close()


def process_file_cmake(filename, graph):
    '''Collects dependency information from filename in graph (assumes
    dependency information in cmake format (as in depend.make files)).'''
    if options.verbose:
        print("Processing file %s" % filename)

    filename = os.path.abspath(filename)

    f = open(filename)
    line_number = 0

    # optimization
    options_strip_prefix = options.strip_prefix

    for line in f:
        line_number += 1
        s = line.rstrip('\n')

        pos = s.find(".o: ")
        if pos < 0:
            continue

        source = s[:pos]
        target = s[pos+4:]

        source = normpath_cache(source)
        target = normpath_cache(target)
        source = remove_prefixes(source, options_strip_prefix)
        target = remove_prefixes(target, options_strip_prefix)

        graph.add((source, target))

    f.close()


#
# main
#

def main():
    '''Generates the dependency graph.'''
    global options
    options = parse()

    files = find(options.root, "*" + options.suffix)
    graph = set()

    for f in files:
        if options.format == "gcc_minus_h":
            process_file_gcc_minus_h(f, graph)
        elif options.format == "cmake":
            process_file_cmake(f, graph)
        else:
            assert False

    if options.verbose:
        (nodes, edges) = stats_graph(graph)
        print("Obtained %d nodes and %d edges" % (nodes, edges))

    for f in filters:
        graph = f(graph)

    print_graph(graph)


if __name__ == "__main__":
    main()
