Fix coverage (#2603)

* Add utility to prepare C coverage for upload to coveralls.io It turned out that it is more easy to write own script then debug and fix coveralls utility (https://github.com/eddyxu/cpp-coveralls). gcov-coveralls.py can be used as a replacement for coveralls. * Save coverage data from .gcda files only once Coverage data in .gcda files is merged after each binary invocation, so we can run all test and then gather coverage data. If we dump them two times execution counts will be more then they be. * Switch from coveralls (cpp-coveralls) to own script Problem with coveralls was, that coverage for source files outside build directory was not added to the report. * Add tool to dump info from json for coveralls.io * Add debug * Fix: don't die if there is no service_job_id in json * Debug * Fix dump_coveralls.py * Rename to gcov_coveralls.py (s/-/_/) For most files in this repo '_' is used as separator. * Don't add source code to coveralls JSON According to https://docs.coveralls.io/api-introduction Coverals don't need source code, only MD5 digest to tracks changes. Anyway source code is already added by luacov-coveralls and source_digest is added by cpp-coveralls and gcov_coveralls.py Both options seems to work for now. * Provide path to source directory to merge_coveralls.py merge_coveralls.py has code to filter files and remove prefixes. When --root points to source directory merge_coveralls.py can strip prefix from absolute path in JSONs generated by luacov-coveralls. * Style Don't add parameters with default values. * Make --output optional It useful mainly for debugging. We can send report without saving it. * Log CI_COMMIT_AUTHOR env var It is not clear from drone.io source how CI_COMMIT_AUTHOR variable is set. Log it to see what it means. * Move merge_coveralls.py to test/tools This script is used not only for funcional test coverage, but for rspamd-test coverage too. * Remove debug * Style Use more compact formatting. * Write comment about parallel tests running [SKIP CI] Document why running tests in parallel may be bad idea (but still do so). * Fix typo [SKIP CI]
author: Anton Yuzhaninov <citrin+git@citrin.ru> 2018-10-20 04:15:40 -0400
committer: Vsevolod Stakhov <vsevolod@highsecure.ru> 2018-10-20 09:15:40 +0100
commit: 97a9de385402020d3add12517c7bccae93af593b (patch)
tree: ff53c36cd794f85f2159cbea19ed3bd7a1466583
parent: 97662a53f4dce272df30dac9b6aa4fc4013b0fc4 (diff)
download: rspamd-97a9de385402020d3add12517c7bccae93af593b.tar.gz
rspamd-97a9de385402020d3add12517c7bccae93af593b.zip
5 files changed, 304 insertions, 27 deletions
diff --git a/.circleci/config.yml b/.circleci/config.yml
index 83316945a..fd47c110b 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -34,7 +34,7 @@ references:
               # Merge Lua coverage (collected into lua_coverage_report.json) and with C-coverage
               # (in coverage.rspamd-test.dump, coverage.functional.dump, see &capture_coverage_data)
               # and finally upload it into coveralls.io
-              test/functional/util/merge_coveralls.py --input coverage.functional.dump coverage.rspamd-test.dump lua_coverage_report.json unit_test_lua.json --output out.josn --token=${COVERALLS_REPO_TOKEN}
+              test/tools/merge_coveralls.py --input coverage.functional.dump coverage.rspamd-test.dump lua_coverage_report.json unit_test_lua.json --output out.josn --token=${COVERALLS_REPO_TOKEN}
             fi
         fi
 
diff --git a/.drone.yml b/.drone.yml
index 9c1b5d09a..24d4d8ec8 100644
--- a/.drone.yml
+++ b/.drone.yml
@@ -12,6 +12,8 @@ pipeline:
       - install -d -o nobody -g nogroup /rspamd/build /rspamd/install
       # lua-torch CMakeLists writes to src dir
       - chown nobody $CI_WORKSPACE/contrib/lua-torch/nn
+      # for debug
+      - echo $CI_COMMIT_AUTHOR
 
   build:
     # https://github.com/rspamd/rspamd-build-docker/blob/master/ubuntu-build/Dockerfile
@@ -44,6 +46,16 @@ pipeline:
       # checks are configured in .tidyallrc at the top of rspamd repo
       - tidyall --all --root-dir $CI_WORKSPACE --check-only --no-cache --data-dir /tmp/tidyall
 
+  # We run rspamd-test (unit test) and functional test (runned by robot) in
+  # parallel to save time. To avoid conflict in saving lua coverage we run them
+  # from different directories. For C code coverage counters is saved to .gcda
+  # files and binary contain absolute path to them, so rspamd-test and
+  # processes started by functional test are writing to the same files. On
+  # process exit new coverage data merged with existing content of .gcda file.
+  # Race is possible if rspamd-test and some rspamd process in functional test
+  # will try to write .gcda file simultaneous.  But it is very unlikely and
+  # performance is more important then correct coverage data.
+
   rspamd-test:
     # https://github.com/rspamd/rspamd-build-docker/blob/master/ubuntu-test/Dockerfile
     image: rspamd/ci-ubuntu-test
@@ -68,8 +80,6 @@ pipeline:
       # luacov-coveralls reads luacov.stats.out written by rspamd-test using luacov module
       # and writes json report for coveralls.io service
       - luacov-coveralls -o /rspamd/build/unit_test_lua.json --dryrun
-      - cd /rspamd/build
-      - coveralls --dump coverage.rspamd-test.dump
       - exit $EXIT_CODE
 
   functional:
@@ -82,18 +92,19 @@ pipeline:
       # some rspamd processes during this test work as root and some as nobody
       # use umask to create world-writable files so nobody can write to *.gcda files created by root
       - umask 0000
-      - set +e
-      - RSPAMD_INSTALLROOT=/rspamd/install robot --xunit xunit.xml --exclude isbroken $CI_WORKSPACE/test/functional/cases; EXIT_CODE=$?
-      - set -e
-      - coveralls --dump coverage.functional.dump
-      - exit $EXIT_CODE
+      - RSPAMD_INSTALLROOT=/rspamd/install robot --xunit xunit.xml --exclude isbroken $CI_WORKSPACE/test/functional/cases
 
   send-coverage:
     image: rspamd/ci-ubuntu-test
     secrets: [ coveralls_repo_token ]
     commands:
       - cd /rspamd/build
-      - $CI_WORKSPACE/test/functional/util/merge_coveralls.py --input coverage.functional.dump coverage.rspamd-test.dump unit_test_lua.json lua_coverage_report.json --output out.josn --token=$COVERALLS_REPO_TOKEN
+      # extract coverage data for C code from .gcda files and save it in a format suitable for coveralls.io
+      - $CI_WORKSPACE/test/tools/gcov_coveralls.py --exclude test --prefix /rspamd/build --prefix $CI_WORKSPACE --out coverage.c.json
+      # * merge coverage for C and Lua code
+      # * remove prefixes from absolute paths (in luacov-coveralls files), filter test, contrib, e. t.c
+      # * upload report to coveralls.io
+      - $CI_WORKSPACE/test/tools/merge_coveralls.py --root $CI_WORKSPACE --input coverage.c.json unit_test_lua.json lua_coverage_report.json --token=$COVERALLS_REPO_TOKEN
     when:
       branch: master
       # don't send coverage report for pull request
diff --git a/test/tools/dump_coveralls.py b/test/tools/dump_coveralls.py
new file mode 100755
index 000000000..c453d0511
--- /dev/null
+++ b/test/tools/dump_coveralls.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+
+# Small tool to dump JSON payload for coveralls.io API
+
+import json
+from operator import itemgetter
+import os
+import sys
+
+
+def warn(*args, **kwargs):
+    print(*args, file=sys.stderr, **kwargs)
+
+
+def dump_file(json_file):
+    """Dumps coveralls.io API payload stored in json_file
+       Returns: 0 if successful, 1 otherwise
+    """
+    try:
+        with open(json_file, encoding='utf8') as f:
+            data = json.load(f)
+    except OSError as err:
+        warn(err)
+        return os.EX_DATAERR
+    except json.decoder.JSONDecodeError:
+        warn("{}: json parsing error".format(json_file))
+        return 1
+
+    if 'source_files' not in data:
+        warn("{}: no source_files, not a coveralls.io payload?".format(json_file))
+        return 1
+
+    print("{} ({} soource files)".format(json_file, len(data['source_files'])))
+
+    for src_file in sorted(data['source_files'], key=itemgetter('name')):
+        covered_lines = not_skipped_lines = 0
+        for cnt in src_file['coverage']:
+            if cnt is None:
+                continue
+            not_skipped_lines += 1
+            if cnt > 0:
+                covered_lines += 1
+        if not_skipped_lines > 0:
+            coverage = "{:.0%}".format(covered_lines / not_skipped_lines)
+        else:
+            coverage = 'N/A'
+
+        print("\t{:>3} {}".format(coverage, src_file['name']))
+
+    return 0
+
+
+def main():
+    if (len(sys.argv) < 2):
+        warn("usage: {} file.json ...".format(sys.argv[0]))
+        return os.EX_USAGE
+
+    exit_status = 0
+    for f in sys.argv[1:]:
+        exit_status += dump_file(f)
+
+    return exit_status
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/test/tools/gcov_coveralls.py b/test/tools/gcov_coveralls.py
new file mode 100755
index 000000000..71aa48b7b
--- /dev/null
+++ b/test/tools/gcov_coveralls.py
@@ -0,0 +1,206 @@
+#!/usr/bin/env python3
+"""
+Script to save coverage info for C source files in JSON for coveralls.io
+
+When C code compiled with --coverage flag, for each object files *.gcno is
+generated, it contains information to reconstruct the basic block graphs and
+assign source line numbers to blocks
+
+When binary executed *.gcda file is written on exit, with same base name as
+corresponding *.gcno file. It contains some summary information, counters, e.t.c.
+
+gcov(1) utility can be used to get information from *.gcda file and write text
+reports to *.gocov file (one file for each source file from which object was compiled).
+
+The script finds *.gcno files, uses gcov to generate *.gcov files, parses them
+and accomulates statistics for all source files.
+
+This script was written with quite a few assumptions:
+
+    * Code was build using absolute path to source directory (and absolute path
+      stored in object file debug sylmbols).
+
+    * Current directory is writable and there is no useful *.gcov files in it
+      (becase they will be deleted).
+
+    * Object file has same base name as *.gcno file (e. g. foo.c.gcno and foo.c.o).
+      This is the case for cmake builds, but probably not for other build systems
+
+    * Source file names contain only ASCII characters.
+"""
+
+import argparse
+from collections import defaultdict
+from glob import glob
+import hashlib
+import json
+import os
+from os.path import isabs, join, normpath, relpath
+import os.path
+import subprocess
+import sys
+
+
+def warn(*args, **kwargs):
+    print(*args, file=sys.stderr, **kwargs)
+
+
+def parse_gcov_file(gcov_file):
+    """Parses the content of .gcov file written by gcov --intermediate-format
+
+    Returns:
+      str: Source file name
+      dict: coverage info { line_number: hits }
+    """
+    count = {}
+    with open(gcov_file) as fh:
+        for line in fh:
+            tag, value = line.split(':')
+            if tag == 'file':
+                src_file = value.rstrip()
+            elif tag == 'lcount':
+                line_num, exec_count = value.split(',')
+                count[int(line_num)] = int(exec_count)
+
+    return src_file, count
+
+
+def run_gcov(filename, coverage, args):
+    """ * run gcov on given file
+        * parse generated .gcov files and update coverage structure
+        * store source file md5 (if not yet stored)
+        * delete .gcov files
+    """
+    if args.verbose:
+        warn("calling:", 'gcov', '--intermediate-format', filename)
+        stdout = None
+    else:
+        # gcov is noisy and don't have quit flag so redirect stdout to /dev/null
+        stdout = subprocess.DEVNULL
+
+    subprocess.check_call(['gcov', '--intermediate-format', filename], stdout=stdout)
+
+    for gcov_file in glob('*.gcov'):
+        if args.verbose:
+            warn('parsing', gcov_file)
+        src_file, count = parse_gcov_file(gcov_file)
+        os.remove(gcov_file)
+
+        if src_file not in coverage:
+            coverage[src_file] = defaultdict(int, count)
+        else:
+            # sum execution counts
+            for line, exe_cnt in count.items():
+                coverage[src_file][line] += exe_cnt
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Save gcov coverage results in JSON file for coveralls.io.')
+    parser.add_argument(
+        '-v',
+        '--verbose',
+        action="store_true",
+        help='Display additional informaton and gcov command output.')
+    parser.add_argument(
+        '-e',
+        '--exclude',
+        action='append',
+        metavar='DIR',
+        help=
+        ("Don't look for .gcno/.gcda files in this directories (repeat option to skip several directories). "
+         "Path is relative to the dirictory where script was started, e. g. '.git'"))
+    parser.add_argument(
+        '-p',
+        '--prefix',
+        action='append',
+        help=
+        ("Strip this prefix from absolute path to source file. "
+         "If this option is provided, then only files with given prefixex in absolute path "
+         "will be added to coverage (option can be repeated)."))
+    parser.add_argument(
+        '--out',
+        type=argparse.FileType('w'),
+        required=True,
+        metavar='FILE',
+        help='Save JSON payload to this file')
+    args = parser.parse_args()
+
+    # ensure that there is no unrelated .gcov files in current directory
+    for gcov_file in glob('*.gcov'):
+        os.remove(gcov_file)
+        warn("Warning: {} deleted".format(gcov_file))
+
+    # dict { src_file_name: {line1: exec_count1, line2: exec_count2, ...} }
+    coverage = {}
+
+    # find . -name '*.gcno' (respecting args.exclude)
+    for root, dirs, files in os.walk('.'):
+        for f in files:
+            # Usually gcov called with a source file as an argument, but this
+            # name used only to find .gcno and .gcda files.  To find source
+            # file information from debug symbols is used.  So we can call gcov
+            # on .gcno file.
+            if f.endswith('.gcno'):
+                run_gcov(join(root, f), coverage, args)
+
+        # don't look into excluded dirs
+        for subdir in dirs:
+            # path relative to start dir
+            path = normpath(join(root, subdir))
+            if path in args.exclude:
+                if args.verbose:
+                    warn('directory "{}" excluded'.format(path))
+                dirs.remove(subdir)
+
+    # prepare JSON pyload for coveralls.io API
+    # https://docs.coveralls.io/api-introduction
+    coveralls_data = {'source_files': []}
+
+    for src_file in coverage:
+        # filter by prefix and save path with stripped prefix
+        src_file_rel = src_file
+        if args.prefix and isabs(src_file):
+            for prefix in args.prefix:
+                if src_file.startswith(prefix):
+                    src_file_rel = relpath(src_file, start=prefix)
+                    break
+            else:
+                # skip file outside given prefixes
+                # it can be e. g. library include file
+                if args.verbose:
+                    warn('file "{}" is not mathced by prefix, skipping'.format(src_file))
+                continue
+
+        try:
+            with open(src_file, mode='rb') as fh:
+                line_count = sum(1 for _ in fh)
+                fh.seek(0)
+                md5 = hashlib.md5(fh.read()).hexdigest()
+        except OSError as err:
+            # skip files for which source file is not available
+            warn(err, 'not adding to coverage')
+            continue
+
+        coverage_array = [None] * line_count
+
+        for line_num, exe_cnt in coverage[src_file].items():
+            # item at index 0 representing the coverage for line 1 of the source code
+            assert 1 <= line_num <= line_count
+            coverage_array[line_num - 1] = exe_cnt
+
+        coveralls_data['source_files'].append({
+            'name': src_file_rel,
+            'coverage': coverage_array,
+            'source_digest': md5
+        })
+
+    args.out.write(json.dumps(coveralls_data))
+
+    if args.verbose:
+        warn('Coverage for {} source files was written'.format(
+            len(coveralls_data['source_files'])))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/test/functional/util/merge_coveralls.py b/test/tools/merge_coveralls.py
index c3976b48f..8fad0f55b 100755
--- a/test/functional/util/merge_coveralls.py
+++ b/test/tools/merge_coveralls.py
@@ -37,12 +37,13 @@ path_mapping = [
 ]
 
 parser = argparse.ArgumentParser(description='')
-parser.add_argument('--input', type=str, required=True, nargs='+', help='input files')
-parser.add_argument('--output', type=str, required=True, help='output file)')
-parser.add_argument('--root', type=str, required=False, default="/rspamd/src/github.com/rspamd/rspamd", help='repository root)')
-parser.add_argument('--install-dir', type=str, required=False, default="/rspamd/install", help='install root)')
-parser.add_argument('--build-dir', type=str, required=False, default="/rspamd/build", help='build root)')
-parser.add_argument('--token', type=str, help='If present, the file will be uploaded to coveralls)')
+parser.add_argument('--input', required=True, nargs='+', help='input files')
+parser.add_argument('--output', help='output file)')
+parser.add_argument('--root', default="/rspamd/src/github.com/rspamd/rspamd", help='repository root)')
+parser.add_argument('--install-dir', default="/rspamd/install", help='install root)')
+parser.add_argument('--build-dir', default="/rspamd/build", help='build root)')
+parser.add_argument('--token', help='If present, the file will be uploaded to coveralls)')
+
 
 def merge_coverage_vectors(c1, c2):
     assert(len(c1) == len(c2))
@@ -85,11 +86,6 @@ def merge(files, j1):
         else:
             sf['name'] = name
             files[name] = sf
-            if not ('source' in sf):
-                path = "%s/%s" % (repository_root, sf['name'])
-                if os.path.isfile(path):
-                    with open(path) as f:
-                        files[name]['source'] = f.read()
 
     return files
 
@@ -127,11 +123,10 @@ if __name__ == '__main__':
         if 'service_job_id' not in j1 and 'service_job_id' in j2:
             j1['service_job_id'] = j2['service_job_id']
 
-        if not j1['service_job_id'] and 'CIRCLE_BUILD_NUM' in os.environ:
-            j1['service_job_id'] = os.environ['CIRCLE_BUILD_NUM']
 
-        if 'CIRCLECI' in os.environ and os.environ['CIRCLECI']:
+        if os.getenv('CIRCLECI'):
             j1['service_name'] = 'circleci'
+            j1['service_job_id'] = os.getenv('CIRCLE_BUILD_NUM')
         elif os.getenv('CI') == 'drone':
             j1['service_name'] = 'drone'
             j1['service_branch'] = os.getenv('CI_COMMIT_BRANCH')
@@ -159,8 +154,9 @@ if __name__ == '__main__':
 
     j1['source_files'] = list(files.values())
 
-    with open(args.output, 'w') as f:
-        f.write(json.dumps(j1))
+    if args.output:
+        with open(args.output, 'w') as f:
+            f.write(json.dumps(j1))
 
     if args.token:
         j1['repo_token'] = args.token
@@ -173,5 +169,3 @@ if __name__ == '__main__':
 
     # post https://coveralls.io/api/v1/jobs
     # print args
-
-
author	Anton Yuzhaninov <citrin+git@citrin.ru>	2018-10-20 04:15:40 -0400
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>	2018-10-20 09:15:40 +0100
commit	97a9de385402020d3add12517c7bccae93af593b (patch)
tree	ff53c36cd794f85f2159cbea19ed3bd7a1466583
parent	97662a53f4dce272df30dac9b6aa4fc4013b0fc4 (diff)
download	rspamd-97a9de385402020d3add12517c7bccae93af593b.tar.gz rspamd-97a9de385402020d3add12517c7bccae93af593b.zip