Update Google Benchmark (nw)

2025-07-07 19:03:29 +03:00 · 2016-09-03 14:42:01 +02:00 · 2016-09-03 14:42:01 +02:00 · c5f0d660c7
commit c5f0d660c7
parent fe95be105b
58 changed files with 4077 additions and 1407 deletions
--- a/3rdparty/benchmark/.clang-format
+++ b/3rdparty/benchmark/.clang-format
@ -0,0 +1,96 @@
+---
+Language:        Cpp
+# BasedOnStyle:  Google
+AccessModifierOffset: -1
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlinesLeft: true
+AlignOperands:   true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: true
+AllowShortLoopsOnASingleLine: true
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:   
+  AfterClass:      false
+  AfterControlStatement: false
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  BeforeCatch:     false
+  BeforeElse:      false
+  IndentBraces:    false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit:     80
+CommentPragmas:  '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: true
+DisableFormat:   false
+ExperimentalAutoDetectBinPacking: false
+ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
+IncludeCategories: 
+  - Regex:           '^<.*\.h>'
+    Priority:        1
+  - Regex:           '^<.*'
+    Priority:        2
+  - Regex:           '.*'
+    Priority:        3
+IncludeIsMainRegex: '([-_](test|unittest))?$'
+IndentCaseLabels: true
+IndentWidth:     2
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: false
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Left
+ReflowComments:  true
+SortIncludes:    true
+SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles:  false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Auto
+TabWidth:        8
+UseTab:          Never
+...
+
--- a/3rdparty/benchmark/.travis-libcxx-setup.sh
+++ b/3rdparty/benchmark/.travis-libcxx-setup.sh
@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+# Install a newer CMake version
+curl -sSL https://cmake.org/files/v3.6/cmake-3.6.1-Linux-x86_64.sh -o install-cmake.sh
+chmod +x install-cmake.sh
+sudo ./install-cmake.sh --prefix=/usr/local --skip-license
+
+# Checkout LLVM sources
+git clone --depth=1 https://github.com/llvm-mirror/llvm.git llvm-source
+git clone --depth=1 https://github.com/llvm-mirror/libcxx.git llvm-source/projects/libcxx
+git clone --depth=1 https://github.com/llvm-mirror/libcxxabi.git llvm-source/projects/libcxxabi
+
+# Build and install libc++ (Use unstable ABI for better sanitizer coverage)
+mkdir llvm-build && cd llvm-build
+cmake -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${COMPILER} \
+      -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=/usr \
+      -DLIBCXX_ABI_UNSTABLE=ON \
+      -DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER} \
+      ../llvm-source
+make cxx -j2
+sudo make install-cxxabi install-cxx
+cd ../
--- a/3rdparty/benchmark/.travis-setup.sh
+++ b/3rdparty/benchmark/.travis-setup.sh
@ -1,26 +0,0 @@
-#!/usr/bin/env bash
-
-# Before install
-
-sudo add-apt-repository -y ppa:kalakris/cmake
-if [ "$STD" = "c++11" ]; then
-    sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
-    if [ "$CXX" = "clang++" ]; then
-        wget -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add -
-        sudo add-apt-repository -y "deb http://llvm.org/apt/precise/ llvm-toolchain-precise-3.6 main"
-    fi
-fi
-sudo apt-get update -qq
-
-# Install
-sudo apt-get install -qq cmake
-if [ "$STD" = "c++11" ] && [ "$CXX" = "g++" ]; then
-    sudo apt-get install -qq gcc-4.8 g++-4.8
-    sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 90
-    sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 90
-elif [ "$CXX" = "clang++" ]; then
-    sudo apt-get install -qq clang-3.6
-    sudo update-alternatives --install /usr/local/bin/clang   clang   /usr/bin/clang-3.6 90
-    sudo update-alternatives --install /usr/local/bin/clang++ clang++ /usr/bin/clang++-3.6 90
-    export PATH=/usr/local/bin:$PATH
-fi
--- a/3rdparty/benchmark/.travis.yml
+++ b/3rdparty/benchmark/.travis.yml
@ -1,26 +1,75 @@
+sudo: required
+dist: trusty
 language: cpp

+env:
+  global:
+    - /usr/local/bin:$PATH
+
 # NOTE: The COMPILER variable is unused. It simply makes the display on
 # travis-ci.org more readable.
 matrix:
    include:
        - compiler: gcc
-          env: COMPILER=g++-4.6     STD=c++0x BUILD_TYPE=Coverage
+          addons:
+            apt:
+              packages:
+                - lcov
+          env: COMPILER=g++ C_COMPILER=gcc       BUILD_TYPE=Coverage
        - compiler: gcc
-          env: COMPILER=g++-4.6     STD=c++0x BUILD_TYPE=Debug
+          env: COMPILER=g++ C_COMPILER=gcc       BUILD_TYPE=Debug
        - compiler: gcc
-          env: COMPILER=g++-4.6     STD=c++0x BUILD_TYPE=Release
+          env: COMPILER=g++ C_COMPILER=gcc       BUILD_TYPE=Release
        - compiler: gcc
-          env: COMPILER=g++-4.8     STD=c++11 BUILD_TYPE=Debug
-        - compiler: gcc
-          env: COMPILER=g++-4.8     STD=c++11 BUILD_TYPE=Release
+          addons:
+            apt:
+              sources:
+                - ubuntu-toolchain-r-test
+              packages:
+                - g++-6
+          env:
+            - COMPILER=g++-6 C_COMPILER=gcc-6  BUILD_TYPE=Debug
+            - EXTRA_FLAGS="-fno-omit-frame-pointer -g -O2 -fsanitize=undefined,address -fuse-ld=gold"
        - compiler: clang
-          env: COMPILER=clang++-3.6 STD=c++11 BUILD_TYPE=Debug
+          env: COMPILER=clang++ C_COMPILER=clang BUILD_TYPE=Debug
        - compiler: clang
-          env: COMPILER=clang++-3.6 STD=c++11 BUILD_TYPE=Release
+          env: COMPILER=clang++ C_COMPILER=clang BUILD_TYPE=Release
+        # Clang w/ libc++
+        - compiler: clang
+          addons:
+            apt:
+              packages:
+                clang-3.8
+          env:
+            - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug
+            - LIBCXX_BUILD=1
+            - EXTRA_FLAGS="-stdlib=libc++"
+        # Clang w/ libc++, ASAN, UBSAN
+        - compiler: clang
+          addons:
+            apt:
+              packages:
+                clang-3.8
+          env:
+            - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug
+            - LIBCXX_BUILD=1 LIBCXX_SANITIZER="Undefined;Address"
+            - EXTRA_FLAGS="-stdlib=libc++ -fno-omit-frame-pointer -g -O2 -fsanitize=undefined,address -fno-sanitize-recover=all"
+            - UBSAN_OPTIONS=print_stacktrace=1
+        # Clang w/ libc++ and MSAN
+        - compiler: clang
+          addons:
+            apt:
+              packages:
+                clang-3.8
+          env:
+            - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug
+            - LIBCXX_BUILD=1 LIBCXX_SANITIZER=MemoryWithOrigins
+            - EXTRA_FLAGS="-stdlib=libc++ -g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins"

 before_script:
-    - source .travis-setup.sh
+    - if [ -n "${LIBCXX_BUILD}" ]; then
+        source .travis-libcxx-setup.sh;
+      fi
    - mkdir build && cd build

 install:
@ -31,7 +80,7 @@ install:
    fi

 script:
-    - cmake .. -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DCMAKE_CXX_FLAGS="-std=${STD}"
+    - cmake -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DCMAKE_CXX_FLAGS="${EXTRA_FLAGS}" ..
    - make
    - make CTEST_OUTPUT_ON_FAILURE=1 test

--- a/3rdparty/benchmark/AUTHORS
+++ b/3rdparty/benchmark/AUTHORS
@ -8,14 +8,17 @@
 #
 # Please keep the list sorted.

+Albert Pretorius <pretoalb@gmail.com>
 Arne Beer <arne@twobeer.de>
 Christopher Seymour <chris.j.seymour@hotmail.com>
 David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
 Dominic Hamon <dma@stripysock.com>
+Eric Fiselier <eric@efcs.ca>
 Eugene Zhuk <eugene.zhuk@gmail.com>
 Evgeny Safronov <division494@gmail.com>
 Felix Homann <linuxaudio@showlabor.de>
 Google Inc.
+Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
 JianXiong Zhou <zhoujianxiong2@gmail.com>
 Jussi Knuuttila <jussi.knuuttila@gmail.com>
 Kaito Udagawa <umireon@gmail.com>
--- a/3rdparty/benchmark/CMakeLists.txt
+++ b/3rdparty/benchmark/CMakeLists.txt
@ -3,6 +3,7 @@ project (benchmark)

 foreach(p
    CMP0054 # CMake 3.1
+    CMP0056 # export EXE_LINKER_FLAGS to try_run
    )
  if(POLICY ${p})
    cmake_policy(SET ${p} NEW)
@ -11,6 +12,7 @@ endforeach()

 option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON)
 option(BENCHMARK_ENABLE_LTO "Enable link time optimisation of the benchmark library." OFF)
+option(BENCHMARK_USE_LIBCXX "Build and test using libc++ as the standard library." OFF)
 # Make sure we can import out CMake functions
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

@ -77,14 +79,15 @@ else()
  add_cxx_compiler_flag(-pedantic-errors)
  add_cxx_compiler_flag(-Wshorten-64-to-32)
  add_cxx_compiler_flag(-Wfloat-equal)
-  add_cxx_compiler_flag(-Wzero-as-null-pointer-constant)
  add_cxx_compiler_flag(-fstrict-aliasing)
+  if (NOT BENCHMARK_USE_LIBCXX)
+    add_cxx_compiler_flag(-Wzero-as-null-pointer-constant)
+  endif()
  if (HAVE_CXX_FLAG_FSTRICT_ALIASING)
    add_cxx_compiler_flag(-Wstrict-aliasing)
  endif()
  add_cxx_compiler_flag(-Wthread-safety)
-  if (HAVE_WTHREAD_SAFETY)
-    add_definitions(-DHAVE_WTHREAD_SAFETY)
+  if (HAVE_CXX_FLAG_WTHREAD_SAFETY)
    cxx_feature_check(THREAD_SAFETY_ATTRIBUTES)
  endif()

@ -125,12 +128,29 @@ else()
  add_cxx_compiler_flag(--coverage COVERAGE)
 endif()

+if (BENCHMARK_USE_LIBCXX)
+  if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+    add_cxx_compiler_flag(-stdlib=libc++)
+  elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR
+          "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
+    add_cxx_compiler_flag(-nostdinc++)
+    message("libc++ header path must be manually specified using CMAKE_CXX_FLAGS")
+    # Adding -nodefaultlibs directly to CMAKE_<TYPE>_LINKER_FLAGS will break
+    # configuration checks such as 'find_package(Threads)'
+    list(APPEND BENCHMARK_CXX_LINKER_FLAGS -nodefaultlibs)
+    # -lc++ cannot be added directly to CMAKE_<TYPE>_LINKER_FLAGS because
+    # linker flags appear before all linker inputs and -lc++ must appear after.
+    list(APPEND BENCHMARK_CXX_LIBRARIES c++)
+  else()
+    message(FATAL "-DBENCHMARK_USE_LIBCXX:BOOL=ON is not supported for compiler")
+  endif()
+endif(BENCHMARK_USE_LIBCXX)
+
 # C++ feature checks
 cxx_feature_check(STD_REGEX)
 cxx_feature_check(GNU_POSIX_REGEX)
 cxx_feature_check(POSIX_REGEX)
 cxx_feature_check(STEADY_CLOCK)
-
 # Ensure we have pthreads
 find_package(Threads REQUIRED)

--- a/3rdparty/benchmark/CONTRIBUTORS
+++ b/3rdparty/benchmark/CONTRIBUTORS
@ -22,15 +22,18 @@
 #
 # Please keep the list sorted.

+Albert Pretorius <pretoalb@gmail.com>
 Arne Beer <arne@twobeer.de>
 Billy Robert O'Neal III <billy.oneal@gmail.com> <bion@microsoft.com>
 Chris Kennelly <ckennelly@google.com> <ckennelly@ckennelly.com>
 Christopher Seymour <chris.j.seymour@hotmail.com>
 David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
 Dominic Hamon <dma@stripysock.com>
+Eric Fiselier <eric@efcs.ca>
 Eugene Zhuk <eugene.zhuk@gmail.com>
 Evgeny Safronov <division494@gmail.com>
 Felix Homann <linuxaudio@showlabor.de>
+Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
 JianXiong Zhou <zhoujianxiong2@gmail.com>
 Jussi Knuuttila <jussi.knuuttila@gmail.com>
 Kaito Udagawa <umireon@gmail.com>
--- a/3rdparty/benchmark/README.md
+++ b/3rdparty/benchmark/README.md
@ -9,6 +9,8 @@ Discussion group: https://groups.google.com/d/forum/benchmark-discuss

 IRC channel: https://freenode.net #googlebenchmark

+[Known issues and common problems](#known-issues)
+
 ## Example usage
 ### Basic usage
 Define a function that executes the code to be measured.
@ -40,13 +42,13 @@ measuring the speed of `memcpy()` calls of different lengths:

 ```c++
 static void BM_memcpy(benchmark::State& state) {
-  char* src = new char[state.range_x()];
-  char* dst = new char[state.range_x()];
-  memset(src, 'x', state.range_x());
+  char* src = new char[state.range(0)];
+  char* dst = new char[state.range(0)];
+  memset(src, 'x', state.range(0));
  while (state.KeepRunning())
-    memcpy(dst, src, state.range_x());
+    memcpy(dst, src, state.range(0));
  state.SetBytesProcessed(int64_t(state.iterations()) *
-                          int64_t(state.range_x()));
+                          int64_t(state.range(0)));
  delete[] src;
  delete[] dst;
 }
@ -61,7 +63,16 @@ the specified range and will generate a benchmark for each such argument.
 BENCHMARK(BM_memcpy)->Range(8, 8<<10);
 ```

-You might have a benchmark that depends on two inputs. For example, the
+By default the arguments in the range are generated in multiples of eight and
+the command above selects [ 8, 64, 512, 4k, 8k ]. In the following code the
+range multiplier is changed to multiples of two.
+
+```c++
+BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10);
+```
+Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ].
+
+You might have a benchmark that depends on two or more inputs. For example, the
 following code defines a family of benchmarks for measuring the speed of set
 insertion.

@ -69,21 +80,21 @@ insertion.
 static void BM_SetInsert(benchmark::State& state) {
  while (state.KeepRunning()) {
    state.PauseTiming();
-    std::set<int> data = ConstructRandomSet(state.range_x());
+    std::set<int> data = ConstructRandomSet(state.range(0));
    state.ResumeTiming();
-    for (int j = 0; j < state.range_y(); ++j)
+    for (int j = 0; j < state.range(1); ++j)
      data.insert(RandomNumber());
  }
 }
 BENCHMARK(BM_SetInsert)
-    ->ArgPair(1<<10, 1)
-    ->ArgPair(1<<10, 8)
-    ->ArgPair(1<<10, 64)
-    ->ArgPair(1<<10, 512)
-    ->ArgPair(8<<10, 1)
-    ->ArgPair(8<<10, 8)
-    ->ArgPair(8<<10, 64)
-    ->ArgPair(8<<10, 512);
+    ->Args({1<<10, 1})
+    ->Args({1<<10, 8})
+    ->Args({1<<10, 64})
+    ->Args({1<<10, 512})
+    ->Args({8<<10, 1})
+    ->Args({8<<10, 8})
+    ->Args({8<<10, 64})
+    ->Args({8<<10, 512});
 ```

 The preceding code is quite repetitive, and can be replaced with the following
@ -92,7 +103,7 @@ product of the two specified ranges and will generate a benchmark for each such
 pair.

 ```c++
-BENCHMARK(BM_SetInsert)->RangePair(1<<10, 8<<10, 1, 512);
+BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {1, 512}});
 ```

 For more complex patterns of inputs, passing a custom function to `Apply` allows
@ -104,11 +115,45 @@ and a sparse range on the second.
 static void CustomArguments(benchmark::internal::Benchmark* b) {
  for (int i = 0; i <= 10; ++i)
    for (int j = 32; j <= 1024*1024; j *= 8)
-      b->ArgPair(i, j);
+      b->Args({i, j});
 }
 BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
 ```

+### Calculate asymptotic complexity (Big O)
+Asymptotic complexity might be calculated for a family of benchmarks. The
+following code will calculate the coefficient for the high-order term in the
+running time and the normalized root-mean square error of string comparison.
+
+```c++
+static void BM_StringCompare(benchmark::State& state) {
+  std::string s1(state.range(0), '-');
+  std::string s2(state.range(0), '-');
+  while (state.KeepRunning()) {
+    benchmark::DoNotOptimize(s1.compare(s2));
+  }
+  state.SetComplexityN(state.range(0));
+}
+BENCHMARK(BM_StringCompare)
+    ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(benchmark::oN);
+```
+
+As shown in the following invocation, asymptotic complexity might also be
+calculated automatically.
+
+```c++
+BENCHMARK(BM_StringCompare)
+    ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity();
+```
+
+The following code will specify asymptotic complexity with a lambda function,
+that might be used to customize high-order term calculation.
+
+```c++
+BENCHMARK(BM_StringCompare)->RangeMultiplier(2)
+    ->Range(1<<10, 1<<18)->Complexity([](int n)->double{return n; });
+```
+
 ### Templated benchmarks
 Templated benchmarks work the same way: This example produces and consumes
 messages of size `sizeof(v)` `range_x` times. It also outputs throughput in the
@ -119,14 +164,14 @@ template <class Q> int BM_Sequential(benchmark::State& state) {
  Q q;
  typename Q::value_type v;
  while (state.KeepRunning()) {
-    for (int i = state.range_x(); i--; )
+    for (int i = state.range(0); i--; )
      q.push(v);
-    for (int e = state.range_x(); e--; )
+    for (int e = state.range(0); e--; )
      q.Wait(&v);
  }
  // actually messages, not bytes:
  state.SetBytesProcessed(
-      static_cast<int64_t>(state.iterations())*state.range_x());
+      static_cast<int64_t>(state.iterations())*state.range(0));
 }
 BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
 ```
@ -143,6 +188,54 @@ Three macros are provided for adding benchmark templates.
 #define BENCHMARK_TEMPLATE2(func, arg1, arg2)
 ```

+## Passing arbitrary arguments to a benchmark
+In C++11 it is possible to define a benchmark that takes an arbitrary number
+of extra arguments. The `BENCHMARK_CAPTURE(func, test_case_name, ...args)`
+macro creates a benchmark that invokes `func`  with the `benchmark::State` as
+the first argument followed by the specified `args...`.
+The `test_case_name` is appended to the name of the benchmark and
+should describe the values passed.
+
+```c++
+template <class ...ExtraArgs>`
+void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
+  [...]
+}
+// Registers a benchmark named "BM_takes_args/int_string_test` that passes
+// the specified values to `extra_args`.
+BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
+```
+Note that elements of `...args` may refer to global variables. Users should
+avoid modifying global state inside of a benchmark.
+
+## Using RegisterBenchmark(name, fn, args...)
+
+The `RegisterBenchmark(name, func, args...)` function provides an alternative
+way to create and register benchmarks.
+`RegisterBenchmark(name, func, args...)` creates, registers, and returns a
+pointer to a new benchmark with the specified `name` that invokes
+`func(st, args...)` where `st` is a `benchmark::State` object.
+
+Unlike the `BENCHMARK` registration macros, which can only be used at the global
+scope, the `RegisterBenchmark` can be called anywhere. This allows for
+benchmark tests to be registered programmatically.
+
+Additionally `RegisterBenchmark` allows any callable object to be registered
+as a benchmark. Including capturing lambdas and function objects. This
+allows the creation
+
+For Example:
+```c++
+auto BM_test = [](benchmark::State& st, auto Inputs) { /* ... */ };
+
+int main(int argc, char** argv) {
+  for (auto& test_input : { /* ... */ })
+      benchmark::RegisterBenchmark(test_input.name(), BM_test, test_input);
+  benchmark::Initialize(&argc, argv);
+  benchmark::RunSpecifiedBenchmarks();
+}
+```
+
 ### Multithreaded benchmarks
 In a multithreaded test (benchmark invoked by multiple threads simultaneously),
 it is guaranteed that none of the threads will start until all have called
@ -193,7 +286,7 @@ can be reported back with `SetIterationTime`.

 ```c++
 static void BM_ManualTiming(benchmark::State& state) {
-  int microseconds = state.range_x();
+  int microseconds = state.range(0);
  std::chrono::duration<double, std::micro> sleep_duration {
    static_cast<double>(microseconds)
  };
@ -216,7 +309,8 @@ BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime();

 ### Preventing optimisation
 To prevent a value or expression from being optimized away by the compiler
-the `benchmark::DoNotOptimize(...)` function can be used.
+the `benchmark::DoNotOptimize(...)` and `benchmark::ClobberMemory()`
+functions can be used.

 ```c++
 static void BM_test(benchmark::State& state) {
@ -229,6 +323,48 @@ static void BM_test(benchmark::State& state) {
 }
 ```

+`DoNotOptimize(<expr>)` forces the  *result* of `<expr>` to be stored in either
+memory or a register. For GNU based compilers it acts as read/write barrier
+for global memory. More specifically it forces the compiler to flush pending
+writes to memory and reload any other values as necessary.
+
+Note that `DoNotOptimize(<expr>)` does not prevent optimizations on `<expr>`
+in any way. `<expr>` may even be removed entirely when the result is already
+known. For example:
+
+```c++
+  /* Example 1: `<expr>` is removed entirely. */
+  int foo(int x) { return x + 42; }
+  while (...) DoNotOptimize(foo(0)); // Optimized to DoNotOptimize(42);
+
+  /*  Example 2: Result of '<expr>' is only reused */
+  int bar(int) __attribute__((const));
+  while (...) DoNotOptimize(bar(0)); // Optimized to:
+  // int __result__ = bar(0);
+  // while (...) DoNotOptimize(__result__);
+```
+
+The second tool for preventing optimizations is `ClobberMemory()`. In essence
+`ClobberMemory()` forces the compiler to perform all pending writes to global
+memory. Memory managed by block scope objects must be "escaped" using
+`DoNotOptimize(...)` before it can be clobbered. In the below example
+`ClobberMemory()` prevents the call to `v.push_back(42)` from being optimized
+away.
+
+```c++
+static void BM_vector_push_back(benchmark::State& state) {
+  while (state.KeepRunning()) {
+    std::vector<int> v;
+    v.reserve(1);
+    benchmark::DoNotOptimize(v.data()); // Allow v.data() to be clobbered.
+    v.push_back(42);
+    benchmark::ClobberMemory(); // Force 42 to be written to memory.
+  }
+}
+```
+
+Note that `ClobberMemory()` is only available for GNU based compilers.
+
 ### Set time unit manually
 If a benchmark runs a few milliseconds it may be hard to visually compare the
 measured times, since the output data is given in nanoseconds per default. In
@ -246,6 +382,24 @@ the minimum time, or the wallclock time is 5x minimum time. The minimum time is
 set as a flag `--benchmark_min_time` or per benchmark by calling `MinTime` on
 the registered benchmark object.

+## Reporting the mean and standard devation by repeated benchmarks
+By default each benchmark is run once and that single result is reported.
+However benchmarks are often noisy and a single result may not be representative
+of the overall behavior. For this reason it's possible to repeatedly rerun the
+benchmark.
+
+The number of runs of each benchmark is specified globally by the
+`--benchmark_repetitions` flag or on a per benchmark basis by calling
+`Repetitions` on the registered benchmark object. When a benchmark is run
+more than once the mean and standard deviation of the runs will be reported.
+
+Additionally the `--benchmark_report_aggregates_only={true|false}` flag or
+`ReportAggregatesOnly(bool)` function can be used to change how repeated tests
+are reported. By default the result of each repeated run is reported. When this
+option is 'true' only the mean and standard deviation of the runs is reported.
+Calling `ReportAggregatesOnly(bool)` on a registered benchmark object overrides
+the value of the flag for that benchmark.
+
 ## Fixtures
 Fixture tests are created by
 first defining a type that derives from ::benchmark::Fixture and then
@ -276,12 +430,44 @@ BENCHMARK_REGISTER_F(MyFixture, BarTest)->Threads(2);
 /* BarTest is now registered */
 ```

+## Exiting Benchmarks in Error
+
+When errors caused by external influences, such as file I/O and network
+communication, occur within a benchmark the
+`State::SkipWithError(const char* msg)` function can be used to skip that run
+of benchmark and report the error. Note that only future iterations of the
+`KeepRunning()` are skipped. Users may explicitly return to exit the
+benchmark immediately.
+
+The `SkipWithError(...)` function may be used at any point within the benchmark,
+including before and after the `KeepRunning()` loop.
+
+For example:
+
+```c++
+static void BM_test(benchmark::State& state) {
+  auto resource = GetResource();
+  if (!resource.good()) {
+      state.SkipWithError("Resource is not good!");
+      // KeepRunning() loop will not be entered.
+  }
+  while (state.KeepRunning()) {
+      auto data = resource.read_data();
+      if (!resource.good()) {
+        state.SkipWithError("Failed to read data!");
+        break; // Needed to skip the rest of the iteration.
+     }
+     do_stuff(data);
+  }
+}
+```
+
 ## Output Formats
 The library supports multiple output formats. Use the
-`--benchmark_format=<tabular|json>` flag to set the format type. `tabular` is
-the default format.
+`--benchmark_format=<console|json|csv>` flag to set the format type. `console`
+is the default format.

-The Tabular format is intended to be a human readable format. By default
+The Console format is intended to be a human readable format. By default
 the format generates color output. Context is output on stderr and the 
 tabular data on stdout. Example tabular output looks like:
 ```
@ -344,6 +530,12 @@ name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label
 "BM_SetInsert/1024/10",106365,17238.4,8421.53,4.74973e+06,1.18743e+06,
 ```

+## Output Files
+The library supports writing the output of the benchmark to a file specified
+by `--benchmark_out=<filename>`. The format of the output can be specified
+using `--benchmark_out_format={json|console|csv}`. Specifying
+`--benchmark_out` does not suppress the console output.
+
 ## Debug vs Release
 By default, benchmark builds as a debug library. You will see a warning in the output when this is the case. To build it as a release library instead, use:

@ -358,4 +550,29 @@ cmake -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_LTO=true
 ```

 ## Linking against the library
-When using gcc, it is necessary to link against pthread to avoid runtime exceptions. This is due to how gcc implements std::thread. See [issue #67](https://github.com/google/benchmark/issues/67) for more details.
+When using gcc, it is necessary to link against pthread to avoid runtime exceptions.
+This is due to how gcc implements std::thread.
+See [issue #67](https://github.com/google/benchmark/issues/67) for more details.
+
+## Compiler Support
+
+Google Benchmark uses C++11 when building the library. As such we require
+a modern C++ toolchain, both compiler and standard library.
+
+The following minimum versions are strongly recommended build the library:
+
+* GCC 4.8
+* Clang 3.4
+* Visual Studio 2013
+
+Anything older *may* work.
+
+Note: Using the library and its headers in C++03 is supported. C++11 is only
+required to build the library.
+
+# Known Issues
+
+### Windows
+
+* Users must manually link `shlwapi.lib`. Failure to do so may result
+in resolved symbols.
--- a/3rdparty/benchmark/appveyor.yml
+++ b/3rdparty/benchmark/appveyor.yml
@ -1,21 +1,50 @@
 version: '{build}'

 configuration:
-  - Static Debug
-  - Static Release
-#  - Shared Debug
-#  - Shared Release
-
-platform:
-  - x86
-  - x64
+  - Debug
+  - Release

 environment:
  matrix:
    - compiler: msvc-12-seh
+      generator: "Visual Studio 12 2013"
+
+    - compiler: msvc-12-seh
+      generator: "Visual Studio 12 2013 Win64"
+
    - compiler: msvc-14-seh
-    - compiler: gcc-4.9.2-posix
-#    - compiler: gcc-4.8.4-posix
+      generator: "Visual Studio 14 2015"
+
+    - compiler: msvc-14-seh
+      generator: "Visual Studio 14 2015 Win64"
+
+    - compiler: gcc-5.3.0-posix
+      generator: "MinGW Makefiles"
+      cxx_path: 'C:\mingw-w64\i686-5.3.0-posix-dwarf-rt_v4-rev0\mingw32\bin'
+
+matrix:
+  fast_finish: true
+
+install:
+  # git bash conflicts with MinGW makefiles
+  - if "%generator%"=="MinGW Makefiles" (set "PATH=%PATH:C:\Program Files\Git\usr\bin;=%")
+  - if not "%cxx_path%"=="" (set "PATH=%PATH%;%cxx_path%")
+
+# TODO Remove this. This is a hack to work around bogus warning messages
+# See http://goo.gl/euguBI for more information.
+before_build:
+  - del "C:\Program Files (x86)\MSBuild\14.0\Microsoft.Common.targets\ImportAfter\Xamarin.Common.targets"
+  - del "C:\Program Files (x86)\MSBuild\12.0\Microsoft.Common.targets\ImportAfter\Xamarin.Common.targets"
+
+build_script:
+  - md _build -Force
+  - cd _build
+  - echo %configuration%
+  - cmake -G "%generator%" "-DCMAKE_BUILD_TYPE=%configuration%" ..
+  - cmake --build . --config %configuration%
+
+test_script:
+  - ctest -c %configuration% --timeout 300 --output-on-failure

 artifacts:
  - path: '_build/CMakeFiles/*.log'
@ -23,105 +52,3 @@ artifacts:
  - path: '_build/Testing/**/*.xml'
    name: test_results

-install:
-  # derive some extra information
-  - for /f "tokens=1-2" %%a in ("%configuration%") do (@set "linkage=%%a")
-  - for /f "tokens=1-2" %%a in ("%configuration%") do (@set "variant=%%b")
-  - if "%linkage%"=="Shared" (set shared=YES) else (set shared=NO)
-  - for /f "tokens=1-3 delims=-" %%a in ("%compiler%") do (@set "compiler_name=%%a")
-  - for /f "tokens=1-3 delims=-" %%a in ("%compiler%") do (@set "compiler_version=%%b")
-  - for /f "tokens=1-3 delims=-" %%a in ("%compiler%") do (@set "compiler_threading=%%c")
-  - if "%platform%"=="x64" (set arch=x86_64)
-  - if "%platform%"=="x86" (set arch=i686)
-  # download the specific version of MinGW
-  - if "%compiler_name%"=="gcc" (for /f %%a in ('python mingw.py --quiet --version "%compiler_version%" --arch "%arch%" --threading "%compiler_threading%" --location "C:\mingw-builds"') do @set "compiler_path=%%a")
-
-before_build:
-  # Set up mingw commands
-  - if "%compiler_name%"=="gcc" (set "generator=MinGW Makefiles")
-  - if "%compiler_name%"=="gcc" (set "build=mingw32-make -j4")
-  - if "%compiler_name%"=="gcc" (set "test=mingw32-make CTEST_OUTPUT_ON_FAILURE=1 test")
-  # msvc specific commands
-  - if "%compiler_name%"=="msvc" if "%compiler_version%"=="12" if "%platform%"=="x86" (set "generator=Visual Studio 12 2013")
-  - if "%compiler_name%"=="msvc" if "%compiler_version%"=="12" if "%platform%"=="x64" (set "generator=Visual Studio 12 2013 Win64")
-  - if "%compiler_name%"=="msvc" if "%compiler_version%"=="14" if "%platform%"=="x86" (set "generator=Visual Studio 14 2015")
-  - if "%compiler_name%"=="msvc" if "%compiler_version%"=="14" if "%platform%"=="x64" (set "generator=Visual Studio 14 2015 Win64")
-  - if "%compiler_name%"=="msvc" (set "build=cmake --build . --config %variant%")
-  - if "%compiler_name%"=="msvc" (set "test=ctest -c Release -D CTEST_OUTPUT_ON_FAILURE:STRING=1")
-  # add the compiler path if needed
-  - if not "%compiler_path%"=="" (set "PATH=%PATH%;%compiler_path%")
-  # git bash conflicts with MinGW makefiles
-  - if "%generator%"=="MinGW Makefiles" (set "PATH=%PATH:C:\Program Files\Git\usr\bin;=%")
-
-build_script:
- ps: |
-    md _build -Force
-    cd _build
-    & cmake -G "$env:generator" "-DCMAKE_BUILD_TYPE=$env:variant" "-DBUILD_SHARED_LIBS=$env:shared" ..
-    if ($LastExitCode -ne 0) {
-        throw "Exec: $ErrorMessage"
-    }
-    iex "& $env:build"
-    if ($LastExitCode -ne 0) {
-        throw "Exec: $ErrorMessage"
-    }
-
-test_script:
- ps: |
-    iex "& $env:test"
-    if ($LastExitCode -ne 0) {
-        throw "Exec: $ErrorMessage"
-    }
-
-    function Add-CTest-Result($testResult)
-    {
-        $tests = ([xml](get-content $testResult)).Site.Testing
-        $testsCount = 0
-        $anyFailures = $FALSE
-
-        foreach ($test in $tests.test) {
-            $testsCount++
-            $testName = $test.Name
-            $testpath = $test.Path
-            $timeNode = $test.SelectSingleNode('Results/NamedMeasurement[@name="Execution Time"]/Value')
-            if ($test.status -eq "failure") {
-                $time = ([double]$timeNode.InnerText * 1000)
-                Add-AppveyorTest $testName -Outcome Failed -FileName $testpath -Duration $time -ErrorMessage $($test.results.measurement.value)
-                Add-AppveyorMessage `"$testName failed`" -Category Error
-                $anyFailures = $TRUE
-            }
-            elseif ($test.status -eq "skipped") {
-                Add-AppveyorTest $testName -Outcome Ignored -Filename $testpath
-            }
-            else {
-                $time = ([double]$timeNode.InnerText * 1000)
-                Add-AppveyorTest $testName -Outcome Passed -FileName $testpath -Duration $time -StdOut $($test.results.measurement.value)
-            }
-        }
-        return $testsCount, $anyFailures
-    }
-
-    $testsCount = 0
-    $anyFailures = $FALSE
-
-    # Run tests and upload results to AppVeyor one by one
-    Get-ChildItem ".\Testing\*.xml" -Recurse | foreach {
-        $testfile = $_.fullname
-        $count, $testsResult = Add-CTest-Result $testfile
-        Write-Host "Found $testfile with $count tests"
-        $testsCount = $testsCount + $count
-        $anyFailures = $anyFailures -or $testsResult
-    }
-
-    Write-Host "There are $testsCount tests found"
-
-    if ($anyFailures -eq $TRUE){
-        Write-Host "Failing build as there are broken tests"
-        $host.SetShouldExit(1)
-    }
-
-matrix:
-  fast_finish: true
-
-cache:
-  - C:\mingw-builds
--- a/3rdparty/benchmark/cmake/CXXFeatureCheck.cmake
+++ b/3rdparty/benchmark/cmake/CXXFeatureCheck.cmake
@ -21,12 +21,17 @@ function(cxx_feature_check FILE)
  string(TOLOWER ${FILE} FILE)
  string(TOUPPER ${FILE} VAR)
  string(TOUPPER "HAVE_${VAR}" FEATURE)
+  if (DEFINED HAVE_${VAR})
+    return()
+  endif()
  message("-- Performing Test ${FEATURE}")
  try_run(RUN_${FEATURE} COMPILE_${FEATURE}
-          ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp)
+          ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
+          CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
+          LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
  if(RUN_${FEATURE} EQUAL 0)
    message("-- Performing Test ${FEATURE} -- success")
-    set(HAVE_${VAR} 1 PARENT_SCOPE)
+    set(HAVE_${VAR} 1 CACHE INTERNAL "Feature test for ${FILE}" PARENT_SCOPE)
    add_definitions(-DHAVE_${VAR})
  else()
    if(NOT COMPILE_${FEATURE})
--- a/3rdparty/benchmark/include/benchmark/benchmark_api.h
+++ b/3rdparty/benchmark/include/benchmark/benchmark_api.h
@ -38,12 +38,12 @@ int main(int argc, char** argv) {
 // of memcpy() calls of different lengths:

 static void BM_memcpy(benchmark::State& state) {
-  char* src = new char[state.range_x()]; char* dst = new char[state.range_x()];
-  memset(src, 'x', state.range_x());
+  char* src = new char[state.range(0)]; char* dst = new char[state.range(0)];
+  memset(src, 'x', state.range(0));
  while (state.KeepRunning())
-    memcpy(dst, src, state.range_x());
+    memcpy(dst, src, state.range(0));
  state.SetBytesProcessed(int64_t(state.iterations()) *
-                          int64_t(state.range_x()));
+                          int64_t(state.range(0)));
  delete[] src; delete[] dst;
 }
 BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
@ -60,27 +60,27 @@ BENCHMARK(BM_memcpy)->Range(8, 8<<10);
 static void BM_SetInsert(benchmark::State& state) {
  while (state.KeepRunning()) {
    state.PauseTiming();
-    set<int> data = ConstructRandomSet(state.range_x());
+    set<int> data = ConstructRandomSet(state.range(0));
    state.ResumeTiming();
-    for (int j = 0; j < state.range_y(); ++j)
+    for (int j = 0; j < state.range(1); ++j)
      data.insert(RandomNumber());
  }
 }
 BENCHMARK(BM_SetInsert)
-   ->ArgPair(1<<10, 1)
-   ->ArgPair(1<<10, 8)
-   ->ArgPair(1<<10, 64)
-   ->ArgPair(1<<10, 512)
-   ->ArgPair(8<<10, 1)
-   ->ArgPair(8<<10, 8)
-   ->ArgPair(8<<10, 64)
-   ->ArgPair(8<<10, 512);
+   ->Args({1<<10, 1})
+   ->Args({1<<10, 8})
+   ->Args({1<<10, 64})
+   ->Args({1<<10, 512})
+   ->Args({8<<10, 1})
+   ->Args({8<<10, 8})
+   ->Args({8<<10, 64})
+   ->Args({8<<10, 512});

 // The preceding code is quite repetitive, and can be replaced with
 // the following short-hand.  The following macro will pick a few
 // appropriate arguments in the product of the two specified ranges
 // and will generate a microbenchmark for each such pair.
-BENCHMARK(BM_SetInsert)->RangePair(1<<10, 8<<10, 1, 512);
+BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {1, 512}});

 // For more complex patterns of inputs, passing a custom function
 // to Apply allows programmatic specification of an
@ -90,7 +90,7 @@ BENCHMARK(BM_SetInsert)->RangePair(1<<10, 8<<10, 1, 512);
 static void CustomArguments(benchmark::internal::Benchmark* b) {
  for (int i = 0; i <= 10; ++i)
    for (int j = 32; j <= 1024*1024; j *= 8)
-      b->ArgPair(i, j);
+      b->Args({i, j});
 }
 BENCHMARK(BM_SetInsert)->Apply(CustomArguments);

@ -101,14 +101,14 @@ template <class Q> int BM_Sequential(benchmark::State& state) {
  Q q;
  typename Q::value_type v;
  while (state.KeepRunning()) {
-    for (int i = state.range_x(); i--; )
+    for (int i = state.range(0); i--; )
      q.push(v);
-    for (int e = state.range_x(); e--; )
+    for (int e = state.range(0); e--; )
      q.Wait(&v);
  }
  // actually messages, not bytes:
  state.SetBytesProcessed(
-      static_cast<int64_t>(state.iterations())*state.range_x());
+      static_cast<int64_t>(state.iterations())*state.range(0));
 }
 BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);

@ -153,17 +153,36 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
 #include <stddef.h>
 #include <stdint.h>

+#include <vector>
+
 #include "macros.h"

+#if defined(BENCHMARK_HAS_CXX11)
+#include <type_traits>
+#include <utility>
+#endif
+
 namespace benchmark {
 class BenchmarkReporter;

 void Initialize(int* argc, char** argv);

-// Otherwise, run all benchmarks specified by the --benchmark_filter flag,
-// and exit after running the benchmarks.
-void RunSpecifiedBenchmarks();
-void RunSpecifiedBenchmarks(BenchmarkReporter* reporter);
+// Generate a list of benchmarks matching the specified --benchmark_filter flag
+// and if --benchmark_list_tests is specified return after printing the name
+// of each matching benchmark. Otherwise run each matching benchmark and
+// report the results.
+//
+// The second and third overload use the specified 'console_reporter' and
+//  'file_reporter' respectively. 'file_reporter' will write to the file specified
+//   by '--benchmark_output'. If '--benchmark_output' is not given the
+//  'file_reporter' is ignored.
+//
+// RETURNS: The number of matching benchmarks.
+size_t RunSpecifiedBenchmarks();
+size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter);
+size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
+                              BenchmarkReporter* file_reporter);
+

 // If this routine is called, peak memory allocation past this point in the
 // benchmark is reported at the end of the benchmark report line. (It is
@ -195,32 +214,33 @@ void UseCharPointer(char const volatile*);
 // registered benchmark.
 Benchmark* RegisterBenchmarkInternal(Benchmark*);

+// Ensure that the standard streams are properly initialized in every TU.
+int InitializeStreams();
+BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
+
 } // end namespace internal


 // The DoNotOptimize(...) function can be used to prevent a value or
 // expression from being optimized away by the compiler. This function is
-// intented to add little to no overhead.
-// See: http://stackoverflow.com/questions/28287064
-#if defined(__clang__) && defined(__GNUC__)
-// TODO(ericwf): Clang has a bug where it tries to always use a register
-// even if value must be stored in memory. This causes codegen to fail.
-// To work around this we remove the "r" modifier so the operand is always
-// loaded into memory.
+// intended to add little to no overhead.
+// See: https://youtu.be/nXaxk27zwlk?t=2441
+#if defined(__GNUC__)
 template <class Tp>
 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
-    asm volatile("" : "+m" (const_cast<Tp&>(value)));
+    asm volatile("" : : "g"(value) : "memory");
 }
-#elif defined(__GNUC__)
-template <class Tp>
-inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
-    asm volatile("" : "+rm" (const_cast<Tp&>(value)));
+// Force the compiler to flush pending writes to global memory. Acts as an
+// effective read/write barrier
+inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
+    asm volatile("" : : : "memory");
 }
 #else
 template <class Tp>
 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
    internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
 }
+// FIXME Add ClobberMemory() for non-gnu compilers
 #endif

 // TimeUnit is passed to a benchmark in order to specify the order of magnitude
@ -231,67 +251,98 @@ enum TimeUnit {
  kMillisecond
 };

+// BigO is passed to a benchmark in order to specify the asymptotic computational 
+// complexity for the benchmark. In case oAuto is selected, complexity will be 
+// calculated automatically to the best fit.
+enum BigO {
+  oNone,
+  o1,
+  oN,
+  oNSquared,
+  oNCubed,
+  oLogN,
+  oNLogN,
+  oAuto,
+  oLambda
+};
+
+// BigOFunc is passed to a benchmark in order to specify the asymptotic 
+// computational complexity for the benchmark.
+typedef double(BigOFunc)(int);
+
+namespace internal {
+class ThreadTimer;
+class ThreadManager;
+}
+
 // State is passed to a running Benchmark and contains state for the
 // benchmark to use.
 class State {
 public:
-  State(size_t max_iters, bool has_x, int x, bool has_y, int y, int thread_i, int n_threads);
-
-  // Returns true iff the benchmark should continue through another iteration.
+  // Returns true if the benchmark should continue through another iteration.
  // NOTE: A benchmark may not return from the test until KeepRunning() has
  // returned false.
  bool KeepRunning() {
    if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
-        ResumeTiming();
-        started_ = true;
+      StartKeepRunning();
    }
    bool const res = total_iterations_++ < max_iterations;
    if (BENCHMARK_BUILTIN_EXPECT(!res, false)) {
-        assert(started_);
-        PauseTiming();
-        // Total iterations now is one greater than max iterations. Fix this.
-        total_iterations_ = max_iterations;
+      FinishKeepRunning();
    }
    return res;
  }

-  // REQUIRES: timer is running
+  // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
+  //           by the current thread.
  // Stop the benchmark timer.  If not called, the timer will be
  // automatically stopped after KeepRunning() returns false for the first time.
  //
-  // For threaded benchmarks the PauseTiming() function acts
-  // like a barrier.  I.e., the ith call by a particular thread to this
-  // function will block until all threads have made their ith call.
-  // The timer will stop when the last thread has called this function.
+  // For threaded benchmarks the PauseTiming() function only pauses the timing
+  // for the current thread.
+  //
+  // NOTE: The "real time" measurement is per-thread. If different threads
+  // report different measurements the largest one is reported.
  //
  // NOTE: PauseTiming()/ResumeTiming() are relatively
  // heavyweight, and so their use should generally be avoided
  // within each benchmark iteration, if possible.
  void PauseTiming();

-  // REQUIRES: timer is not running
+  // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called
+  //           by the current thread.
  // Start the benchmark timer.  The timer is NOT running on entrance to the
  // benchmark function. It begins running after the first call to KeepRunning()
  //
-  // For threaded benchmarks the ResumeTiming() function acts
-  // like a barrier.  I.e., the ith call by a particular thread to this
-  // function will block until all threads have made their ith call.
-  // The timer will start when the last thread has called this function.
-  //
  // NOTE: PauseTiming()/ResumeTiming() are relatively
  // heavyweight, and so their use should generally be avoided
  // within each benchmark iteration, if possible.
  void ResumeTiming();

+  // REQUIRES: 'SkipWithError(...)' has not been called previously by the
+  //            current thread.
+  // Skip any future iterations of the 'KeepRunning()' loop in the current
+  // thread and report an error with the specified 'msg'. After this call
+  // the user may explicitly 'return' from the benchmark.
+  //
+  // For threaded benchmarks only the current thread stops executing and future
+  // calls to `KeepRunning()` will block until all threads have completed
+  // the `KeepRunning()` loop. If multiple threads report an error only the
+  // first error message is used.
+  //
+  // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
+  // the current scope immediately. If the function is called from within
+  // the 'KeepRunning()' loop the current iteration will finish. It is the users
+  // responsibility to exit the scope as needed.
+  void SkipWithError(const char* msg);
+
  // REQUIRES: called exactly once per iteration of the KeepRunning loop.
  // Set the manually measured time for this benchmark iteration, which
  // is used instead of automatically measured time if UseManualTime() was
  // specified.
  //
-  // For threaded benchmarks the SetIterationTime() function acts
-  // like a barrier.  I.e., the ith call by a particular thread to this
-  // function will block until all threads have made their ith call.
-  // The time will be set by the last thread to call this function.
+  // For threaded benchmarks the final value will be set to the largest
+  // reported values.
  void SetIterationTime(double seconds);

  // Set the number of bytes processed by the current benchmark
@ -311,6 +362,19 @@ public:
    return bytes_processed_;
  }

+  // If this routine is called with complexity_n > 0 and complexity report is requested for the 
+  // family benchmark, then current benchmark will be part of the computation and complexity_n will
+  // represent the length of N.
+  BENCHMARK_ALWAYS_INLINE
+  void SetComplexityN(int complexity_n) {
+    complexity_n_ = complexity_n;
+  }
+
+  BENCHMARK_ALWAYS_INLINE
+  int complexity_length_n() {
+    return complexity_n_;
+  }
+
  // If this routine is called with items > 0, then an items/s
  // label is printed on the benchmark report line for the currently
  // executing benchmark. It is typically called at the end of a processing
@ -353,35 +417,35 @@ public:

  // Range arguments for this run. CHECKs if the argument has been set.
  BENCHMARK_ALWAYS_INLINE
-  int range_x() const {
-    assert(has_range_x_);
-    ((void)has_range_x_); // Prevent unused warning.
-    return range_x_;
+  int range(std::size_t pos = 0) const {
+      assert(range_.size() > pos);
+      return range_[pos];
  }

-  BENCHMARK_ALWAYS_INLINE
-  int range_y() const {
-    assert(has_range_y_);
-    ((void)has_range_y_); // Prevent unused warning.
-    return range_y_;
-  }
+  BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
+  int range_x() const { return range(0); }
+
+  BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
+  int range_y() const { return range(1); }

  BENCHMARK_ALWAYS_INLINE
  size_t iterations() const { return total_iterations_; }

 private:
  bool started_;
+  bool finished_;
  size_t total_iterations_;

-  bool has_range_x_;
-  int range_x_;
-
-  bool has_range_y_;
-  int range_y_;
+  std::vector<int> range_;

  size_t bytes_processed_;
  size_t items_processed_;

+  int complexity_n_;
+
+public:
+  // FIXME: Make this private somehow.
+  bool error_occurred_;
 public:
  // Index of the executing thread. Values from [0, threads).
  const int thread_index;
@ -389,7 +453,16 @@ public:
  const int threads;
  const size_t max_iterations;

-private:
+  // TODO make me private
+  State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
+        int n_threads, internal::ThreadTimer* timer,
+        internal::ThreadManager* manager);
+
+ private:
+  void StartKeepRunning();
+  void FinishKeepRunning();
+  internal::ThreadTimer* timer_;
+  internal::ThreadManager* manager_;
  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(State);
 };

@ -423,31 +496,64 @@ public:
  // REQUIRES: The function passed to the constructor must accept an arg1.
  Benchmark* Range(int start, int limit);

-  // Run this benchmark once for every value in the range [start..limit]
+  // Run this benchmark once for all values in the range [start..limit] with specific step
  // REQUIRES: The function passed to the constructor must accept an arg1.
-  Benchmark* DenseRange(int start, int limit);
+  Benchmark* DenseRange(int start, int limit, int step = 1);

-  // Run this benchmark once with "x,y" as the extra arguments passed
+  // Run this benchmark once with "args" as the extra arguments passed
  // to the function.
-  // REQUIRES: The function passed to the constructor must accept arg1,arg2.
-  Benchmark* ArgPair(int x, int y);
+  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
+  Benchmark* Args(const std::vector<int>& args);

-  // Pick a set of values A from the range [lo1..hi1] and a set
-  // of values B from the range [lo2..hi2].  Run the benchmark for
-  // every pair of values in the cartesian product of A and B
-  // (i.e., for all combinations of the values in A and B).
-  // REQUIRES: The function passed to the constructor must accept arg1,arg2.
-  Benchmark* RangePair(int lo1, int hi1, int lo2, int hi2);
+  // Equivalent to Args({x, y})
+  // NOTE: This is a legacy C++03 interface provided for compatibility only.
+  //   New code should use 'Args'.
+  Benchmark* ArgPair(int x, int y) {
+      std::vector<int> args;
+      args.push_back(x);
+      args.push_back(y);
+      return Args(args);
+  }
+
+  // Run this benchmark once for a number of values picked from the
+  // ranges [start..limit].  (starts and limits are always picked.)
+  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
+  Benchmark* Ranges(const std::vector<std::pair<int, int> >& ranges);
+
+  // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
+  // NOTE: This is a legacy C++03 interface provided for compatibility only.
+  //   New code should use 'Ranges'.
+  Benchmark* RangePair(int lo1, int hi1, int lo2, int hi2) {
+      std::vector<std::pair<int, int> > ranges;
+      ranges.push_back(std::make_pair(lo1, hi1));
+      ranges.push_back(std::make_pair(lo2, hi2));
+      return Ranges(ranges);
+  }

  // Pass this benchmark object to *func, which can customize
-  // the benchmark by calling various methods like Arg, ArgPair,
+  // the benchmark by calling various methods like Arg, Args,
  // Threads, etc.
  Benchmark* Apply(void (*func)(Benchmark* benchmark));

+  // Set the range multiplier for non-dense range. If not called, the range multiplier 
+  // kRangeMultiplier will be used.
+  Benchmark* RangeMultiplier(int multiplier);
+
  // Set the minimum amount of time to use when running this benchmark. This
  // option overrides the `benchmark_min_time` flag.
+  // REQUIRES: `t > 0`
  Benchmark* MinTime(double t);

+  // Specify the amount of times to repeat this benchmark. This option overrides
+  // the `benchmark_repetitions` flag.
+  // REQUIRES: `n > 0`
+  Benchmark* Repetitions(int n);
+
+  // Specify if each repetition of the benchmark should be reported separately
+  // or if only the final statistics should be reported. If the benchmark
+  // is not repeated then the single result is always reported.
+  Benchmark* ReportAggregatesOnly(bool v = true);
+
  // If a particular benchmark is I/O bound, runs multiple threads internally or
  // if for some reason CPU timings are not representative, call this method. If
  // called, the elapsed time will be used to control how many iterations are
@ -462,6 +568,14 @@ public:
  // or MB/second values.
  Benchmark* UseManualTime();

+  // Set the asymptotic computational complexity for the benchmark. If called
+  // the asymptotic computational complexity will be shown on the output. 
+  Benchmark* Complexity(BigO complexity = benchmark::oAuto);
+
+  // Set the asymptotic computational complexity for the benchmark. If called
+  // the asymptotic computational complexity will be shown on the output.
+  Benchmark* Complexity(BigOFunc* complexity);
+
  // Support for running multiple copies of the same benchmark concurrently
  // in multiple threads.  This may be useful when measuring the scaling
  // of some piece of code.
@ -503,6 +617,20 @@ private:
  Benchmark& operator=(Benchmark const&);
 };

+} // namespace internal
+
+// Create and register a benchmark with the specified 'name' that invokes
+// the specified functor 'fn'.
+//
+// RETURNS: A pointer to the registered benchmark.
+internal::Benchmark* RegisterBenchmark(const char* name, internal::Function* fn);
+
+#if defined(BENCHMARK_HAS_CXX11)
+template <class Lambda>
+internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn);
+#endif
+
+namespace internal {
 // The class used to hold all Benchmarks created from static function.
 // (ie those created using the BENCHMARK(...) macros.
 class FunctionBenchmark : public Benchmark {
@ -516,8 +644,57 @@ private:
    Function* func_;
 };

+#ifdef BENCHMARK_HAS_CXX11
+template <class Lambda>
+class LambdaBenchmark : public Benchmark {
+public:
+    virtual void Run(State& st) { lambda_(st); }
+
+private:
+  template <class OLambda>
+  LambdaBenchmark(const char* name, OLambda&& lam)
+      : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
+
+  LambdaBenchmark(LambdaBenchmark const&) = delete;
+
+private:
+  template <class Lam>
+  friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
+
+  Lambda lambda_;
+};
+#endif
+
 }  // end namespace internal

+inline internal::Benchmark*
+RegisterBenchmark(const char* name, internal::Function* fn) {
+    return internal::RegisterBenchmarkInternal(
+        ::new internal::FunctionBenchmark(name, fn));
+}
+
+#ifdef BENCHMARK_HAS_CXX11
+template <class Lambda>
+internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
+    using BenchType = internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
+    return internal::RegisterBenchmarkInternal(
+        ::new BenchType(name, std::forward<Lambda>(fn)));
+}
+#endif
+
+#if defined(BENCHMARK_HAS_CXX11) && \
+     (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
+template <class Lambda, class ...Args>
+internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
+                                       Args&&... args) {
+    return benchmark::RegisterBenchmark(name,
+        [=](benchmark::State& st) { fn(st, args...); });
+}
+#else
+#define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
+#endif
+
+
 // The base class for all fixture tests.
 class Fixture: public internal::Benchmark {
 public:
@ -529,8 +706,12 @@ public:
      this->TearDown(st);
    }

+    // These will be deprecated ...
    virtual void SetUp(const State&) {}
    virtual void TearDown(const State&) {}
+    // ... In favor of these.
+    virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); }
+    virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); }

 protected:
    virtual void BenchmarkCase(State&) = 0;
@ -568,11 +749,33 @@ protected:

 // Old-style macros
 #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
-#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->ArgPair((a1), (a2))
+#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)})
 #define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
 #define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
 #define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
-  BENCHMARK(n)->RangePair((l1), (h1), (l2), (h2))
+  BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})
+
+#if __cplusplus >= 201103L
+
+// Register a benchmark which invokes the function specified by `func`
+// with the additional arguments specified by `...`.
+//
+// For example:
+//
+// template <class ...ExtraArgs>`
+// void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
+//  [...]
+//}
+// /* Registers a benchmark named "BM_takes_args/int_string_test` */
+// BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
+#define BENCHMARK_CAPTURE(func, test_case_name, ...)                       \
+    BENCHMARK_PRIVATE_DECLARE(func) =                                      \
+        (::benchmark::internal::RegisterBenchmarkInternal(                 \
+            new ::benchmark::internal::FunctionBenchmark(                  \
+                    #func "/" #test_case_name,                             \
+                    [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
+
+#endif // __cplusplus >= 11

 // This will register a benchmark for a templatized function.  For example:
 //
--- a/3rdparty/benchmark/include/benchmark/macros.h
+++ b/3rdparty/benchmark/include/benchmark/macros.h
@ -14,7 +14,11 @@
 #ifndef BENCHMARK_MACROS_H_
 #define BENCHMARK_MACROS_H_

-#if __cplusplus < 201103L
+#if __cplusplus >= 201103L
+#define BENCHMARK_HAS_CXX11
+#endif
+
+#ifndef BENCHMARK_HAS_CXX11
 # define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName)  \
    TypeName(const TypeName&);                         \
    TypeName& operator=(const TypeName&)
@ -28,21 +32,35 @@
 # define BENCHMARK_UNUSED __attribute__((unused))
 # define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
 # define BENCHMARK_NOEXCEPT noexcept
+# define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
 #elif defined(_MSC_VER) && !defined(__clang__)
 # define BENCHMARK_UNUSED
 # define BENCHMARK_ALWAYS_INLINE __forceinline
-# define BENCHMARK_NOEXCEPT
+# if _MSC_VER >= 1900
+#  define BENCHMARK_NOEXCEPT noexcept
+#  define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
+# else
+#  define BENCHMARK_NOEXCEPT
+#  define BENCHMARK_NOEXCEPT_OP(x)
+# endif
 # define __func__ __FUNCTION__
 #else
 # define BENCHMARK_UNUSED
 # define BENCHMARK_ALWAYS_INLINE
 # define BENCHMARK_NOEXCEPT
+# define BENCHMARK_NOEXCEPT_OP(x)
 #endif

 #if defined(__GNUC__)
 # define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
+# define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
 #else
 # define BENCHMARK_BUILTIN_EXPECT(x, y) x
+# define BENCHMARK_DEPRECATED_MSG(msg)
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+#define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
 #endif

 #endif  // BENCHMARK_MACROS_H_
--- a/3rdparty/benchmark/include/benchmark/reporter.h
+++ b/3rdparty/benchmark/include/benchmark/reporter.h
@ -14,16 +14,16 @@
 #ifndef BENCHMARK_REPORTER_H_
 #define BENCHMARK_REPORTER_H_

+#include <cassert>
+#include <iosfwd>
 #include <string>
 #include <utility>
 #include <vector>

-#include "benchmark_api.h" // For forward declaration of BenchmarkReporter
+#include "benchmark_api.h"  // For forward declaration of BenchmarkReporter

 namespace benchmark {

-typedef std::pair<const char*,double> TimeUnitMultiplier;
-
 // Interface for custom benchmark result printers.
 // By default, benchmark reports are printed to stdout. However an application
 // can control the destination of the reports by calling
@ -42,29 +42,62 @@ class BenchmarkReporter {

  struct Run {
    Run() :
+      error_occurred(false),
      iterations(1),
      time_unit(kNanosecond),
      real_accumulated_time(0),
      cpu_accumulated_time(0),
      bytes_per_second(0),
      items_per_second(0),
-      max_heapbytes_used(0) {}
+      max_heapbytes_used(0),
+      complexity(oNone),
+      complexity_n(0),
+      report_big_o(false),
+      report_rms(false) {}

    std::string benchmark_name;
    std::string report_label;  // Empty if not set by benchmark.
+    bool error_occurred;
+    std::string error_message;
+
    int64_t iterations;
    TimeUnit time_unit;
    double real_accumulated_time;
    double cpu_accumulated_time;

+    // Return a value representing the real time per iteration in the unit
+    // specified by 'time_unit'.
+    // NOTE: If 'iterations' is zero the returned value represents the
+    // accumulated time.
+    double GetAdjustedRealTime() const;
+
+    // Return a value representing the cpu time per iteration in the unit
+    // specified by 'time_unit'.
+    // NOTE: If 'iterations' is zero the returned value represents the
+    // accumulated time.
+    double GetAdjustedCPUTime() const;
+
    // Zero if not set by benchmark.
    double bytes_per_second;
    double items_per_second;

    // This is set to 0.0 if memory tracing is not enabled.
    double max_heapbytes_used;
+
+    // Keep track of arguments to compute asymptotic complexity
+    BigO complexity;
+    BigOFunc* complexity_lambda;
+    int complexity_n;
+
+    // Inform print function whether the current run is a complexity report
+    bool report_big_o;
+    bool report_rms;
  };

+  // Construct a BenchmarkReporter with the output stream set to 'std::cout'
+  // and the error stream set to 'std::cerr'
+  BenchmarkReporter();
+
  // Called once for every suite of benchmarks run.
  // The parameter "context" contains information that the
  // reporter may wish to use when generating its report, for example the
@ -74,55 +107,119 @@ class BenchmarkReporter {
  virtual bool ReportContext(const Context& context) = 0;

  // Called once for each group of benchmark runs, gives information about
-  // cpu-time and heap memory usage during the benchmark run.
-  // Note that all the grouped benchmark runs should refer to the same
-  // benchmark, thus have the same name.
+  // cpu-time and heap memory usage during the benchmark run. If the group
+  // of runs contained more than two entries then 'report' contains additional
+  // elements representing the mean and standard deviation of those runs.
+  // Additionally if this group of runs was the last in a family of benchmarks
+  // 'reports' contains additional entries representing the asymptotic
+  // complexity and RMS of that benchmark family.
  virtual void ReportRuns(const std::vector<Run>& report) = 0;

  // Called once and only once after ever group of benchmarks is run and
  // reported.
-  virtual void Finalize();
+  virtual void Finalize() {}
+
+  // REQUIRES: The object referenced by 'out' is valid for the lifetime
+  // of the reporter.
+  void SetOutputStream(std::ostream* out) {
+    assert(out);
+    output_stream_ = out;
+  }
+
+  // REQUIRES: The object referenced by 'err' is valid for the lifetime
+  // of the reporter.
+  void SetErrorStream(std::ostream* err) {
+    assert(err);
+    error_stream_ = err;
+  }
+
+  std::ostream& GetOutputStream() const {
+    return *output_stream_;
+  }
+
+  std::ostream& GetErrorStream() const {
+    return *error_stream_;
+  }

  virtual ~BenchmarkReporter();
-protected:
-  static void ComputeStats(std::vector<Run> const& reports, Run* mean, Run* stddev);
-  static TimeUnitMultiplier GetTimeUnitAndMultiplier(TimeUnit unit);
+
+  // Write a human readable string to 'out' representing the specified
+  // 'context'.
+  // REQUIRES: 'out' is non-null.
+  static void PrintBasicContext(std::ostream* out, Context const& context);
+
+ private:
+  std::ostream* output_stream_;
+  std::ostream* error_stream_;
 };

 // Simple reporter that outputs benchmark data to the console. This is the
 // default reporter used by RunSpecifiedBenchmarks().
 class ConsoleReporter : public BenchmarkReporter {
- public:
+public:
+  enum OutputOptions {
+    OO_None,
+    OO_Color
+  };
+  explicit ConsoleReporter(OutputOptions color_output = OO_Color)
+      : color_output_(color_output == OO_Color) {}
+
  virtual bool ReportContext(const Context& context);
  virtual void ReportRuns(const std::vector<Run>& reports);

- protected:
+protected:
  virtual void PrintRunData(const Run& report);
-
  size_t name_field_width_;
+
+private:
+  bool color_output_;
 };

 class JSONReporter : public BenchmarkReporter {
-public:
+ public:
  JSONReporter() : first_report_(true) {}
  virtual bool ReportContext(const Context& context);
  virtual void ReportRuns(const std::vector<Run>& reports);
  virtual void Finalize();

-private:
+ private:
  void PrintRunData(const Run& report);

  bool first_report_;
 };

 class CSVReporter : public BenchmarkReporter {
-public:
+ public:
  virtual bool ReportContext(const Context& context);
  virtual void ReportRuns(const std::vector<Run>& reports);

-private:
+ private:
  void PrintRunData(const Run& report);
 };

-} // end namespace benchmark
-#endif // BENCHMARK_REPORTER_H_
+inline const char* GetTimeUnitString(TimeUnit unit) {
+  switch (unit) {
+    case kMillisecond:
+      return "ms";
+    case kMicrosecond:
+      return "us";
+    case kNanosecond:
+    default:
+      return "ns";
+  }
+}
+
+inline double GetTimeUnitMultiplier(TimeUnit unit) {
+  switch (unit) {
+    case kMillisecond:
+      return 1e3;
+    case kMicrosecond:
+      return 1e6;
+    case kNanosecond:
+    default:
+      return 1e9;
+  }
+}
+
+}  // end namespace benchmark
+#endif  // BENCHMARK_REPORTER_H_
--- a/3rdparty/benchmark/src/CMakeLists.txt
+++ b/3rdparty/benchmark/src/CMakeLists.txt
@ -1,11 +1,27 @@
 # Allow the source files to find headers in src/
 include_directories(${PROJECT_SOURCE_DIR}/src)

+if (DEFINED BENCHMARK_CXX_LINKER_FLAGS)
+  list(APPEND CMAKE_SHARED_LINKER_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS})
+  list(APPEND CMAKE_MODULE_LINKER_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS})
+endif()
+
 # Define the source files
 set(SOURCE_FILES "benchmark.cc" "colorprint.cc" "commandlineflags.cc"
-                 "console_reporter.cc" "csv_reporter.cc" "json_reporter.cc"
-                 "log.cc" "reporter.cc" "sleep.cc" "string_util.cc"
-                 "sysinfo.cc" "walltime.cc")
+                 "console_reporter.cc" "csv_reporter.cc"
+                 "json_reporter.cc" "reporter.cc" "sleep.cc"
+                 "string_util.cc" "sysinfo.cc" "complexity.cc" "timers.cc")
+# Add headers to the list of source files. cmake does not require this,
+# but IDEs such as Visual Studio need this to add the headers
+# to the generated project.
+set(_d "${PROJECT_SOURCE_DIR}/include/benchmark")
+list(APPEND SOURCE_FILES "${_d}/benchmark.h" "${_d}/benchmark_api.h"
+            "${_d}/macros.h" "${_d}/reporter.h" "arraysize.h" "check.h"
+            "colorprint.h" "commandlineflags.h" "complexity.h"
+            "cycleclock.h" "internal_macros.h" "log.h" "mutex.h"
+            "re.h" "sleep.h" "stat.h" "string_util.h" "sysinfo.h" "timers.h")
+unset(_d)
+
 # Determine the correct regular expression engine to use
 if(HAVE_STD_REGEX)
  set(RE_FILES "re_std.cc")
@ -19,7 +35,6 @@ endif()

 add_library(benchmark ${SOURCE_FILES} ${RE_FILES})

-
 set_target_properties(benchmark PROPERTIES
  OUTPUT_NAME "benchmark"
  VERSION ${GENERIC_LIB_VERSION}
@ -27,7 +42,7 @@ set_target_properties(benchmark PROPERTIES
 )

 # Link threads.
-target_link_libraries(benchmark ${CMAKE_THREAD_LIBS_INIT})
+target_link_libraries(benchmark  ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})

 # We need extra libraries on Windows
 if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
--- a/3rdparty/benchmark/src/benchmark.cc
+++ b/3rdparty/benchmark/src/benchmark.cc
--- a/3rdparty/benchmark/src/check.h
+++ b/3rdparty/benchmark/src/check.h
@ -10,6 +10,18 @@
 namespace benchmark {
 namespace internal {

+typedef void(AbortHandlerT)();
+
+inline AbortHandlerT*& GetAbortHandler() {
+    static AbortHandlerT* handler = &std::abort;
+    return handler;
+}
+
+BENCHMARK_NORETURN inline void CallAbortHandler() {
+    GetAbortHandler()();
+    std::abort(); // fallback to enforce noreturn
+}
+
 // CheckHandler is the class constructed by failing CHECK macros. CheckHandler
 // will log information about the failures and abort when it is destructed.
 class CheckHandler {
@ -21,20 +33,18 @@ public:
          << check << "' failed. ";
  }

-  std::ostream& GetLog() {
-    return log_;
-  }
+  LogType& GetLog() { return log_; }

-  BENCHMARK_NORETURN ~CheckHandler() {
+  BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) {
      log_ << std::endl;
-      std::abort();
+      CallAbortHandler();
  }

  CheckHandler & operator=(const CheckHandler&) = delete;
  CheckHandler(const CheckHandler&) = delete;
  CheckHandler() = delete;
 private:
-  std::ostream& log_;
+ LogType& log_;
 };

 } // end namespace internal
--- a/3rdparty/benchmark/src/colorprint.cc
+++ b/3rdparty/benchmark/src/colorprint.cc
@ -16,16 +16,17 @@

 #include <cstdarg>
 #include <cstdio>
+#include <cstdarg>
+#include <string>
+#include <memory>

-#include "commandlineflags.h"
+#include "check.h"
 #include "internal_macros.h"

 #ifdef BENCHMARK_OS_WINDOWS
 #include <Windows.h>
 #endif

-DECLARE_bool(color_print);
-
 namespace benchmark {
 namespace {
 #ifdef BENCHMARK_OS_WINDOWS
@ -74,19 +75,56 @@ PlatformColorCode GetPlatformColorCode(LogColor color) {
  };
 #endif
 }
+
 }  // end namespace

-void ColorPrintf(LogColor color, const char* fmt, ...) {
+std::string FormatString(const char *msg, va_list args) {
+  // we might need a second shot at this, so pre-emptivly make a copy
+  va_list args_cp;
+  va_copy(args_cp, args);
+
+  std::size_t size = 256;
+  char local_buff[256];
+  auto ret = std::vsnprintf(local_buff, size, msg, args_cp);
+
+  va_end(args_cp);
+
+  // currently there is no error handling for failure, so this is hack.
+  CHECK(ret >= 0);
+
+  if (ret == 0) // handle empty expansion
+    return {};
+  else if (static_cast<size_t>(ret) < size)
+    return local_buff;
+  else {
+    // we did not provide a long enough buffer on our first attempt.
+    size = (size_t)ret + 1; // + 1 for the null byte
+    std::unique_ptr<char[]> buff(new char[size]);
+    ret = std::vsnprintf(buff.get(), size, msg, args);
+    CHECK(ret > 0 && ((size_t)ret) < size);
+    return buff.get();
+  }
+}
+
+std::string FormatString(const char *msg, ...) {
+  va_list args;
+  va_start(args, msg);
+  auto tmp = FormatString(msg, args);
+  va_end(args);
+  return tmp;
+}
+
+void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...) {
  va_list args;
  va_start(args, fmt);
+  ColorPrintf(out, color, fmt, args);
+  va_end(args);
+}

-  if (!FLAGS_color_print) {
-    vprintf(fmt, args);
-    va_end(args);
-    return;
-  }
-
+void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, va_list args) {
 #ifdef BENCHMARK_OS_WINDOWS
+  ((void)out); // suppress unused warning
+
  const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);

  // Gets the current text color.
@ -107,10 +145,10 @@ void ColorPrintf(LogColor color, const char* fmt, ...) {
  SetConsoleTextAttribute(stdout_handle, old_color_attrs);
 #else
  const char* color_code = GetPlatformColorCode(color);
-  if (color_code) fprintf(stdout, "\033[0;3%sm", color_code);
-  vprintf(fmt, args);
-  printf("\033[m");  // Resets the terminal to default.
+  if (color_code) out << FormatString("\033[0;3%sm", color_code);
+  out << FormatString(fmt, args) << "\033[m";
 #endif
-  va_end(args);
+
 }
+
 }  // end namespace benchmark
--- a/3rdparty/benchmark/src/colorprint.h
+++ b/3rdparty/benchmark/src/colorprint.h
@ -1,6 +1,10 @@
 #ifndef BENCHMARK_COLORPRINT_H_
 #define BENCHMARK_COLORPRINT_H_

+#include <cstdarg>
+#include <string>
+#include <iostream>
+
 namespace benchmark {
 enum LogColor {
  COLOR_DEFAULT,
@ -13,7 +17,12 @@ enum LogColor {
  COLOR_WHITE
 };

-void ColorPrintf(LogColor color, const char* fmt, ...);
+std::string FormatString(const char* msg, va_list args);
+std::string FormatString(const char* msg, ...);
+
+void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, va_list args);
+void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...);
+
 }  // end namespace benchmark

 #endif  // BENCHMARK_COLORPRINT_H_
--- a/3rdparty/benchmark/src/complexity.cc
+++ b/3rdparty/benchmark/src/complexity.cc
@ -0,0 +1,283 @@
+// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Source project : https://github.com/ismaelJimenez/cpp.leastsq
+// Adapted to be used with google benchmark
+
+#include "benchmark/benchmark_api.h"
+
+#include <algorithm>
+#include <cmath>
+#include "check.h"
+#include "complexity.h"
+#include "stat.h"
+
+namespace benchmark {
+
+// Internal function to calculate the different scalability forms
+BigOFunc* FittingCurve(BigO complexity) {
+  switch (complexity) {
+    case oN:
+      return [](int n) -> double { return n; };
+    case oNSquared:
+      return [](int n) -> double { return std::pow(n, 2); };
+    case oNCubed:
+      return [](int n) -> double { return std::pow(n, 3); };
+    case oLogN:
+      return [](int n) { return std::log2(n); };
+    case oNLogN:
+      return [](int n) { return n * std::log2(n); };
+    case o1:
+    default:
+      return [](int) { return 1.0; };
+  }
+}
+
+// Function to return an string for the calculated complexity
+std::string GetBigOString(BigO complexity) {
+  switch (complexity) {
+    case oN:
+      return "N";
+    case oNSquared:
+      return "N^2";
+    case oNCubed:
+      return "N^3";
+    case oLogN:
+      return "lgN";
+    case oNLogN:
+      return "NlgN";
+    case o1:
+      return "(1)";
+    default:
+      return "f(N)";
+  }
+}
+
+// Find the coefficient for the high-order term in the running time, by
+// minimizing the sum of squares of relative error, for the fitting curve
+// given by the lambda expresion.
+//   - n             : Vector containing the size of the benchmark tests.
+//   - time          : Vector containing the times for the benchmark tests.
+//   - fitting_curve : lambda expresion (e.g. [](int n) {return n; };).
+
+// For a deeper explanation on the algorithm logic, look the README file at
+// http://github.com/ismaelJimenez/Minimal-Cpp-Least-Squared-Fit
+
+LeastSq MinimalLeastSq(const std::vector<int>& n,
+                       const std::vector<double>& time,
+                       BigOFunc* fitting_curve) {
+  double sigma_gn = 0.0;
+  double sigma_gn_squared = 0.0;
+  double sigma_time = 0.0;
+  double sigma_time_gn = 0.0;
+
+  // Calculate least square fitting parameter
+  for (size_t i = 0; i < n.size(); ++i) {
+    double gn_i = fitting_curve(n[i]);
+    sigma_gn += gn_i;
+    sigma_gn_squared += gn_i * gn_i;
+    sigma_time += time[i];
+    sigma_time_gn += time[i] * gn_i;
+  }
+
+  LeastSq result;
+  result.complexity = oLambda;
+
+  // Calculate complexity.
+  result.coef = sigma_time_gn / sigma_gn_squared;
+
+  // Calculate RMS
+  double rms = 0.0;
+  for (size_t i = 0; i < n.size(); ++i) {
+    double fit = result.coef * fitting_curve(n[i]);
+    rms += pow((time[i] - fit), 2);
+  }
+
+  // Normalized RMS by the mean of the observed values
+  double mean = sigma_time / n.size();
+  result.rms = sqrt(rms / n.size()) / mean;
+
+  return result;
+}
+
+// Find the coefficient for the high-order term in the running time, by
+// minimizing the sum of squares of relative error.
+//   - n          : Vector containing the size of the benchmark tests.
+//   - time       : Vector containing the times for the benchmark tests.
+//   - complexity : If different than oAuto, the fitting curve will stick to
+//                  this one. If it is oAuto, it will be calculated the best
+//                  fitting curve.
+LeastSq MinimalLeastSq(const std::vector<int>& n,
+                       const std::vector<double>& time,
+                       const BigO complexity) {
+  CHECK_EQ(n.size(), time.size());
+  CHECK_GE(n.size(), 2);  // Do not compute fitting curve is less than two
+                          // benchmark runs are given
+  CHECK_NE(complexity, oNone);
+
+  LeastSq best_fit;
+
+  if (complexity == oAuto) {
+    std::vector<BigO> fit_curves = {oLogN, oN, oNLogN, oNSquared, oNCubed};
+
+    // Take o1 as default best fitting curve
+    best_fit = MinimalLeastSq(n, time, FittingCurve(o1));
+    best_fit.complexity = o1;
+
+    // Compute all possible fitting curves and stick to the best one
+    for (const auto& fit : fit_curves) {
+      LeastSq current_fit = MinimalLeastSq(n, time, FittingCurve(fit));
+      if (current_fit.rms < best_fit.rms) {
+        best_fit = current_fit;
+        best_fit.complexity = fit;
+      }
+    }
+  } else {
+    best_fit = MinimalLeastSq(n, time, FittingCurve(complexity));
+    best_fit.complexity = complexity;
+  }
+
+  return best_fit;
+}
+
+std::vector<BenchmarkReporter::Run> ComputeStats(
+    const std::vector<BenchmarkReporter::Run>& reports) {
+  typedef BenchmarkReporter::Run Run;
+  std::vector<Run> results;
+
+  auto error_count =
+      std::count_if(reports.begin(), reports.end(),
+                    [](Run const& run) { return run.error_occurred; });
+
+  if (reports.size() - error_count < 2) {
+    // We don't report aggregated data if there was a single run.
+    return results;
+  }
+  // Accumulators.
+  Stat1_d real_accumulated_time_stat;
+  Stat1_d cpu_accumulated_time_stat;
+  Stat1_d bytes_per_second_stat;
+  Stat1_d items_per_second_stat;
+  // All repetitions should be run with the same number of iterations so we
+  // can take this information from the first benchmark.
+  int64_t const run_iterations = reports.front().iterations;
+
+  // Populate the accumulators.
+  for (Run const& run : reports) {
+    CHECK_EQ(reports[0].benchmark_name, run.benchmark_name);
+    CHECK_EQ(run_iterations, run.iterations);
+    if (run.error_occurred) continue;
+    real_accumulated_time_stat +=
+        Stat1_d(run.real_accumulated_time / run.iterations, run.iterations);
+    cpu_accumulated_time_stat +=
+        Stat1_d(run.cpu_accumulated_time / run.iterations, run.iterations);
+    items_per_second_stat += Stat1_d(run.items_per_second, run.iterations);
+    bytes_per_second_stat += Stat1_d(run.bytes_per_second, run.iterations);
+  }
+
+  // Get the data from the accumulator to BenchmarkReporter::Run's.
+  Run mean_data;
+  mean_data.benchmark_name = reports[0].benchmark_name + "_mean";
+  mean_data.iterations = run_iterations;
+  mean_data.real_accumulated_time =
+      real_accumulated_time_stat.Mean() * run_iterations;
+  mean_data.cpu_accumulated_time =
+      cpu_accumulated_time_stat.Mean() * run_iterations;
+  mean_data.bytes_per_second = bytes_per_second_stat.Mean();
+  mean_data.items_per_second = items_per_second_stat.Mean();
+
+  // Only add label to mean/stddev if it is same for all runs
+  mean_data.report_label = reports[0].report_label;
+  for (std::size_t i = 1; i < reports.size(); i++) {
+    if (reports[i].report_label != reports[0].report_label) {
+      mean_data.report_label = "";
+      break;
+    }
+  }
+
+  Run stddev_data;
+  stddev_data.benchmark_name = reports[0].benchmark_name + "_stddev";
+  stddev_data.report_label = mean_data.report_label;
+  stddev_data.iterations = 0;
+  stddev_data.real_accumulated_time = real_accumulated_time_stat.StdDev();
+  stddev_data.cpu_accumulated_time = cpu_accumulated_time_stat.StdDev();
+  stddev_data.bytes_per_second = bytes_per_second_stat.StdDev();
+  stddev_data.items_per_second = items_per_second_stat.StdDev();
+
+  results.push_back(mean_data);
+  results.push_back(stddev_data);
+  return results;
+}
+
+std::vector<BenchmarkReporter::Run> ComputeBigO(
+    const std::vector<BenchmarkReporter::Run>& reports) {
+  typedef BenchmarkReporter::Run Run;
+  std::vector<Run> results;
+
+  if (reports.size() < 2) return results;
+
+  // Accumulators.
+  std::vector<int> n;
+  std::vector<double> real_time;
+  std::vector<double> cpu_time;
+
+  // Populate the accumulators.
+  for (const Run& run : reports) {
+    CHECK_GT(run.complexity_n, 0) << "Did you forget to call SetComplexityN?";
+    n.push_back(run.complexity_n);
+    real_time.push_back(run.real_accumulated_time / run.iterations);
+    cpu_time.push_back(run.cpu_accumulated_time / run.iterations);
+  }
+
+  LeastSq result_cpu;
+  LeastSq result_real;
+
+  if (reports[0].complexity == oLambda) {
+    result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity_lambda);
+    result_real = MinimalLeastSq(n, real_time, reports[0].complexity_lambda);
+  } else {
+    result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity);
+    result_real = MinimalLeastSq(n, real_time, result_cpu.complexity);
+  }
+  std::string benchmark_name =
+      reports[0].benchmark_name.substr(0, reports[0].benchmark_name.find('/'));
+
+  // Get the data from the accumulator to BenchmarkReporter::Run's.
+  Run big_o;
+  big_o.benchmark_name = benchmark_name + "_BigO";
+  big_o.iterations = 0;
+  big_o.real_accumulated_time = result_real.coef;
+  big_o.cpu_accumulated_time = result_cpu.coef;
+  big_o.report_big_o = true;
+  big_o.complexity = result_cpu.complexity;
+
+  double multiplier = GetTimeUnitMultiplier(reports[0].time_unit);
+
+  // Only add label to mean/stddev if it is same for all runs
+  Run rms;
+  big_o.report_label = reports[0].report_label;
+  rms.benchmark_name = benchmark_name + "_RMS";
+  rms.report_label = big_o.report_label;
+  rms.iterations = 0;
+  rms.real_accumulated_time = result_real.rms / multiplier;
+  rms.cpu_accumulated_time = result_cpu.rms / multiplier;
+  rms.report_rms = true;
+  rms.complexity = result_cpu.complexity;
+
+  results.push_back(big_o);
+  results.push_back(rms);
+  return results;
+}
+
+}  // end namespace benchmark
--- a/3rdparty/benchmark/src/complexity.h
+++ b/3rdparty/benchmark/src/complexity.h
@ -0,0 +1,64 @@
+// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Source project : https://github.com/ismaelJimenez/cpp.leastsq
+// Adapted to be used with google benchmark
+
+#ifndef COMPLEXITY_H_
+#define COMPLEXITY_H_
+
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark_api.h"
+#include "benchmark/reporter.h"
+
+namespace benchmark {
+
+// Return a vector containing the mean and standard devation information for
+// the specified list of reports. If 'reports' contains less than two
+// non-errored runs an empty vector is returned
+std::vector<BenchmarkReporter::Run> ComputeStats(
+    const std::vector<BenchmarkReporter::Run>& reports);
+
+// Return a vector containing the bigO and RMS information for the specified
+// list of reports. If 'reports.size() < 2' an empty vector is returned.
+std::vector<BenchmarkReporter::Run> ComputeBigO(
+    const std::vector<BenchmarkReporter::Run>& reports);
+
+// This data structure will contain the result returned by MinimalLeastSq
+//   - coef        : Estimated coeficient for the high-order term as
+//                   interpolated from data.
+//   - rms         : Normalized Root Mean Squared Error.
+//   - complexity  : Scalability form (e.g. oN, oNLogN). In case a scalability
+//                   form has been provided to MinimalLeastSq this will return
+//                   the same value. In case BigO::oAuto has been selected, this
+//                   parameter will return the best fitting curve detected.
+
+struct LeastSq {
+  LeastSq() :
+    coef(0.0),
+    rms(0.0),
+    complexity(oNone) {}
+
+  double coef;
+  double rms;
+  BigO complexity;
+};
+
+// Function to return an string for the calculated complexity
+std::string GetBigOString(BigO complexity);
+
+} // end namespace benchmark
+#endif // COMPLEXITY_H_
--- a/3rdparty/benchmark/src/console_reporter.cc
+++ b/3rdparty/benchmark/src/console_reporter.cc
@ -13,7 +13,9 @@
 // limitations under the License.

 #include "benchmark/reporter.h"
+#include "complexity.h"

+#include <algorithm>
 #include <cstdint>
 #include <cstdio>
 #include <iostream>
@ -23,63 +25,63 @@

 #include "check.h"
 #include "colorprint.h"
+#include "commandlineflags.h"
+#include "internal_macros.h"
 #include "string_util.h"
-#include "walltime.h"
+#include "timers.h"

 namespace benchmark {

 bool ConsoleReporter::ReportContext(const Context& context) {
  name_field_width_ = context.name_field_width;

-  std::cerr << "Run on (" << context.num_cpus << " X " << context.mhz_per_cpu
-            << " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n";
+  PrintBasicContext(&GetErrorStream(), context);

-  std::cerr << LocalDateTimeString() << "\n";
-
-  if (context.cpu_scaling_enabled) {
-    std::cerr << "***WARNING*** CPU scaling is enabled, the benchmark "
-                 "real time measurements may be noisy and will incur extra "
-                 "overhead.\n";
+#ifdef BENCHMARK_OS_WINDOWS
+  if (color_output_ && &std::cout != &GetOutputStream()) {
+      GetErrorStream() << "Color printing is only supported for stdout on windows."
+                          " Disabling color printing\n";
+      color_output_ = false;
  }
-
-#ifndef NDEBUG
-  std::cerr << "***WARNING*** Library was built as DEBUG. Timings may be "
-               "affected.\n";
 #endif
-
-  int output_width = fprintf(stdout, "%-*s %13s %13s %10s\n",
+  std::string str = FormatString("%-*s %13s %13s %10s\n",
                             static_cast<int>(name_field_width_), "Benchmark",
                             "Time", "CPU", "Iterations");
-  std::cout << std::string(output_width - 1, '-') << "\n";
+  GetOutputStream() << str << std::string(str.length() - 1, '-') << "\n";

  return true;
 }

 void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
-  if (reports.empty()) {
-    return;
-  }
-
-  for (Run const& run : reports) {
-    CHECK_EQ(reports[0].benchmark_name, run.benchmark_name);
+  for (const auto& run : reports)
    PrintRunData(run);
-  }
+}

-  if (reports.size() < 2) {
-    // We don't report aggregated data if there was a single run.
-    return;
-  }
-
-  Run mean_data;
-  Run stddev_data;
-  BenchmarkReporter::ComputeStats(reports, &mean_data, &stddev_data);
-
-  // Output using PrintRun.
-  PrintRunData(mean_data);
-  PrintRunData(stddev_data);
+static void  IgnoreColorPrint(std::ostream& out, LogColor,
+                               const char* fmt, ...)
+{
+    va_list args;
+    va_start(args, fmt);
+    out << FormatString(fmt, args);
+    va_end(args);
 }

 void ConsoleReporter::PrintRunData(const Run& result) {
+  typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...);
+  auto& Out = GetOutputStream();
+  PrinterFn* printer = color_output_ ? (PrinterFn*)ColorPrintf
+                                     : IgnoreColorPrint;
+  auto name_color =
+      (result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN;
+  printer(Out, name_color, "%-*s ", name_field_width_,
+              result.benchmark_name.c_str());
+
+  if (result.error_occurred) {
+    printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'",
+                result.error_message.c_str());
+    printer(Out, COLOR_DEFAULT, "\n");
+    return;
+  }
  // Format bytes per second
  std::string rate;
  if (result.bytes_per_second > 0) {
@ -91,46 +93,41 @@ void ConsoleReporter::PrintRunData(const Run& result) {
  if (result.items_per_second > 0) {
    items = StrCat(" ", HumanReadableNumber(result.items_per_second),
                   " items/s");
-  }
+ }

-  double multiplier;
-  const char* timeLabel;
-  std::tie(timeLabel, multiplier) = GetTimeUnitAndMultiplier(result.time_unit);
+  const double real_time = result.GetAdjustedRealTime();
+  const double cpu_time = result.GetAdjustedCPUTime();

-  ColorPrintf(COLOR_GREEN, "%-*s ",
-              name_field_width_, result.benchmark_name.c_str());
-
-  if (result.iterations == 0) {
-    ColorPrintf(COLOR_YELLOW, "%10.0f %s %10.0f %s ",
-                result.real_accumulated_time * multiplier,
-                timeLabel,
-                result.cpu_accumulated_time * multiplier,
-                timeLabel);
+  if (result.report_big_o) {
+    std::string big_o = GetBigOString(result.complexity);
+    printer(Out, COLOR_YELLOW, "%10.2f %s %10.2f %s ", real_time,
+                big_o.c_str(), cpu_time, big_o.c_str());
+  } else if (result.report_rms) {
+    printer(Out, COLOR_YELLOW, "%10.0f %% %10.0f %% ", real_time * 100,
+                cpu_time * 100);
  } else {
-    ColorPrintf(COLOR_YELLOW, "%10.0f %s %10.0f %s ",
-                (result.real_accumulated_time * multiplier) /
-                    (static_cast<double>(result.iterations)),
-                timeLabel,
-                (result.cpu_accumulated_time * multiplier) /
-                    (static_cast<double>(result.iterations)),
-                timeLabel);
+    const char* timeLabel = GetTimeUnitString(result.time_unit);
+    printer(Out, COLOR_YELLOW, "%10.0f %s %10.0f %s ", real_time, timeLabel,
+                cpu_time, timeLabel);
  }

-  ColorPrintf(COLOR_CYAN, "%10lld", result.iterations);
+  if (!result.report_big_o && !result.report_rms) {
+    printer(Out, COLOR_CYAN, "%10lld", result.iterations);
+  }

  if (!rate.empty()) {
-    ColorPrintf(COLOR_DEFAULT, " %*s", 13, rate.c_str());
+    printer(Out, COLOR_DEFAULT, " %*s", 13, rate.c_str());
  }

  if (!items.empty()) {
-    ColorPrintf(COLOR_DEFAULT, " %*s", 18, items.c_str());
+    printer(Out, COLOR_DEFAULT, " %*s", 18, items.c_str());
  }

  if (!result.report_label.empty()) {
-    ColorPrintf(COLOR_DEFAULT, " %s", result.report_label.c_str());
+    printer(Out, COLOR_DEFAULT, " %s", result.report_label.c_str());
  }

-  ColorPrintf(COLOR_DEFAULT, "\n");
+  printer(Out, COLOR_DEFAULT, "\n");
 }

 }  // end namespace benchmark
--- a/3rdparty/benchmark/src/csv_reporter.cc
+++ b/3rdparty/benchmark/src/csv_reporter.cc
@ -13,7 +13,9 @@
 // limitations under the License.

 #include "benchmark/reporter.h"
+#include "complexity.h"

+#include <algorithm>
 #include <cstdint>
 #include <iostream>
 #include <string>
@ -21,90 +23,96 @@
 #include <vector>

 #include "string_util.h"
-#include "walltime.h"
+#include "timers.h"

 // File format reference: http://edoceo.com/utilitas/csv-file-format.

 namespace benchmark {

+namespace {
+std::vector<std::string> elements = {
+  "name",
+  "iterations",
+  "real_time",
+  "cpu_time",
+  "time_unit",
+  "bytes_per_second",
+  "items_per_second",
+  "label",
+  "error_occurred",
+  "error_message"
+};
+}
+
 bool CSVReporter::ReportContext(const Context& context) {
-  std::cerr << "Run on (" << context.num_cpus << " X " << context.mhz_per_cpu
-            << " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n";
+  PrintBasicContext(&GetErrorStream(), context);

-  std::cerr << LocalDateTimeString() << "\n";
-
-  if (context.cpu_scaling_enabled) {
-    std::cerr << "***WARNING*** CPU scaling is enabled, the benchmark "
-                 "real time measurements may be noisy and will incur extra "
-                 "overhead.\n";
+  std::ostream& Out = GetOutputStream();
+  for (auto B = elements.begin(); B != elements.end(); ) {
+    Out << *B++;
+    if (B != elements.end())
+      Out << ",";
  }
-
-#ifndef NDEBUG
-  std::cerr << "***WARNING*** Library was built as DEBUG. Timings may be "
-               "affected.\n";
-#endif
-  std::cout << "name,iterations,real_time,cpu_time,time_unit,bytes_per_second,"
-               "items_per_second,label\n";
+  Out << "\n";
  return true;
 }

-void CSVReporter::ReportRuns(std::vector<Run> const& reports) {
-  if (reports.empty()) {
-    return;
-  }
-
-  std::vector<Run> reports_cp = reports;
-  if (reports.size() >= 2) {
-    Run mean_data;
-    Run stddev_data;
-    BenchmarkReporter::ComputeStats(reports, &mean_data, &stddev_data);
-    reports_cp.push_back(mean_data);
-    reports_cp.push_back(stddev_data);
-  }
-  for (auto it = reports_cp.begin(); it != reports_cp.end(); ++it) {
-    PrintRunData(*it);
-  }
+void CSVReporter::ReportRuns(const std::vector<Run> & reports) {
+  for (const auto& run : reports)
+    PrintRunData(run);
 }

-void CSVReporter::PrintRunData(Run const& run) {
-  double multiplier;
-  const char* timeLabel;
-  std::tie(timeLabel, multiplier) = GetTimeUnitAndMultiplier(run.time_unit);
-
-  double cpu_time = run.cpu_accumulated_time * multiplier;
-  double real_time = run.real_accumulated_time * multiplier;
-  if (run.iterations != 0) {
-    real_time = real_time / static_cast<double>(run.iterations);
-    cpu_time = cpu_time / static_cast<double>(run.iterations);
-  }
+void CSVReporter::PrintRunData(const Run & run) {
+  std::ostream& Out = GetOutputStream();

  // Field with embedded double-quote characters must be doubled and the field
  // delimited with double-quotes.
  std::string name = run.benchmark_name;
  ReplaceAll(&name, "\"", "\"\"");
-  std::cout << "\"" << name << "\",";
+  Out << '"' << name << "\",";
+  if (run.error_occurred) {
+    Out << std::string(elements.size() - 3, ',');
+    Out << "true,";
+    std::string msg = run.error_message;
+    ReplaceAll(&msg, "\"", "\"\"");
+    Out << '"' << msg << "\"\n";
+    return;
+  }

-  std::cout << run.iterations << ",";
-  std::cout << real_time << ",";
-  std::cout << cpu_time << ",";
-  std::cout << timeLabel << ",";
+  // Do not print iteration on bigO and RMS report
+  if (!run.report_big_o && !run.report_rms) {
+    Out << run.iterations;
+  }
+  Out << ",";
+
+  Out << run.GetAdjustedRealTime() << ",";
+  Out << run.GetAdjustedCPUTime() << ",";
+
+  // Do not print timeLabel on bigO and RMS report
+  if (run.report_big_o) {
+    Out << GetBigOString(run.complexity);
+  } else if (!run.report_rms) {
+    Out << GetTimeUnitString(run.time_unit);
+  }
+  Out << ",";

  if (run.bytes_per_second > 0.0) {
-    std::cout << run.bytes_per_second;
+    Out << run.bytes_per_second;
  }
-  std::cout << ",";
+  Out << ",";
  if (run.items_per_second > 0.0) {
-    std::cout << run.items_per_second;
+    Out << run.items_per_second;
  }
-  std::cout << ",";
+  Out << ",";
  if (!run.report_label.empty()) {
    // Field with embedded double-quote characters must be doubled and the field
    // delimited with double-quotes.
    std::string label = run.report_label;
    ReplaceAll(&label, "\"", "\"\"");
-    std::cout << "\"" << label << "\"";
+    Out << "\"" << label << "\"";
  }
-  std::cout << '\n';
+  Out << ",,";  // for error_occurred and error_message
+  Out << '\n';
 }

 }  // end namespace benchmark
--- a/3rdparty/benchmark/src/cycleclock.h
+++ b/3rdparty/benchmark/src/cycleclock.h
@ -113,11 +113,11 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
  uint32_t pmuseren;
  uint32_t pmcntenset;
  // Read the user mode perf monitor counter access permissions.
-  asm("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
+  asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
  if (pmuseren & 1) {  // Allows reading perfmon counters for user mode code.
-    asm("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
+    asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
    if (pmcntenset & 0x80000000ul) {  // Is it counting?
-      asm("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
+      asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
      // The counter is set up to count every 64th cycle
      return static_cast<int64_t>(pmccntr) * 64;  // Should optimize to << 6
    }
--- a/3rdparty/benchmark/src/internal_macros.h
+++ b/3rdparty/benchmark/src/internal_macros.h
@ -7,10 +7,20 @@
 # define __has_feature(x) 0
 #endif

+#if defined(__clang__)
+# define COMPILER_CLANG
+#elif defined(_MSC_VER)
+# define COMPILER_MSVC
+#elif defined(__GNUC__)
+# define COMPILER_GCC
+#endif
+
 #if __has_feature(cxx_attributes)
 # define BENCHMARK_NORETURN [[noreturn]]
 #elif defined(__GNUC__)
 # define BENCHMARK_NORETURN __attribute__((noreturn))
+#elif defined(COMPILER_MSVC)
+# define BENCHMARK_NORETURN __declspec(noreturn)
 #else
 # define BENCHMARK_NORETURN
 #endif
@ -29,12 +39,5 @@
 # define BENCHMARK_OS_LINUX 1
 #endif

-#if defined(__clang__)
-# define COMPILER_CLANG
-#elif defined(_MSC_VER)
-# define COMPILER_MSVC
-#elif defined(__GNUC__)
-# define COMPILER_GCC
-#endif

 #endif // BENCHMARK_INTERNAL_MACROS_H_
--- a/3rdparty/benchmark/src/json_reporter.cc
+++ b/3rdparty/benchmark/src/json_reporter.cc
@ -13,7 +13,9 @@
 // limitations under the License.

 #include "benchmark/reporter.h"
+#include "complexity.h"

+#include <algorithm>
 #include <cstdint>
 #include <iostream>
 #include <string>
@ -21,7 +23,7 @@
 #include <vector>

 #include "string_util.h"
-#include "walltime.h"
+#include "timers.h"

 namespace benchmark {

@ -52,7 +54,7 @@ int64_t RoundDouble(double v) {
 } // end namespace

 bool JSONReporter::ReportContext(const Context& context) {
-  std::ostream& out = std::cout;
+  std::ostream& out = GetOutputStream();

  out << "{\n";
  std::string inner_indent(2, ' ');
@ -91,76 +93,86 @@ void JSONReporter::ReportRuns(std::vector<Run> const& reports) {
    return;
  }
  std::string indent(4, ' ');
-  std::ostream& out = std::cout;
+  std::ostream& out = GetOutputStream();
  if (!first_report_) {
    out << ",\n";
  }
  first_report_ = false;
-  std::vector<Run> reports_cp = reports;
-  if (reports.size() >= 2) {
-    Run mean_data;
-    Run stddev_data;
-    BenchmarkReporter::ComputeStats(reports, &mean_data, &stddev_data);
-    reports_cp.push_back(mean_data);
-    reports_cp.push_back(stddev_data);
-  }
-  for (auto it = reports_cp.begin(); it != reports_cp.end(); ++it) {
-     out << indent << "{\n";
-     PrintRunData(*it);
-     out << indent << '}';
-     auto it_cp = it;
-     if (++it_cp != reports_cp.end()) {
-         out << ",\n";
-     }
+
+  for (auto it = reports.begin(); it != reports.end(); ++it) {
+    out << indent << "{\n";
+    PrintRunData(*it);
+    out << indent << '}';
+    auto it_cp = it;
+    if (++it_cp != reports.end()) {
+      out << ",\n";
+    }
  }
 }

 void JSONReporter::Finalize() {
-    // Close the list of benchmarks and the top level object.
-    std::cout << "\n  ]\n}\n";
+  // Close the list of benchmarks and the top level object.
+  GetOutputStream() << "\n  ]\n}\n";
 }

 void JSONReporter::PrintRunData(Run const& run) {
-    double multiplier;
-    const char* timeLabel;
-    std::tie(timeLabel, multiplier) = GetTimeUnitAndMultiplier(run.time_unit);
-
-    double cpu_time = run.cpu_accumulated_time * multiplier;
-    double real_time = run.real_accumulated_time * multiplier;
-    if (run.iterations != 0) {
-        real_time = real_time / static_cast<double>(run.iterations);
-        cpu_time = cpu_time / static_cast<double>(run.iterations);
-    }
-
-    std::string indent(6, ' ');
-    std::ostream& out = std::cout;
+  std::string indent(6, ' ');
+  std::ostream& out = GetOutputStream();
    out << indent
        << FormatKV("name", run.benchmark_name)
        << ",\n";
+    if (run.error_occurred) {
+        out << indent
+            << FormatKV("error_occurred", run.error_occurred)
+            << ",\n";
+        out << indent
+            << FormatKV("error_message", run.error_message)
+            << ",\n";
+    }
+  if (!run.report_big_o && !run.report_rms) {
+        out << indent
+            << FormatKV("iterations", run.iterations)
+            << ",\n";
+        out << indent
+            << FormatKV("real_time", RoundDouble(run.GetAdjustedRealTime()))
+            << ",\n";
+        out << indent
+            << FormatKV("cpu_time", RoundDouble(run.GetAdjustedCPUTime()));
+        out << ",\n" << indent
+            << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
+  } else if (run.report_big_o) {
    out << indent
-        << FormatKV("iterations", run.iterations)
+        << FormatKV("cpu_coefficient", RoundDouble(run.GetAdjustedCPUTime()))
        << ",\n";
    out << indent
-        << FormatKV("real_time", RoundDouble(real_time))
+        << FormatKV("real_coefficient", RoundDouble(run.GetAdjustedRealTime()))
        << ",\n";
    out << indent
-        << FormatKV("cpu_time", RoundDouble(cpu_time))
-        << ",\n";
-    out << indent
-        << FormatKV("time_unit", timeLabel);
-    if (run.bytes_per_second > 0.0) {
-        out << ",\n" << indent
-            << FormatKV("bytes_per_second", RoundDouble(run.bytes_per_second));
-    }
-    if (run.items_per_second > 0.0) {
-        out << ",\n" << indent
-            << FormatKV("items_per_second", RoundDouble(run.items_per_second));
-    }
-    if (!run.report_label.empty()) {
-        out << ",\n" << indent
-            << FormatKV("label", run.report_label);
-    }
-    out << '\n';
+            << FormatKV("big_o", GetBigOString(run.complexity))
+            << ",\n";
+        out << indent
+            << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
+    } else if(run.report_rms) {
+        out << indent
+            << FormatKV("rms", RoundDouble(run.GetAdjustedCPUTime()*100))
+            << '%';
+  }
+  if (run.bytes_per_second > 0.0) {
+    out << ",\n"
+        << indent
+        << FormatKV("bytes_per_second", RoundDouble(run.bytes_per_second));
+  }
+  if (run.items_per_second > 0.0) {
+    out << ",\n"
+        << indent
+        << FormatKV("items_per_second", RoundDouble(run.items_per_second));
+  }
+  if (!run.report_label.empty()) {
+    out << ",\n"
+        << indent
+        << FormatKV("label", run.report_label);
+  }
+  out << '\n';
 }

-} // end namespace benchmark
+}  // end namespace benchmark
--- a/3rdparty/benchmark/src/log.cc
+++ b/3rdparty/benchmark/src/log.cc
@ -1,40 +0,0 @@
-#include "log.h"
-
-#include <iostream>
-
-namespace benchmark {
-namespace internal {
-
-int& LoggingLevelImp() {
-    static int level = 0;
-    return level;
-}
-
-void SetLogLevel(int value) {
-    LoggingLevelImp() = value;
-}
-
-int GetLogLevel() {
-    return LoggingLevelImp();
-}
-
-class NullLogBuffer : public std::streambuf
-{
-public:
-  int overflow(int c) {
-    return c;
-  }
-};
-
-std::ostream& GetNullLogInstance() {
-  static NullLogBuffer log_buff;
-  static std::ostream null_log(&log_buff);
-  return null_log;
-}
-
-std::ostream& GetErrorLogInstance() {
-  return std::clog;
-}
-
-} // end namespace internal
-} // end namespace benchmark
--- a/3rdparty/benchmark/src/log.h
+++ b/3rdparty/benchmark/src/log.h
@ -1,19 +1,63 @@
 #ifndef BENCHMARK_LOG_H_
 #define BENCHMARK_LOG_H_

+#include <iostream>
 #include <ostream>

+#include "benchmark/macros.h"
+
 namespace benchmark {
 namespace internal {

-int GetLogLevel();
-void SetLogLevel(int level);
+typedef std::basic_ostream<char>&(EndLType)(std::basic_ostream<char>&);

-std::ostream& GetNullLogInstance();
-std::ostream& GetErrorLogInstance();
+class LogType {
+  friend LogType& GetNullLogInstance();
+  friend LogType& GetErrorLogInstance();

-inline std::ostream& GetLogInstanceForLevel(int level) {
-  if (level <= GetLogLevel()) {
+  // FIXME: Add locking to output.
+  template <class Tp>
+  friend LogType& operator<<(LogType&, Tp const&);
+  friend LogType& operator<<(LogType&, EndLType*);
+
+ private:
+  LogType(std::ostream* out) : out_(out) {}
+  std::ostream* out_;
+  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(LogType);
+};
+
+template <class Tp>
+LogType& operator<<(LogType& log, Tp const& value) {
+  if (log.out_) {
+    *log.out_ << value;
+  }
+  return log;
+}
+
+inline LogType& operator<<(LogType& log, EndLType* m) {
+  if (log.out_) {
+    *log.out_ << m;
+  }
+  return log;
+}
+
+inline int& LogLevel() {
+  static int log_level = 0;
+  return log_level;
+}
+
+inline LogType& GetNullLogInstance() {
+  static LogType log(nullptr);
+  return log;
+}
+
+inline LogType& GetErrorLogInstance() {
+  static LogType log(&std::clog);
+  return log;
+}
+
+inline LogType& GetLogInstanceForLevel(int level) {
+  if (level <= LogLevel()) {
    return GetErrorLogInstance();
  }
  return GetNullLogInstance();
--- a/3rdparty/benchmark/src/mutex.h
+++ b/3rdparty/benchmark/src/mutex.h
@ -4,6 +4,8 @@
 #include <mutex>
 #include <condition_variable>

+#include "check.h"
+
 // Enable thread safety attributes only with clang.
 // The attributes can be safely erased when compiling with other compilers.
 #if defined(HAVE_THREAD_SAFETY_ATTRIBUTES)
@ -105,36 +107,58 @@ private:
  MutexLockImp ml_;
 };

+class Barrier {
+ public:
+  Barrier(int num_threads) : running_threads_(num_threads) {}

-class Notification
-{
-public:
-  Notification() : notified_yet_(false) { }
-
-  void WaitForNotification() const EXCLUDES(mutex_) {
-    MutexLock m_lock(mutex_);
-    auto notified_fn = [this]() REQUIRES(mutex_) {
-                            return this->HasBeenNotified();
-                        };
-    cv_.wait(m_lock.native_handle(), notified_fn);
-  }
-
-  void Notify() EXCLUDES(mutex_) {
+  // Called by each thread
+  bool wait() EXCLUDES(lock_) {
+    bool last_thread = false;
    {
-      MutexLock lock(mutex_);
-      notified_yet_ = 1;
+      MutexLock ml(lock_);
+      last_thread = createBarrier(ml);
    }
-    cv_.notify_all();
+    if (last_thread) phase_condition_.notify_all();
+    return last_thread;
  }

-private:
-  bool HasBeenNotified() const REQUIRES(mutex_) {
-    return notified_yet_;
+  void removeThread() EXCLUDES(lock_) {
+    MutexLock ml(lock_);
+    --running_threads_;
+    if (entered_ != 0) phase_condition_.notify_all();
  }

-  mutable Mutex mutex_;
-  mutable std::condition_variable cv_;
-  bool notified_yet_ GUARDED_BY(mutex_);
+ private:
+  Mutex lock_;
+  Condition phase_condition_;
+  int running_threads_;
+
+  // State for barrier management
+  int phase_number_ = 0;
+  int entered_ = 0;  // Number of threads that have entered this barrier
+
+  // Enter the barrier and wait until all other threads have also
+  // entered the barrier.  Returns iff this is the last thread to
+  // enter the barrier.
+  bool createBarrier(MutexLock& ml) REQUIRES(lock_) {
+    CHECK_LT(entered_, running_threads_);
+    entered_++;
+    if (entered_ < running_threads_) {
+      // Wait for all threads to enter
+      int phase_number_cp = phase_number_;
+      auto cb = [this, phase_number_cp]() {
+        return this->phase_number_ > phase_number_cp ||
+               entered_ == running_threads_;  // A thread has aborted in error
+      };
+      phase_condition_.wait(ml.native_handle(), cb);
+      if (phase_number_ > phase_number_cp) return false;
+      // else (running_threads_ == entered_) and we are the last thread.
+    }
+    // Last thread has reached the barrier
+    phase_number_++;
+    entered_ = 0;
+    return true;
+  }
 };

 } // end namespace benchmark
--- a/3rdparty/benchmark/src/reporter.cc
+++ b/3rdparty/benchmark/src/reporter.cc
@ -13,86 +13,63 @@
 // limitations under the License.

 #include "benchmark/reporter.h"
+#include "timers.h"

 #include <cstdlib>
+
+#include <iostream>
 #include <vector>
+#include <tuple>

 #include "check.h"
 #include "stat.h"

 namespace benchmark {

-void BenchmarkReporter::ComputeStats(
-    const std::vector<Run>& reports,
-    Run* mean_data, Run* stddev_data) {
-  CHECK(reports.size() >= 2) << "Cannot compute stats for less than 2 reports";
-  // Accumulators.
-  Stat1_d real_accumulated_time_stat;
-  Stat1_d cpu_accumulated_time_stat;
-  Stat1_d bytes_per_second_stat;
-  Stat1_d items_per_second_stat;
-  // All repetitions should be run with the same number of iterations so we
-  // can take this information from the first benchmark.
-  int64_t const run_iterations = reports.front().iterations;
-
-  // Populate the accumulators.
-  for (Run const& run : reports) {
-    CHECK_EQ(reports[0].benchmark_name, run.benchmark_name);
-    CHECK_EQ(run_iterations, run.iterations);
-    real_accumulated_time_stat +=
-        Stat1_d(run.real_accumulated_time/run.iterations, run.iterations);
-    cpu_accumulated_time_stat +=
-        Stat1_d(run.cpu_accumulated_time/run.iterations, run.iterations);
-    items_per_second_stat += Stat1_d(run.items_per_second, run.iterations);
-    bytes_per_second_stat += Stat1_d(run.bytes_per_second, run.iterations);
-  }
-
-  // Get the data from the accumulator to BenchmarkReporter::Run's.
-  mean_data->benchmark_name = reports[0].benchmark_name + "_mean";
-  mean_data->iterations = run_iterations;
-  mean_data->real_accumulated_time = real_accumulated_time_stat.Mean() *
-                                     run_iterations;
-  mean_data->cpu_accumulated_time = cpu_accumulated_time_stat.Mean() *
-                                    run_iterations;
-  mean_data->bytes_per_second = bytes_per_second_stat.Mean();
-  mean_data->items_per_second = items_per_second_stat.Mean();
-
-  // Only add label to mean/stddev if it is same for all runs
-  mean_data->report_label = reports[0].report_label;
-  for (std::size_t i = 1; i < reports.size(); i++) {
-    if (reports[i].report_label != reports[0].report_label) {
-      mean_data->report_label = "";
-      break;
-    }
-  }
-
-  stddev_data->benchmark_name = reports[0].benchmark_name + "_stddev";
-  stddev_data->report_label = mean_data->report_label;
-  stddev_data->iterations = 0;
-  stddev_data->real_accumulated_time =
-      real_accumulated_time_stat.StdDev();
-  stddev_data->cpu_accumulated_time =
-      cpu_accumulated_time_stat.StdDev();
-  stddev_data->bytes_per_second = bytes_per_second_stat.StdDev();
-  stddev_data->items_per_second = items_per_second_stat.StdDev();
-}
-
-TimeUnitMultiplier BenchmarkReporter::GetTimeUnitAndMultiplier(TimeUnit unit) {
-  switch (unit) {
-    case kMillisecond:
-      return std::make_pair("ms", 1e3);
-    case kMicrosecond:
-      return std::make_pair("us", 1e6);
-    case kNanosecond:
-    default:
-      return std::make_pair("ns", 1e9);
-  }
-}
-
-void BenchmarkReporter::Finalize() {
+BenchmarkReporter::BenchmarkReporter()
+    : output_stream_(&std::cout), error_stream_(&std::cerr)
+{
 }

 BenchmarkReporter::~BenchmarkReporter() {
 }

+void BenchmarkReporter::PrintBasicContext(std::ostream *out_ptr,
+                                          Context const &context) {
+  CHECK(out_ptr) << "cannot be null";
+  auto& Out = *out_ptr;
+
+  Out << "Run on (" << context.num_cpus << " X " << context.mhz_per_cpu
+            << " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n";
+
+  Out << LocalDateTimeString() << "\n";
+
+  if (context.cpu_scaling_enabled) {
+    Out << "***WARNING*** CPU scaling is enabled, the benchmark "
+                 "real time measurements may be noisy and will incur extra "
+                 "overhead.\n";
+  }
+
+#ifndef NDEBUG
+  Out << "***WARNING*** Library was built as DEBUG. Timings may be "
+               "affected.\n";
+#endif
+}
+
+double BenchmarkReporter::Run::GetAdjustedRealTime() const {
+  double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit);
+  if (iterations != 0)
+    new_time /= static_cast<double>(iterations);
+  return new_time;
+}
+
+double BenchmarkReporter::Run::GetAdjustedCPUTime() const {
+  double new_time = cpu_accumulated_time * GetTimeUnitMultiplier(time_unit);
+  if (iterations != 0)
+    new_time /= static_cast<double>(iterations);
+  return new_time;
+}
+
+
+
 } // end namespace benchmark
--- a/3rdparty/benchmark/src/sysinfo.cc
+++ b/3rdparty/benchmark/src/sysinfo.cc
@ -52,7 +52,6 @@ namespace {
 std::once_flag cpuinfo_init;
 double cpuinfo_cycles_per_second = 1.0;
 int cpuinfo_num_cpus = 1;  // Conservative guess
-std::mutex cputimens_mutex;

 #if !defined BENCHMARK_OS_MACOSX
 const int64_t estimate_time_ms = 1000;
@ -239,6 +238,7 @@ void InitializeSystemInfo() {
  }
 // TODO: also figure out cpuinfo_num_cpus

+
 #elif defined BENCHMARK_OS_WINDOWS
  // In NT, read MHz from the registry. If we fail to do so or we're in win9x
  // then make a crude estimate.
@ -251,7 +251,12 @@ void InitializeSystemInfo() {
    cpuinfo_cycles_per_second = static_cast<double>((int64_t)data * (int64_t)(1000 * 1000));  // was mhz
  else
    cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
-// TODO: also figure out cpuinfo_num_cpus
+
+  SYSTEM_INFO sysinfo;
+  // Use memset as opposed to = {} to avoid GCC missing initializer false positives.
+  std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO));
+  GetSystemInfo(&sysinfo);
+  cpuinfo_num_cpus = sysinfo.dwNumberOfProcessors; // number of logical processors in the current group

 #elif defined BENCHMARK_OS_MACOSX
  // returning "mach time units" per second. the current number of elapsed
@ -282,102 +287,9 @@ void InitializeSystemInfo() {
  cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
 #endif
 }
+
 }  // end namespace

-// getrusage() based implementation of MyCPUUsage
-static double MyCPUUsageRUsage() {
-#ifndef BENCHMARK_OS_WINDOWS
-  struct rusage ru;
-  if (getrusage(RUSAGE_SELF, &ru) == 0) {
-    return (static_cast<double>(ru.ru_utime.tv_sec) +
-            static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
-            static_cast<double>(ru.ru_stime.tv_sec) +
-            static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
-  } else {
-    return 0.0;
-  }
-#else
-  HANDLE proc = GetCurrentProcess();
-  FILETIME creation_time;
-  FILETIME exit_time;
-  FILETIME kernel_time;
-  FILETIME user_time;
-  ULARGE_INTEGER kernel;
-  ULARGE_INTEGER user;
-  GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time);
-  kernel.HighPart = kernel_time.dwHighDateTime;
-  kernel.LowPart = kernel_time.dwLowDateTime;
-  user.HighPart = user_time.dwHighDateTime;
-  user.LowPart = user_time.dwLowDateTime;
-  return (static_cast<double>(kernel.QuadPart) +
-          static_cast<double>(user.QuadPart)) * 1e-7;
-#endif  // OS_WINDOWS
-}
-
-#ifndef BENCHMARK_OS_WINDOWS
-static bool MyCPUUsageCPUTimeNsLocked(double* cputime) {
-  static int cputime_fd = -1;
-  if (cputime_fd == -1) {
-    cputime_fd = open("/proc/self/cputime_ns", O_RDONLY);
-    if (cputime_fd < 0) {
-      cputime_fd = -1;
-      return false;
-    }
-  }
-  char buff[64];
-  memset(buff, 0, sizeof(buff));
-  if (pread(cputime_fd, buff, sizeof(buff) - 1, 0) <= 0) {
-    close(cputime_fd);
-    cputime_fd = -1;
-    return false;
-  }
-  unsigned long long result = strtoull(buff, nullptr, 0);
-  if (result == (std::numeric_limits<unsigned long long>::max)()) {
-    close(cputime_fd);
-    cputime_fd = -1;
-    return false;
-  }
-  *cputime = static_cast<double>(result) / 1e9;
-  return true;
-}
-#endif  // OS_WINDOWS
-
-double MyCPUUsage() {
-#ifndef BENCHMARK_OS_WINDOWS
-  {
-    std::lock_guard<std::mutex> l(cputimens_mutex);
-    static bool use_cputime_ns = true;
-    if (use_cputime_ns) {
-      double value;
-      if (MyCPUUsageCPUTimeNsLocked(&value)) {
-        return value;
-      }
-      // Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage().
-      VLOG(1) << "Reading /proc/self/cputime_ns failed. Using getrusage().\n";
-      use_cputime_ns = false;
-    }
-  }
-#endif  // OS_WINDOWS
-  return MyCPUUsageRUsage();
-}
-
-double ChildrenCPUUsage() {
-#ifndef BENCHMARK_OS_WINDOWS
-  struct rusage ru;
-  if (getrusage(RUSAGE_CHILDREN, &ru) == 0) {
-    return (static_cast<double>(ru.ru_utime.tv_sec) +
-            static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
-            static_cast<double>(ru.ru_stime.tv_sec) +
-            static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
-  } else {
-    return 0.0;
-  }
-#else
-  // TODO: Not sure what this even means on Windows
-  return 0.0;
-#endif  // OS_WINDOWS
-}
-
 double CyclesPerSecond(void) {
  std::call_once(cpuinfo_init, InitializeSystemInfo);
  return cpuinfo_cycles_per_second;
--- a/3rdparty/benchmark/src/sysinfo.h
+++ b/3rdparty/benchmark/src/sysinfo.h
@ -2,8 +2,6 @@
 #define BENCHMARK_SYSINFO_H_

 namespace benchmark {
-double MyCPUUsage();
-double ChildrenCPUUsage();
 int NumCPUs();
 double CyclesPerSecond();
 bool CpuScalingEnabled();
--- a/3rdparty/benchmark/src/timers.cc
+++ b/3rdparty/benchmark/src/timers.cc
@ -0,0 +1,195 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "timers.h"
+#include "internal_macros.h"
+
+#ifdef BENCHMARK_OS_WINDOWS
+#include <Shlwapi.h>
+#include <VersionHelpers.h>
+#include <Windows.h>
+#else
+#include <fcntl.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>  // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
+#include <unistd.h>
+#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX
+#include <sys/sysctl.h>
+#endif
+#if defined(BENCHMARK_OS_MACOSX)
+#include <mach/mach_init.h>
+#include <mach/mach_port.h>
+#include <mach/thread_act.h>
+#endif
+#endif
+
+#include <cerrno>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
+#include <iostream>
+#include <limits>
+#include <mutex>
+
+#include "check.h"
+#include "log.h"
+#include "sleep.h"
+#include "string_util.h"
+
+namespace benchmark {
+
+// Suppress unused warnings on helper functions.
+#if defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+namespace {
+#if defined(BENCHMARK_OS_WINDOWS)
+double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) {
+  ULARGE_INTEGER kernel;
+  ULARGE_INTEGER user;
+  kernel.HighPart = kernel_time.dwHighDateTime;
+  kernel.LowPart = kernel_time.dwLowDateTime;
+  user.HighPart = user_time.dwHighDateTime;
+  user.LowPart = user_time.dwLowDateTime;
+  return (static_cast<double>(kernel.QuadPart) +
+          static_cast<double>(user.QuadPart)) *
+         1e-7;
+}
+#else
+double MakeTime(struct timespec const& ts) {
+  return ts.tv_sec + (static_cast<double>(ts.tv_nsec) * 1e-9);
+}
+double MakeTime(struct rusage ru) {
+  return (static_cast<double>(ru.ru_utime.tv_sec) +
+          static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
+          static_cast<double>(ru.ru_stime.tv_sec) +
+          static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
+}
+#endif
+#if defined(BENCHMARK_OS_MACOSX)
+double MakeTime(thread_basic_info_data_t const& info) {
+  return (static_cast<double>(info.user_time.seconds) +
+          static_cast<double>(info.user_time.microseconds) * 1e-6 +
+          static_cast<double>(info.system_time.seconds) +
+          static_cast<double>(info.user_time.microseconds) * 1e-6);
+}
+#endif
+
+BENCHMARK_NORETURN static void  DiagnoseAndExit(const char* msg) {
+    std::cerr << "ERROR: " << msg << std::endl;
+    std::exit(EXIT_FAILURE);
+}
+
+}  // end namespace
+
+#if defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
+
+double ProcessCPUUsage() {
+#if defined(BENCHMARK_OS_WINDOWS)
+  HANDLE proc = GetCurrentProcess();
+  FILETIME creation_time;
+  FILETIME exit_time;
+  FILETIME kernel_time;
+  FILETIME user_time;
+  if (GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time))
+    return MakeTime(kernel_time, user_time);
+  DiagnoseAndExit("GetProccessTimes() failed");
+#elif defined(CLOCK_PROCESS_CPUTIME_ID)
+  struct timespec spec;
+  if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0)
+    return MakeTime(spec);
+  DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed");
+#else
+  struct rusage ru;
+  if (getrusage(RUSAGE_SELF, &ru) == 0)
+    return MakeTime(ru);
+  DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed");
+#endif
+}
+
+double ThreadCPUUsage() {
+#if defined(BENCHMARK_OS_WINDOWS)
+  HANDLE this_thread = GetCurrentThread();
+  FILETIME creation_time;
+  FILETIME exit_time;
+  FILETIME kernel_time;
+  FILETIME user_time;
+  GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time,
+                 &user_time);
+  return MakeTime(kernel_time, user_time);
+#elif defined(CLOCK_THREAD_CPUTIME_ID)
+  struct timespec ts;
+  if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0)
+    return MakeTime(ts);
+  DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed");
+#elif defined(BENCHMARK_OS_MACOSX)
+  mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT;
+  thread_basic_info_data_t info;
+  mach_port_t thread = pthread_mach_thread_np(pthread_self());
+  if (thread_info(thread, THREAD_BASIC_INFO, (thread_info_t) &info, &count)
+      == KERN_SUCCESS) {
+    return MakeTime(info);
+  }
+  DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info");
+#else
+#error Per-thread timing is not available on your system.
+#endif
+}
+
+namespace {
+
+std::string DateTimeString(bool local) {
+  typedef std::chrono::system_clock Clock;
+  std::time_t now = Clock::to_time_t(Clock::now());
+  const std::size_t kStorageSize = 128;
+  char storage[kStorageSize];
+  std::size_t written;
+
+  if (local) {
+#if defined(BENCHMARK_OS_WINDOWS)
+    written =
+        std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now));
+#else
+    std::tm timeinfo;
+    std::memset(&timeinfo, 0, sizeof(std::tm));
+    ::localtime_r(&now, &timeinfo);
+    written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
+#endif
+  } else {
+#if defined(BENCHMARK_OS_WINDOWS)
+    written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now));
+#else
+    std::tm timeinfo;
+    std::memset(&timeinfo, 0, sizeof(std::tm));
+    ::gmtime_r(&now, &timeinfo);
+    written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
+#endif
+  }
+  CHECK(written < kStorageSize);
+  ((void)written);  // prevent unused variable in optimized mode.
+  return std::string(storage);
+}
+
+}  // end namespace
+
+std::string LocalDateTimeString() { return DateTimeString(true); }
+
+}  // end namespace benchmark
--- a/3rdparty/benchmark/src/timers.h
+++ b/3rdparty/benchmark/src/timers.h
@ -0,0 +1,48 @@
+#ifndef BENCHMARK_TIMERS_H
+#define BENCHMARK_TIMERS_H
+
+#include <chrono>
+#include <string>
+
+namespace benchmark {
+
+// Return the CPU usage of the current process
+double ProcessCPUUsage();
+
+// Return the CPU usage of the children of the current process
+double ChildrenCPUUsage();
+
+// Return the CPU usage of the current thread
+double ThreadCPUUsage();
+
+#if defined(HAVE_STEADY_CLOCK)
+template <bool HighResIsSteady = std::chrono::high_resolution_clock::is_steady>
+struct ChooseSteadyClock {
+  typedef std::chrono::high_resolution_clock type;
+};
+
+template <>
+struct ChooseSteadyClock<false> {
+  typedef std::chrono::steady_clock type;
+};
+#endif
+
+struct ChooseClockType {
+#if defined(HAVE_STEADY_CLOCK)
+  typedef ChooseSteadyClock<>::type type;
+#else
+  typedef std::chrono::high_resolution_clock type;
+#endif
+};
+
+inline double ChronoClockNow() {
+  typedef ChooseClockType::type ClockType;
+  using FpSeconds = std::chrono::duration<double, std::chrono::seconds::period>;
+  return FpSeconds(ClockType::now().time_since_epoch()).count();
+}
+
+std::string LocalDateTimeString();
+
+}  // end namespace benchmark
+
+#endif  // BENCHMARK_TIMERS_H
--- a/3rdparty/benchmark/src/walltime.cc
+++ b/3rdparty/benchmark/src/walltime.cc
@ -1,263 +0,0 @@
-// Copyright 2015 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "benchmark/macros.h"
-#include "internal_macros.h"
-#include "walltime.h"
-
-#if defined(BENCHMARK_OS_WINDOWS)
-#include <time.h>
-#include <winsock.h> // for timeval
-#else
-#include <sys/time.h>
-#endif
-
-#include <cstdio>
-#include <cstdint>
-#include <cstring>
-#include <ctime>
-
-#include <atomic>
-#include <chrono>
-#include <limits>
-
-#include "arraysize.h"
-#include "check.h"
-#include "cycleclock.h"
-#include "log.h"
-#include "sysinfo.h"
-
-namespace benchmark {
-namespace walltime {
-
-namespace {
-
-#if defined(HAVE_STEADY_CLOCK)
-template <bool HighResIsSteady = std::chrono::high_resolution_clock::is_steady>
-struct ChooseSteadyClock {
-    typedef std::chrono::high_resolution_clock type;
-};
-
-template <>
-struct ChooseSteadyClock<false> {
-    typedef std::chrono::steady_clock type;
-};
-#endif
-
-struct ChooseClockType {
-#if defined(HAVE_STEADY_CLOCK)
-  typedef ChooseSteadyClock<>::type type;
-#else
-  typedef std::chrono::high_resolution_clock type;
-#endif
-};
-
-class WallTimeImp
-{
-public:
-  WallTime Now();
-
-  static WallTimeImp& GetWallTimeImp() {
-    static WallTimeImp* imp = new WallTimeImp();
-    return *imp;
-  }
-
-private:
-  WallTimeImp();
-  // Helper routines to load/store a float from an AtomicWord. Required because
-  // g++ < 4.7 doesn't support std::atomic<float> correctly. I cannot wait to
-  // get rid of this horror show.
-  void SetDrift(float f) {
-    int32_t w;
-    memcpy(&w, &f, sizeof(f));
-    std::atomic_store(&drift_adjust_, w);
-  }
-
-  float GetDrift() const {
-    float f;
-    int32_t w = std::atomic_load(&drift_adjust_);
-    memcpy(&f, &w, sizeof(f));
-    return f;
-  }
-
-  WallTime Slow() const {
-    struct timeval tv;
-#if defined(BENCHMARK_OS_WINDOWS)
-    FILETIME    file_time;
-    SYSTEMTIME  system_time;
-    ULARGE_INTEGER ularge;
-    const unsigned __int64 epoch = 116444736000000000LL;
-
-    GetSystemTime(&system_time);
-    SystemTimeToFileTime(&system_time, &file_time);
-    ularge.LowPart = file_time.dwLowDateTime;
-    ularge.HighPart = file_time.dwHighDateTime;
-
-    tv.tv_sec = (long)((ularge.QuadPart - epoch) / (10L * 1000 * 1000));
-    tv.tv_usec = (long)(system_time.wMilliseconds * 1000);
-#else
-    gettimeofday(&tv, nullptr);
-#endif
-    return tv.tv_sec + tv.tv_usec * 1e-6;
-  }
-
-private:
-  static_assert(sizeof(float) <= sizeof(int32_t),
-               "type sizes don't allow the drift_adjust hack");
-
-  WallTime base_walltime_;
-  int64_t base_cycletime_;
-  int64_t cycles_per_second_;
-  double seconds_per_cycle_;
-  uint32_t last_adjust_time_;
-  std::atomic<int32_t> drift_adjust_;
-  int64_t max_interval_cycles_;
-
-  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(WallTimeImp);
-};
-
-
-WallTime WallTimeImp::Now() {
-  WallTime now = 0.0;
-  WallTime result = 0.0;
-  int64_t ct = 0;
-  uint32_t top_bits = 0;
-  do {
-    ct = cycleclock::Now();
-    int64_t cycle_delta = ct - base_cycletime_;
-    result = base_walltime_ + cycle_delta * seconds_per_cycle_;
-
-    top_bits = static_cast<uint32_t>(uint64_t(ct) >> 32);
-    // Recompute drift no more often than every 2^32 cycles.
-    // I.e., @2GHz, ~ every two seconds
-    if (top_bits == last_adjust_time_) {  // don't need to recompute drift
-      return result + GetDrift();
-    }
-
-    now = Slow();
-  } while (cycleclock::Now() - ct > max_interval_cycles_);
-  // We are now sure that "now" and "result" were produced within
-  // kMaxErrorInterval of one another.
-
-  SetDrift(static_cast<float>(now - result));
-  last_adjust_time_ = top_bits;
-  return now;
-}
-
-
-WallTimeImp::WallTimeImp()
-    : base_walltime_(0.0), base_cycletime_(0),
-      cycles_per_second_(0), seconds_per_cycle_(0.0),
-      last_adjust_time_(0), drift_adjust_(0),
-      max_interval_cycles_(0) {
-  const double kMaxErrorInterval = 100e-6;
-  cycles_per_second_ = static_cast<int64_t>(CyclesPerSecond());
-  CHECK(cycles_per_second_ != 0);
-  seconds_per_cycle_ = 1.0 / cycles_per_second_;
-  max_interval_cycles_ =
-      static_cast<int64_t>(cycles_per_second_ * kMaxErrorInterval);
-  do {
-    base_cycletime_ = cycleclock::Now();
-    base_walltime_ = Slow();
-  } while (cycleclock::Now() - base_cycletime_ > max_interval_cycles_);
-  // We are now sure that "base_walltime" and "base_cycletime" were produced
-  // within kMaxErrorInterval of one another.
-
-  SetDrift(0.0);
-  last_adjust_time_ = static_cast<uint32_t>(uint64_t(base_cycletime_) >> 32);
-}
-
-WallTime CPUWalltimeNow() {
-  static WallTimeImp& imp = WallTimeImp::GetWallTimeImp();
-  return imp.Now();
-}
-
-WallTime ChronoWalltimeNow() {
-  typedef ChooseClockType::type Clock;
-  typedef std::chrono::duration<WallTime, std::chrono::seconds::period>
-          FPSeconds;
-  static_assert(std::chrono::treat_as_floating_point<WallTime>::value,
-                "This type must be treated as a floating point type.");
-  auto now = Clock::now().time_since_epoch();
-  return std::chrono::duration_cast<FPSeconds>(now).count();
-}
-
-bool UseCpuCycleClock() {
-    bool useWallTime = !CpuScalingEnabled();
-    if (useWallTime) {
-        VLOG(1) << "Using the CPU cycle clock to provide walltime::Now().\n";
-    } else {
-        VLOG(1) << "Using std::chrono to provide walltime::Now().\n";
-    }
-    return useWallTime;
-}
-
-
-} // end anonymous namespace
-
-// WallTimeImp doesn't work when CPU Scaling is enabled. If CPU Scaling is
-// enabled at the start of the program then std::chrono::system_clock is used
-// instead.
-WallTime Now()
-{
-  static bool useCPUClock = UseCpuCycleClock();
-  if (useCPUClock) {
-    return CPUWalltimeNow();
-  } else {
-    return ChronoWalltimeNow();
-  }
-}
-
-}  // end namespace walltime
-
-
-namespace {
-
-std::string DateTimeString(bool local) {
-  typedef std::chrono::system_clock Clock;
-  std::time_t now = Clock::to_time_t(Clock::now());
-  char storage[128];
-  std::size_t written;
-
-  if (local) {
-#if defined(BENCHMARK_OS_WINDOWS)
-    written = std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now));
-#else
-    std::tm timeinfo;
-    std::memset(&timeinfo, 0, sizeof(std::tm));
-    ::localtime_r(&now, &timeinfo);
-    written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
-#endif
-  } else {
-#if defined(BENCHMARK_OS_WINDOWS)
-    written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now));
-#else
-    std::tm timeinfo;
-    std::memset(&timeinfo, 0, sizeof(std::tm));
-    ::gmtime_r(&now, &timeinfo);
-    written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
-#endif
-  }
-  CHECK(written < arraysize(storage));
-  ((void)written); // prevent unused variable in optimized mode.
-  return std::string(storage);
-}
-
-} // end namespace
-
-std::string LocalDateTimeString() {
-  return DateTimeString(true);
-}
-
-}  // end namespace benchmark
--- a/3rdparty/benchmark/src/walltime.h
+++ b/3rdparty/benchmark/src/walltime.h
@ -1,17 +0,0 @@
-#ifndef BENCHMARK_WALLTIME_H_
-#define BENCHMARK_WALLTIME_H_
-
-#include <string>
-
-namespace benchmark {
-typedef double WallTime;
-
-namespace walltime {
-WallTime Now();
-}  // end namespace walltime
-
-std::string LocalDateTimeString();
-
-}  // end namespace benchmark
-
-#endif  // BENCHMARK_WALLTIME_H_
--- a/3rdparty/benchmark/test/CMakeLists.txt
+++ b/3rdparty/benchmark/test/CMakeLists.txt
@ -2,15 +2,27 @@

 find_package(Threads REQUIRED)

-set(CXX03_FLAGS "${CMAKE_CXX_FLAGS}")
-string(REPLACE "-std=c++11" "-std=c++03" CXX03_FLAGS "${CXX03_FLAGS}")
-string(REPLACE "-std=c++0x" "-std=c++03" CXX03_FLAGS "${CXX03_FLAGS}")
+# NOTE: These flags must be added after find_package(Threads REQUIRED) otherwise
+# they will break the configuration check.
+if (DEFINED BENCHMARK_CXX_LINKER_FLAGS)
+  list(APPEND CMAKE_EXE_LINKER_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS})
+endif()
+
+add_library(output_test_helper STATIC output_test_helper.cc)

 macro(compile_benchmark_test name)
  add_executable(${name} "${name}.cc")
  target_link_libraries(${name} benchmark ${CMAKE_THREAD_LIBS_INIT})
 endmacro(compile_benchmark_test)

+
+macro(compile_output_test name)
+  add_executable(${name} "${name}.cc" output_test.h)
+  target_link_libraries(${name} output_test_helper benchmark
+          ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
+endmacro(compile_output_test)
+
+
 # Demonstration executable
 compile_benchmark_test(benchmark_test)
 add_test(benchmark benchmark_test --benchmark_min_time=0.01)
@ -18,6 +30,7 @@ add_test(benchmark benchmark_test --benchmark_min_time=0.01)
 compile_benchmark_test(filter_test)
 macro(add_filter_test name filter expect)
  add_test(${name} filter_test --benchmark_min_time=0.01 --benchmark_filter=${filter} ${expect})
+  add_test(${name}_list_only filter_test --benchmark_list_tests --benchmark_filter=${filter} ${expect})
 endmacro(add_filter_test)

 add_filter_test(filter_simple "Foo" 3)
@ -36,16 +49,50 @@ add_test(options_benchmarks options_test --benchmark_min_time=0.01)
 compile_benchmark_test(basic_test)
 add_test(basic_benchmark basic_test --benchmark_min_time=0.01)

+compile_benchmark_test(diagnostics_test)
+add_test(diagnostics_test diagnostics_test --benchmark_min_time=0.01)
+
+compile_benchmark_test(skip_with_error_test)
+add_test(skip_with_error_test skip_with_error_test --benchmark_min_time=0.01)
+
+compile_benchmark_test(donotoptimize_test)
+add_test(donotoptimize_test donotoptimize_test --benchmark_min_time=0.01)
+
 compile_benchmark_test(fixture_test)
 add_test(fixture_test fixture_test --benchmark_min_time=0.01)

+compile_benchmark_test(register_benchmark_test)
+add_test(register_benchmark_test register_benchmark_test --benchmark_min_time=0.01)
+
 compile_benchmark_test(map_test)
 add_test(map_test map_test --benchmark_min_time=0.01)

-compile_benchmark_test(cxx03_test)
-set_target_properties(cxx03_test
-    PROPERTIES COMPILE_FLAGS "${CXX03_FLAGS}")
-add_test(cxx03 cxx03_test --benchmark_min_time=0.01)
+compile_benchmark_test(multiple_ranges_test)
+add_test(multiple_ranges_test multiple_ranges_test --benchmark_min_time=0.01)
+
+compile_output_test(reporter_output_test)
+add_test(reporter_output_test reporter_output_test --benchmark_min_time=0.01)
+
+check_cxx_compiler_flag(-std=c++03 BENCHMARK_HAS_CXX03_FLAG)
+if (BENCHMARK_HAS_CXX03_FLAG)
+  set(CXX03_FLAGS "${CMAKE_CXX_FLAGS}")
+  string(REPLACE "-std=c++11" "-std=c++03" CXX03_FLAGS "${CXX03_FLAGS}")
+  string(REPLACE "-std=c++0x" "-std=c++03" CXX03_FLAGS "${CXX03_FLAGS}")
+
+  compile_benchmark_test(cxx03_test)
+  set_target_properties(cxx03_test
+      PROPERTIES COMPILE_FLAGS "${CXX03_FLAGS}")
+  add_test(cxx03 cxx03_test --benchmark_min_time=0.01)
+endif()
+
+# Attempt to work around flaky test failures when running on Appveyor servers.
+if (DEFINED ENV{APPVEYOR})
+  set(COMPLEXITY_MIN_TIME "0.5")
+else()
+  set(COMPLEXITY_MIN_TIME "0.01")
+endif()
+compile_output_test(complexity_test)
+add_test(complexity_benchmark complexity_test --benchmark_min_time=${COMPLEXITY_MIN_TIME})

 # Add the coverage command(s)
 if(CMAKE_BUILD_TYPE)
@ -66,7 +113,7 @@ if (${CMAKE_BUILD_TYPE_LOWER} MATCHES "coverage")
      COMMAND ${LCOV} -q -a before.lcov -a after.lcov --output-file final.lcov
      COMMAND ${LCOV} -q -r final.lcov "'${CMAKE_SOURCE_DIR}/test/*'" -o final.lcov
      COMMAND ${GENHTML} final.lcov -o lcov --demangle-cpp --sort -p "${CMAKE_BINARY_DIR}" -t benchmark
-      DEPENDS filter_test benchmark_test options_test basic_test fixture_test cxx03_test
+      DEPENDS filter_test benchmark_test options_test basic_test fixture_test cxx03_test complexity_test
      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
      COMMENT "Running LCOV"
    )
--- a/3rdparty/benchmark/test/basic_test.cc
+++ b/3rdparty/benchmark/test/basic_test.cc
@ -14,7 +14,7 @@ BENCHMARK(BM_empty)->ThreadPerCpu();

 void BM_spin_empty(benchmark::State& state) {
  while (state.KeepRunning()) {
-    for (int x = 0; x < state.range_x(); ++x) {
+    for (int x = 0; x < state.range(0); ++x) {
      benchmark::DoNotOptimize(x);
    }
  }
@ -23,11 +23,11 @@ BASIC_BENCHMARK_TEST(BM_spin_empty);
 BASIC_BENCHMARK_TEST(BM_spin_empty)->ThreadPerCpu();

 void BM_spin_pause_before(benchmark::State& state) {
-  for (int i = 0; i < state.range_x(); ++i) {
+  for (int i = 0; i < state.range(0); ++i) {
    benchmark::DoNotOptimize(i);
  }
  while(state.KeepRunning()) {
-    for (int i = 0; i < state.range_x(); ++i) {
+    for (int i = 0; i < state.range(0); ++i) {
      benchmark::DoNotOptimize(i);
    }
  }
@ -39,11 +39,11 @@ BASIC_BENCHMARK_TEST(BM_spin_pause_before)->ThreadPerCpu();
 void BM_spin_pause_during(benchmark::State& state) {
  while(state.KeepRunning()) {
    state.PauseTiming();
-    for (int i = 0; i < state.range_x(); ++i) {
+    for (int i = 0; i < state.range(0); ++i) {
      benchmark::DoNotOptimize(i);
    }
    state.ResumeTiming();
-    for (int i = 0; i < state.range_x(); ++i) {
+    for (int i = 0; i < state.range(0); ++i) {
      benchmark::DoNotOptimize(i);
    }
  }
@ -64,11 +64,11 @@ BENCHMARK(BM_pause_during)->UseRealTime()->ThreadPerCpu();

 void BM_spin_pause_after(benchmark::State& state) {
  while(state.KeepRunning()) {
-    for (int i = 0; i < state.range_x(); ++i) {
+    for (int i = 0; i < state.range(0); ++i) {
      benchmark::DoNotOptimize(i);
    }
  }
-  for (int i = 0; i < state.range_x(); ++i) {
+  for (int i = 0; i < state.range(0); ++i) {
    benchmark::DoNotOptimize(i);
  }
 }
@ -77,15 +77,15 @@ BASIC_BENCHMARK_TEST(BM_spin_pause_after)->ThreadPerCpu();


 void BM_spin_pause_before_and_after(benchmark::State& state) {
-  for (int i = 0; i < state.range_x(); ++i) {
+  for (int i = 0; i < state.range(0); ++i) {
    benchmark::DoNotOptimize(i);
  }
  while(state.KeepRunning()) {
-    for (int i = 0; i < state.range_x(); ++i) {
+    for (int i = 0; i < state.range(0); ++i) {
      benchmark::DoNotOptimize(i);
    }
  }
-  for (int i = 0; i < state.range_x(); ++i) {
+  for (int i = 0; i < state.range(0); ++i) {
    benchmark::DoNotOptimize(i);
  }
 }
--- a/3rdparty/benchmark/test/benchmark_test.cc
+++ b/3rdparty/benchmark/test/benchmark_test.cc
@ -16,6 +16,7 @@
 #include <vector>
 #include <chrono>
 #include <thread>
+#include <utility>

 #if defined(__GNUC__)
 # define BENCHMARK_NOINLINE __attribute__((noinline))
@ -66,7 +67,7 @@ BENCHMARK(BM_Factorial)->UseRealTime();
 static void BM_CalculatePiRange(benchmark::State& state) {
  double pi = 0.0;
  while (state.KeepRunning())
-    pi = CalculatePi(state.range_x());
+    pi = CalculatePi(state.range(0));
  std::stringstream ss;
  ss << pi;
  state.SetLabel(ss.str());
@ -86,25 +87,25 @@ BENCHMARK(BM_CalculatePi)->ThreadPerCpu();
 static void BM_SetInsert(benchmark::State& state) {
  while (state.KeepRunning()) {
    state.PauseTiming();
-    std::set<int> data = ConstructRandomSet(state.range_x());
+    std::set<int> data = ConstructRandomSet(state.range(0));
    state.ResumeTiming();
-    for (int j = 0; j < state.range_y(); ++j)
+    for (int j = 0; j < state.range(1); ++j)
      data.insert(rand());
  }
-  state.SetItemsProcessed(state.iterations() * state.range_y());
-  state.SetBytesProcessed(state.iterations() * state.range_y() * sizeof(int));
+  state.SetItemsProcessed(state.iterations() * state.range(1));
+  state.SetBytesProcessed(state.iterations() * state.range(1) * sizeof(int));
 }
-BENCHMARK(BM_SetInsert)->RangePair(1<<10,8<<10, 1,10);
+BENCHMARK(BM_SetInsert)->Ranges({{1<<10,8<<10}, {1,10}});

 template<typename Container, typename ValueType = typename Container::value_type>
 static void BM_Sequential(benchmark::State& state) {
  ValueType v = 42;
  while (state.KeepRunning()) {
    Container c;
-    for (int i = state.range_x(); --i; )
+    for (int i = state.range(0); --i; )
      c.push_back(v);
  }
-  const size_t items_processed = state.iterations() * state.range_x();
+  const size_t items_processed = state.iterations() * state.range(0);
  state.SetItemsProcessed(items_processed);
  state.SetBytesProcessed(items_processed * sizeof(v));
 }
@ -116,8 +117,8 @@ BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>, int)->Arg(512);
 #endif

 static void BM_StringCompare(benchmark::State& state) {
-  std::string s1(state.range_x(), '-');
-  std::string s2(state.range_x(), '-');
+  std::string s1(state.range(0), '-');
+  std::string s2(state.range(0), '-');
  while (state.KeepRunning())
    benchmark::DoNotOptimize(s1.compare(s2));
 }
@ -146,14 +147,14 @@ BENCHMARK(BM_SetupTeardown)->ThreadPerCpu();
 static void BM_LongTest(benchmark::State& state) {
  double tracker = 0.0;
  while (state.KeepRunning()) {
-    for (int i = 0; i < state.range_x(); ++i)
+    for (int i = 0; i < state.range(0); ++i)
      benchmark::DoNotOptimize(tracker += i);
  }
 }
 BENCHMARK(BM_LongTest)->Range(1<<16,1<<28);

 static void BM_ParallelMemset(benchmark::State& state) {
-  int size = state.range_x() / sizeof(int);
+  int size = state.range(0) / sizeof(int);
  int thread_size = size / state.threads;
  int from = thread_size * state.thread_index;
  int to = from + thread_size;
@ -178,7 +179,7 @@ BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4);

 static void BM_ManualTiming(benchmark::State& state) {
  size_t slept_for = 0;
-  int microseconds = state.range_x();
+  int microseconds = state.range(0);
  std::chrono::duration<double, std::micro> sleep_duration {
    static_cast<double>(microseconds)
  };
@ -202,5 +203,22 @@ static void BM_ManualTiming(benchmark::State& state) {
 BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseRealTime();
 BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseManualTime();

+#if __cplusplus >= 201103L
+
+template <class ...Args>
+void BM_with_args(benchmark::State& state, Args&&...) {
+  while (state.KeepRunning()) {}
+}
+BENCHMARK_CAPTURE(BM_with_args, int_test, 42, 43, 44);
+BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test,
+                  std::string("abc"), std::pair<int, double>(42, 3.8));
+
+void BM_non_template_args(benchmark::State& state, int, double) {
+  while(state.KeepRunning()) {}
+}
+BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0);
+
+#endif // __cplusplus >= 201103L
+
 BENCHMARK_MAIN()

--- a/3rdparty/benchmark/test/complexity_test.cc
+++ b/3rdparty/benchmark/test/complexity_test.cc
@ -0,0 +1,153 @@
+#undef NDEBUG
+#include "benchmark/benchmark.h"
+#include "output_test.h"
+#include <cassert>
+#include <vector>
+#include <algorithm>
+#include <cstdlib>
+#include <cmath>
+
+namespace {
+
+#define ADD_COMPLEXITY_CASES(...) \
+    int CONCAT(dummy, __LINE__) = AddComplexityTest(__VA_ARGS__)
+
+int AddComplexityTest(std::string big_o_test_name,
+                      std::string rms_test_name, std::string big_o) {
+  SetSubstitutions({
+        {"%bigo_name", big_o_test_name},
+        {"%rms_name", rms_test_name},
+        {"%bigo_str", "[ ]*" + std::string(dec_re) + " " + big_o},
+        {"%bigo", big_o},
+        {"%rms", "[ ]*[0-9]+ %"}
+  });
+  AddCases(TC_ConsoleOut, {
+    {"^%bigo_name %bigo_str %bigo_str[ ]*$"},
+    {"^%bigo_name", MR_Not}, // Assert we we didn't only matched a name.
+    {"^%rms_name %rms %rms[ ]*$", MR_Next}
+  });
+  AddCases(TC_JSONOut, {
+    {"\"name\": \"%bigo_name\",$"},
+    {"\"cpu_coefficient\": [0-9]+,$", MR_Next},
+    {"\"real_coefficient\": [0-9]{1,5},$", MR_Next},
+    {"\"big_o\": \"%bigo\",$", MR_Next},
+    {"\"time_unit\": \"ns\"$", MR_Next},
+    {"}", MR_Next},
+    {"\"name\": \"%rms_name\",$"},
+    {"\"rms\": [0-9]+%$", MR_Next},
+    {"}", MR_Next}
+  });
+  AddCases(TC_CSVOut, {
+    {"^\"%bigo_name\",,%float,%float,%bigo,,,,,$"},
+    {"^\"%bigo_name\"", MR_Not},
+    {"^\"%rms_name\",,%float,%float,,,,,,$", MR_Next}
+  });
+  return 0;
+}
+
+}  // end namespace
+
+// ========================================================================= //
+// --------------------------- Testing BigO O(1) --------------------------- //
+// ========================================================================= //
+
+void BM_Complexity_O1(benchmark::State& state) {
+  while (state.KeepRunning()) {
+      for (int i=0; i < 1024; ++i) {
+          benchmark::DoNotOptimize(&i);
+      }
+  }
+  state.SetComplexityN(state.range(0));
+}
+BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity(benchmark::o1);
+BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity();
+BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity([](int){return 1.0; });
+
+const char* big_o_1_test_name = "BM_Complexity_O1_BigO";
+const char* rms_o_1_test_name = "BM_Complexity_O1_RMS";
+const char* enum_big_o_1 = "\\([0-9]+\\)";
+// FIXME: Tolerate both '(1)' and 'lgN' as output when the complexity is auto deduced.
+// See https://github.com/google/benchmark/issues/272
+const char* auto_big_o_1 = "(\\([0-9]+\\))|(lgN)";
+const char* lambda_big_o_1 = "f\\(N\\)";
+
+// Add enum tests
+ADD_COMPLEXITY_CASES(big_o_1_test_name, rms_o_1_test_name, enum_big_o_1);
+
+// Add auto enum tests
+ADD_COMPLEXITY_CASES(big_o_1_test_name, rms_o_1_test_name, auto_big_o_1);
+
+// Add lambda tests
+ADD_COMPLEXITY_CASES(big_o_1_test_name, rms_o_1_test_name, lambda_big_o_1);
+
+// ========================================================================= //
+// --------------------------- Testing BigO O(N) --------------------------- //
+// ========================================================================= //
+
+std::vector<int> ConstructRandomVector(int size) {
+  std::vector<int> v;
+  v.reserve(size);
+  for (int i = 0; i < size; ++i) {
+    v.push_back(std::rand() % size);
+  }
+  return v;
+}
+
+void BM_Complexity_O_N(benchmark::State& state) {
+  auto v = ConstructRandomVector(state.range(0));
+  const int item_not_in_vector = state.range(0)*2; // Test worst case scenario (item not in vector)
+  while (state.KeepRunning()) {
+      benchmark::DoNotOptimize(std::find(v.begin(), v.end(), item_not_in_vector));
+  }
+  state.SetComplexityN(state.range(0));
+}
+BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(benchmark::oN);
+BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity([](int n) -> double{return n; });
+BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity();
+
+const char* big_o_n_test_name = "BM_Complexity_O_N_BigO";
+const char* rms_o_n_test_name = "BM_Complexity_O_N_RMS";
+const char* enum_auto_big_o_n = "N";
+const char* lambda_big_o_n = "f\\(N\\)";
+
+// Add enum tests
+ADD_COMPLEXITY_CASES(big_o_n_test_name, rms_o_n_test_name, enum_auto_big_o_n);
+
+// Add lambda tests
+ADD_COMPLEXITY_CASES(big_o_n_test_name, rms_o_n_test_name, lambda_big_o_n);
+
+// ========================================================================= //
+// ------------------------- Testing BigO O(N*lgN) ------------------------- //
+// ========================================================================= //
+
+static void BM_Complexity_O_N_log_N(benchmark::State& state) {
+  auto v = ConstructRandomVector(state.range(0));
+  while (state.KeepRunning()) {
+      std::sort(v.begin(), v.end());
+  }
+  state.SetComplexityN(state.range(0));
+}
+BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(benchmark::oNLogN);
+BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity([](int n) {return n * std::log2(n); });
+BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity();
+
+const char* big_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_BigO";
+const char* rms_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_RMS";
+const char* enum_auto_big_o_n_lg_n = "NlgN";
+const char* lambda_big_o_n_lg_n = "f\\(N\\)";
+
+// Add enum tests
+ADD_COMPLEXITY_CASES(big_o_n_lg_n_test_name, rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n);
+
+// Add lambda tests
+ADD_COMPLEXITY_CASES(big_o_n_lg_n_test_name, rms_o_n_lg_n_test_name, lambda_big_o_n_lg_n);
+
+
+// ========================================================================= //
+// --------------------------- TEST CASES END ------------------------------ //
+// ========================================================================= //
+
+int main(int argc, char* argv[]) {
+  RunOutputTests(argc, argv);
+}
+
--- a/3rdparty/benchmark/test/cxx03_test.cc
+++ b/3rdparty/benchmark/test/cxx03_test.cc
@ -1,5 +1,6 @@
-
+#undef NDEBUG
 #include <cstddef>
+#include <cassert>

 #include "benchmark/benchmark.h"

@ -15,6 +16,16 @@ void BM_empty(benchmark::State& state) {
 }
 BENCHMARK(BM_empty);

+// The new C++11 interface for args/ranges requires initializer list support.
+// Therefore we provide the old interface to support C++03.
+void BM_old_arg_range_interface(benchmark::State& state) {
+    assert((state.range(0) == 1 && state.range(1) == 2) ||
+           (state.range(0) == 5 && state.range(1) == 6));
+    while (state.KeepRunning()) {
+    }
+}
+BENCHMARK(BM_old_arg_range_interface)->ArgPair(1, 2)->RangePair(5, 5, 6, 6);
+
 template <class T, class U>
 void BM_template2(benchmark::State& state) {
    BM_empty(state);
--- a/3rdparty/benchmark/test/diagnostics_test.cc
+++ b/3rdparty/benchmark/test/diagnostics_test.cc
@ -0,0 +1,61 @@
+// Testing:
+//   State::PauseTiming()
+//   State::ResumeTiming()
+// Test that CHECK's within these function diagnose when they are called
+// outside of the KeepRunning() loop.
+//
+// NOTE: Users should NOT include or use src/check.h. This is only done in
+// order to test library internals.
+
+#include "benchmark/benchmark_api.h"
+#include "../src/check.h"
+#include <stdexcept>
+#include <cstdlib>
+
+#if defined(__GNUC__) && !defined(__EXCEPTIONS)
+#define TEST_HAS_NO_EXCEPTIONS
+#endif
+
+void TestHandler() {
+#ifndef TEST_HAS_NO_EXCEPTIONS
+  throw std::logic_error("");
+#else
+  std::abort();
+#endif
+}
+
+void try_invalid_pause_resume(benchmark::State& state) {
+#if !defined(NDEBUG) && !defined(TEST_HAS_NO_EXCEPTIONS)
+  try {
+    state.PauseTiming();
+    std::abort();
+  } catch (std::logic_error const&) {}
+  try {
+    state.ResumeTiming();
+    std::abort();
+  } catch (std::logic_error const&) {}
+#else
+  (void)state; // avoid unused warning
+#endif
+}
+
+void BM_diagnostic_test(benchmark::State& state) {
+  static bool called_once = false;
+
+  if (called_once == false) try_invalid_pause_resume(state);
+
+  while (state.KeepRunning()) {
+    benchmark::DoNotOptimize(state.iterations());
+  }
+
+  if (called_once == false) try_invalid_pause_resume(state);
+
+  called_once = true;
+}
+BENCHMARK(BM_diagnostic_test);
+
+int main(int argc, char** argv) {
+  benchmark::internal::GetAbortHandler() = &TestHandler;
+  benchmark::Initialize(&argc, argv);
+  benchmark::RunSpecifiedBenchmarks();
+}
--- a/3rdparty/benchmark/test/donotoptimize_test.cc
+++ b/3rdparty/benchmark/test/donotoptimize_test.cc
@ -0,0 +1,36 @@
+#include "benchmark/benchmark.h"
+
+#include <cstdint>
+
+namespace {
+#if defined(__GNUC__)
+  std::uint64_t double_up(const std::uint64_t x) __attribute__ ((const));
+#endif
+  std::uint64_t double_up(const std::uint64_t x) {
+    return x * 2;
+  }
+}
+
+int main(int, char*[]) {
+
+  // this test verifies compilation of DoNotOptimize() for some types
+
+  char buffer8[8];
+  benchmark::DoNotOptimize(buffer8);
+
+  char buffer20[20];
+  benchmark::DoNotOptimize(buffer20);
+
+  char buffer1024[1024];
+  benchmark::DoNotOptimize(buffer1024);
+  benchmark::DoNotOptimize(&buffer1024[0]);
+
+  int x = 123;
+  benchmark::DoNotOptimize(x);
+  benchmark::DoNotOptimize(&x);
+  benchmark::DoNotOptimize(x += 42);
+
+  benchmark::DoNotOptimize(double_up(x));
+
+  return 0;
+}
--- a/3rdparty/benchmark/test/filter_test.cc
+++ b/3rdparty/benchmark/test/filter_test.cc
@ -68,24 +68,38 @@ BENCHMARK(BM_FooBa);



-int main(int argc, char* argv[]) {
+int main(int argc, char** argv) {
+  bool list_only = false;
+  for (int i=0; i < argc; ++i)
+    list_only |= std::string(argv[i]).find("--benchmark_list_tests") != std::string::npos;
+
  benchmark::Initialize(&argc, argv);

  TestReporter test_reporter;
-  benchmark::RunSpecifiedBenchmarks(&test_reporter);
+  const size_t returned_count = benchmark::RunSpecifiedBenchmarks(&test_reporter);

  if (argc == 2) {
    // Make sure we ran all of the tests
    std::stringstream ss(argv[1]);
-    size_t expected;
-    ss >> expected;
+    size_t expected_return;
+    ss >> expected_return;

-    const size_t count = test_reporter.GetCount();
-    if (count != expected) {
-      std::cerr << "ERROR: Expected " << expected << " tests to be ran but only "
-                << count << " completed" << std::endl;
+    if (returned_count != expected_return) {
+      std::cerr << "ERROR: Expected " << expected_return
+                << " tests to match the filter but returned_count = "
+                << returned_count << std::endl;
+      return -1;
+    }
+
+    const size_t expected_reports = list_only ? 0 : expected_return;
+    const size_t reports_count = test_reporter.GetCount();
+    if (reports_count != expected_reports) {
+      std::cerr << "ERROR: Expected " << expected_reports
+                << " tests to be run but reported_count = " << reports_count
+                << std::endl;
      return -1;
    }
  }
+
  return 0;
 }
--- a/3rdparty/benchmark/test/fixture_test.cc
+++ b/3rdparty/benchmark/test/fixture_test.cc
@ -44,7 +44,7 @@ BENCHMARK_DEFINE_F(MyFixture, Bar)(benchmark::State& st) {
    assert(data.get() != nullptr);
    assert(*data == 42);
  }
-  st.SetItemsProcessed(st.range_x());
+  st.SetItemsProcessed(st.range(0));
 }
 BENCHMARK_REGISTER_F(MyFixture, Bar)->Arg(42);
 BENCHMARK_REGISTER_F(MyFixture, Bar)->Arg(42)->ThreadPerCpu();
--- a/3rdparty/benchmark/test/map_test.cc
+++ b/3rdparty/benchmark/test/map_test.cc
@ -17,7 +17,7 @@ std::map<int, int> ConstructRandomMap(int size) {

 // Basic version.
 static void BM_MapLookup(benchmark::State& state) {
-  const int size = state.range_x();
+  const int size = state.range(0);
  while (state.KeepRunning()) {
    state.PauseTiming();
    std::map<int, int> m = ConstructRandomMap(size);
@ -34,7 +34,7 @@ BENCHMARK(BM_MapLookup)->Range(1 << 3, 1 << 12);
 class MapFixture : public ::benchmark::Fixture {
 public:
  void SetUp(const ::benchmark::State& st) {
-    m = ConstructRandomMap(st.range_x());
+    m = ConstructRandomMap(st.range(0));
  }

  void TearDown(const ::benchmark::State&) {
@ -45,7 +45,7 @@ class MapFixture : public ::benchmark::Fixture {
 };

 BENCHMARK_DEFINE_F(MapFixture, Lookup)(benchmark::State& state) {
-  const int size = state.range_x();
+  const int size = state.range(0);
  while (state.KeepRunning()) {
    for (int i = 0; i < size; ++i) {
      benchmark::DoNotOptimize(m.find(rand() % size));
--- a/3rdparty/benchmark/test/multiple_ranges_test.cc
+++ b/3rdparty/benchmark/test/multiple_ranges_test.cc
@ -0,0 +1,61 @@
+#include "benchmark/benchmark.h"
+
+#include <set>
+#include <cassert>
+
+class MultipleRangesFixture : public ::benchmark::Fixture {
+ public:
+  MultipleRangesFixture()
+      : expectedValues({
+        {1, 3, 5}, {1, 3, 8}, {1, 3, 15}, {2, 3, 5}, {2, 3, 8}, {2, 3, 15},
+        {1, 4, 5}, {1, 4, 8}, {1, 4, 15}, {2, 4, 5}, {2, 4, 8}, {2, 4, 15},
+        {1, 7, 5}, {1, 7, 8}, {1, 7, 15}, {2, 7, 5}, {2, 7, 8}, {2, 7, 15},
+        {7, 6, 3}
+      })
+  {
+  }
+
+  void SetUp(const ::benchmark::State& state) {
+    std::vector<int> ranges = {state.range(0), state.range(1), state.range(2)};
+
+    assert(expectedValues.find(ranges) != expectedValues.end());
+
+    actualValues.insert(ranges);
+  }
+
+  virtual ~MultipleRangesFixture() {
+    assert(actualValues.size() == expectedValues.size());
+  }
+  
+  std::set<std::vector<int>> expectedValues;
+  std::set<std::vector<int>> actualValues;
+};
+
+
+BENCHMARK_DEFINE_F(MultipleRangesFixture, Empty)(benchmark::State& state) {
+  while (state.KeepRunning()) {
+    int product = state.range(0) * state.range(1) * state.range(2);
+    for (int x = 0; x < product; x++) {
+      benchmark::DoNotOptimize(x);
+    }
+  }
+}
+
+BENCHMARK_REGISTER_F(MultipleRangesFixture, Empty)->RangeMultiplier(2)
+    ->Ranges({{1, 2}, {3, 7}, {5, 15}})->Args({7, 6, 3});
+
+void BM_CheckDefaultArgument(benchmark::State& state) {
+  // Test that the 'range()' without an argument is the same as 'range(0)'.
+  assert(state.range() == state.range(0));
+  assert(state.range() != state.range(1));
+  while (state.KeepRunning()) {}
+}
+BENCHMARK(BM_CheckDefaultArgument)->Ranges({{1, 5}, {6, 10}});
+
+static void BM_MultipleRanges(benchmark::State& st) {
+    while (st.KeepRunning()) {}
+}
+BENCHMARK(BM_MultipleRanges)->Ranges({{5, 5}, {6, 6}});
+
+
+BENCHMARK_MAIN()
--- a/3rdparty/benchmark/test/options_test.cc
+++ b/3rdparty/benchmark/test/options_test.cc
@ -9,7 +9,7 @@ void BM_basic(benchmark::State& state) {
 }

 void BM_basic_slow(benchmark::State& state) {
-  std::chrono::milliseconds sleep_duration(state.range_x());
+  std::chrono::milliseconds sleep_duration(state.range(0));
  while (state.KeepRunning()) {
    std::this_thread::sleep_for(
      std::chrono::duration_cast<std::chrono::nanoseconds>(sleep_duration)
@ -23,13 +23,15 @@ BENCHMARK(BM_basic_slow)->Arg(10)->Unit(benchmark::kNanosecond);
 BENCHMARK(BM_basic_slow)->Arg(100)->Unit(benchmark::kMicrosecond);
 BENCHMARK(BM_basic_slow)->Arg(1000)->Unit(benchmark::kMillisecond);
 BENCHMARK(BM_basic)->Range(1, 8);
+BENCHMARK(BM_basic)->RangeMultiplier(2)->Range(1, 8);
 BENCHMARK(BM_basic)->DenseRange(10, 15);
-BENCHMARK(BM_basic)->ArgPair(42, 42);
-BENCHMARK(BM_basic)->RangePair(64, 512, 64, 512);
+BENCHMARK(BM_basic)->Args({42, 42});
+BENCHMARK(BM_basic)->Ranges({{64, 512}, {64, 512}});
 BENCHMARK(BM_basic)->MinTime(0.7);
 BENCHMARK(BM_basic)->UseRealTime();
 BENCHMARK(BM_basic)->ThreadRange(2, 4);
 BENCHMARK(BM_basic)->ThreadPerCpu();
+BENCHMARK(BM_basic)->Repetitions(3);

 void CustomArgs(benchmark::internal::Benchmark* b) {
  for (int i = 0; i < 10; ++i) {
--- a/3rdparty/benchmark/test/output_test.h
+++ b/3rdparty/benchmark/test/output_test.h
@ -0,0 +1,72 @@
+#ifndef TEST_OUTPUT_TEST_H
+#define TEST_OUTPUT_TEST_H
+
+#undef NDEBUG
+#include "benchmark/benchmark.h"
+#include "../src/re.h"
+#include <vector>
+#include <string>
+#include <initializer_list>
+#include <memory>
+#include <utility>
+
+#define CONCAT2(x, y) x##y
+#define CONCAT(x, y) CONCAT2(x, y)
+
+#define ADD_CASES(...) \
+    int CONCAT(dummy, __LINE__) = ::AddCases(__VA_ARGS__)
+
+#define SET_SUBSTITUTIONS(...) \
+    int CONCAT(dummy, __LINE__) = ::SetSubstitutions(__VA_ARGS__)
+
+enum MatchRules {
+  MR_Default, // Skip non-matching lines until a match is found.
+  MR_Next,    // Match must occur on the next line.
+  MR_Not      // No line between the current position and the next match matches
+              // the regex
+};
+
+struct TestCase {
+  TestCase(std::string re, int rule = MR_Default);
+
+  std::string regex_str;
+  int match_rule;
+  std::string substituted_regex;
+  std::shared_ptr<benchmark::Regex> regex;
+};
+
+enum TestCaseID {
+  TC_ConsoleOut,
+  TC_ConsoleErr,
+  TC_JSONOut,
+  TC_JSONErr,
+  TC_CSVOut,
+  TC_CSVErr,
+
+  TC_NumID // PRIVATE
+};
+
+// Add a list of test cases to be run against the output specified by
+// 'ID'
+int AddCases(TestCaseID ID, std::initializer_list<TestCase> il);
+
+// Add or set a list of substitutions to be performed on constructed regex's
+// See 'output_test_helper.cc' for a list of default substitutions.
+int SetSubstitutions(
+    std::initializer_list<std::pair<std::string, std::string>> il);
+
+// Run all output tests.
+void RunOutputTests(int argc, char* argv[]);
+
+// ========================================================================= //
+// --------------------------- Misc Utilities ------------------------------ //
+// ========================================================================= //
+
+namespace {
+
+const char* const dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?";
+
+} //  end namespace
+
+
+#endif // TEST_OUTPUT_TEST_H
--- a/3rdparty/benchmark/test/output_test_helper.cc
+++ b/3rdparty/benchmark/test/output_test_helper.cc
@ -0,0 +1,224 @@
+#include "output_test.h"
+#include "../src/check.h" // NOTE: check.h is for internal use only!
+#include "../src/re.h" // NOTE: re.h is for internal use only
+#include <memory>
+#include <map>
+#include <iostream>
+#include <sstream>
+
+
+// ========================================================================= //
+// ------------------------------ Internals -------------------------------- //
+// ========================================================================= //
+namespace internal { namespace {
+
+using TestCaseList = std::vector<TestCase>;
+
+// Use a vector because the order elements are added matters during iteration.
+// std::map/unordered_map don't guarantee that.
+// For example:
+//  SetSubstitutions({{"%HelloWorld", "Hello"}, {"%Hello", "Hi"}});
+//     Substitute("%HelloWorld") // Always expands to Hello.
+using SubMap = std::vector<std::pair<std::string, std::string>>;
+
+TestCaseList& GetTestCaseList(TestCaseID ID) {
+    // Uses function-local statics to ensure initialization occurs
+    // before first use.
+    static TestCaseList lists[TC_NumID];
+    return lists[ID];
+}
+
+SubMap& GetSubstitutions() {
+    // Don't use 'dec_re' from header because it may not yet be initialized.
+    static std::string dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?";
+    static SubMap map = {
+        {"%float", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"},
+        {"%int", "[ ]*[0-9]+"},
+        {" %s ", "[ ]+"},
+        {"%time", "[ ]*[0-9]{1,5} ns"},
+        {"%console_report", "[ ]*[0-9]{1,5} ns [ ]*[0-9]{1,5} ns [ ]*[0-9]+"},
+        {"%csv_report", "[0-9]+," + dec_re + "," + dec_re + ",ns,,,,,"}
+    };
+    return map;
+}
+
+std::string PerformSubstitutions(std::string source) {
+    SubMap const& subs = GetSubstitutions();
+    using SizeT = std::string::size_type;
+    for (auto const& KV : subs) {
+        SizeT pos;
+        SizeT next_start = 0;
+        while ((pos = source.find(KV.first, next_start)) != std::string::npos) {
+            next_start = pos + KV.second.size();
+            source.replace(pos, KV.first.size(), KV.second);
+        }
+    }
+    return source;
+}
+
+void CheckCase(std::stringstream& remaining_output, TestCase const& TC,
+               TestCaseList const& not_checks)
+{
+    std::string first_line;
+    bool on_first = true;
+    std::string line;
+    while (remaining_output.eof() == false) {
+        CHECK(remaining_output.good());
+        std::getline(remaining_output, line);
+        if (on_first) {
+            first_line = line;
+            on_first = false;
+        }
+        for (auto& NC : not_checks) {
+            CHECK(!NC.regex->Match(line))
+                << "Unexpected match for line \"" << line
+                << "\" for MR_Not regex \"" << NC.regex_str << "\""
+                << "\n    actual regex string \"" << TC.substituted_regex << "\""
+                << "\n    started matching near: " << first_line;
+        }
+        if (TC.regex->Match(line)) return;
+        CHECK(TC.match_rule != MR_Next)
+            << "Expected line \"" << line << "\" to match regex \"" << TC.regex_str << "\""
+            << "\n    actual regex string \"" << TC.substituted_regex << "\""
+            << "\n    started matching near: " << first_line;
+    }
+    CHECK(remaining_output.eof() == false)
+        << "End of output reached before match for regex \"" << TC.regex_str
+        << "\" was found"
+        << "\n    actual regex string \"" << TC.substituted_regex << "\""
+        << "\n    started matching near: " << first_line;
+}
+
+
+void CheckCases(TestCaseList const& checks, std::stringstream& output) {
+    std::vector<TestCase> not_checks;
+    for (size_t i=0; i < checks.size(); ++i) {
+        const auto& TC = checks[i];
+        if (TC.match_rule == MR_Not) {
+            not_checks.push_back(TC);
+            continue;
+        }
+        CheckCase(output, TC, not_checks);
+        not_checks.clear();
+    }
+}
+
+class TestReporter : public benchmark::BenchmarkReporter {
+public:
+  TestReporter(std::vector<benchmark::BenchmarkReporter*> reps)
+      : reporters_(reps)  {}
+
+  virtual bool ReportContext(const Context& context) {
+    bool last_ret = false;
+    bool first = true;
+    for (auto rep : reporters_) {
+      bool new_ret = rep->ReportContext(context);
+      CHECK(first || new_ret == last_ret)
+          << "Reports return different values for ReportContext";
+      first = false;
+      last_ret = new_ret;
+    }
+    return last_ret;
+  }
+
+  void ReportRuns(const std::vector<Run>& report)
+    { for (auto rep : reporters_) rep->ReportRuns(report); }
+  void Finalize() { for (auto rep : reporters_) rep->Finalize(); }
+
+private:
+  std::vector<benchmark::BenchmarkReporter*> reporters_;
+};
+
+}} // end namespace internal
+
+// ========================================================================= //
+// -------------------------- Public API Definitions------------------------ //
+// ========================================================================= //
+
+TestCase::TestCase(std::string re, int rule)
+    : regex_str(std::move(re)), match_rule(rule),
+      substituted_regex(internal::PerformSubstitutions(regex_str)),
+      regex(std::make_shared<benchmark::Regex>())
+{
+    std::string err_str;
+    regex->Init(substituted_regex, &err_str);
+    CHECK(err_str.empty())
+        << "Could not construct regex \"" << substituted_regex << "\""
+        << "\n    originally \"" << regex_str << "\""
+        << "\n    got error: " << err_str;
+}
+
+int AddCases(TestCaseID ID, std::initializer_list<TestCase> il) {
+    auto& L = internal::GetTestCaseList(ID);
+    L.insert(L.end(), il);
+    return 0;
+}
+
+int SetSubstitutions(std::initializer_list<std::pair<std::string, std::string>> il) {
+    auto& subs = internal::GetSubstitutions();
+    for (auto const& KV : il) {
+        bool exists = false;
+        for (auto& EKV : subs) {
+            if (EKV.first == KV.first) {
+                EKV.second = KV.second;
+                exists = true;
+                break;
+            }
+        }
+        if (!exists) subs.push_back(KV);
+    }
+    return 0;
+}
+
+void RunOutputTests(int argc, char* argv[]) {
+  using internal::GetTestCaseList;
+  benchmark::Initialize(&argc, argv);
+  benchmark::ConsoleReporter CR(benchmark::ConsoleReporter::OO_None);
+  benchmark::JSONReporter JR;
+  benchmark::CSVReporter CSVR;
+  struct ReporterTest {
+    const char* name;
+    std::vector<TestCase>& output_cases;
+    std::vector<TestCase>& error_cases;
+    benchmark::BenchmarkReporter& reporter;
+    std::stringstream out_stream;
+    std::stringstream err_stream;
+
+    ReporterTest(const char* n,
+                 std::vector<TestCase>& out_tc,
+                 std::vector<TestCase>& err_tc,
+                 benchmark::BenchmarkReporter& br)
+        : name(n), output_cases(out_tc), error_cases(err_tc), reporter(br) {
+        reporter.SetOutputStream(&out_stream);
+        reporter.SetErrorStream(&err_stream);
+    }
+  } TestCases[] = {
+      {"ConsoleReporter", GetTestCaseList(TC_ConsoleOut),
+                          GetTestCaseList(TC_ConsoleErr), CR},
+      {"JSONReporter",    GetTestCaseList(TC_JSONOut),
+                          GetTestCaseList(TC_JSONErr), JR},
+      {"CSVReporter",     GetTestCaseList(TC_CSVOut),
+                          GetTestCaseList(TC_CSVErr), CSVR},
+  };
+
+  // Create the test reporter and run the benchmarks.
+  std::cout << "Running benchmarks...\n";
+  internal::TestReporter test_rep({&CR, &JR, &CSVR});
+  benchmark::RunSpecifiedBenchmarks(&test_rep);
+
+  for (auto& rep_test : TestCases) {
+      std::string msg = std::string("\nTesting ") + rep_test.name + " Output\n";
+      std::string banner(msg.size() - 1, '-');
+      std::cout << banner << msg << banner << "\n";
+
+      std::cerr << rep_test.err_stream.str();
+      std::cout << rep_test.out_stream.str();
+
+      internal::CheckCases(rep_test.error_cases,rep_test.err_stream);
+      internal::CheckCases(rep_test.output_cases, rep_test.out_stream);
+
+      std::cout << "\n";
+  }
+}
+
+
--- a/3rdparty/benchmark/test/register_benchmark_test.cc
+++ b/3rdparty/benchmark/test/register_benchmark_test.cc
@ -0,0 +1,149 @@
+
+#undef NDEBUG
+#include "benchmark/benchmark.h"
+#include "../src/check.h" // NOTE: check.h is for internal use only!
+#include <cassert>
+#include <vector>
+
+namespace {
+
+class TestReporter : public benchmark::ConsoleReporter {
+public:
+  virtual void ReportRuns(const std::vector<Run>& report) {
+    all_runs_.insert(all_runs_.end(), begin(report), end(report));
+    ConsoleReporter::ReportRuns(report);
+  }
+
+  std::vector<Run> all_runs_;
+};
+
+struct TestCase {
+  std::string name;
+  const char* label;
+  TestCase(const char* xname) : name(xname), label(nullptr) {}
+  TestCase(const char* xname, const char* xlabel)
+    : name(xname), label(xlabel) {}
+
+  typedef benchmark::BenchmarkReporter::Run Run;
+
+  void CheckRun(Run const& run) const {
+    CHECK(name == run.benchmark_name) << "expected " << name
+                                      << " got " << run.benchmark_name;
+    if (label) {
+      CHECK(run.report_label == label) << "expected " << label
+                                       << " got " << run.report_label;
+    } else {
+      CHECK(run.report_label == "");
+    }
+  }
+};
+
+std::vector<TestCase> ExpectedResults;
+
+int AddCases(std::initializer_list<TestCase> const& v) {
+  for (auto N : v) {
+    ExpectedResults.push_back(N);
+  }
+  return 0;
+}
+
+#define CONCAT(x, y) CONCAT2(x, y)
+#define CONCAT2(x, y) x##y
+#define ADD_CASES(...) \
+int CONCAT(dummy, __LINE__) = AddCases({__VA_ARGS__})
+
+}  // end namespace
+
+typedef benchmark::internal::Benchmark* ReturnVal;
+
+//----------------------------------------------------------------------------//
+// Test RegisterBenchmark with no additional arguments
+//----------------------------------------------------------------------------//
+void BM_function(benchmark::State& state) { while (state.KeepRunning()) {} }
+BENCHMARK(BM_function);
+ReturnVal dummy = benchmark::RegisterBenchmark(
+    "BM_function_manual_registration",
+     BM_function);
+ADD_CASES({"BM_function"}, {"BM_function_manual_registration"});
+
+//----------------------------------------------------------------------------//
+// Test RegisterBenchmark with additional arguments
+// Note: GCC <= 4.8 do not support this form of RegisterBenchmark because they
+//       reject the variadic pack expansion of lambda captures.
+//----------------------------------------------------------------------------//
+#ifndef BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
+
+void BM_extra_args(benchmark::State& st, const char* label) {
+  while (st.KeepRunning()) {}
+  st.SetLabel(label);
+}
+int RegisterFromFunction() {
+  std::pair<const char*, const char*> cases[] = {
+      {"test1", "One"},
+      {"test2", "Two"},
+      {"test3", "Three"}
+  };
+  for (auto& c : cases)
+    benchmark::RegisterBenchmark(c.first, &BM_extra_args, c.second);
+  return 0;
+}
+int dummy2 = RegisterFromFunction();
+ADD_CASES(
+  {"test1", "One"},
+  {"test2", "Two"},
+  {"test3", "Three"}
+);
+
+#endif // BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
+
+//----------------------------------------------------------------------------//
+// Test RegisterBenchmark with different callable types
+//----------------------------------------------------------------------------//
+
+struct CustomFixture {
+  void operator()(benchmark::State& st) {
+    while (st.KeepRunning()) {}
+  }
+};
+
+void TestRegistrationAtRuntime() {
+#ifdef BENCHMARK_HAS_CXX11
+  {
+    CustomFixture fx;
+    benchmark::RegisterBenchmark("custom_fixture", fx);
+    AddCases({"custom_fixture"});
+  }
+#endif
+#ifndef BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
+  {
+    int x = 42;
+    auto capturing_lam = [=](benchmark::State& st) {
+      while (st.KeepRunning()) {}
+      st.SetLabel(std::to_string(x));
+    };
+    benchmark::RegisterBenchmark("lambda_benchmark", capturing_lam);
+    AddCases({{"lambda_benchmark", "42"}});
+  }
+#endif
+}
+
+int main(int argc, char* argv[]) {
+  TestRegistrationAtRuntime();
+
+  benchmark::Initialize(&argc, argv);
+
+  TestReporter test_reporter;
+  benchmark::RunSpecifiedBenchmarks(&test_reporter);
+
+  typedef benchmark::BenchmarkReporter::Run Run;
+  auto EB = ExpectedResults.begin();
+
+  for (Run const& run : test_reporter.all_runs_) {
+    assert(EB != ExpectedResults.end());
+    EB->CheckRun(run);
+    ++EB;
+  }
+  assert(EB == ExpectedResults.end());
+
+  return 0;
+}
--- a/3rdparty/benchmark/test/reporter_output_test.cc
+++ b/3rdparty/benchmark/test/reporter_output_test.cc
@ -0,0 +1,158 @@
+
+#undef NDEBUG
+#include "benchmark/benchmark.h"
+#include "output_test.h"
+#include <utility>
+
+
+// ========================================================================= //
+// ---------------------- Testing Prologue Output -------------------------- //
+// ========================================================================= //
+
+ADD_CASES(TC_ConsoleOut, {
+    {"^Benchmark %s Time %s CPU %s Iterations$", MR_Next},
+    {"^[-]+$", MR_Next}
+});
+ADD_CASES(TC_CSVOut, {
+  {"name,iterations,real_time,cpu_time,time_unit,bytes_per_second,items_per_second,"
+    "label,error_occurred,error_message"}
+});
+
+// ========================================================================= //
+// ------------------------ Testing Basic Output --------------------------- //
+// ========================================================================= //
+
+void BM_basic(benchmark::State& state) {
+  while (state.KeepRunning()) {}
+}
+BENCHMARK(BM_basic);
+
+ADD_CASES(TC_ConsoleOut, {
+    {"^BM_basic %console_report$"}
+});
+ADD_CASES(TC_JSONOut, {
+    {"\"name\": \"BM_basic\",$"},
+    {"\"iterations\": %int,$", MR_Next},
+    {"\"real_time\": %int,$", MR_Next},
+    {"\"cpu_time\": %int,$", MR_Next},
+    {"\"time_unit\": \"ns\"$", MR_Next},
+    {"}", MR_Next}
+});
+ADD_CASES(TC_CSVOut, {
+    {"^\"BM_basic\",%csv_report$"}
+});
+
+// ========================================================================= //
+// ------------------------ Testing Error Output --------------------------- //
+// ========================================================================= //
+
+void BM_error(benchmark::State& state) {
+    state.SkipWithError("message");
+    while(state.KeepRunning()) {}
+}
+BENCHMARK(BM_error);
+ADD_CASES(TC_ConsoleOut, {
+    {"^BM_error[ ]+ERROR OCCURRED: 'message'$"}
+});
+ADD_CASES(TC_JSONOut, {
+    {"\"name\": \"BM_error\",$"},
+    {"\"error_occurred\": true,$", MR_Next},
+    {"\"error_message\": \"message\",$", MR_Next}
+});
+
+ADD_CASES(TC_CSVOut, {
+    {"^\"BM_error\",,,,,,,,true,\"message\"$"}
+});
+
+
+// ========================================================================= //
+// ----------------------- Testing Complexity Output ----------------------- //
+// ========================================================================= //
+
+void BM_Complexity_O1(benchmark::State& state) {
+  while (state.KeepRunning()) {
+  }
+  state.SetComplexityN(state.range(0));
+}
+BENCHMARK(BM_Complexity_O1)->Range(1, 1<<18)->Complexity(benchmark::o1);
+SET_SUBSTITUTIONS({
+  {"%bigOStr", "[ ]*[0-9]+\\.[0-9]+ \\([0-9]+\\)"},
+  {"%RMS", "[ ]*[0-9]+ %"}
+});
+ADD_CASES(TC_ConsoleOut, {
+   {"^BM_Complexity_O1_BigO %bigOStr %bigOStr[ ]*$"},
+   {"^BM_Complexity_O1_RMS %RMS %RMS[ ]*$"}
+});
+
+
+// ========================================================================= //
+// ----------------------- Testing Aggregate Output ------------------------ //
+// ========================================================================= //
+
+// Test that non-aggregate data is printed by default
+void BM_Repeat(benchmark::State& state) { while (state.KeepRunning()) {} }
+BENCHMARK(BM_Repeat)->Repetitions(3);
+ADD_CASES(TC_ConsoleOut, {
+    {"^BM_Repeat/repeats:3 %console_report$"},
+    {"^BM_Repeat/repeats:3 %console_report$"},
+    {"^BM_Repeat/repeats:3 %console_report$"},
+    {"^BM_Repeat/repeats:3_mean %console_report$"},
+    {"^BM_Repeat/repeats:3_stddev %console_report$"}
+});
+ADD_CASES(TC_JSONOut, {
+    {"\"name\": \"BM_Repeat/repeats:3\",$"},
+    {"\"name\": \"BM_Repeat/repeats:3\",$"},
+    {"\"name\": \"BM_Repeat/repeats:3\",$"},
+    {"\"name\": \"BM_Repeat/repeats:3_mean\",$"},
+    {"\"name\": \"BM_Repeat/repeats:3_stddev\",$"}
+});
+ADD_CASES(TC_CSVOut, {
+    {"^\"BM_Repeat/repeats:3\",%csv_report$"},
+    {"^\"BM_Repeat/repeats:3\",%csv_report$"},
+    {"^\"BM_Repeat/repeats:3\",%csv_report$"},
+    {"^\"BM_Repeat/repeats:3_mean\",%csv_report$"},
+    {"^\"BM_Repeat/repeats:3_stddev\",%csv_report$"}
+});
+
+// Test that a non-repeated test still prints non-aggregate results even when
+// only-aggregate reports have been requested
+void BM_RepeatOnce(benchmark::State& state) { while (state.KeepRunning()) {} }
+BENCHMARK(BM_RepeatOnce)->Repetitions(1)->ReportAggregatesOnly();
+ADD_CASES(TC_ConsoleOut, {
+    {"^BM_RepeatOnce/repeats:1 %console_report$"}
+});
+ADD_CASES(TC_JSONOut, {
+    {"\"name\": \"BM_RepeatOnce/repeats:1\",$"}
+});
+ADD_CASES(TC_CSVOut, {
+    {"^\"BM_RepeatOnce/repeats:1\",%csv_report$"}
+});
+
+
+// Test that non-aggregate data is not reported
+void BM_SummaryRepeat(benchmark::State& state) { while (state.KeepRunning()) {} }
+BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->ReportAggregatesOnly();
+ADD_CASES(TC_ConsoleOut, {
+    {".*BM_SummaryRepeat/repeats:3 ", MR_Not},
+    {"^BM_SummaryRepeat/repeats:3_mean %console_report$"},
+    {"^BM_SummaryRepeat/repeats:3_stddev %console_report$"}
+});
+ADD_CASES(TC_JSONOut, {
+    {".*BM_SummaryRepeat/repeats:3 ", MR_Not},
+    {"\"name\": \"BM_SummaryRepeat/repeats:3_mean\",$"},
+    {"\"name\": \"BM_SummaryRepeat/repeats:3_stddev\",$"}
+});
+ADD_CASES(TC_CSVOut, {
+    {".*BM_SummaryRepeat/repeats:3 ", MR_Not},
+    {"^\"BM_SummaryRepeat/repeats:3_mean\",%csv_report$"},
+    {"^\"BM_SummaryRepeat/repeats:3_stddev\",%csv_report$"}
+});
+
+// ========================================================================= //
+// --------------------------- TEST CASES END ------------------------------ //
+// ========================================================================= //
+
+
+int main(int argc, char* argv[]) {
+  RunOutputTests(argc, argv);
+}
--- a/3rdparty/benchmark/test/skip_with_error_test.cc
+++ b/3rdparty/benchmark/test/skip_with_error_test.cc
@ -0,0 +1,161 @@
+
+#undef NDEBUG
+#include "benchmark/benchmark.h"
+#include "../src/check.h" // NOTE: check.h is for internal use only!
+#include <cassert>
+#include <vector>
+
+namespace {
+
+class TestReporter : public benchmark::ConsoleReporter {
+ public:
+  virtual bool ReportContext(const Context& context) {
+    return ConsoleReporter::ReportContext(context);
+  };
+
+  virtual void ReportRuns(const std::vector<Run>& report) {
+    all_runs_.insert(all_runs_.end(), begin(report), end(report));
+    ConsoleReporter::ReportRuns(report);
+  }
+
+  TestReporter()  {}
+  virtual ~TestReporter() {}
+
+  mutable std::vector<Run> all_runs_;
+};
+
+struct TestCase {
+  std::string name;
+  bool error_occurred;
+  std::string error_message;
+
+  typedef benchmark::BenchmarkReporter::Run Run;
+
+  void CheckRun(Run const& run) const {
+    CHECK(name == run.benchmark_name) << "expected " << name << " got " << run.benchmark_name;
+    CHECK(error_occurred == run.error_occurred);
+    CHECK(error_message == run.error_message);
+    if (error_occurred) {
+      //CHECK(run.iterations == 0);
+    } else {
+      CHECK(run.iterations != 0);
+    }
+  }
+};
+
+std::vector<TestCase> ExpectedResults;
+
+int AddCases(const char* base_name, std::initializer_list<TestCase> const& v) {
+  for (auto TC : v) {
+    TC.name = base_name + TC.name;
+    ExpectedResults.push_back(std::move(TC));
+  }
+  return 0;
+}
+
+#define CONCAT(x, y) CONCAT2(x, y)
+#define CONCAT2(x, y) x##y
+#define ADD_CASES(...) \
+int CONCAT(dummy, __LINE__) = AddCases(__VA_ARGS__)
+
+}  // end namespace
+
+
+void BM_error_before_running(benchmark::State& state) {
+  state.SkipWithError("error message");
+  while (state.KeepRunning()) {
+    assert(false);
+  }
+}
+BENCHMARK(BM_error_before_running);
+ADD_CASES("BM_error_before_running",
+          {{"", true, "error message"}});
+
+void BM_error_during_running(benchmark::State& state) {
+  int first_iter = true;
+  while (state.KeepRunning()) {
+    if (state.range(0) == 1 && state.thread_index <= (state.threads / 2)) {
+      assert(first_iter);
+      first_iter = false;
+      state.SkipWithError("error message");
+    } else {
+      state.PauseTiming();
+      state.ResumeTiming();
+    }
+  }
+}
+BENCHMARK(BM_error_during_running)->Arg(1)->Arg(2)->ThreadRange(1, 8);
+ADD_CASES(
+    "BM_error_during_running",
+    {{"/1/threads:1", true, "error message"},
+    {"/1/threads:2", true, "error message"},
+    {"/1/threads:4", true, "error message"},
+    {"/1/threads:8", true, "error message"},
+    {"/2/threads:1", false, ""},
+    {"/2/threads:2", false, ""},
+    {"/2/threads:4", false, ""},
+    {"/2/threads:8", false, ""}}
+);
+
+void BM_error_after_running(benchmark::State& state) {
+  while (state.KeepRunning()) {
+    benchmark::DoNotOptimize(state.iterations());
+  }
+  if (state.thread_index <= (state.threads / 2))
+    state.SkipWithError("error message");
+}
+BENCHMARK(BM_error_after_running)->ThreadRange(1, 8);
+ADD_CASES(
+    "BM_error_after_running",
+    {{"/threads:1", true, "error message"},
+    {"/threads:2", true, "error message"},
+    {"/threads:4", true, "error message"},
+    {"/threads:8", true, "error message"}}
+);
+
+void BM_error_while_paused(benchmark::State& state) {
+  bool first_iter = true;
+  while (state.KeepRunning()) {
+    if (state.range(0) == 1 && state.thread_index <= (state.threads / 2)) {
+      assert(first_iter);
+      first_iter = false;
+      state.PauseTiming();
+      state.SkipWithError("error message");
+    } else {
+      state.PauseTiming();
+      state.ResumeTiming();
+    }
+  }
+}
+BENCHMARK(BM_error_while_paused)->Arg(1)->Arg(2)->ThreadRange(1, 8);
+ADD_CASES(
+    "BM_error_while_paused",
+    {{"/1/threads:1", true, "error message"},
+    {"/1/threads:2", true, "error message"},
+    {"/1/threads:4", true, "error message"},
+    {"/1/threads:8", true, "error message"},
+    {"/2/threads:1", false, ""},
+    {"/2/threads:2", false, ""},
+    {"/2/threads:4", false, ""},
+    {"/2/threads:8", false, ""}}
+);
+
+
+int main(int argc, char* argv[]) {
+  benchmark::Initialize(&argc, argv);
+
+  TestReporter test_reporter;
+  benchmark::RunSpecifiedBenchmarks(&test_reporter);
+
+  typedef benchmark::BenchmarkReporter::Run Run;
+  auto EB = ExpectedResults.begin();
+
+  for (Run const& run : test_reporter.all_runs_) {
+    assert(EB != ExpectedResults.end());
+    EB->CheckRun(run);
+    ++EB;
+  }
+  assert(EB == ExpectedResults.end());
+
+  return 0;
+}
--- a/3rdparty/benchmark/tools/compare_bench.py
+++ b/3rdparty/benchmark/tools/compare_bench.py
@ -0,0 +1,30 @@
+#!/usr/bin/env python
+"""
+compare_bench.py - Compare two benchmarks or their results and report the
+                   difference.
+"""
+import sys
+import gbench
+from gbench import util, report
+
+def main():
+    # Parse the command line flags
+    def usage():
+        print('compare_bench.py <test1> <test2> [benchmark options]...')
+        exit(1)
+    if '--help' in sys.argv or len(sys.argv) < 3:
+        usage()
+    tests = sys.argv[1:3]
+    bench_opts = sys.argv[3:]
+    bench_opts = list(bench_opts)
+    # Run the benchmarks and report the results
+    json1 = gbench.util.run_or_load_benchmark(tests[0], bench_opts)
+    json2 = gbench.util.run_or_load_benchmark(tests[1], bench_opts)
+    output_lines = gbench.report.generate_difference_report(json1, json2)
+    print 'Comparing %s to %s' % (tests[0], tests[1])
+    for ln in output_lines:
+        print(ln)
+
+
+if __name__ == '__main__':
+    main()
--- a/3rdparty/benchmark/tools/gbench/Inputs/test1_run1.json
+++ b/3rdparty/benchmark/tools/gbench/Inputs/test1_run1.json
@ -0,0 +1,46 @@
+{
+  "context": {
+    "date": "2016-08-02 17:44:46",
+    "num_cpus": 4,
+    "mhz_per_cpu": 4228,
+    "cpu_scaling_enabled": false,
+    "library_build_type": "release"
+  },
+  "benchmarks": [
+    {
+      "name": "BM_SameTimes",
+      "iterations": 1000,
+      "real_time": 10,
+      "cpu_time": 10,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_2xFaster",
+      "iterations": 1000,
+      "real_time": 50,
+      "cpu_time": 50,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_2xSlower",
+      "iterations": 1000,
+      "real_time": 50,
+      "cpu_time": 50,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentFaster",
+      "iterations": 1000,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentSlower",
+      "iterations": 1000,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    }
+  ]
+}
--- a/3rdparty/benchmark/tools/gbench/Inputs/test1_run2.json
+++ b/3rdparty/benchmark/tools/gbench/Inputs/test1_run2.json
@ -0,0 +1,46 @@
+{
+  "context": {
+    "date": "2016-08-02 17:44:46",
+    "num_cpus": 4,
+    "mhz_per_cpu": 4228,
+    "cpu_scaling_enabled": false,
+    "library_build_type": "release"
+  },
+  "benchmarks": [
+    {
+      "name": "BM_SameTimes",
+      "iterations": 1000,
+      "real_time": 10,
+      "cpu_time": 10,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_2xFaster",
+      "iterations": 1000,
+      "real_time": 25,
+      "cpu_time": 25,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_2xSlower",
+      "iterations": 20833333,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentFaster",
+      "iterations": 1000,
+      "real_time": 90,
+      "cpu_time": 90,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentSlower",
+      "iterations": 1000,
+      "real_time": 110,
+      "cpu_time": 110,
+      "time_unit": "ns"
+    }
+  ]
+}
--- a/3rdparty/benchmark/tools/gbench/init.py
+++ b/3rdparty/benchmark/tools/gbench/init.py
@ -0,0 +1,8 @@
+"""Google Benchmark tooling"""
+
+__author__ = 'Eric Fiselier'
+__email__ = 'eric@efcs.ca'
+__versioninfo__ = (0, 5, 0)
+__version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
+
+__all__ = []
--- a/3rdparty/benchmark/tools/gbench/report.py
+++ b/3rdparty/benchmark/tools/gbench/report.py
@ -0,0 +1,141 @@
+"""report.py - Utilities for reporting statistics about benchmark results
+"""
+import os
+
+class BenchmarkColor(object):
+    def __init__(self, name, code):
+        self.name = name
+        self.code = code
+
+    def __repr__(self):
+        return '%s%r' % (self.__class__.__name__,
+                         (self.name, self.code))
+
+    def __format__(self, format):
+        return self.code
+
+# Benchmark Colors Enumeration
+BC_NONE = BenchmarkColor('NONE', '')
+BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
+BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
+BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
+BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
+BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
+BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
+BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
+BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
+BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
+BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
+
+def color_format(use_color, fmt_str, *args, **kwargs):
+    """
+    Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
+    'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
+    is False then all color codes in 'args' and 'kwargs' are replaced with
+    the empty string.
+    """
+    assert use_color is True or use_color is False
+    if not use_color:
+        args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
+                for arg in args]
+        kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
+                  for key, arg in kwargs.items()}
+    return fmt_str.format(*args, **kwargs)
+
+
+def find_longest_name(benchmark_list):
+    """
+    Return the length of the longest benchmark name in a given list of
+    benchmark JSON objects
+    """
+    longest_name = 1
+    for bc in benchmark_list:
+        if len(bc['name']) > longest_name:
+            longest_name = len(bc['name'])
+    return longest_name
+
+
+def calculate_change(old_val, new_val):
+    """
+    Return a float representing the decimal change between old_val and new_val.
+    """
+    if old_val == 0 and new_val == 0:
+        return 0.0
+    if old_val == 0:
+        return float(new_val - old_val) / (float(old_val + new_val) / 2)
+    return float(new_val - old_val) / abs(old_val)
+
+
+def generate_difference_report(json1, json2, use_color=True):
+    """
+    Calculate and report the difference between each test of two benchmarks
+    runs specified as 'json1' and 'json2'.
+    """
+    first_col_width = find_longest_name(json1['benchmarks']) + 5
+    def find_test(name):
+        for b in json2['benchmarks']:
+            if b['name'] == name:
+                return b
+        return None
+    first_line = "{:<{}s}     Time           CPU           Old           New".format(
+        'Benchmark', first_col_width)
+    output_strs = [first_line, '-' * len(first_line)]
+    for bn in json1['benchmarks']:
+        other_bench = find_test(bn['name'])
+        if not other_bench:
+            continue
+
+        def get_color(res):
+            if res > 0.05:
+                return BC_FAIL
+            elif res > -0.07:
+                return BC_WHITE
+            else:
+                return BC_CYAN
+        fmt_str = "{}{:<{}s}{endc}    {}{:+.2f}{endc}         {}{:+.2f}{endc}         {:4d}         {:4d}"
+        tres = calculate_change(bn['real_time'], other_bench['real_time'])
+        cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time'])
+        output_strs += [color_format(use_color, fmt_str,
+            BC_HEADER, bn['name'], first_col_width,
+            get_color(tres), tres, get_color(cpures), cpures,
+            bn['cpu_time'], other_bench['cpu_time'],
+            endc=BC_ENDC)]
+    return output_strs
+
+###############################################################################
+# Unit tests
+
+import unittest
+
+class TestReportDifference(unittest.TestCase):
+    def load_results(self):
+        import json
+        testInputs = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Inputs')
+        testOutput1 = os.path.join(testInputs, 'test1_run1.json')
+        testOutput2 = os.path.join(testInputs, 'test1_run2.json')
+        with open(testOutput1, 'r') as f:
+            json1 = json.load(f)
+        with open(testOutput2, 'r') as f:
+            json2 = json.load(f)
+        return json1, json2
+
+    def test_basic(self):
+        expect_lines = [
+            ['BM_SameTimes', '+0.00', '+0.00'],
+            ['BM_2xFaster', '-0.50', '-0.50'],
+            ['BM_2xSlower', '+1.00', '+1.00'],
+            ['BM_10PercentFaster', '-0.10', '-0.10'],
+            ['BM_10PercentSlower', '+0.10', '+0.10']
+        ]
+        json1, json2 = self.load_results()
+        output_lines = generate_difference_report(json1, json2, use_color=False)
+        print output_lines
+        self.assertEqual(len(output_lines), len(expect_lines))
+        for i in xrange(0, len(output_lines)):
+            parts = [x for x in output_lines[i].split(' ') if x]
+            self.assertEqual(len(parts), 3)
+            self.assertEqual(parts, expect_lines[i])
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/3rdparty/benchmark/tools/gbench/util.py
+++ b/3rdparty/benchmark/tools/gbench/util.py
@ -0,0 +1,130 @@
+"""util.py - General utilities for running, loading, and processing benchmarks
+"""
+import json
+import os
+import tempfile
+import subprocess
+import sys
+
+# Input file type enumeration
+IT_Invalid    = 0
+IT_JSON       = 1
+IT_Executable = 2
+
+_num_magic_bytes = 2 if sys.platform.startswith('win') else 4
+def is_executable_file(filename):
+    """
+    Return 'True' if 'filename' names a valid file which is likely
+    an executable. A file is considered an executable if it starts with the
+    magic bytes for a EXE, Mach O, or ELF file.
+    """
+    if not os.path.isfile(filename):
+        return False
+    with open(filename, 'r') as f:
+        magic_bytes = f.read(_num_magic_bytes)
+    if sys.platform == 'darwin':
+        return magic_bytes in [
+            '\xfe\xed\xfa\xce',  # MH_MAGIC
+            '\xce\xfa\xed\xfe',  # MH_CIGAM
+            '\xfe\xed\xfa\xcf',  # MH_MAGIC_64
+            '\xcf\xfa\xed\xfe',  # MH_CIGAM_64
+            '\xca\xfe\xba\xbe',  # FAT_MAGIC
+            '\xbe\xba\xfe\xca'   # FAT_CIGAM
+        ]
+    elif sys.platform.startswith('win'):
+        return magic_bytes == 'MZ'
+    else:
+        return magic_bytes == '\x7FELF'
+
+
+def is_json_file(filename):
+    """
+    Returns 'True' if 'filename' names a valid JSON output file.
+    'False' otherwise.
+    """
+    try:
+        with open(filename, 'r') as f:
+            json.load(f)
+        return True
+    except:
+        pass
+    return False
+
+
+def classify_input_file(filename):
+    """
+    Return a tuple (type, msg) where 'type' specifies the classified type
+    of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable
+    string represeting the error.
+    """
+    ftype = IT_Invalid
+    err_msg = None
+    if not os.path.exists(filename):
+        err_msg = "'%s' does not exist" % filename
+    elif not os.path.isfile(filename):
+        err_msg = "'%s' does not name a file" % filename
+    elif is_executable_file(filename):
+        ftype = IT_Executable
+    elif is_json_file(filename):
+        ftype = IT_JSON
+    else:
+        err_msg = "'%s' does not name a valid benchmark executable or JSON file"
+    return ftype, err_msg
+
+
+def check_input_file(filename):
+    """
+    Classify the file named by 'filename' and return the classification.
+    If the file is classified as 'IT_Invalid' print an error message and exit
+    the program.
+    """
+    ftype, msg = classify_input_file(filename)
+    if ftype == IT_Invalid:
+        print "Invalid input file: %s" % msg
+        sys.exit(1)
+    return ftype
+
+
+def load_benchmark_results(fname):
+    """
+    Read benchmark output from a file and return the JSON object.
+    REQUIRES: 'fname' names a file containing JSON benchmark output.
+    """
+    with open(fname, 'r') as f:
+        return json.load(f)
+
+
+def run_benchmark(exe_name, benchmark_flags):
+    """
+    Run a benchmark specified by 'exe_name' with the specified
+    'benchmark_flags'. The benchmark is run directly as a subprocess to preserve
+    real time console output.
+    RETURNS: A JSON object representing the benchmark output
+    """
+    thandle, tname = tempfile.mkstemp()
+    os.close(thandle)
+    cmd = [exe_name] + benchmark_flags
+    print("RUNNING: %s" % ' '.join(cmd))
+    exitCode = subprocess.call(cmd + ['--benchmark_out=%s' % tname])
+    if exitCode != 0:
+        print('TEST FAILED...')
+        sys.exit(exitCode)
+    json_res = load_benchmark_results(tname)
+    os.unlink(tname)
+    return json_res
+
+
+def run_or_load_benchmark(filename, benchmark_flags):
+    """
+    Get the results for a specified benchmark. If 'filename' specifies
+    an executable benchmark then the results are generated by running the
+    benchmark. Otherwise 'filename' must name a valid JSON output file,
+    which is loaded and the result returned.
+    """
+    ftype = check_input_file(filename)
+    if ftype == IT_JSON:
+        return load_benchmark_results(filename)
+    elif ftype == IT_Executable:
+        return run_benchmark(filename, benchmark_flags)
+    else:
+        assert False # This branch is unreachable