C++学习笔记-基准测试

基准测试(Benchmarking)是测量和比较代码性能的重要技术。通过精确的时间测量,我们可以评估不同算法的效率,优化代码性能,并验证优化效果。基准测试时一定要保证测试的对象是实际有效的。

基本计时器实现

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#include <iostream>
#include <chrono>
#include <array>
#include <memory>

class Timer
{
private:
    std::chrono::time_point<std::chrono::high_resolution_clock> m_StartTimepoint;

public:
    Timer()
    {
        m_StartTimepoint = std::chrono::high_resolution_clock::now();
    }

    ~Timer()
    {
        auto endTimepoint = std::chrono::high_resolution_clock::now();

        /* 
         * time_point_cast<std::chrono::microseconds>(m_StartTimepoint) 直接把时间转换微秒
         * time_since_epoch()返回时间起始点到现在的时间
         */
        auto start = std::chrono::time_point_cast<std::chrono::microseconds>(m_StartTimepoint).time_since_epoch().count();
        auto end = std::chrono::time_point_cast<std::chrono::microseconds>(endTimepoint).time_since_epoch().count();
        
        auto duration = end - start; // 微秒
        double ms = duration * 0.001; // 毫秒

        std::cout << duration << "μs (" << ms << "ms)" << std::endl;
    }
};

void BasicTimerDemo()
{
    std::cout << "=== Basic Timer Demo ===" << std::endl;
    
    /*
    下面这个测试实际上是无效的,因为在Release模式编译时
            for (int i = 0; i < 1000000; i++)
            value += 2;
    这样的代码会被优化掉,编译器会直接在编译时就计算出结果了。
    所以我们需要实际的对象
    */
    
    struct Vector2
    {
        float x, y;
    };

    {
        std::cout << "Make shared_ptr: ";
        std::array<std::shared_ptr<Vector2>, 1000> sharedPtrs;
        Timer timer;
        for (int i = 0; i < sharedPtrs.size(); i++)
            sharedPtrs[i] = std::make_shared<Vector2>();
    }

    {
        std::cout << "New shared_ptr: ";
        std::array<std::shared_ptr<Vector2>, 1000> sharedPtrs;
        Timer timer;
        for (int i = 0; i < sharedPtrs.size(); i++)
            sharedPtrs[i] = std::shared_ptr<Vector2>(new Vector2());
    }

    {
        std::cout << "Make unique_ptr: ";
        std::array<std::unique_ptr<Vector2>, 1000> uniquePtrs;
        Timer timer;
        for (int i = 0; i < uniquePtrs.size(); i++)
            uniquePtrs[i] = std::make_unique<Vector2>();
    }

    {
        std::cout << "New unique_ptr: ";
        std::array<std::unique_ptr<Vector2>, 1000> uniquePtrs;
        Timer timer;
        for (int i = 0; i < uniquePtrs.size(); i++)
            uniquePtrs[i] = std::unique_ptr<Vector2>(new Vector2());
    }
}

int main()
{
    BasicTimerDemo();
    return 0;
}

高级基准测试框架

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#include <iostream>
#include <chrono>
#include <vector>
#include <functional>
#include <string>
#include <algorithm>
#include <numeric>
#include <iomanip>

class Benchmark
{
private:
    std::string m_Name;
    std::vector<long long> m_Results;
    int m_Iterations;
    
public:
    Benchmark(const std::string& name, int iterations = 100) 
        : m_Name(name), m_Iterations(iterations)
    {
        m_Results.reserve(iterations);
    }
    
    template<typename Func>
    void Run(Func&& func)
    {
        std::cout << "Running benchmark: " << m_Name << " (" << m_Iterations << " iterations)" << std::endl;
        
        // 预热
        for (int i = 0; i < 10; ++i)
        {
            func();
        }
        
        // 实际测试
        for (int i = 0; i < m_Iterations; ++i)
        {
            auto start = std::chrono::high_resolution_clock::now();
            func();
            auto end = std::chrono::high_resolution_clock::now();
            
            auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start);
            m_Results.push_back(duration.count());
        }
    }
    
    void PrintResults() const
    {
        if (m_Results.empty())
        {
            std::cout << "No results to display" << std::endl;
            return;
        }
        
        // 计算统计信息
        long long total = std::accumulate(m_Results.begin(), m_Results.end(), 0LL);
        double average = static_cast<double>(total) / m_Results.size();
        
        auto minMax = std::minmax_element(m_Results.begin(), m_Results.end());
        long long minTime = *minMax.first;
        long long maxTime = *minMax.second;
        
        // 计算中位数
        std::vector<long long> sorted = m_Results;
        std::sort(sorted.begin(), sorted.end());
        long long median = sorted[sorted.size() / 2];
        
        // 计算标准差
        double variance = 0.0;
        for (long long result : m_Results)
        {
            variance += (result - average) * (result - average);
        }
        variance /= m_Results.size();
        double stddev = std::sqrt(variance);
        
        std::cout << "Results for " << m_Name << ":" << std::endl;
        std::cout << std::fixed << std::setprecision(2);
        std::cout << "  Average: " << average / 1000.0 << " μs" << std::endl;
        std::cout << "  Median:  " << median / 1000.0 << " μs" << std::endl;
        std::cout << "  Min:     " << minTime / 1000.0 << " μs" << std::endl;
        std::cout << "  Max:     " << maxTime / 1000.0 << " μs" << std::endl;
        std::cout << "  Std Dev: " << stddev / 1000.0 << " μs" << std::endl;
        std::cout << std::endl;
    }
    
    double GetAverageNanoseconds() const
    {
        if (m_Results.empty()) return 0.0;
        long long total = std::accumulate(m_Results.begin(), m_Results.end(), 0LL);
        return static_cast<double>(total) / m_Results.size();
    }
};

// 基准测试管理器
class BenchmarkSuite
{
private:
    std::vector<std::unique_ptr<Benchmark>> m_Benchmarks;
    
public:
    template<typename Func>
    void AddBenchmark(const std::string& name, Func&& func, int iterations = 100)
    {
        auto benchmark = std::make_unique<Benchmark>(name, iterations);
        benchmark->Run(std::forward<Func>(func));
        m_Benchmarks.push_back(std::move(benchmark));
    }
    
    void PrintAllResults() const
    {
        std::cout << "=== Benchmark Suite Results ===" << std::endl;
        for (const auto& benchmark : m_Benchmarks)
        {
            benchmark->PrintResults();
        }
    }
    
    void PrintComparison() const
    {
        if (m_Benchmarks.size() < 2) return;
        
        std::cout << "=== Performance Comparison ===" << std::endl;
        
        // 找到最快的基准测试
        double fastestTime = std::numeric_limits<double>::max();
        std::string fastestName;
        
        for (const auto& benchmark : m_Benchmarks)
        {
            double avgTime = benchmark->GetAverageNanoseconds();
            if (avgTime < fastestTime)
            {
                fastestTime = avgTime;
                fastestName = benchmark->m_Name;
            }
        }
        
        std::cout << "Fastest: " << fastestName << std::endl;
        std::cout << "Relative performance:" << std::endl;
        
        for (const auto& benchmark : m_Benchmarks)
        {
            double avgTime = benchmark->GetAverageNanoseconds();
            double ratio = avgTime / fastestTime;
            std::cout << "  " << std::setw(20) << std::left << benchmark->m_Name 
                      << ": " << std::fixed << std::setprecision(2) << ratio << "x" << std::endl;
        }
        std::cout << std::endl;
    }
};

容器性能比较

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#include <vector>
#include <list>
#include <deque>
#include <set>
#include <unordered_set>
#include <random>

void ContainerBenchmarks()
{
    std::cout << "=== Container Performance Benchmarks ===" << std::endl;
    
    const int elementCount = 10000;
    std::vector<int> testData;
    
    // 生成测试数据
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<> dis(1, 100000);
    
    for (int i = 0; i < elementCount; ++i)
    {
        testData.push_back(dis(gen));
    }
    
    BenchmarkSuite suite;
    
    // Vector插入测试
    suite.AddBenchmark("Vector push_back", [&]() {
        std::vector<int> vec;
        for (int value : testData)
        {
            vec.push_back(value);
        }
    }, 50);
    
    // Vector预分配插入测试
    suite.AddBenchmark("Vector reserve + push_back", [&]() {
        std::vector<int> vec;
        vec.reserve(elementCount);
        for (int value : testData)
        {
            vec.push_back(value);
        }
    }, 50);
    
    // List插入测试
    suite.AddBenchmark("List push_back", [&]() {
        std::list<int> lst;
        for (int value : testData)
        {
            lst.push_back(value);
        }
    }, 50);
    
    // Deque插入测试
    suite.AddBenchmark("Deque push_back", [&]() {
        std::deque<int> deq;
        for (int value : testData)
        {
            deq.push_back(value);
        }
    }, 50);
    
    // Set插入测试
    suite.AddBenchmark("Set insert", [&]() {
        std::set<int> s;
        for (int value : testData)
        {
            s.insert(value);
        }
    }, 20);
    
    // Unordered_set插入测试
    suite.AddBenchmark("Unordered_set insert", [&]() {
        std::unordered_set<int> us;
        for (int value : testData)
        {
            us.insert(value);
        }
    }, 20);
    
    suite.PrintAllResults();
    suite.PrintComparison();
}

算法性能比较

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#include <algorithm>
#include <random>

void AlgorithmBenchmarks()
{
    std::cout << "=== Algorithm Performance Benchmarks ===" << std::endl;
    
    const int dataSize = 100000;
    std::vector<int> originalData;
    
    // 生成随机数据
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<> dis(1, 1000000);
    
    for (int i = 0; i < dataSize; ++i)
    {
        originalData.push_back(dis(gen));
    }
    
    BenchmarkSuite suite;
    
    // 排序算法比较
    suite.AddBenchmark("std::sort", [&]() {
        std::vector<int> data = originalData;
        std::sort(data.begin(), data.end());
    }, 10);
    
    suite.AddBenchmark("std::stable_sort", [&]() {
        std::vector<int> data = originalData;
        std::stable_sort(data.begin(), data.end());
    }, 10);
    
    // 查找算法比较(需要排序后的数据)
    std::vector<int> sortedData = originalData;
    std::sort(sortedData.begin(), sortedData.end());
    int searchValue = sortedData[dataSize / 2];
    
    suite.AddBenchmark("Linear search", [&]() {
        auto it = std::find(sortedData.begin(), sortedData.end(), searchValue);
        volatile bool found = (it != sortedData.end());
    }, 100);
    
    suite.AddBenchmark("Binary search", [&]() {
        volatile bool found = std::binary_search(sortedData.begin(), sortedData.end(), searchValue);
    }, 100);
    
    // 数学运算比较
    suite.AddBenchmark("Accumulate", [&]() {
        volatile long long sum = std::accumulate(originalData.begin(), originalData.end(), 0LL);
    }, 50);
    
    suite.AddBenchmark("Manual sum", [&]() {
        volatile long long sum = 0;
        for (int value : originalData)
        {
            sum += value;
        }
    }, 50);
    
    suite.PrintAllResults();
    suite.PrintComparison();
}

内存分配性能测试

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#include <memory>

void MemoryAllocationBenchmarks()
{
    std::cout << "=== Memory Allocation Benchmarks ===" << std::endl;
    
    const int allocationCount = 10000;
    
    BenchmarkSuite suite;
    
    // 原始指针分配
    suite.AddBenchmark("Raw pointer new/delete", [&]() {
        std::vector<int*> ptrs;
        ptrs.reserve(allocationCount);
        
        for (int i = 0; i < allocationCount; ++i)
        {
            ptrs.push_back(new int(i));
        }
        
        for (int* ptr : ptrs)
        {
            delete ptr;
        }
    }, 10);
    
    // unique_ptr分配
    suite.AddBenchmark("unique_ptr", [&]() {
        std::vector<std::unique_ptr<int>> ptrs;
        ptrs.reserve(allocationCount);
        
        for (int i = 0; i < allocationCount; ++i)
        {
            ptrs.push_back(std::make_unique<int>(i));
        }
    }, 10);
    
    // shared_ptr分配
    suite.AddBenchmark("shared_ptr", [&]() {
        std::vector<std::shared_ptr<int>> ptrs;
        ptrs.reserve(allocationCount);
        
        for (int i = 0; i < allocationCount; ++i)
        {
            ptrs.push_back(std::make_shared<int>(i));
        }
    }, 10);
    
    // 栈分配(作为对比)
    suite.AddBenchmark("Stack allocation", [&]() {
        std::vector<int> values;
        values.reserve(allocationCount);
        
        for (int i = 0; i < allocationCount; ++i)
        {
            values.push_back(i);
        }
    }, 10);
    
    suite.PrintAllResults();
    suite.PrintComparison();
}

字符串操作性能测试

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#include <string>
#include <sstream>

void StringBenchmarks()
{
    std::cout << "=== String Operation Benchmarks ===" << std::endl;
    
    const int iterations = 1000;
    const std::string baseString = "Hello, World! ";
    
    BenchmarkSuite suite;
    
    // 字符串连接比较
    suite.AddBenchmark("String concatenation (+)", [&]() {
        std::string result;
        for (int i = 0; i < iterations; ++i)
        {
            result = result + baseString;
        }
    }, 10);
    
    suite.AddBenchmark("String concatenation (+=)", [&]() {
        std::string result;
        for (int i = 0; i < iterations; ++i)
        {
            result += baseString;
        }
    }, 10);
    
    suite.AddBenchmark("String reserve + concatenation", [&]() {
        std::string result;
        result.reserve(iterations * baseString.length());
        for (int i = 0; i < iterations; ++i)
        {
            result += baseString;
        }
    }, 10);
    
    suite.AddBenchmark("Stringstream", [&]() {
        std::stringstream ss;
        for (int i = 0; i < iterations; ++i)
        {
            ss << baseString;
        }
        std::string result = ss.str();
    }, 10);
    
    // 字符串查找比较
    std::string longString;
    for (int i = 0; i < 10000; ++i)
    {
        longString += "test string data ";
    }
    longString += "target";
    
    suite.AddBenchmark("String find", [&]() {
        volatile size_t pos = longString.find("target");
    }, 100);
    
    suite.AddBenchmark("String rfind", [&]() {
        volatile size_t pos = longString.rfind("target");
    }, 100);
    
    suite.PrintAllResults();
    suite.PrintComparison();
}

编译器优化对比

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
void OptimizationDemo()
{
    std::cout << "=== Compiler Optimization Demo ===" << std::endl;
    
    const int iterations = 1000000;
    
    BenchmarkSuite suite;
    
    // 可能被优化的代码
    suite.AddBenchmark("Optimizable loop", [&]() {
        int sum = 0;
        for (int i = 0; i < iterations; ++i)
        {
            sum += i;
        }
        // 编译器可能会优化这个循环
    }, 10);
    
    // 防止优化的代码
    suite.AddBenchmark("Non-optimizable loop", [&]() {
        volatile int sum = 0;
        for (volatile int i = 0; i < iterations; ++i)
        {
            sum += i;
        }
    }, 10);
    
    // 使用结果防止优化
    suite.AddBenchmark("Result used", [&]() {
        int sum = 0;
        for (int i = 0; i < iterations; ++i)
        {
            sum += i;
        }
        // 使用结果,防止被优化掉
        if (sum == 0) std::cout << "Unexpected result" << std::endl;
    }, 10);
    
    suite.PrintAllResults();
    suite.PrintComparison();
}

int main()
{
    BasicTimerDemo();
    std::cout << std::endl;
    
    ContainerBenchmarks();
    AlgorithmBenchmarks();
    MemoryAllocationBenchmarks();
    StringBenchmarks();
    OptimizationDemo();
    
    return 0;
}

总结

  1. 基准测试重要性:准确测量代码性能,指导优化决策
  2. 测试有效性
    • 确保测试的对象是实际有效的
    • 防止编译器优化掉测试代码
    • 使用volatile关键字或实际使用结果
  3. 测试方法
    • 预热阶段消除冷启动影响
    • 多次测试取统计值
    • 计算平均值、中位数、标准差等
  4. 性能比较
    • 容器性能:vector > deque > list,预分配显著提升性能
    • 算法性能:二分查找 >> 线性查找
    • 内存分配:栈分配 > unique_ptr > shared_ptr > 原始指针
    • 字符串操作:预分配 > += > +,stringstream适合大量连接
  5. 注意事项
    • 编译器优化可能影响测试结果
    • 不同编译选项(Debug/Release)结果差异巨大
    • 测试环境要保持一致
  6. 最佳实践
    • 使用专业的基准测试框架
    • 测试真实的使用场景
    • 关注统计显著性
    • 在目标平台上测试
updatedupdated2025-09-202025-09-20