Performance Ninja -- False Sharing

Поділитися
Вставка
  • Опубліковано 27 жов 2024

КОМЕНТАРІ • 3

  • @virajcz7143
    @virajcz7143 Рік тому

    beautiful! watched so many videos but finally understood it after watching your video! great infographics!

  • @easyperf3992
    @easyperf3992  2 роки тому

    Here is the complete code of the solution:
    ```
    #include "solution.hpp"
    #include
    #include
    #include
    #include
    size_t solution(const std::vector &data, int thread_count) {
    // Using std::atomic counters to disallow compiler to promote `target`
    // memory location into a register. This way we ensure that the store
    // to `target` stays inside the loop.
    #define CACHELINE_ALIGN alignas(64)
    struct CACHELINE_ALIGN Accumulator {
    std::atomic value = 0;
    };
    std::vector accumulators(thread_count);
    #pragma omp parallel num_threads(thread_count) default(none) \
    shared(accumulators, data)
    {
    int target_index = omp_get_thread_num();
    auto &target = accumulators[target_index];
    #pragma omp for
    for (int i = 0; i < data.size(); i++) {
    // Perform computation on each input
    auto item = data[i];
    item += 1000;
    item ^= 0xADEDAE;
    item |= (item >> 24);
    // Write result to accumulator
    target.value += item % 13;
    }
    }
    size_t result = 0;
    for (const auto &accumulator : accumulators) {
    result += accumulator.value;
    }
    return result;
    }
    ```

  • @easyperf3992
    @easyperf3992  2 роки тому +1

    perf c2c:
    joemario.github.io/blog/2016/09/01/c2c-blog/