Threads.nthreads()

4


a = zeros(10)
Threads.@threads for i = 1:10
    a[i] = Threads.threadid()
end
println(a)

[1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0]


function sqrt_array(A)
    B = similar(A)
    for i in eachindex(A)
        @inbounds B[i] = sqrt(A[i])
    end
    B
end

sqrt_array (generic function with 1 method)


function threaded_sqrt_array(A)
    B = similar(A)
    Threads.@threads for i in eachindex(A)
        @inbounds B[i] = sqrt(A[i])
    end
    B
end

threaded_sqrt_array (generic function with 1 method)


A = rand(1000, 1000)
@btime sqrt_array(A);

  1.506 ms (2 allocations: 7.63 MiB)


@btime threaded_sqrt_array(A);

  515.931 μs (27 allocations: 7.63 MiB)


function sqrt_sum(A)
    s = zero(eltype(A))
    for i in eachindex(A)
        @inbounds s += sqrt(A[i])
    end
    return s
end

sqrt_sum (generic function with 1 method)


function threaded_sqrt_sum(A)
    s = zero(eltype(A))
    Threads.@threads for i in eachindex(A)
        @inbounds s += sqrt(A[i])
    end
    return s
end

threaded_sqrt_sum (generic function with 1 method)


sqrt_sum(A)

666276.2521367806


threaded_sqrt_sum(A)

166545.27314915543


function threaded_sqrt_sum_atomic(A)
    s = Threads.Atomic{eltype(A)}(zero(eltype(A)))
    Threads.@threads for i in eachindex(A)
        @inbounds Threads.atomic_add!(s, sqrt(A[i]))
    end
    return s[]
end

threaded_sqrt_sum_atomic (generic function with 1 method)


function threaded_sqrt_sum_independent(A)
    # Independent work part
    partial = zeros(eltype(A), Threads.nthreads())
    Threads.@threads for i in eachindex(A)
        @inbounds partial[Threads.threadid()] += sqrt(A[i])
    end
    # Single-threaded part
    s = zero(eltype(A))
    for i in eachindex(partial)
        s += partial[i]
    end
    return s
end

threaded_sqrt_sum_independent (generic function with 1 method)


@btime sqrt_sum(A)

  1.465 ms (1 allocation: 16 bytes)

666276.2521367806


@btime threaded_sqrt_sum_atomic(A)

  42.910 ms (27 allocations: 2.05 KiB)

666276.2521367898


@btime threaded_sqrt_sum_independent(A)

  677.865 μs (27 allocations: 2.12 KiB)

666276.2521367706

A (very) Brief Look at Multi-Threaded Computing¶

Controling the number of threads¶

Multi-Threaded `for` loops¶

Reductions and multi-threaded code¶

Atomic Opertaions¶

Independent Work¶

A (very) Brief Look at Multi-Threaded Computing¶

Controling the number of threads¶

Multi-Threaded for loops¶

Reductions and multi-threaded code¶

Atomic Opertaions¶

Independent Work¶

Multi-Threaded `for` loops¶