CoCalc -- analysis_dg2d

GitHub Repository: trixi-framework/Trixi.jl
Path: blob/main/src/callbacks_step/analysis_dg2d_parallel.jl
²⁰⁵⁵ views
1
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
2
# Since these FMAs can increase the performance of many numerical algorithms,
3
# we need to opt-in explicitly.
4
# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
5
@muladd begin
6
#! format: noindent
7

8
function calc_error_norms(func, u, t, analyzer,
9
                          mesh::ParallelTreeMesh{2}, equations, initial_condition,
10
                          dg::DGSEM, cache, cache_analysis)
11
    l2_errors, linf_errors = calc_error_norms_per_element(func, u, t, analyzer,
12
                                                          mesh, equations,
13
                                                          initial_condition,
14
                                                          dg, cache, cache_analysis)
15

16
    # Collect local error norms for each element on root process. That way, when aggregating the L2
17
    # errors, the order of summation is the same as in the serial case to ensure exact equality.
18
    # This facilitates easier parallel development and debugging (see
19
    # https://github.com/trixi-framework/Trixi.jl/pull/850#pullrequestreview-757463943 for details).
20
    # Note that this approach does not scale.
21
    if mpi_isroot()
22
        global_l2_errors = zeros(eltype(l2_errors), cache.mpi_cache.n_elements_global)
23
        global_linf_errors = similar(global_l2_errors)
24

25
        n_elements_by_rank = parent(cache.mpi_cache.n_elements_by_rank) # convert OffsetArray to Array
26
        l2_buf = MPI.VBuffer(global_l2_errors, n_elements_by_rank)
27
        linf_buf = MPI.VBuffer(global_linf_errors, n_elements_by_rank)
28
        MPI.Gatherv!(l2_errors, l2_buf, mpi_root(), mpi_comm())
29
        MPI.Gatherv!(linf_errors, linf_buf, mpi_root(), mpi_comm())
30
    else
31
        MPI.Gatherv!(l2_errors, nothing, mpi_root(), mpi_comm())
32
        MPI.Gatherv!(linf_errors, nothing, mpi_root(), mpi_comm())
33
    end
34

35
    # Aggregate element error norms on root process
36
    if mpi_isroot()
37
        # sum(global_l2_errors) does not produce the same result as in the serial case, thus a
38
        # hand-written loop is used
39
        l2_error = zero(eltype(global_l2_errors))
40
        for error in global_l2_errors
41
            l2_error += error
42
        end
43
        linf_error = reduce((x, y) -> max.(x, y), global_linf_errors)
44

45
        # For L2 error, divide by total volume
46
        total_volume_ = total_volume(mesh)
47
        l2_error = @. sqrt(l2_error / total_volume_)
48
    else
49
        l2_error = convert(eltype(l2_errors), NaN * zero(eltype(l2_errors)))
50
        linf_error = convert(eltype(linf_errors), NaN * zero(eltype(linf_errors)))
51
    end
52

53
    return l2_error, linf_error
54
end
55

56
function calc_error_norms_per_element(func, u, t, analyzer,
57
                                      mesh::ParallelTreeMesh{2}, equations,
58
                                      initial_condition,
59
                                      dg::DGSEM, cache, cache_analysis)
60
    @unpack vandermonde, weights = analyzer
61
    @unpack node_coordinates = cache.elements
62
    @unpack u_local, u_tmp1, x_local, x_tmp1 = cache_analysis
63

64
    # Set up data structures
65
    T = typeof(zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations)))
66
    l2_errors = zeros(T, nelements(dg, cache))
67
    linf_errors = copy(l2_errors)
68

69
    # Iterate over all elements for error calculations
70
    for element in eachelement(dg, cache)
71
        # Interpolate solution and node locations to analysis nodes
72
        multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, element), u_tmp1)
73
        multiply_dimensionwise!(x_local, vandermonde,
74
                                view(node_coordinates, :, :, :, element), x_tmp1)
75

76
        # Calculate errors at each analysis node
77
        volume_jacobian_ = volume_jacobian(element, mesh, cache)
78

79
        for j in eachnode(analyzer), i in eachnode(analyzer)
80
            u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j),
81
                                        t, equations)
82
            diff = func(u_exact, equations) -
83
                   func(get_node_vars(u_local, equations, dg, i, j), equations)
84
            l2_errors[element] += diff .^ 2 *
85
                                  (weights[i] * weights[j] * volume_jacobian_)
86
            linf_errors[element] = @. max(linf_errors[element], abs(diff))
87
        end
88
    end
89

90
    return l2_errors, linf_errors
91
end
92

93
function calc_error_norms(func, u, t, analyzer,
94
                          mesh::Union{ParallelP4estMesh{2}, ParallelT8codeMesh{2}},
95
                          equations,
96
                          initial_condition, dg::DGSEM, cache, cache_analysis)
97
    @unpack vandermonde, weights = analyzer
98
    @unpack node_coordinates, inverse_jacobian = cache.elements
99
    @unpack u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1 = cache_analysis
100

101
    # Set up data structures
102
    l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations))
103
    linf_error = copy(l2_error)
104
    volume = zero(real(mesh))
105

106
    # Iterate over all elements for error calculations
107
    for element in eachelement(dg, cache)
108
        # Interpolate solution and node locations to analysis nodes
109
        multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, element), u_tmp1)
110
        multiply_dimensionwise!(x_local, vandermonde,
111
                                view(node_coordinates, :, :, :, element), x_tmp1)
112
        multiply_scalar_dimensionwise!(jacobian_local, vandermonde,
113
                                       inv.(view(inverse_jacobian, :, :, element)),
114
                                       jacobian_tmp1)
115

116
        # Calculate errors at each analysis node
117
        for j in eachnode(analyzer), i in eachnode(analyzer)
118
            u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j),
119
                                        t, equations)
120
            diff = func(u_exact, equations) -
121
                   func(get_node_vars(u_local, equations, dg, i, j), equations)
122
            # We take absolute value as we need the Jacobian here for the volume calculation
123
            abs_jacobian_local_ij = abs(jacobian_local[i, j])
124

125
            l2_error += diff .^ 2 * (weights[i] * weights[j] * abs_jacobian_local_ij)
126
            linf_error = @. max(linf_error, abs(diff))
127
            volume += weights[i] * weights[j] * abs_jacobian_local_ij
128
        end
129
    end
130

131
    # Accumulate local results on root process
132
    global_l2_error = Vector(l2_error)
133
    global_linf_error = Vector(linf_error)
134
    MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())
135
    # Base.max instead of max needed, see comment in src/auxiliary/math.jl
136
    MPI.Reduce!(global_linf_error, Base.max, mpi_root(), mpi_comm())
137
    total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
138
    if mpi_isroot()
139
        l2_error = convert(typeof(l2_error), global_l2_error)
140
        linf_error = convert(typeof(linf_error), global_linf_error)
141
        # For L2 error, divide by total volume
142
        l2_error = @. sqrt(l2_error / total_volume)
143
    else
144
        l2_error = convert(typeof(l2_error), NaN * global_l2_error)
145
        linf_error = convert(typeof(linf_error), NaN * global_linf_error)
146
    end
147

148
    return l2_error, linf_error
149
end
150

151
function integrate_via_indices(func::Func, u,
152
                               mesh::ParallelTreeMesh{2}, equations, dg::DGSEM, cache,
153
                               args...; normalize = true) where {Func}
154
    # call the method accepting a general `mesh::TreeMesh{2}`
155
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
156
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
157
    #       Then, this specific array type should also work well with DiffEq etc.
158
    local_integral = invoke(integrate_via_indices,
159
                            Tuple{typeof(func), typeof(u), TreeMesh{2},
160
                                  typeof(equations),
161
                                  typeof(dg), typeof(cache), map(typeof, args)...},
162
                            func, u, mesh, equations, dg, cache, args...,
163
                            normalize = normalize)
164

165
    # OBS! Global results are only calculated on MPI root, all other domains receive `nothing`
166
    global_integral = MPI.Reduce!(Ref(local_integral), +, mpi_root(), mpi_comm())
167
    if mpi_isroot()
168
        integral = convert(typeof(local_integral), global_integral[])
169
    else
170
        integral = convert(typeof(local_integral), NaN * local_integral)
171
    end
172

173
    return integral
174
end
175

176
function integrate_via_indices(func::Func, u,
177
                               mesh::Union{ParallelP4estMesh{2}, ParallelT8codeMesh{2}},
178
                               equations,
179
                               dg::DGSEM, cache, args...; normalize = true) where {Func}
180
    @unpack weights = dg.basis
181

182
    # Initialize integral with zeros of the right shape
183
    # Pass `zeros(eltype(u), nvariables(equations), nnodes(dg), nnodes(dg), 1)`
184
    # to `func` since `u` might be empty, if the current rank has no elements.
185
    # See also https://github.com/trixi-framework/Trixi.jl/issues/1096, and
186
    # https://github.com/trixi-framework/Trixi.jl/pull/2126/files/7cbc57cfcba93e67353566e10fce1f3edac27330#r1814483243.
187
    integral = zero(func(zeros(eltype(u), nvariables(equations), nnodes(dg), nnodes(dg),
188
                               1), 1, 1, 1, equations, dg, args...))
189
    volume = zero(real(mesh))
190

191
    # Use quadrature to numerically integrate over entire domain
192
    @batch reduction=((+, integral), (+, volume)) for element in eachelement(dg, cache)
193
        for j in eachnode(dg), i in eachnode(dg)
194
            volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, element]))
195
            integral += volume_jacobian * weights[i] * weights[j] *
196
                        func(u, i, j, element, equations, dg, args...)
197
            volume += volume_jacobian * weights[i] * weights[j]
198
        end
199
    end
200

201
    global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())
202
    total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
203
    if mpi_isroot()
204
        integral = convert(typeof(integral), global_integral[])
205
    else
206
        integral = convert(typeof(integral), NaN * integral)
207
        total_volume = volume # non-root processes receive nothing from reduce -> overwrite
208
    end
209

210
    # Normalize with total volume
211
    if normalize
212
        integral = integral / total_volume
213
    end
214

215
    return integral
216
end
217
end # @muladd
218

219
Product

Resources

Company