Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
trixi-framework
GitHub Repository: trixi-framework/Trixi.jl
Path: blob/main/src/callbacks_step/analysis_dg2d_parallel.jl
2055 views
1
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
2
# Since these FMAs can increase the performance of many numerical algorithms,
3
# we need to opt-in explicitly.
4
# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
5
@muladd begin
6
#! format: noindent
7
8
function calc_error_norms(func, u, t, analyzer,
9
mesh::ParallelTreeMesh{2}, equations, initial_condition,
10
dg::DGSEM, cache, cache_analysis)
11
l2_errors, linf_errors = calc_error_norms_per_element(func, u, t, analyzer,
12
mesh, equations,
13
initial_condition,
14
dg, cache, cache_analysis)
15
16
# Collect local error norms for each element on root process. That way, when aggregating the L2
17
# errors, the order of summation is the same as in the serial case to ensure exact equality.
18
# This facilitates easier parallel development and debugging (see
19
# https://github.com/trixi-framework/Trixi.jl/pull/850#pullrequestreview-757463943 for details).
20
# Note that this approach does not scale.
21
if mpi_isroot()
22
global_l2_errors = zeros(eltype(l2_errors), cache.mpi_cache.n_elements_global)
23
global_linf_errors = similar(global_l2_errors)
24
25
n_elements_by_rank = parent(cache.mpi_cache.n_elements_by_rank) # convert OffsetArray to Array
26
l2_buf = MPI.VBuffer(global_l2_errors, n_elements_by_rank)
27
linf_buf = MPI.VBuffer(global_linf_errors, n_elements_by_rank)
28
MPI.Gatherv!(l2_errors, l2_buf, mpi_root(), mpi_comm())
29
MPI.Gatherv!(linf_errors, linf_buf, mpi_root(), mpi_comm())
30
else
31
MPI.Gatherv!(l2_errors, nothing, mpi_root(), mpi_comm())
32
MPI.Gatherv!(linf_errors, nothing, mpi_root(), mpi_comm())
33
end
34
35
# Aggregate element error norms on root process
36
if mpi_isroot()
37
# sum(global_l2_errors) does not produce the same result as in the serial case, thus a
38
# hand-written loop is used
39
l2_error = zero(eltype(global_l2_errors))
40
for error in global_l2_errors
41
l2_error += error
42
end
43
linf_error = reduce((x, y) -> max.(x, y), global_linf_errors)
44
45
# For L2 error, divide by total volume
46
total_volume_ = total_volume(mesh)
47
l2_error = @. sqrt(l2_error / total_volume_)
48
else
49
l2_error = convert(eltype(l2_errors), NaN * zero(eltype(l2_errors)))
50
linf_error = convert(eltype(linf_errors), NaN * zero(eltype(linf_errors)))
51
end
52
53
return l2_error, linf_error
54
end
55
56
function calc_error_norms_per_element(func, u, t, analyzer,
57
mesh::ParallelTreeMesh{2}, equations,
58
initial_condition,
59
dg::DGSEM, cache, cache_analysis)
60
@unpack vandermonde, weights = analyzer
61
@unpack node_coordinates = cache.elements
62
@unpack u_local, u_tmp1, x_local, x_tmp1 = cache_analysis
63
64
# Set up data structures
65
T = typeof(zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations)))
66
l2_errors = zeros(T, nelements(dg, cache))
67
linf_errors = copy(l2_errors)
68
69
# Iterate over all elements for error calculations
70
for element in eachelement(dg, cache)
71
# Interpolate solution and node locations to analysis nodes
72
multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, element), u_tmp1)
73
multiply_dimensionwise!(x_local, vandermonde,
74
view(node_coordinates, :, :, :, element), x_tmp1)
75
76
# Calculate errors at each analysis node
77
volume_jacobian_ = volume_jacobian(element, mesh, cache)
78
79
for j in eachnode(analyzer), i in eachnode(analyzer)
80
u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j),
81
t, equations)
82
diff = func(u_exact, equations) -
83
func(get_node_vars(u_local, equations, dg, i, j), equations)
84
l2_errors[element] += diff .^ 2 *
85
(weights[i] * weights[j] * volume_jacobian_)
86
linf_errors[element] = @. max(linf_errors[element], abs(diff))
87
end
88
end
89
90
return l2_errors, linf_errors
91
end
92
93
function calc_error_norms(func, u, t, analyzer,
94
mesh::Union{ParallelP4estMesh{2}, ParallelT8codeMesh{2}},
95
equations,
96
initial_condition, dg::DGSEM, cache, cache_analysis)
97
@unpack vandermonde, weights = analyzer
98
@unpack node_coordinates, inverse_jacobian = cache.elements
99
@unpack u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1 = cache_analysis
100
101
# Set up data structures
102
l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations))
103
linf_error = copy(l2_error)
104
volume = zero(real(mesh))
105
106
# Iterate over all elements for error calculations
107
for element in eachelement(dg, cache)
108
# Interpolate solution and node locations to analysis nodes
109
multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, element), u_tmp1)
110
multiply_dimensionwise!(x_local, vandermonde,
111
view(node_coordinates, :, :, :, element), x_tmp1)
112
multiply_scalar_dimensionwise!(jacobian_local, vandermonde,
113
inv.(view(inverse_jacobian, :, :, element)),
114
jacobian_tmp1)
115
116
# Calculate errors at each analysis node
117
for j in eachnode(analyzer), i in eachnode(analyzer)
118
u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j),
119
t, equations)
120
diff = func(u_exact, equations) -
121
func(get_node_vars(u_local, equations, dg, i, j), equations)
122
# We take absolute value as we need the Jacobian here for the volume calculation
123
abs_jacobian_local_ij = abs(jacobian_local[i, j])
124
125
l2_error += diff .^ 2 * (weights[i] * weights[j] * abs_jacobian_local_ij)
126
linf_error = @. max(linf_error, abs(diff))
127
volume += weights[i] * weights[j] * abs_jacobian_local_ij
128
end
129
end
130
131
# Accumulate local results on root process
132
global_l2_error = Vector(l2_error)
133
global_linf_error = Vector(linf_error)
134
MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())
135
# Base.max instead of max needed, see comment in src/auxiliary/math.jl
136
MPI.Reduce!(global_linf_error, Base.max, mpi_root(), mpi_comm())
137
total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
138
if mpi_isroot()
139
l2_error = convert(typeof(l2_error), global_l2_error)
140
linf_error = convert(typeof(linf_error), global_linf_error)
141
# For L2 error, divide by total volume
142
l2_error = @. sqrt(l2_error / total_volume)
143
else
144
l2_error = convert(typeof(l2_error), NaN * global_l2_error)
145
linf_error = convert(typeof(linf_error), NaN * global_linf_error)
146
end
147
148
return l2_error, linf_error
149
end
150
151
function integrate_via_indices(func::Func, u,
152
mesh::ParallelTreeMesh{2}, equations, dg::DGSEM, cache,
153
args...; normalize = true) where {Func}
154
# call the method accepting a general `mesh::TreeMesh{2}`
155
# TODO: MPI, we should improve this; maybe we should dispatch on `u`
156
# and create some MPI array type, overloading broadcasting and mapreduce etc.
157
# Then, this specific array type should also work well with DiffEq etc.
158
local_integral = invoke(integrate_via_indices,
159
Tuple{typeof(func), typeof(u), TreeMesh{2},
160
typeof(equations),
161
typeof(dg), typeof(cache), map(typeof, args)...},
162
func, u, mesh, equations, dg, cache, args...,
163
normalize = normalize)
164
165
# OBS! Global results are only calculated on MPI root, all other domains receive `nothing`
166
global_integral = MPI.Reduce!(Ref(local_integral), +, mpi_root(), mpi_comm())
167
if mpi_isroot()
168
integral = convert(typeof(local_integral), global_integral[])
169
else
170
integral = convert(typeof(local_integral), NaN * local_integral)
171
end
172
173
return integral
174
end
175
176
function integrate_via_indices(func::Func, u,
177
mesh::Union{ParallelP4estMesh{2}, ParallelT8codeMesh{2}},
178
equations,
179
dg::DGSEM, cache, args...; normalize = true) where {Func}
180
@unpack weights = dg.basis
181
182
# Initialize integral with zeros of the right shape
183
# Pass `zeros(eltype(u), nvariables(equations), nnodes(dg), nnodes(dg), 1)`
184
# to `func` since `u` might be empty, if the current rank has no elements.
185
# See also https://github.com/trixi-framework/Trixi.jl/issues/1096, and
186
# https://github.com/trixi-framework/Trixi.jl/pull/2126/files/7cbc57cfcba93e67353566e10fce1f3edac27330#r1814483243.
187
integral = zero(func(zeros(eltype(u), nvariables(equations), nnodes(dg), nnodes(dg),
188
1), 1, 1, 1, equations, dg, args...))
189
volume = zero(real(mesh))
190
191
# Use quadrature to numerically integrate over entire domain
192
@batch reduction=((+, integral), (+, volume)) for element in eachelement(dg, cache)
193
for j in eachnode(dg), i in eachnode(dg)
194
volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, element]))
195
integral += volume_jacobian * weights[i] * weights[j] *
196
func(u, i, j, element, equations, dg, args...)
197
volume += volume_jacobian * weights[i] * weights[j]
198
end
199
end
200
201
global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())
202
total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
203
if mpi_isroot()
204
integral = convert(typeof(integral), global_integral[])
205
else
206
integral = convert(typeof(integral), NaN * integral)
207
total_volume = volume # non-root processes receive nothing from reduce -> overwrite
208
end
209
210
# Normalize with total volume
211
if normalize
212
integral = integral / total_volume
213
end
214
215
return integral
216
end
217
end # @muladd
218
219