Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
trixi-framework
GitHub Repository: trixi-framework/Trixi.jl
Path: blob/main/src/callbacks_step/stepsize_dg3d.jl
2055 views
1
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
2
# Since these FMAs can increase the performance of many numerical algorithms,
3
# we need to opt-in explicitly.
4
# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
5
@muladd begin
6
#! format: noindent
7
8
function max_dt(u, t, mesh::TreeMesh{3},
9
constant_speed::False, equations, dg::DG, cache)
10
# to avoid a division by zero if the speed vanishes everywhere,
11
# e.g. for steady-state linear advection
12
max_scaled_speed = nextfloat(zero(t))
13
14
@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
15
max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed)
16
for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
17
u_node = get_node_vars(u, equations, dg, i, j, k, element)
18
lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations)
19
max_lambda1 = max(max_lambda1, lambda1)
20
max_lambda2 = max(max_lambda2, lambda2)
21
max_lambda3 = max(max_lambda3, lambda3)
22
end
23
inv_jacobian = cache.elements.inverse_jacobian[element]
24
max_scaled_speed = max(max_scaled_speed,
25
inv_jacobian * (max_lambda1 + max_lambda2 + max_lambda3))
26
end
27
28
return 2 / (nnodes(dg) * max_scaled_speed)
29
end
30
31
function max_dt(u, t, mesh::TreeMesh{3},
32
constant_speed::True, equations, dg::DG, cache)
33
# to avoid a division by zero if the speed vanishes everywhere,
34
# e.g. for steady-state linear advection
35
max_scaled_speed = nextfloat(zero(t))
36
37
max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations)
38
39
@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
40
inv_jacobian = cache.elements.inverse_jacobian[element]
41
max_scaled_speed = max(max_scaled_speed,
42
inv_jacobian * (max_lambda1 + max_lambda2 + max_lambda3))
43
end
44
45
return 2 / (nnodes(dg) * max_scaled_speed)
46
end
47
48
function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},
49
constant_speed::False, equations, dg::DG, cache)
50
# to avoid a division by zero if the speed vanishes everywhere,
51
# e.g. for steady-state linear advection
52
max_scaled_speed = nextfloat(zero(t))
53
54
@unpack contravariant_vectors = cache.elements
55
56
@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
57
max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed)
58
for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
59
u_node = get_node_vars(u, equations, dg, i, j, k, element)
60
lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations)
61
62
Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,
63
i, j, k, element)
64
lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2 + Ja13 * lambda3)
65
Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,
66
i, j, k, element)
67
lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2 + Ja23 * lambda3)
68
Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,
69
i, j, k, element)
70
lambda3_transformed = abs(Ja31 * lambda1 + Ja32 * lambda2 + Ja33 * lambda3)
71
72
inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element])
73
74
max_lambda1 = max(max_lambda1, inv_jacobian * lambda1_transformed)
75
max_lambda2 = max(max_lambda2, inv_jacobian * lambda2_transformed)
76
max_lambda3 = max(max_lambda3, inv_jacobian * lambda3_transformed)
77
end
78
79
max_scaled_speed = max(max_scaled_speed,
80
max_lambda1 + max_lambda2 + max_lambda3)
81
end
82
83
return 2 / (nnodes(dg) * max_scaled_speed)
84
end
85
86
function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},
87
constant_speed::True, equations, dg::DG, cache)
88
# to avoid a division by zero if the speed vanishes everywhere,
89
# e.g. for steady-state linear advection
90
max_scaled_speed = nextfloat(zero(t))
91
92
@unpack contravariant_vectors = cache.elements
93
94
max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations)
95
96
@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
97
for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
98
Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,
99
i, j, k, element)
100
lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2 +
101
Ja13 * max_lambda3)
102
Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,
103
i, j, k, element)
104
lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2 +
105
Ja23 * max_lambda3)
106
Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,
107
i, j, k, element)
108
lambda3_transformed = abs(Ja31 * max_lambda1 + Ja32 * max_lambda2 +
109
Ja33 * max_lambda3)
110
111
inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element])
112
113
max_scaled_speed = max(max_scaled_speed,
114
inv_jacobian *
115
(lambda1_transformed + lambda2_transformed +
116
lambda3_transformed))
117
end
118
end
119
120
return 2 / (nnodes(dg) * max_scaled_speed)
121
end
122
123
function max_dt(u, t, mesh::ParallelP4estMesh{3},
124
constant_speed::False, equations, dg::DG, cache)
125
# call the method accepting a general `mesh::P4estMesh{3}`
126
# TODO: MPI, we should improve this; maybe we should dispatch on `u`
127
# and create some MPI array type, overloading broadcasting and mapreduce etc.
128
# Then, this specific array type should also work well with DiffEq etc.
129
dt = invoke(max_dt,
130
Tuple{typeof(u), typeof(t), P4estMesh{3},
131
typeof(constant_speed), typeof(equations), typeof(dg),
132
typeof(cache)},
133
u, t, mesh, constant_speed, equations, dg, cache)
134
# Base.min instead of min needed, see comment in src/auxiliary/math.jl
135
dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
136
137
return dt
138
end
139
140
function max_dt(u, t, mesh::ParallelP4estMesh{3},
141
constant_speed::True, equations, dg::DG, cache)
142
# call the method accepting a general `mesh::P4estMesh{3}`
143
# TODO: MPI, we should improve this; maybe we should dispatch on `u`
144
# and create some MPI array type, overloading broadcasting and mapreduce etc.
145
# Then, this specific array type should also work well with DiffEq etc.
146
dt = invoke(max_dt,
147
Tuple{typeof(u), typeof(t), P4estMesh{3},
148
typeof(constant_speed), typeof(equations), typeof(dg),
149
typeof(cache)},
150
u, t, mesh, constant_speed, equations, dg, cache)
151
# Base.min instead of min needed, see comment in src/auxiliary/math.jl
152
dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
153
154
return dt
155
end
156
157
function max_dt(u, t, mesh::ParallelT8codeMesh{3},
158
constant_speed::False, equations, dg::DG, cache)
159
# call the method accepting a general `mesh::T8codeMesh{3}`
160
# TODO: MPI, we should improve this; maybe we should dispatch on `u`
161
# and create some MPI array type, overloading broadcasting and mapreduce etc.
162
# Then, this specific array type should also work well with DiffEq etc.
163
dt = invoke(max_dt,
164
Tuple{typeof(u), typeof(t), T8codeMesh{3},
165
typeof(constant_speed), typeof(equations), typeof(dg),
166
typeof(cache)},
167
u, t, mesh, constant_speed, equations, dg, cache)
168
# Base.min instead of min needed, see comment in src/auxiliary/math.jl
169
dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
170
171
return dt
172
end
173
174
function max_dt(u, t, mesh::ParallelT8codeMesh{3},
175
constant_speed::True, equations, dg::DG, cache)
176
# call the method accepting a general `mesh::T8codeMesh{3}`
177
# TODO: MPI, we should improve this; maybe we should dispatch on `u`
178
# and create some MPI array type, overloading broadcasting and mapreduce etc.
179
# Then, this specific array type should also work well with DiffEq etc.
180
dt = invoke(max_dt,
181
Tuple{typeof(u), typeof(t), T8codeMesh{3},
182
typeof(constant_speed), typeof(equations), typeof(dg),
183
typeof(cache)},
184
u, t, mesh, constant_speed, equations, dg, cache)
185
# Base.min instead of min needed, see comment in src/auxiliary/math.jl
186
dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
187
188
return dt
189
end
190
end # @muladd
191
192