CoCalc -- stepsize

GitHub Repository: trixi-framework/Trixi.jl
Path: blob/main/src/callbacks_step/stepsize_dg2d.jl
²⁰⁵⁵ views
1
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
2
# Since these FMAs can increase the performance of many numerical algorithms,
3
# we need to opt-in explicitly.
4
# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
5
@muladd begin
6
#! format: noindent
7

8
function max_dt(u, t, mesh::TreeMesh{2},
9
                constant_speed::False, equations, dg::DG, cache)
10
    # to avoid a division by zero if the speed vanishes everywhere,
11
    # e.g. for steady-state linear advection
12
    max_scaled_speed = nextfloat(zero(t))
13

14
    @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
15
        max_lambda1 = max_lambda2 = zero(max_scaled_speed)
16
        for j in eachnode(dg), i in eachnode(dg)
17
            u_node = get_node_vars(u, equations, dg, i, j, element)
18
            lambda1, lambda2 = max_abs_speeds(u_node, equations)
19
            max_lambda1 = max(max_lambda1, lambda1)
20
            max_lambda2 = max(max_lambda2, lambda2)
21
        end
22
        inv_jacobian = cache.elements.inverse_jacobian[element]
23
        max_scaled_speed = max(max_scaled_speed,
24
                               inv_jacobian * (max_lambda1 + max_lambda2))
25
    end
26

27
    return 2 / (nnodes(dg) * max_scaled_speed)
28
end
29

30
function max_dt(u, t, mesh::TreeMesh{2},
31
                constant_speed::True, equations, dg::DG, cache)
32
    # to avoid a division by zero if the speed vanishes everywhere,
33
    # e.g. for steady-state linear advection
34
    max_scaled_speed = nextfloat(zero(t))
35

36
    max_lambda1, max_lambda2 = max_abs_speeds(equations)
37

38
    @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
39
        inv_jacobian = cache.elements.inverse_jacobian[element]
40
        max_scaled_speed = max(max_scaled_speed,
41
                               inv_jacobian * (max_lambda1 + max_lambda2))
42
    end
43

44
    return 2 / (nnodes(dg) * max_scaled_speed)
45
end
46

47
function max_dt(u, t, mesh::ParallelTreeMesh{2},
48
                constant_speed::False, equations, dg::DG, cache)
49
    # call the method accepting a general `mesh::TreeMesh{2}`
50
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
51
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
52
    #       Then, this specific array type should also work well with DiffEq etc.
53
    dt = invoke(max_dt,
54
                Tuple{typeof(u), typeof(t), TreeMesh{2},
55
                      typeof(constant_speed), typeof(equations), typeof(dg),
56
                      typeof(cache)},
57
                u, t, mesh, constant_speed, equations, dg, cache)
58
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
59
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
60

61
    return dt
62
end
63

64
function max_dt(u, t, mesh::ParallelTreeMesh{2},
65
                constant_speed::True, equations, dg::DG, cache)
66
    # call the method accepting a general `mesh::TreeMesh{2}`
67
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
68
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
69
    #       Then, this specific array type should also work well with DiffEq etc.
70
    dt = invoke(max_dt,
71
                Tuple{typeof(u), typeof(t), TreeMesh{2},
72
                      typeof(constant_speed), typeof(equations), typeof(dg),
73
                      typeof(cache)},
74
                u, t, mesh, constant_speed, equations, dg, cache)
75
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
76
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
77

78
    return dt
79
end
80

81
function max_dt(u, t,
82
                mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2},
83
                            T8codeMesh{2}, StructuredMeshView{2}},
84
                constant_speed::False, equations, dg::DG, cache)
85
    # to avoid a division by zero if the speed vanishes everywhere,
86
    # e.g. for steady-state linear advection
87
    max_scaled_speed = nextfloat(zero(t))
88

89
    @unpack contravariant_vectors, inverse_jacobian = cache.elements
90

91
    @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
92
        max_lambda1 = max_lambda2 = zero(max_scaled_speed)
93
        for j in eachnode(dg), i in eachnode(dg)
94
            u_node = get_node_vars(u, equations, dg, i, j, element)
95
            lambda1, lambda2 = max_abs_speeds(u_node, equations)
96

97
            # Local speeds transformed to the reference element
98
            Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors,
99
                                                  i, j, element)
100
            lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2)
101
            Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors,
102
                                                  i, j, element)
103
            lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2)
104

105
            inv_jacobian = abs(inverse_jacobian[i, j, element])
106

107
            max_lambda1 = max(max_lambda1, lambda1_transformed * inv_jacobian)
108
            max_lambda2 = max(max_lambda2, lambda2_transformed * inv_jacobian)
109
        end
110

111
        max_scaled_speed = max(max_scaled_speed, max_lambda1 + max_lambda2)
112
    end
113

114
    return 2 / (nnodes(dg) * max_scaled_speed)
115
end
116

117
function max_dt(u, t,
118
                mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2},
119
                            P4estMeshView{2}, T8codeMesh{2}, StructuredMeshView{2}},
120
                constant_speed::True, equations, dg::DG, cache)
121
    @unpack contravariant_vectors, inverse_jacobian = cache.elements
122

123
    # to avoid a division by zero if the speed vanishes everywhere,
124
    # e.g. for steady-state linear advection
125
    max_scaled_speed = nextfloat(zero(t))
126

127
    max_lambda1, max_lambda2 = max_abs_speeds(equations)
128

129
    @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
130
        for j in eachnode(dg), i in eachnode(dg)
131
            # Local speeds transformed to the reference element
132
            Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors,
133
                                                  i, j, element)
134
            lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2)
135
            Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors,
136
                                                  i, j, element)
137
            lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2)
138

139
            inv_jacobian = abs(inverse_jacobian[i, j, element])
140
            max_scaled_speed = max(max_scaled_speed,
141
                                   inv_jacobian *
142
                                   (lambda1_transformed + lambda2_transformed))
143
        end
144
    end
145

146
    return 2 / (nnodes(dg) * max_scaled_speed)
147
end
148

149
function max_dt(u, t, mesh::ParallelP4estMesh{2},
150
                constant_speed::False, equations, dg::DG, cache)
151
    # call the method accepting a general `mesh::P4estMesh{2}`
152
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
153
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
154
    #       Then, this specific array type should also work well with DiffEq etc.
155
    dt = invoke(max_dt,
156
                Tuple{typeof(u), typeof(t), P4estMesh{2},
157
                      typeof(constant_speed), typeof(equations), typeof(dg),
158
                      typeof(cache)},
159
                u, t, mesh, constant_speed, equations, dg, cache)
160
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
161
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
162

163
    return dt
164
end
165

166
function max_dt(u, t, mesh::ParallelP4estMesh{2},
167
                constant_speed::True, equations, dg::DG, cache)
168
    # call the method accepting a general `mesh::P4estMesh{2}`
169
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
170
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
171
    #       Then, this specific array type should also work well with DiffEq etc.
172
    dt = invoke(max_dt,
173
                Tuple{typeof(u), typeof(t), P4estMesh{2},
174
                      typeof(constant_speed), typeof(equations), typeof(dg),
175
                      typeof(cache)},
176
                u, t, mesh, constant_speed, equations, dg, cache)
177
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
178
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
179

180
    return dt
181
end
182

183
function max_dt(u, t, mesh::ParallelT8codeMesh{2},
184
                constant_speed::False, equations, dg::DG, cache)
185
    # call the method accepting a general `mesh::T8codeMesh{2}`
186
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
187
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
188
    #       Then, this specific array type should also work well with DiffEq etc.
189
    dt = invoke(max_dt,
190
                Tuple{typeof(u), typeof(t), T8codeMesh{2},
191
                      typeof(constant_speed), typeof(equations), typeof(dg),
192
                      typeof(cache)},
193
                u, t, mesh, constant_speed, equations, dg, cache)
194
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
195
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
196

197
    return dt
198
end
199

200
function max_dt(u, t, mesh::ParallelT8codeMesh{2},
201
                constant_speed::True, equations, dg::DG, cache)
202
    # call the method accepting a general `mesh::T8codeMesh{2}`
203
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
204
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
205
    #       Then, this specific array type should also work well with DiffEq etc.
206
    dt = invoke(max_dt,
207
                Tuple{typeof(u), typeof(t), T8codeMesh{2},
208
                      typeof(constant_speed), typeof(equations), typeof(dg),
209
                      typeof(cache)},
210
                u, t, mesh, constant_speed, equations, dg, cache)
211
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
212
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
213

214
    return dt
215
end
216
end # @muladd
217

218
Product

Resources

Company