CoCalc -- stepsize

GitHub Repository: trixi-framework/Trixi.jl
Path: blob/main/src/callbacks_step/stepsize_dg3d.jl
²⁰⁵⁵ views
1
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
2
# Since these FMAs can increase the performance of many numerical algorithms,
3
# we need to opt-in explicitly.
4
# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
5
@muladd begin
6
#! format: noindent
7

8
function max_dt(u, t, mesh::TreeMesh{3},
9
                constant_speed::False, equations, dg::DG, cache)
10
    # to avoid a division by zero if the speed vanishes everywhere,
11
    # e.g. for steady-state linear advection
12
    max_scaled_speed = nextfloat(zero(t))
13

14
    @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
15
        max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed)
16
        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
17
            u_node = get_node_vars(u, equations, dg, i, j, k, element)
18
            lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations)
19
            max_lambda1 = max(max_lambda1, lambda1)
20
            max_lambda2 = max(max_lambda2, lambda2)
21
            max_lambda3 = max(max_lambda3, lambda3)
22
        end
23
        inv_jacobian = cache.elements.inverse_jacobian[element]
24
        max_scaled_speed = max(max_scaled_speed,
25
                               inv_jacobian * (max_lambda1 + max_lambda2 + max_lambda3))
26
    end
27

28
    return 2 / (nnodes(dg) * max_scaled_speed)
29
end
30

31
function max_dt(u, t, mesh::TreeMesh{3},
32
                constant_speed::True, equations, dg::DG, cache)
33
    # to avoid a division by zero if the speed vanishes everywhere,
34
    # e.g. for steady-state linear advection
35
    max_scaled_speed = nextfloat(zero(t))
36

37
    max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations)
38

39
    @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
40
        inv_jacobian = cache.elements.inverse_jacobian[element]
41
        max_scaled_speed = max(max_scaled_speed,
42
                               inv_jacobian * (max_lambda1 + max_lambda2 + max_lambda3))
43
    end
44

45
    return 2 / (nnodes(dg) * max_scaled_speed)
46
end
47

48
function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},
49
                constant_speed::False, equations, dg::DG, cache)
50
    # to avoid a division by zero if the speed vanishes everywhere,
51
    # e.g. for steady-state linear advection
52
    max_scaled_speed = nextfloat(zero(t))
53

54
    @unpack contravariant_vectors = cache.elements
55

56
    @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
57
        max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed)
58
        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
59
            u_node = get_node_vars(u, equations, dg, i, j, k, element)
60
            lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations)
61

62
            Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,
63
                                                        i, j, k, element)
64
            lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2 + Ja13 * lambda3)
65
            Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,
66
                                                        i, j, k, element)
67
            lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2 + Ja23 * lambda3)
68
            Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,
69
                                                        i, j, k, element)
70
            lambda3_transformed = abs(Ja31 * lambda1 + Ja32 * lambda2 + Ja33 * lambda3)
71

72
            inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element])
73

74
            max_lambda1 = max(max_lambda1, inv_jacobian * lambda1_transformed)
75
            max_lambda2 = max(max_lambda2, inv_jacobian * lambda2_transformed)
76
            max_lambda3 = max(max_lambda3, inv_jacobian * lambda3_transformed)
77
        end
78

79
        max_scaled_speed = max(max_scaled_speed,
80
                               max_lambda1 + max_lambda2 + max_lambda3)
81
    end
82

83
    return 2 / (nnodes(dg) * max_scaled_speed)
84
end
85

86
function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},
87
                constant_speed::True, equations, dg::DG, cache)
88
    # to avoid a division by zero if the speed vanishes everywhere,
89
    # e.g. for steady-state linear advection
90
    max_scaled_speed = nextfloat(zero(t))
91

92
    @unpack contravariant_vectors = cache.elements
93

94
    max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations)
95

96
    @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
97
        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
98
            Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,
99
                                                        i, j, k, element)
100
            lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2 +
101
                                      Ja13 * max_lambda3)
102
            Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,
103
                                                        i, j, k, element)
104
            lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2 +
105
                                      Ja23 * max_lambda3)
106
            Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,
107
                                                        i, j, k, element)
108
            lambda3_transformed = abs(Ja31 * max_lambda1 + Ja32 * max_lambda2 +
109
                                      Ja33 * max_lambda3)
110

111
            inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element])
112

113
            max_scaled_speed = max(max_scaled_speed,
114
                                   inv_jacobian *
115
                                   (lambda1_transformed + lambda2_transformed +
116
                                    lambda3_transformed))
117
        end
118
    end
119

120
    return 2 / (nnodes(dg) * max_scaled_speed)
121
end
122

123
function max_dt(u, t, mesh::ParallelP4estMesh{3},
124
                constant_speed::False, equations, dg::DG, cache)
125
    # call the method accepting a general `mesh::P4estMesh{3}`
126
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
127
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
128
    #       Then, this specific array type should also work well with DiffEq etc.
129
    dt = invoke(max_dt,
130
                Tuple{typeof(u), typeof(t), P4estMesh{3},
131
                      typeof(constant_speed), typeof(equations), typeof(dg),
132
                      typeof(cache)},
133
                u, t, mesh, constant_speed, equations, dg, cache)
134
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
135
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
136

137
    return dt
138
end
139

140
function max_dt(u, t, mesh::ParallelP4estMesh{3},
141
                constant_speed::True, equations, dg::DG, cache)
142
    # call the method accepting a general `mesh::P4estMesh{3}`
143
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
144
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
145
    #       Then, this specific array type should also work well with DiffEq etc.
146
    dt = invoke(max_dt,
147
                Tuple{typeof(u), typeof(t), P4estMesh{3},
148
                      typeof(constant_speed), typeof(equations), typeof(dg),
149
                      typeof(cache)},
150
                u, t, mesh, constant_speed, equations, dg, cache)
151
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
152
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
153

154
    return dt
155
end
156

157
function max_dt(u, t, mesh::ParallelT8codeMesh{3},
158
                constant_speed::False, equations, dg::DG, cache)
159
    # call the method accepting a general `mesh::T8codeMesh{3}`
160
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
161
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
162
    #       Then, this specific array type should also work well with DiffEq etc.
163
    dt = invoke(max_dt,
164
                Tuple{typeof(u), typeof(t), T8codeMesh{3},
165
                      typeof(constant_speed), typeof(equations), typeof(dg),
166
                      typeof(cache)},
167
                u, t, mesh, constant_speed, equations, dg, cache)
168
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
169
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
170

171
    return dt
172
end
173

174
function max_dt(u, t, mesh::ParallelT8codeMesh{3},
175
                constant_speed::True, equations, dg::DG, cache)
176
    # call the method accepting a general `mesh::T8codeMesh{3}`
177
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
178
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
179
    #       Then, this specific array type should also work well with DiffEq etc.
180
    dt = invoke(max_dt,
181
                Tuple{typeof(u), typeof(t), T8codeMesh{3},
182
                      typeof(constant_speed), typeof(equations), typeof(dg),
183
                      typeof(cache)},
184
                u, t, mesh, constant_speed, equations, dg, cache)
185
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
186
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
187

188
    return dt
189
end
190
end # @muladd
191

192
Product

Resources

Company