Path: blob/main/src/callbacks_step/stepsize_dg3d.jl
2055 views
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).1# Since these FMAs can increase the performance of many numerical algorithms,2# we need to opt-in explicitly.3# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.4@muladd begin5#! format: noindent67function max_dt(u, t, mesh::TreeMesh{3},8constant_speed::False, equations, dg::DG, cache)9# to avoid a division by zero if the speed vanishes everywhere,10# e.g. for steady-state linear advection11max_scaled_speed = nextfloat(zero(t))1213@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)14max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed)15for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)16u_node = get_node_vars(u, equations, dg, i, j, k, element)17lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations)18max_lambda1 = max(max_lambda1, lambda1)19max_lambda2 = max(max_lambda2, lambda2)20max_lambda3 = max(max_lambda3, lambda3)21end22inv_jacobian = cache.elements.inverse_jacobian[element]23max_scaled_speed = max(max_scaled_speed,24inv_jacobian * (max_lambda1 + max_lambda2 + max_lambda3))25end2627return 2 / (nnodes(dg) * max_scaled_speed)28end2930function max_dt(u, t, mesh::TreeMesh{3},31constant_speed::True, equations, dg::DG, cache)32# to avoid a division by zero if the speed vanishes everywhere,33# e.g. for steady-state linear advection34max_scaled_speed = nextfloat(zero(t))3536max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations)3738@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)39inv_jacobian = cache.elements.inverse_jacobian[element]40max_scaled_speed = max(max_scaled_speed,41inv_jacobian * (max_lambda1 + max_lambda2 + max_lambda3))42end4344return 2 / (nnodes(dg) * max_scaled_speed)45end4647function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},48constant_speed::False, equations, dg::DG, cache)49# to avoid a division by zero if the speed vanishes everywhere,50# e.g. for steady-state linear advection51max_scaled_speed = nextfloat(zero(t))5253@unpack contravariant_vectors = cache.elements5455@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)56max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed)57for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)58u_node = get_node_vars(u, equations, dg, i, j, k, element)59lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations)6061Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,62i, j, k, element)63lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2 + Ja13 * lambda3)64Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,65i, j, k, element)66lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2 + Ja23 * lambda3)67Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,68i, j, k, element)69lambda3_transformed = abs(Ja31 * lambda1 + Ja32 * lambda2 + Ja33 * lambda3)7071inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element])7273max_lambda1 = max(max_lambda1, inv_jacobian * lambda1_transformed)74max_lambda2 = max(max_lambda2, inv_jacobian * lambda2_transformed)75max_lambda3 = max(max_lambda3, inv_jacobian * lambda3_transformed)76end7778max_scaled_speed = max(max_scaled_speed,79max_lambda1 + max_lambda2 + max_lambda3)80end8182return 2 / (nnodes(dg) * max_scaled_speed)83end8485function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},86constant_speed::True, equations, dg::DG, cache)87# to avoid a division by zero if the speed vanishes everywhere,88# e.g. for steady-state linear advection89max_scaled_speed = nextfloat(zero(t))9091@unpack contravariant_vectors = cache.elements9293max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations)9495@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)96for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)97Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,98i, j, k, element)99lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2 +100Ja13 * max_lambda3)101Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,102i, j, k, element)103lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2 +104Ja23 * max_lambda3)105Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,106i, j, k, element)107lambda3_transformed = abs(Ja31 * max_lambda1 + Ja32 * max_lambda2 +108Ja33 * max_lambda3)109110inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element])111112max_scaled_speed = max(max_scaled_speed,113inv_jacobian *114(lambda1_transformed + lambda2_transformed +115lambda3_transformed))116end117end118119return 2 / (nnodes(dg) * max_scaled_speed)120end121122function max_dt(u, t, mesh::ParallelP4estMesh{3},123constant_speed::False, equations, dg::DG, cache)124# call the method accepting a general `mesh::P4estMesh{3}`125# TODO: MPI, we should improve this; maybe we should dispatch on `u`126# and create some MPI array type, overloading broadcasting and mapreduce etc.127# Then, this specific array type should also work well with DiffEq etc.128dt = invoke(max_dt,129Tuple{typeof(u), typeof(t), P4estMesh{3},130typeof(constant_speed), typeof(equations), typeof(dg),131typeof(cache)},132u, t, mesh, constant_speed, equations, dg, cache)133# Base.min instead of min needed, see comment in src/auxiliary/math.jl134dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]135136return dt137end138139function max_dt(u, t, mesh::ParallelP4estMesh{3},140constant_speed::True, equations, dg::DG, cache)141# call the method accepting a general `mesh::P4estMesh{3}`142# TODO: MPI, we should improve this; maybe we should dispatch on `u`143# and create some MPI array type, overloading broadcasting and mapreduce etc.144# Then, this specific array type should also work well with DiffEq etc.145dt = invoke(max_dt,146Tuple{typeof(u), typeof(t), P4estMesh{3},147typeof(constant_speed), typeof(equations), typeof(dg),148typeof(cache)},149u, t, mesh, constant_speed, equations, dg, cache)150# Base.min instead of min needed, see comment in src/auxiliary/math.jl151dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]152153return dt154end155156function max_dt(u, t, mesh::ParallelT8codeMesh{3},157constant_speed::False, equations, dg::DG, cache)158# call the method accepting a general `mesh::T8codeMesh{3}`159# TODO: MPI, we should improve this; maybe we should dispatch on `u`160# and create some MPI array type, overloading broadcasting and mapreduce etc.161# Then, this specific array type should also work well with DiffEq etc.162dt = invoke(max_dt,163Tuple{typeof(u), typeof(t), T8codeMesh{3},164typeof(constant_speed), typeof(equations), typeof(dg),165typeof(cache)},166u, t, mesh, constant_speed, equations, dg, cache)167# Base.min instead of min needed, see comment in src/auxiliary/math.jl168dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]169170return dt171end172173function max_dt(u, t, mesh::ParallelT8codeMesh{3},174constant_speed::True, equations, dg::DG, cache)175# call the method accepting a general `mesh::T8codeMesh{3}`176# TODO: MPI, we should improve this; maybe we should dispatch on `u`177# and create some MPI array type, overloading broadcasting and mapreduce etc.178# Then, this specific array type should also work well with DiffEq etc.179dt = invoke(max_dt,180Tuple{typeof(u), typeof(t), T8codeMesh{3},181typeof(constant_speed), typeof(equations), typeof(dg),182typeof(cache)},183u, t, mesh, constant_speed, equations, dg, cache)184# Base.min instead of min needed, see comment in src/auxiliary/math.jl185dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]186187return dt188end189end # @muladd190191192