Path: blob/main/src/callbacks_step/stepsize_dg2d.jl
2055 views
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).1# Since these FMAs can increase the performance of many numerical algorithms,2# we need to opt-in explicitly.3# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.4@muladd begin5#! format: noindent67function max_dt(u, t, mesh::TreeMesh{2},8constant_speed::False, equations, dg::DG, cache)9# to avoid a division by zero if the speed vanishes everywhere,10# e.g. for steady-state linear advection11max_scaled_speed = nextfloat(zero(t))1213@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)14max_lambda1 = max_lambda2 = zero(max_scaled_speed)15for j in eachnode(dg), i in eachnode(dg)16u_node = get_node_vars(u, equations, dg, i, j, element)17lambda1, lambda2 = max_abs_speeds(u_node, equations)18max_lambda1 = max(max_lambda1, lambda1)19max_lambda2 = max(max_lambda2, lambda2)20end21inv_jacobian = cache.elements.inverse_jacobian[element]22max_scaled_speed = max(max_scaled_speed,23inv_jacobian * (max_lambda1 + max_lambda2))24end2526return 2 / (nnodes(dg) * max_scaled_speed)27end2829function max_dt(u, t, mesh::TreeMesh{2},30constant_speed::True, equations, dg::DG, cache)31# to avoid a division by zero if the speed vanishes everywhere,32# e.g. for steady-state linear advection33max_scaled_speed = nextfloat(zero(t))3435max_lambda1, max_lambda2 = max_abs_speeds(equations)3637@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)38inv_jacobian = cache.elements.inverse_jacobian[element]39max_scaled_speed = max(max_scaled_speed,40inv_jacobian * (max_lambda1 + max_lambda2))41end4243return 2 / (nnodes(dg) * max_scaled_speed)44end4546function max_dt(u, t, mesh::ParallelTreeMesh{2},47constant_speed::False, equations, dg::DG, cache)48# call the method accepting a general `mesh::TreeMesh{2}`49# TODO: MPI, we should improve this; maybe we should dispatch on `u`50# and create some MPI array type, overloading broadcasting and mapreduce etc.51# Then, this specific array type should also work well with DiffEq etc.52dt = invoke(max_dt,53Tuple{typeof(u), typeof(t), TreeMesh{2},54typeof(constant_speed), typeof(equations), typeof(dg),55typeof(cache)},56u, t, mesh, constant_speed, equations, dg, cache)57# Base.min instead of min needed, see comment in src/auxiliary/math.jl58dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]5960return dt61end6263function max_dt(u, t, mesh::ParallelTreeMesh{2},64constant_speed::True, equations, dg::DG, cache)65# call the method accepting a general `mesh::TreeMesh{2}`66# TODO: MPI, we should improve this; maybe we should dispatch on `u`67# and create some MPI array type, overloading broadcasting and mapreduce etc.68# Then, this specific array type should also work well with DiffEq etc.69dt = invoke(max_dt,70Tuple{typeof(u), typeof(t), TreeMesh{2},71typeof(constant_speed), typeof(equations), typeof(dg),72typeof(cache)},73u, t, mesh, constant_speed, equations, dg, cache)74# Base.min instead of min needed, see comment in src/auxiliary/math.jl75dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]7677return dt78end7980function max_dt(u, t,81mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2},82T8codeMesh{2}, StructuredMeshView{2}},83constant_speed::False, equations, dg::DG, cache)84# to avoid a division by zero if the speed vanishes everywhere,85# e.g. for steady-state linear advection86max_scaled_speed = nextfloat(zero(t))8788@unpack contravariant_vectors, inverse_jacobian = cache.elements8990@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)91max_lambda1 = max_lambda2 = zero(max_scaled_speed)92for j in eachnode(dg), i in eachnode(dg)93u_node = get_node_vars(u, equations, dg, i, j, element)94lambda1, lambda2 = max_abs_speeds(u_node, equations)9596# Local speeds transformed to the reference element97Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors,98i, j, element)99lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2)100Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors,101i, j, element)102lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2)103104inv_jacobian = abs(inverse_jacobian[i, j, element])105106max_lambda1 = max(max_lambda1, lambda1_transformed * inv_jacobian)107max_lambda2 = max(max_lambda2, lambda2_transformed * inv_jacobian)108end109110max_scaled_speed = max(max_scaled_speed, max_lambda1 + max_lambda2)111end112113return 2 / (nnodes(dg) * max_scaled_speed)114end115116function max_dt(u, t,117mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2},118P4estMeshView{2}, T8codeMesh{2}, StructuredMeshView{2}},119constant_speed::True, equations, dg::DG, cache)120@unpack contravariant_vectors, inverse_jacobian = cache.elements121122# to avoid a division by zero if the speed vanishes everywhere,123# e.g. for steady-state linear advection124max_scaled_speed = nextfloat(zero(t))125126max_lambda1, max_lambda2 = max_abs_speeds(equations)127128@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)129for j in eachnode(dg), i in eachnode(dg)130# Local speeds transformed to the reference element131Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors,132i, j, element)133lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2)134Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors,135i, j, element)136lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2)137138inv_jacobian = abs(inverse_jacobian[i, j, element])139max_scaled_speed = max(max_scaled_speed,140inv_jacobian *141(lambda1_transformed + lambda2_transformed))142end143end144145return 2 / (nnodes(dg) * max_scaled_speed)146end147148function max_dt(u, t, mesh::ParallelP4estMesh{2},149constant_speed::False, equations, dg::DG, cache)150# call the method accepting a general `mesh::P4estMesh{2}`151# TODO: MPI, we should improve this; maybe we should dispatch on `u`152# and create some MPI array type, overloading broadcasting and mapreduce etc.153# Then, this specific array type should also work well with DiffEq etc.154dt = invoke(max_dt,155Tuple{typeof(u), typeof(t), P4estMesh{2},156typeof(constant_speed), typeof(equations), typeof(dg),157typeof(cache)},158u, t, mesh, constant_speed, equations, dg, cache)159# Base.min instead of min needed, see comment in src/auxiliary/math.jl160dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]161162return dt163end164165function max_dt(u, t, mesh::ParallelP4estMesh{2},166constant_speed::True, equations, dg::DG, cache)167# call the method accepting a general `mesh::P4estMesh{2}`168# TODO: MPI, we should improve this; maybe we should dispatch on `u`169# and create some MPI array type, overloading broadcasting and mapreduce etc.170# Then, this specific array type should also work well with DiffEq etc.171dt = invoke(max_dt,172Tuple{typeof(u), typeof(t), P4estMesh{2},173typeof(constant_speed), typeof(equations), typeof(dg),174typeof(cache)},175u, t, mesh, constant_speed, equations, dg, cache)176# Base.min instead of min needed, see comment in src/auxiliary/math.jl177dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]178179return dt180end181182function max_dt(u, t, mesh::ParallelT8codeMesh{2},183constant_speed::False, equations, dg::DG, cache)184# call the method accepting a general `mesh::T8codeMesh{2}`185# TODO: MPI, we should improve this; maybe we should dispatch on `u`186# and create some MPI array type, overloading broadcasting and mapreduce etc.187# Then, this specific array type should also work well with DiffEq etc.188dt = invoke(max_dt,189Tuple{typeof(u), typeof(t), T8codeMesh{2},190typeof(constant_speed), typeof(equations), typeof(dg),191typeof(cache)},192u, t, mesh, constant_speed, equations, dg, cache)193# Base.min instead of min needed, see comment in src/auxiliary/math.jl194dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]195196return dt197end198199function max_dt(u, t, mesh::ParallelT8codeMesh{2},200constant_speed::True, equations, dg::DG, cache)201# call the method accepting a general `mesh::T8codeMesh{2}`202# TODO: MPI, we should improve this; maybe we should dispatch on `u`203# and create some MPI array type, overloading broadcasting and mapreduce etc.204# Then, this specific array type should also work well with DiffEq etc.205dt = invoke(max_dt,206Tuple{typeof(u), typeof(t), T8codeMesh{2},207typeof(constant_speed), typeof(equations), typeof(dg),208typeof(cache)},209u, t, mesh, constant_speed, equations, dg, cache)210# Base.min instead of min needed, see comment in src/auxiliary/math.jl211dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]212213return dt214end215end # @muladd216217218