Path: blob/main/src/callbacks_step/analysis_dg2d_parallel.jl
2055 views
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).1# Since these FMAs can increase the performance of many numerical algorithms,2# we need to opt-in explicitly.3# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.4@muladd begin5#! format: noindent67function calc_error_norms(func, u, t, analyzer,8mesh::ParallelTreeMesh{2}, equations, initial_condition,9dg::DGSEM, cache, cache_analysis)10l2_errors, linf_errors = calc_error_norms_per_element(func, u, t, analyzer,11mesh, equations,12initial_condition,13dg, cache, cache_analysis)1415# Collect local error norms for each element on root process. That way, when aggregating the L216# errors, the order of summation is the same as in the serial case to ensure exact equality.17# This facilitates easier parallel development and debugging (see18# https://github.com/trixi-framework/Trixi.jl/pull/850#pullrequestreview-757463943 for details).19# Note that this approach does not scale.20if mpi_isroot()21global_l2_errors = zeros(eltype(l2_errors), cache.mpi_cache.n_elements_global)22global_linf_errors = similar(global_l2_errors)2324n_elements_by_rank = parent(cache.mpi_cache.n_elements_by_rank) # convert OffsetArray to Array25l2_buf = MPI.VBuffer(global_l2_errors, n_elements_by_rank)26linf_buf = MPI.VBuffer(global_linf_errors, n_elements_by_rank)27MPI.Gatherv!(l2_errors, l2_buf, mpi_root(), mpi_comm())28MPI.Gatherv!(linf_errors, linf_buf, mpi_root(), mpi_comm())29else30MPI.Gatherv!(l2_errors, nothing, mpi_root(), mpi_comm())31MPI.Gatherv!(linf_errors, nothing, mpi_root(), mpi_comm())32end3334# Aggregate element error norms on root process35if mpi_isroot()36# sum(global_l2_errors) does not produce the same result as in the serial case, thus a37# hand-written loop is used38l2_error = zero(eltype(global_l2_errors))39for error in global_l2_errors40l2_error += error41end42linf_error = reduce((x, y) -> max.(x, y), global_linf_errors)4344# For L2 error, divide by total volume45total_volume_ = total_volume(mesh)46l2_error = @. sqrt(l2_error / total_volume_)47else48l2_error = convert(eltype(l2_errors), NaN * zero(eltype(l2_errors)))49linf_error = convert(eltype(linf_errors), NaN * zero(eltype(linf_errors)))50end5152return l2_error, linf_error53end5455function calc_error_norms_per_element(func, u, t, analyzer,56mesh::ParallelTreeMesh{2}, equations,57initial_condition,58dg::DGSEM, cache, cache_analysis)59@unpack vandermonde, weights = analyzer60@unpack node_coordinates = cache.elements61@unpack u_local, u_tmp1, x_local, x_tmp1 = cache_analysis6263# Set up data structures64T = typeof(zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations)))65l2_errors = zeros(T, nelements(dg, cache))66linf_errors = copy(l2_errors)6768# Iterate over all elements for error calculations69for element in eachelement(dg, cache)70# Interpolate solution and node locations to analysis nodes71multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, element), u_tmp1)72multiply_dimensionwise!(x_local, vandermonde,73view(node_coordinates, :, :, :, element), x_tmp1)7475# Calculate errors at each analysis node76volume_jacobian_ = volume_jacobian(element, mesh, cache)7778for j in eachnode(analyzer), i in eachnode(analyzer)79u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j),80t, equations)81diff = func(u_exact, equations) -82func(get_node_vars(u_local, equations, dg, i, j), equations)83l2_errors[element] += diff .^ 2 *84(weights[i] * weights[j] * volume_jacobian_)85linf_errors[element] = @. max(linf_errors[element], abs(diff))86end87end8889return l2_errors, linf_errors90end9192function calc_error_norms(func, u, t, analyzer,93mesh::Union{ParallelP4estMesh{2}, ParallelT8codeMesh{2}},94equations,95initial_condition, dg::DGSEM, cache, cache_analysis)96@unpack vandermonde, weights = analyzer97@unpack node_coordinates, inverse_jacobian = cache.elements98@unpack u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1 = cache_analysis99100# Set up data structures101l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations))102linf_error = copy(l2_error)103volume = zero(real(mesh))104105# Iterate over all elements for error calculations106for element in eachelement(dg, cache)107# Interpolate solution and node locations to analysis nodes108multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, element), u_tmp1)109multiply_dimensionwise!(x_local, vandermonde,110view(node_coordinates, :, :, :, element), x_tmp1)111multiply_scalar_dimensionwise!(jacobian_local, vandermonde,112inv.(view(inverse_jacobian, :, :, element)),113jacobian_tmp1)114115# Calculate errors at each analysis node116for j in eachnode(analyzer), i in eachnode(analyzer)117u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j),118t, equations)119diff = func(u_exact, equations) -120func(get_node_vars(u_local, equations, dg, i, j), equations)121# We take absolute value as we need the Jacobian here for the volume calculation122abs_jacobian_local_ij = abs(jacobian_local[i, j])123124l2_error += diff .^ 2 * (weights[i] * weights[j] * abs_jacobian_local_ij)125linf_error = @. max(linf_error, abs(diff))126volume += weights[i] * weights[j] * abs_jacobian_local_ij127end128end129130# Accumulate local results on root process131global_l2_error = Vector(l2_error)132global_linf_error = Vector(linf_error)133MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())134# Base.max instead of max needed, see comment in src/auxiliary/math.jl135MPI.Reduce!(global_linf_error, Base.max, mpi_root(), mpi_comm())136total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())137if mpi_isroot()138l2_error = convert(typeof(l2_error), global_l2_error)139linf_error = convert(typeof(linf_error), global_linf_error)140# For L2 error, divide by total volume141l2_error = @. sqrt(l2_error / total_volume)142else143l2_error = convert(typeof(l2_error), NaN * global_l2_error)144linf_error = convert(typeof(linf_error), NaN * global_linf_error)145end146147return l2_error, linf_error148end149150function integrate_via_indices(func::Func, u,151mesh::ParallelTreeMesh{2}, equations, dg::DGSEM, cache,152args...; normalize = true) where {Func}153# call the method accepting a general `mesh::TreeMesh{2}`154# TODO: MPI, we should improve this; maybe we should dispatch on `u`155# and create some MPI array type, overloading broadcasting and mapreduce etc.156# Then, this specific array type should also work well with DiffEq etc.157local_integral = invoke(integrate_via_indices,158Tuple{typeof(func), typeof(u), TreeMesh{2},159typeof(equations),160typeof(dg), typeof(cache), map(typeof, args)...},161func, u, mesh, equations, dg, cache, args...,162normalize = normalize)163164# OBS! Global results are only calculated on MPI root, all other domains receive `nothing`165global_integral = MPI.Reduce!(Ref(local_integral), +, mpi_root(), mpi_comm())166if mpi_isroot()167integral = convert(typeof(local_integral), global_integral[])168else169integral = convert(typeof(local_integral), NaN * local_integral)170end171172return integral173end174175function integrate_via_indices(func::Func, u,176mesh::Union{ParallelP4estMesh{2}, ParallelT8codeMesh{2}},177equations,178dg::DGSEM, cache, args...; normalize = true) where {Func}179@unpack weights = dg.basis180181# Initialize integral with zeros of the right shape182# Pass `zeros(eltype(u), nvariables(equations), nnodes(dg), nnodes(dg), 1)`183# to `func` since `u` might be empty, if the current rank has no elements.184# See also https://github.com/trixi-framework/Trixi.jl/issues/1096, and185# https://github.com/trixi-framework/Trixi.jl/pull/2126/files/7cbc57cfcba93e67353566e10fce1f3edac27330#r1814483243.186integral = zero(func(zeros(eltype(u), nvariables(equations), nnodes(dg), nnodes(dg),1871), 1, 1, 1, equations, dg, args...))188volume = zero(real(mesh))189190# Use quadrature to numerically integrate over entire domain191@batch reduction=((+, integral), (+, volume)) for element in eachelement(dg, cache)192for j in eachnode(dg), i in eachnode(dg)193volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, element]))194integral += volume_jacobian * weights[i] * weights[j] *195func(u, i, j, element, equations, dg, args...)196volume += volume_jacobian * weights[i] * weights[j]197end198end199200global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())201total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())202if mpi_isroot()203integral = convert(typeof(integral), global_integral[])204else205integral = convert(typeof(integral), NaN * integral)206total_volume = volume # non-root processes receive nothing from reduce -> overwrite207end208209# Normalize with total volume210if normalize211integral = integral / total_volume212end213214return integral215end216end # @muladd217218219