Path: blob/master/src/hotspot/cpu/x86/crc32c.h
41144 views
/*1* Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#ifndef CPU_X86_CRC32C_H25#define CPU_X86_CRC32C_H2627enum {28// S. Gueron / Information Processing Letters 112 (2012) 18429// shows than anything above 6K and below 32K is a good choice30// 32K does not deliver any further performance gains31// 6K=8*256 (*3 as we compute 3 blocks together)32//33// Thus selecting the smallest value so it could apply to the largest number34// of buffer sizes.35CRC32C_HIGH = 8 * 256,3637// empirical38// based on ubench study using methodology described in39// V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 840//41// arbitrary value between 27 and 25642CRC32C_MIDDLE = 8 * 86,4344// V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 945// shows that 240 and 1024 are equally good choices as the 216==8*2746//47// Selecting the smallest value which resulted in a significant performance improvement over48// sequential version49CRC32C_LOW = 8 * 27,5051CRC32C_NUM_ChunkSizeInBytes = 3,5253// We need to compute powers of 64N and 128N for each "chunk" size54CRC32C_NUM_PRECOMPUTED_CONSTANTS = ( 2 * CRC32C_NUM_ChunkSizeInBytes )55};56// Notes:57// 1. Why we need to choose a "chunk" approach?58// Overhead of computing a powers and powers of for an arbitrary buffer of size N is significant59// (implementation approaches a library perf.)60// 2. Why only 3 "chunks"?61// Performance experiments results showed that a HIGH+LOW was not delivering a stable speedup62// curve.63//64// Disclaimer:65// If you ever decide to increase/decrease number of "chunks" be sure to modify66// a) constants table generation (hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp)67// b) constant fetch from that table (macroAssembler_x86.cpp)68// c) unrolled for loop (macroAssembler_x86.cpp)6970#endif /* !CPU_X86_CRC32C_H */717273