Path: blob/master/src/java.base/share/native/libjimage/imageFile.hpp
41149 views
/*1* Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.2*3* Redistribution and use in source and binary forms, with or without4* modification, are permitted provided that the following conditions5* are met:6*7* - Redistributions of source code must retain the above copyright8* notice, this list of conditions and the following disclaimer.9*10* - Redistributions in binary form must reproduce the above copyright11* notice, this list of conditions and the following disclaimer in the12* documentation and/or other materials provided with the distribution.13*14* - Neither the name of Oracle nor the names of its15* contributors may be used to endorse or promote products derived16* from this software without specific prior written permission.17*18* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS19* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,20* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR21* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR22* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,23* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,24* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR25* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF26* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING27* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS28* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.29*/3031#ifndef LIBJIMAGE_IMAGEFILE_HPP32#define LIBJIMAGE_IMAGEFILE_HPP3334#include <assert.h>3536#include "endian.hpp"37#include "inttypes.hpp"3839// Image files are an alternate file format for storing classes and resources. The40// goal is to supply file access which is faster and smaller than the jar format.41// It should be noted that unlike jars, information stored in an image is in native42// endian format. This allows the image to be mapped into memory without endian43// translation. This also means that images are platform dependent.44//45// Image files are structured as three sections;46//47// +-----------+48// | Header |49// +-----------+50// | |51// | Index |52// | |53// +-----------+54// | |55// | |56// | Resources |57// | |58// | |59// +-----------+60//61// The header contains information related to identification and description of62// contents.63//64// +-------------------------+65// | Magic (0xCAFEDADA) |66// +------------+------------+67// | Major Vers | Minor Vers |68// +------------+------------+69// | Flags |70// +-------------------------+71// | Resource Count |72// +-------------------------+73// | Table Length |74// +-------------------------+75// | Attributes Size |76// +-------------------------+77// | Strings Size |78// +-------------------------+79//80// Magic - means of identifying validity of the file. This avoids requiring a81// special file extension.82// Major vers, minor vers - differences in version numbers indicate structural83// changes in the image.84// Flags - various image wide flags (future).85// Resource count - number of resources in the file.86// Table length - the length of lookup tables used in the index.87// Attributes size - number of bytes in the region used to store location attribute88// streams.89// Strings size - the size of the region used to store strings used by the90// index and meta data.91//92// The index contains information related to resource lookup. The algorithm93// used for lookup is "A Practical Minimal Perfect Hashing Method"94// (http://homepages.dcc.ufmg.br/~nivio/papers/wea05.pdf). Given a path string95// in the form /<module>/<package>/<base>.<extension> return the resource location96// information;97//98// redirectIndex = hash(path, DEFAULT_SEED) % table_length;99// redirect = redirectTable[redirectIndex];100// if (redirect == 0) return not found;101// locationIndex = redirect < 0 ? -1 - redirect : hash(path, redirect) % table_length;102// location = locationTable[locationIndex];103// if (!verify(location, path)) return not found;104// return location;105//106// Note: The hash function takes an initial seed value. A different seed value107// usually returns a different result for strings that would otherwise collide with108// other seeds. The verify function guarantees the found resource location is109// indeed the resource we are looking for.110//111// The following is the format of the index;112//113// +-------------------+114// | Redirect Table |115// +-------------------+116// | Attribute Offsets |117// +-------------------+118// | Attribute Data |119// +-------------------+120// | Strings |121// +-------------------+122//123// Redirect Table - Array of 32-bit signed values representing actions that124// should take place for hashed strings that map to that125// value. Negative values indicate no hash collision and can be126// quickly converted to indices into attribute offsets. Positive127// values represent a new seed for hashing an index into attribute128// offsets. Zero indicates not found.129// Attribute Offsets - Array of 32-bit unsigned values representing offsets into130// attribute data. Attribute offsets can be iterated to do a131// full survey of resources in the image. Offset of zero132// indicates no attributes.133// Attribute Data - Bytes representing compact attribute data for locations. (See134// comments in ImageLocation.)135// Strings - Collection of zero terminated UTF-8 strings used by the index and136// image meta data. Each string is accessed by offset. Each string is137// unique. Offset zero is reserved for the empty string.138//139// Note that the memory mapped index assumes 32 bit alignment of each component140// in the index.141//142// Endianness of an image.143// An image booted by hotspot is always in native endian. However, it is possible144// to read (by the JDK) in alternate endian format. Primarily, this is during145// cross platform scenarios. Ex, where javac needs to read an embedded image146// to access classes for crossing compilation.147//148149class ImageFileReader; // forward declaration150151// Manage image file string table.152class ImageStrings {153private:154u1* _data; // Data bytes for strings.155u4 _size; // Number of bytes in the string table.156public:157enum {158// Not found result from find routine.159NOT_FOUND = -1,160// Prime used to generate hash for Perfect Hashing.161HASH_MULTIPLIER = 0x01000193162};163164ImageStrings(u1* data, u4 size) : _data(data), _size(size) {}165166// Return the UTF-8 string beginning at offset.167inline const char* get(u4 offset) const {168assert(offset < _size && "offset exceeds string table size");169return (const char*)(_data + offset);170}171172// Compute the Perfect Hashing hash code for the supplied UTF-8 string.173inline static u4 hash_code(const char* string) {174return hash_code(string, HASH_MULTIPLIER);175}176177// Compute the Perfect Hashing hash code for the supplied string, starting at seed.178static s4 hash_code(const char* string, s4 seed);179180// Match up a string in a perfect hash table. Result still needs validation181// for precise match.182static s4 find(Endian* endian, const char* name, s4* redirect, u4 length);183184// Test to see if UTF-8 string begins with the start UTF-8 string. If so,185// return non-NULL address of remaining portion of string. Otherwise, return186// NULL. Used to test sections of a path without copying from image string187// table.188static const char* starts_with(const char* string, const char* start);189190// Test to see if UTF-8 string begins with start char. If so, return non-NULL191// address of remaining portion of string. Otherwise, return NULL. Used192// to test a character of a path without copying.193inline static const char* starts_with(const char* string, const char ch) {194return *string == ch ? string + 1 : NULL;195}196};197198// Manage image file location attribute data. Within an image, a location's199// attributes are compressed into a stream of bytes. An attribute stream is200// composed of individual attribute sequences. Each attribute sequence begins with201// a header byte containing the attribute 'kind' (upper 5 bits of header) and the202// 'length' less 1 (lower 3 bits of header) of bytes that follow containing the203// attribute value. Attribute values present as most significant byte first.204//205// Ex. Container offset (ATTRIBUTE_OFFSET) 0x33562 would be represented as 0x22206// (kind = 4, length = 3), 0x03, 0x35, 0x62.207//208// An attribute stream is terminated with a header kind of ATTRIBUTE_END (header209// byte of zero.)210//211// ImageLocation inflates the stream into individual values stored in the long212// array _attributes. This allows an attribute value can be quickly accessed by213// direct indexing. Unspecified values default to zero.214//215// Notes:216// - Even though ATTRIBUTE_END is used to mark the end of the attribute stream,217// streams will contain zero byte values to represent lesser significant bits.218// Thus, detecting a zero byte is not sufficient to detect the end of an attribute219// stream.220// - ATTRIBUTE_OFFSET represents the number of bytes from the beginning of the region221// storing the resources. Thus, in an image this represents the number of bytes222// after the index.223// - Currently, compressed resources are represented by having a non-zero224// ATTRIBUTE_COMPRESSED value. This represents the number of bytes stored in the225// image, and the value of ATTRIBUTE_UNCOMPRESSED represents number of bytes of the226// inflated resource in memory. If the ATTRIBUTE_COMPRESSED is zero then the value227// of ATTRIBUTE_UNCOMPRESSED represents both the number of bytes in the image and228// in memory. In the future, additional compression techniques will be used and229// represented differently.230// - Package strings include trailing slash and extensions include prefix period.231//232class ImageLocation {233public:234enum {235ATTRIBUTE_END, // End of attribute stream marker236ATTRIBUTE_MODULE, // String table offset of module name237ATTRIBUTE_PARENT, // String table offset of resource path parent238ATTRIBUTE_BASE, // String table offset of resource path base239ATTRIBUTE_EXTENSION, // String table offset of resource path extension240ATTRIBUTE_OFFSET, // Container byte offset of resource241ATTRIBUTE_COMPRESSED, // In image byte size of the compressed resource242ATTRIBUTE_UNCOMPRESSED, // In memory byte size of the uncompressed resource243ATTRIBUTE_COUNT // Number of attribute kinds244};245246private:247// Values of inflated attributes.248u8 _attributes[ATTRIBUTE_COUNT];249250// Return the attribute value number of bytes.251inline static u1 attribute_length(u1 data) {252return (data & 0x7) + 1;253}254255// Return the attribute kind.256inline static u1 attribute_kind(u1 data) {257u1 kind = data >> 3;258assert(kind < ATTRIBUTE_COUNT && "invalid attribute kind");259return kind;260}261262// Return the attribute length.263inline static u8 attribute_value(u1* data, u1 n) {264assert(0 < n && n <= 8 && "invalid attribute value length");265u8 value = 0;266// Most significant bytes first.267for (u1 i = 0; i < n; i++) {268value <<= 8;269value |= data[i];270}271return value;272}273274public:275ImageLocation() {276clear_data();277}278279ImageLocation(u1* data) {280clear_data();281set_data(data);282}283284// Inflates the attribute stream into individual values stored in the long285// array _attributes. This allows an attribute value to be quickly accessed by286// direct indexing. Unspecified values default to zero.287void set_data(u1* data);288289// Zero all attribute values.290void clear_data();291292// Retrieve an attribute value from the inflated array.293inline u8 get_attribute(u1 kind) const {294assert(ATTRIBUTE_END < kind && kind < ATTRIBUTE_COUNT && "invalid attribute kind");295return _attributes[kind];296}297298// Retrieve an attribute string value from the inflated array.299inline const char* get_attribute(u4 kind, const ImageStrings& strings) const {300return strings.get((u4)get_attribute(kind));301}302};303304//305// Manage the image module meta data.306class ImageModuleData {307const ImageFileReader* _image_file; // Source image file308Endian* _endian; // Endian handler309310public:311ImageModuleData(const ImageFileReader* image_file);312~ImageModuleData();313314// Return the module in which a package resides. Returns NULL if not found.315const char* package_to_module(const char* package_name);316};317318// Image file header, starting at offset 0.319class ImageHeader {320private:321u4 _magic; // Image file marker322u4 _version; // Image file major version number323u4 _flags; // Image file flags324u4 _resource_count; // Number of resources in file325u4 _table_length; // Number of slots in index tables326u4 _locations_size; // Number of bytes in attribute table327u4 _strings_size; // Number of bytes in string table328329public:330u4 magic() const { return _magic; }331u4 magic(Endian* endian) const { return endian->get(_magic); }332void set_magic(Endian* endian, u4 magic) { return endian->set(_magic, magic); }333334u4 major_version(Endian* endian) const { return endian->get(_version) >> 16; }335u4 minor_version(Endian* endian) const { return endian->get(_version) & 0xFFFF; }336void set_version(Endian* endian, u4 major_version, u4 minor_version) {337return endian->set(_version, major_version << 16 | minor_version);338}339340u4 flags(Endian* endian) const { return endian->get(_flags); }341void set_flags(Endian* endian, u4 value) { return endian->set(_flags, value); }342343u4 resource_count(Endian* endian) const { return endian->get(_resource_count); }344void set_resource_count(Endian* endian, u4 count) { return endian->set(_resource_count, count); }345346u4 table_length(Endian* endian) const { return endian->get(_table_length); }347void set_table_length(Endian* endian, u4 count) { return endian->set(_table_length, count); }348349u4 locations_size(Endian* endian) const { return endian->get(_locations_size); }350void set_locations_size(Endian* endian, u4 size) { return endian->set(_locations_size, size); }351352u4 strings_size(Endian* endian) const { return endian->get(_strings_size); }353void set_strings_size(Endian* endian, u4 size) { return endian->set(_strings_size, size); }354};355356// Max path length limit independent of platform. Windows max path is 1024,357// other platforms use 4096. The JCK fails several tests when 1024 is used.358#define IMAGE_MAX_PATH 4096359360class ImageFileReader;361362// Manage a table of open image files. This table allows multiple access points363// to share an open image.364class ImageFileReaderTable {365private:366const static u4 _growth = 8; // Growth rate of the table367u4 _count; // Number of entries in the table368u4 _max; // Maximum number of entries allocated369ImageFileReader** _table; // Growable array of entries370371public:372ImageFileReaderTable();373// ~ImageFileReaderTable()374// Bug 8166727375//376// WARNING: Should never close jimage files.377// Threads may still be running during shutdown.378//379380// Return the number of entries.381inline u4 count() { return _count; }382383// Return the ith entry from the table.384inline ImageFileReader* get(u4 i) { return _table[i]; }385386// Add a new image entry to the table.387void add(ImageFileReader* image);388389// Remove an image entry from the table.390void remove(ImageFileReader* image);391392// Determine if image entry is in table.393bool contains(ImageFileReader* image);394};395396// Manage the image file.397// ImageFileReader manages the content of an image file.398// Initially, the header of the image file is read for validation. If valid,399// values in the header are used calculate the size of the image index. The400// index is then memory mapped to allow load on demand and sharing. The401// -XX:+MemoryMapImage flag determines if the entire file is loaded (server use.)402// An image can be used by Hotspot and multiple reference points in the JDK, thus403// it is desirable to share a reader. To accomodate sharing, a share table is404// defined (see ImageFileReaderTable in imageFile.cpp) To track the number of405// uses, ImageFileReader keeps a use count (_use). Use is incremented when406// 'opened' by reference point and decremented when 'closed'. Use of zero407// leads the ImageFileReader to be actually closed and discarded.408class ImageFileReader {409friend class ImageFileReaderTable;410private:411// Manage a number of image files such that an image can be shared across412// multiple uses (ex. loader.)413static ImageFileReaderTable _reader_table;414415// true if image should be fully memory mapped.416static bool memory_map_image;417418char* _name; // Name of image419s4 _use; // Use count420int _fd; // File descriptor421Endian* _endian; // Endian handler422u8 _file_size; // File size in bytes423ImageHeader _header; // Image header424size_t _index_size; // Total size of index425u1* _index_data; // Raw index data426s4* _redirect_table; // Perfect hash redirect table427u4* _offsets_table; // Location offset table428u1* _location_bytes; // Location attributes429u1* _string_bytes; // String table430ImageModuleData *_module_data; // The ImageModuleData for this image431432ImageFileReader(const char* name, bool big_endian);433~ImageFileReader();434435// Compute number of bytes in image file index.436inline size_t index_size() {437return sizeof(ImageHeader) +438table_length() * sizeof(u4) * 2 + locations_size() + strings_size();439}440441public:442enum {443// Image file marker.444IMAGE_MAGIC = 0xCAFEDADA,445// Endian inverted Image file marker.446IMAGE_MAGIC_INVERT = 0xDADAFECA,447// Image file major version number.448MAJOR_VERSION = 1,449// Image file minor version number.450MINOR_VERSION = 0451};452453// Locate an image if file already open.454static ImageFileReader* find_image(const char* name);455456// Open an image file, reuse structure if file already open.457static ImageFileReader* open(const char* name, bool big_endian = Endian::is_big_endian());458459// Close an image file if the file is not in use elsewhere.460static void close(ImageFileReader *reader);461462// Return an id for the specifed ImageFileReader.463static u8 reader_to_ID(ImageFileReader *reader);464465// Validate the image id.466static bool id_check(u8 id);467468// Return an id for the specifed ImageFileReader.469static ImageFileReader* id_to_reader(u8 id);470471// Open image file for read access.472bool open();473474// Close image file.475void close();476477// Read directly from the file.478bool read_at(u1* data, u8 size, u8 offset) const;479480inline Endian* endian() const { return _endian; }481482// Retrieve name of image file.483inline const char* name() const {484return _name;485}486487// Retrieve size of image file.488inline u8 file_size() const {489return _file_size;490}491492// Retrieve the size of the mapped image.493inline u8 map_size() const {494return (u8)(memory_map_image ? _file_size : _index_size);495}496497// Return first address of index data.498inline u1* get_index_address() const {499return _index_data;500}501502// Return first address of resource data.503inline u1* get_data_address() const {504return _index_data + _index_size;505}506507// Get the size of the index data.508size_t get_index_size() const {509return _index_size;510}511512inline u4 table_length() const {513return _header.table_length(_endian);514}515516inline u4 locations_size() const {517return _header.locations_size(_endian);518}519520inline u4 strings_size()const {521return _header.strings_size(_endian);522}523524inline u4* offsets_table() const {525return _offsets_table;526}527528// Increment use count.529inline void inc_use() {530_use++;531}532533// Decrement use count.534inline bool dec_use() {535return --_use == 0;536}537538// Return a string table accessor.539inline const ImageStrings get_strings() const {540return ImageStrings(_string_bytes, _header.strings_size(_endian));541}542543// Return location attribute stream at offset.544inline u1* get_location_offset_data(u4 offset) const {545assert((u4)offset < _header.locations_size(_endian) &&546"offset exceeds location attributes size");547return offset != 0 ? _location_bytes + offset : NULL;548}549550// Return location attribute stream for location i.551inline u1* get_location_data(u4 index) const {552return get_location_offset_data(get_location_offset(index));553}554555// Return the location offset for index.556inline u4 get_location_offset(u4 index) const {557assert((u4)index < _header.table_length(_endian) &&558"index exceeds location count");559return _endian->get(_offsets_table[index]);560}561562// Find the location attributes associated with the path. Returns true if563// the location is found, false otherwise.564bool find_location(const char* path, ImageLocation& location) const;565566// Find the location index and size associated with the path.567// Returns the location index and size if the location is found,568// ImageFileReader::NOT_FOUND otherwise.569u4 find_location_index(const char* path, u8 *size) const;570571// Verify that a found location matches the supplied path.572bool verify_location(ImageLocation& location, const char* path) const;573574// Return the resource for the supplied location index.575void get_resource(u4 index, u1* uncompressed_data) const;576577// Return the resource for the supplied path.578void get_resource(ImageLocation& location, u1* uncompressed_data) const;579580// Return the ImageModuleData for this image581ImageModuleData * get_image_module_data();582583};584#endif // LIBJIMAGE_IMAGEFILE_HPP585586587