/*1* Copyright (c) 2007 Mans Rullgard2*3* This file is part of FFmpeg.4*5* FFmpeg is free software; you can redistribute it and/or6* modify it under the terms of the GNU Lesser General Public7* License as published by the Free Software Foundation; either8* version 2.1 of the License, or (at your option) any later version.9*10* FFmpeg is distributed in the hope that it will be useful,11* but WITHOUT ANY WARRANTY; without even the implied warranty of12* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU13* Lesser General Public License for more details.14*15* You should have received a copy of the GNU Lesser General Public16* License along with FFmpeg; if not, write to the Free Software17* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA18*/1920#ifndef AVUTIL_AVSTRING_H21#define AVUTIL_AVSTRING_H2223#include <stddef.h>24#include <stdint.h>25#include "attributes.h"2627/**28* @addtogroup lavu_string29* @{30*/3132/**33* Return non-zero if pfx is a prefix of str. If it is, *ptr is set to34* the address of the first character in str after the prefix.35*36* @param str input string37* @param pfx prefix to test38* @param ptr updated if the prefix is matched inside str39* @return non-zero if the prefix matches, zero otherwise40*/41int av_strstart(const char *str, const char *pfx, const char **ptr);4243/**44* Return non-zero if pfx is a prefix of str independent of case. If45* it is, *ptr is set to the address of the first character in str46* after the prefix.47*48* @param str input string49* @param pfx prefix to test50* @param ptr updated if the prefix is matched inside str51* @return non-zero if the prefix matches, zero otherwise52*/53int av_stristart(const char *str, const char *pfx, const char **ptr);5455/**56* Locate the first case-independent occurrence in the string haystack57* of the string needle. A zero-length string needle is considered to58* match at the start of haystack.59*60* This function is a case-insensitive version of the standard strstr().61*62* @param haystack string to search in63* @param needle string to search for64* @return pointer to the located match within haystack65* or a null pointer if no match66*/67char *av_stristr(const char *haystack, const char *needle);6869/**70* Locate the first occurrence of the string needle in the string haystack71* where not more than hay_length characters are searched. A zero-length72* string needle is considered to match at the start of haystack.73*74* This function is a length-limited version of the standard strstr().75*76* @param haystack string to search in77* @param needle string to search for78* @param hay_length length of string to search in79* @return pointer to the located match within haystack80* or a null pointer if no match81*/82char *av_strnstr(const char *haystack, const char *needle, size_t hay_length);8384/**85* Copy the string src to dst, but no more than size - 1 bytes, and86* null-terminate dst.87*88* This function is the same as BSD strlcpy().89*90* @param dst destination buffer91* @param src source string92* @param size size of destination buffer93* @return the length of src94*95* @warning since the return value is the length of src, src absolutely96* _must_ be a properly 0-terminated string, otherwise this will read beyond97* the end of the buffer and possibly crash.98*/99size_t av_strlcpy(char *dst, const char *src, size_t size);100101/**102* Append the string src to the string dst, but to a total length of103* no more than size - 1 bytes, and null-terminate dst.104*105* This function is similar to BSD strlcat(), but differs when106* size <= strlen(dst).107*108* @param dst destination buffer109* @param src source string110* @param size size of destination buffer111* @return the total length of src and dst112*113* @warning since the return value use the length of src and dst, these114* absolutely _must_ be a properly 0-terminated strings, otherwise this115* will read beyond the end of the buffer and possibly crash.116*/117size_t av_strlcat(char *dst, const char *src, size_t size);118119/**120* Append output to a string, according to a format. Never write out of121* the destination buffer, and always put a terminating 0 within122* the buffer.123* @param dst destination buffer (string to which the output is124* appended)125* @param size total size of the destination buffer126* @param fmt printf-compatible format string, specifying how the127* following parameters are used128* @return the length of the string that would have been generated129* if enough space had been available130*/131size_t av_strlcatf(char *dst, size_t size, const char *fmt, ...) av_printf_format(3, 4);132133/**134* Get the count of continuous non zero chars starting from the beginning.135*136* @param len maximum number of characters to check in the string, that137* is the maximum value which is returned by the function138*/139static inline size_t av_strnlen(const char *s, size_t len)140{141size_t i;142for (i = 0; i < len && s[i]; i++)143;144return i;145}146147/**148* Print arguments following specified format into a large enough auto149* allocated buffer. It is similar to GNU asprintf().150* @param fmt printf-compatible format string, specifying how the151* following parameters are used.152* @return the allocated string153* @note You have to free the string yourself with av_free().154*/155char *av_asprintf(const char *fmt, ...) av_printf_format(1, 2);156157/**158* Convert a number to a av_malloced string.159*/160char *av_d2str(double d);161162/**163* Unescape the given string until a non escaped terminating char,164* and return the token corresponding to the unescaped string.165*166* The normal \ and ' escaping is supported. Leading and trailing167* whitespaces are removed, unless they are escaped with '\' or are168* enclosed between ''.169*170* @param buf the buffer to parse, buf will be updated to point to the171* terminating char172* @param term a 0-terminated list of terminating chars173* @return the malloced unescaped string, which must be av_freed by174* the user, NULL in case of allocation failure175*/176char *av_get_token(const char **buf, const char *term);177178/**179* Split the string into several tokens which can be accessed by180* successive calls to av_strtok().181*182* A token is defined as a sequence of characters not belonging to the183* set specified in delim.184*185* On the first call to av_strtok(), s should point to the string to186* parse, and the value of saveptr is ignored. In subsequent calls, s187* should be NULL, and saveptr should be unchanged since the previous188* call.189*190* This function is similar to strtok_r() defined in POSIX.1.191*192* @param s the string to parse, may be NULL193* @param delim 0-terminated list of token delimiters, must be non-NULL194* @param saveptr user-provided pointer which points to stored195* information necessary for av_strtok() to continue scanning the same196* string. saveptr is updated to point to the next character after the197* first delimiter found, or to NULL if the string was terminated198* @return the found token, or NULL when no token is found199*/200char *av_strtok(char *s, const char *delim, char **saveptr);201202/**203* Locale-independent conversion of ASCII isdigit.204*/205static inline av_const int av_isdigit(int c)206{207return c >= '0' && c <= '9';208}209210/**211* Locale-independent conversion of ASCII isgraph.212*/213static inline av_const int av_isgraph(int c)214{215return c > 32 && c < 127;216}217218/**219* Locale-independent conversion of ASCII isspace.220*/221static inline av_const int av_isspace(int c)222{223return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' ||224c == '\v';225}226227/**228* Locale-independent conversion of ASCII characters to uppercase.229*/230static inline av_const int av_toupper(int c)231{232if (c >= 'a' && c <= 'z')233c ^= 0x20;234return c;235}236237/**238* Locale-independent conversion of ASCII characters to lowercase.239*/240static inline av_const int av_tolower(int c)241{242if (c >= 'A' && c <= 'Z')243c ^= 0x20;244return c;245}246247/**248* Locale-independent conversion of ASCII isxdigit.249*/250static inline av_const int av_isxdigit(int c)251{252c = av_tolower(c);253return av_isdigit(c) || (c >= 'a' && c <= 'f');254}255256/**257* Locale-independent case-insensitive compare.258* @note This means only ASCII-range characters are case-insensitive259*/260int av_strcasecmp(const char *a, const char *b);261262/**263* Locale-independent case-insensitive compare.264* @note This means only ASCII-range characters are case-insensitive265*/266int av_strncasecmp(const char *a, const char *b, size_t n);267268269/**270* Thread safe basename.271* @param path the path, on DOS both \ and / are considered separators.272* @return pointer to the basename substring.273*/274const char *av_basename(const char *path);275276/**277* Thread safe dirname.278* @param path the path, on DOS both \ and / are considered separators.279* @return the path with the separator replaced by the string terminator or ".".280* @note the function may change the input string.281*/282const char *av_dirname(char *path);283284/**285* Match instances of a name in a comma-separated list of names.286* List entries are checked from the start to the end of the names list,287* the first match ends further processing. If an entry prefixed with '-'288* matches, then 0 is returned. The "ALL" list entry is considered to289* match all names.290*291* @param name Name to look for.292* @param names List of names.293* @return 1 on match, 0 otherwise.294*/295int av_match_name(const char *name, const char *names);296297/**298* Append path component to the existing path.299* Path separator '/' is placed between when needed.300* Resulting string have to be freed with av_free().301* @param path base path302* @param component component to be appended303* @return new path or NULL on error.304*/305char *av_append_path_component(const char *path, const char *component);306307enum AVEscapeMode {308AV_ESCAPE_MODE_AUTO, ///< Use auto-selected escaping mode.309AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping.310AV_ESCAPE_MODE_QUOTE, ///< Use single-quote escaping.311};312313/**314* Consider spaces special and escape them even in the middle of the315* string.316*317* This is equivalent to adding the whitespace characters to the special318* characters lists, except it is guaranteed to use the exact same list319* of whitespace characters as the rest of libavutil.320*/321#define AV_ESCAPE_FLAG_WHITESPACE (1 << 0)322323/**324* Escape only specified special characters.325* Without this flag, escape also any characters that may be considered326* special by av_get_token(), such as the single quote.327*/328#define AV_ESCAPE_FLAG_STRICT (1 << 1)329330/**331* Escape string in src, and put the escaped string in an allocated332* string in *dst, which must be freed with av_free().333*334* @param dst pointer where an allocated string is put335* @param src string to escape, must be non-NULL336* @param special_chars string containing the special characters which337* need to be escaped, can be NULL338* @param mode escape mode to employ, see AV_ESCAPE_MODE_* macros.339* Any unknown value for mode will be considered equivalent to340* AV_ESCAPE_MODE_BACKSLASH, but this behaviour can change without341* notice.342* @param flags flags which control how to escape, see AV_ESCAPE_FLAG_ macros343* @return the length of the allocated string, or a negative error code in case of error344* @see av_bprint_escape()345*/346av_warn_unused_result347int av_escape(char **dst, const char *src, const char *special_chars,348enum AVEscapeMode mode, int flags);349350#define AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES 1 ///< accept codepoints over 0x10FFFF351#define AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS 2 ///< accept non-characters - 0xFFFE and 0xFFFF352#define AV_UTF8_FLAG_ACCEPT_SURROGATES 4 ///< accept UTF-16 surrogates codes353#define AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES 8 ///< exclude control codes not accepted by XML354355#define AV_UTF8_FLAG_ACCEPT_ALL \356AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES|AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS|AV_UTF8_FLAG_ACCEPT_SURROGATES357358/**359* Read and decode a single UTF-8 code point (character) from the360* buffer in *buf, and update *buf to point to the next byte to361* decode.362*363* In case of an invalid byte sequence, the pointer will be updated to364* the next byte after the invalid sequence and the function will365* return an error code.366*367* Depending on the specified flags, the function will also fail in368* case the decoded code point does not belong to a valid range.369*370* @note For speed-relevant code a carefully implemented use of371* GET_UTF8() may be preferred.372*373* @param codep pointer used to return the parsed code in case of success.374* The value in *codep is set even in case the range check fails.375* @param bufp pointer to the address the first byte of the sequence376* to decode, updated by the function to point to the377* byte next after the decoded sequence378* @param buf_end pointer to the end of the buffer, points to the next379* byte past the last in the buffer. This is used to380* avoid buffer overreads (in case of an unfinished381* UTF-8 sequence towards the end of the buffer).382* @param flags a collection of AV_UTF8_FLAG_* flags383* @return >= 0 in case a sequence was successfully read, a negative384* value in case of invalid sequence385*/386av_warn_unused_result387int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end,388unsigned int flags);389390/**391* Check if a name is in a list.392* @returns 0 if not found, or the 1 based index where it has been found in the393* list.394*/395int av_match_list(const char *name, const char *list, char separator);396397/**398* @}399*/400401#endif /* AVUTIL_AVSTRING_H */402403404