/* * Copyright 2008-2013 Various Authors * Copyright 2004-2005 Timo Hirvonen * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #ifndef CMUS_UCHAR_H #define CMUS_UCHAR_H #include /* size_t */ #include typedef unsigned int uchar; extern const char hex_tab[16]; /* * Invalid bytes are or'ed with this * for example 0xff -> 0x100000ff */ #define U_INVALID_MASK 0x10000000U /* * @uch potential unicode character * * Returns 1 if @uch is valid unicode character, 0 otherwise */ static inline int u_is_unicode(uchar uch) { return uch <= 0x0010ffffU; } /* * Returns size of @uch in bytes */ static inline int u_char_size(uchar uch) { if (uch <= 0x0000007fU) { return 1; } else if (uch <= 0x000007ffU) { return 2; } else if (uch <= 0x0000ffffU) { return 3; } else if (uch <= 0x0010ffffU) { return 4; } else { return 1; } } /* * Returns width of @uch (normally 1 or 2, 4 for invalid chars ()) */ int u_char_width(uchar uch); /* * @str any null-terminated string * * Returns 1 if @str is valid UTF-8 string, 0 otherwise. */ int u_is_valid(const char *str); /* * @str valid, null-terminated UTF-8 string * * Returns position of next unicode character in @str. */ extern const char * const utf8_skip; static inline char *u_next_char(const char *str) { return (char *) (str + utf8_skip[*((const unsigned char *) str)]); } /* * @str valid, null-terminated UTF-8 string * * Retuns length of @str in UTF-8 characters. */ size_t u_strlen(const char *str); /* * @str null-terminated UTF-8 string * * Retuns length of @str in UTF-8 characters. * Invalid chars are counted as single characters. */ size_t u_strlen_safe(const char *str); /* * @str null-terminated UTF-8 string * * Retuns width of @str. */ int u_str_width(const char *str); /* * @uch unicode character * * Retuns size of @uch if it were printed. */ int u_print_size(uchar uch); /* * @str null-terminated UTF-8 string * * Retuns size of @str if it were printed. */ int u_str_print_size(const char *str); /* * @str null-terminated UTF-8 string * @len number of characters to measure * * Retuns width of the first @len characters in @str. */ int u_str_nwidth(const char *str, int len); /* * @str null-terminated UTF-8 string * @uch unicode character * * Returns a pointer to the first occurrence of @uch in the @str. */ char *u_strchr(const char *str, uchar uch); void u_prev_char_pos(const char *str, int *idx); /* * @str null-terminated UTF-8 string * @idx pointer to byte index in @str (not UTF-8 character index!) or NULL * * Returns unicode character at @str[*@idx] or @str[0] if @idx is NULL. * Stores byte index of the next char back to @idx if set. */ uchar u_get_char(const char *str, int *idx); /* * @str destination buffer * @idx pointer to byte index in @str (not UTF-8 character index!) * @uch unicode character */ void u_set_char_raw(char *str, int *idx, uchar uch); void u_set_char(char *str, size_t *idx, uchar uch); /* * @dst destination buffer * @src null-terminated UTF-8 string * @width how much to copy (at most) * * Copies at most @width columns, less if null byte was hit. * Null byte is _never_ copied. * Remaining width is stored to @width. * * Returns number of _bytes_ copied. */ size_t u_copy_chars(char *dst, const char *src, int *width); /* * @dst destination buffer * @src null-terminated UTF-8 string * @len how many bytes are available in @dst * * Copies at most @len bytes, less if null byte was hit. Replaces every * non-ascii character by '?'. Null byte is _never_ copied. * * Returns number of bytes written to @dst. */ int u_to_ascii(char *dst, const char *src, int len); /* * @dst destination buffer * @src null-terminated string * * Copies src into dst, changing all invalid utf8 bytes into , * where xx is the value of the byte in hex. * * Expects dst to be large enough to fit src + the conversions. */ void u_to_utf8(char *dst, const char *src); /* * @str null-terminated UTF-8 string, must be long enough * @width how much to skip * @overskip skip a final wide character even when it overshoots @width * * Skips @width columns in a UTF-8 string. * Underskip (positive) or overskip (negative) is stored to @width. * * Returns number of _bytes_ skipped. */ int u_skip_chars(const char *str, int *width, bool overskip); /* * @str valid null-terminated UTF-8 string * * Converts a string into a form that is independent of case. * * Returns a newly allocated string */ char *u_casefold(const char *str); /* * @str1 valid, normalized, null-terminated UTF-8 string * @str2 valid, normalized, null-terminated UTF-8 string * * Returns 1 if @str1 is equal to @str2, ignoring the case of the characters. */ int u_strcase_equal(const char *str1, const char *str2); /* * @str1 valid, normalized, null-terminated UTF-8 string * @str2 valid, normalized, null-terminated UTF-8 string * @len number of characters to consider for comparison * * Returns 1 if the first @len characters of @str1 and @str2 are equal, * ignoring the case of the characters (0 otherwise). */ int u_strncase_equal(const char *str1, const char *str2, size_t len); /* * @str1 valid, normalized, null-terminated UTF-8 string * @str2 valid, normalized, null-terminated UTF-8 string * @len number of characters to consider for comparison * * Like u_strncase_equal(), but uses only base characters for comparison * (e.g. "Trentemöller" matches "Trentemøller") */ int u_strncase_equal_base(const char *str1, const char *str2, size_t len); /* * @haystack valid, normalized, null-terminated UTF-8 string * @needle valid, normalized, null-terminated UTF-8 string * * Returns position of @needle in @haystack (case insensitive comparison). */ char *u_strcasestr(const char *haystack, const char *needle); /* * @haystack valid, normalized, null-terminated UTF-8 string * @needle valid, normalized, null-terminated UTF-8 string * * Like u_strcasestr(), but uses only base characters for comparison * (e.g. "Trentemöller" matches "Trentemøller") */ char *u_strcasestr_base(const char *haystack, const char *needle); /* * @haystack null-terminated string in local encoding * @needle valid, normalized, null-terminated UTF-8 string * * Like u_strcasestr_base(), but converts @haystack to UTF-8 if necessary. */ char *u_strcasestr_filename(const char *haystack, const char *needle); #endif