// icucommon.h is autogenerated and merged from the ICU header files.
// Code unused or not supported in the Windows ICU SDK has been removed.
#if (NTDDI_VERSION >= NTDDI_WIN10_RS2)
#ifndef SUPPRESS_LEGACY_ICU_HEADER_WARNINGS
// For more information on the ICU breaking change to use char16_t by default, please see the page here:
// https://go.microsoft.com/fwlink/?linkid=851033
#pragma message("The wchar_t versions of the ICU headers are no longer being updated, please use the char16_t based header icu.h instead; see https://go.microsoft.com/fwlink/?linkid=851033 for more info. To suppress this warning, define the macro SUPPRESS_LEGACY_ICU_HEADER_WARNINGS before including this header.")
#endif /* SUPPRESS_LEGACY_ICU_HEADER_WARNINGS */
// Default Windows SDK ICU configuration options.
// Alternate selections are not supported in the Windows SDK.
#define U_DISABLE_RENAMING 1
#define U_SHOW_CPLUSPLUS_API 0
#define U_DEFAULT_SHOW_DRAFT 0
#define U_HIDE_DRAFT_API 1
#define U_HIDE_DEPRECATED_API 1
#define U_HIDE_OBSOLETE_API 1
#define U_HIDE_INTERNAL_API 1
#define U_HAVE_STD_STRING 0
#define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1
// appendable.h
// No supported content
// brkiter.h
// No supported content
// bytestream.h
// No supported content
// bytestrie.h
// No supported content
// bytestriebuilder.h
// No supported content
// chariter.h
// No supported content
// dbbi.h
// No supported content
// docmain.h
// No supported content
// dtintrv.h
// No supported content
// enumset.h
// No supported content
// errorcode.h
// No supported content
// filteredbrk.h
// No supported content
// icuplug.h
// No supported content
// idna.h
// No supported content
// listformatter.h
// No supported content
// localpointer.h
// No supported content
// locdspnm.h
// No supported content
// locid.h
// No supported content
// normalizer2.h
// No supported content
// normlzr.h
// No supported content
// parsepos.h
// No supported content
// rbbi.h
// No supported content
// rep.h
// No supported content
// resbund.h
// No supported content
// schriter.h
// No supported content
// simpleformatter.h
// No supported content
// std_string.h
// No supported content
// strenum.h
// No supported content
// stringpiece.h
// No supported content
// symtable.h
// No supported content
// ucharstrie.h
// No supported content
// ucharstriebuilder.h
// No supported content
// uchriter.h
// No supported content
// uconfig.h
/*
**********************************************************************
* Copyright (C) 2002-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: uconfig.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002sep19
* created by: Markus W. Scherer
*/
#ifndef __UCONFIG_H__
#define __UCONFIG_H__
/*!
* \file
* \brief User-configurable settings
*
* Miscellaneous switches:
*
* A number of macros affect a variety of minor aspects of ICU.
* Most of them used to be defined elsewhere (e.g., in utypes.h or platform.h)
* and moved here to make them easier to find.
*
* Switches for excluding parts of ICU library code modules:
*
* Changing these macros allows building partial, smaller libraries for special purposes.
* By default, all modules are built.
* The switches are fairly coarse, controlling large modules.
* Basic services cannot be turned off.
*
* Building with any of these options does not guarantee that the
* ICU build process will completely work. It is recommended that
* the ICU libraries and data be built using the normal build.
* At that time you should remove the data used by those services.
* After building the ICU data library, you should rebuild the ICU
* libraries with these switches customized to your needs.
*
* @stable ICU 2.4
*/
/**
* If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h"
* prior to determining default settings for uconfig variables.
*
* @internal ICU 4.0
*/
#if defined(UCONFIG_USE_LOCAL)
#include "uconfig_local.h"
#endif
/**
* \def U_DEBUG
* Determines whether to include debugging code.
* Automatically set on Windows, but most compilers do not have
* related predefined macros.
* @internal
*/
#ifdef U_DEBUG
/* Use the predefined value. */
#elif defined(_DEBUG)
/*
* _DEBUG is defined by Visual Studio debug compilation.
* Do *not* test for its NDEBUG macro: It is an orthogonal macro
* which disables assert().
*/
# define U_DEBUG 1
# else
# define U_DEBUG 0
#endif
/**
* Determines wheter to enable auto cleanup of libraries.
* @internal
*/
#ifndef UCLN_NO_AUTO_CLEANUP
#define UCLN_NO_AUTO_CLEANUP 1
#endif
/**
* \def U_DISABLE_RENAMING
* Determines whether to disable renaming or not.
* @internal
*/
#ifndef U_DISABLE_RENAMING
#endif
/**
* \def U_NO_DEFAULT_INCLUDE_UTF_HEADERS
* Determines whether utypes.h includes utf.h, utf8.h, utf16.h and utf_old.h.
* utypes.h includes those headers if this macro is defined to 0.
* Otherwise, each those headers must be included explicitly when using one of their macros.
* Defaults to 0 for backward compatibility, except inside ICU.
* @stable ICU 49
*/
/**
* \def U_OVERRIDE_CXX_ALLOCATION
* Determines whether to override new and delete.
* ICU is normally built such that all of its C++ classes, via their UMemory base,
* override operators new and delete to use its internal, customizable,
* non-exception-throwing memory allocation functions. (Default value 1 for this macro.)
*
* This is especially important when the application and its libraries use multiple heaps.
* For example, on Windows, this allows the ICU DLL to be used by
* applications that statically link the C Runtime library.
*
* @stable ICU 2.2
*/
#ifndef U_OVERRIDE_CXX_ALLOCATION
#define U_OVERRIDE_CXX_ALLOCATION 1
#endif
/**
* \def U_ENABLE_TRACING
* Determines whether to enable tracing.
* @internal
*/
#ifndef U_ENABLE_TRACING
#define U_ENABLE_TRACING 0
#endif
/**
* \def UCONFIG_ENABLE_PLUGINS
* Determines whether to enable ICU plugins.
* @internal
*/
#ifndef UCONFIG_ENABLE_PLUGINS
#define UCONFIG_ENABLE_PLUGINS 0
#endif
/**
* \def U_ENABLE_DYLOAD
* Whether to enable Dynamic loading in ICU.
* @internal
*/
#ifndef U_ENABLE_DYLOAD
#define U_ENABLE_DYLOAD 1
#endif
/**
* \def U_CHECK_DYLOAD
* Whether to test Dynamic loading as an OS capability.
* @internal
*/
#ifndef U_CHECK_DYLOAD
#define U_CHECK_DYLOAD 1
#endif
/**
* \def U_DEFAULT_SHOW_DRAFT
* Do we allow ICU users to use the draft APIs by default?
* @internal
*/
#ifndef U_DEFAULT_SHOW_DRAFT
#define U_DEFAULT_SHOW_DRAFT 1
#endif
/*===========================================================================*/
/* Custom icu entry point renaming */
/*===========================================================================*/
/**
* \def U_HAVE_LIB_SUFFIX
* 1 if a custom library suffix is set.
* @internal
*/
#ifdef U_HAVE_LIB_SUFFIX
/* Use the predefined value. */
#elif defined(U_LIB_SUFFIX_C_NAME)
# define U_HAVE_LIB_SUFFIX 1
#endif
/**
* \def U_LIB_SUFFIX_C_NAME_STRING
* Defines the library suffix as a string with C syntax.
* @internal
*/
#ifdef U_LIB_SUFFIX_C_NAME_STRING
/* Use the predefined value. */
#elif defined(U_LIB_SUFFIX_C_NAME)
# define CONVERT_TO_STRING(s) #s
# define U_LIB_SUFFIX_C_NAME_STRING CONVERT_TO_STRING(U_LIB_SUFFIX_C_NAME)
#else
# define U_LIB_SUFFIX_C_NAME_STRING ""
#endif
/* common/i18n library switches --------------------------------------------- */
/**
* \def UCONFIG_ONLY_COLLATION
* This switch turns off modules that are not needed for collation.
*
* It does not turn off legacy conversion because that is necessary
* for ICU to work on EBCDIC platforms (for the default converter).
* If you want "only collation" and do not build for EBCDIC,
* then you can define UCONFIG_NO_CONVERSION or UCONFIG_NO_LEGACY_CONVERSION to 1 as well.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_ONLY_COLLATION
# define UCONFIG_ONLY_COLLATION 0
#endif
#if UCONFIG_ONLY_COLLATION
/* common library */
# define UCONFIG_NO_BREAK_ITERATION 1
# define UCONFIG_NO_IDNA 1
/* i18n library */
# if UCONFIG_NO_COLLATION
# error Contradictory collation switches in uconfig.h.
# endif
# define UCONFIG_NO_FORMATTING 1
# define UCONFIG_NO_TRANSLITERATION 1
# define UCONFIG_NO_REGULAR_EXPRESSIONS 1
#endif
/* common library switches -------------------------------------------------- */
/**
* \def UCONFIG_NO_FILE_IO
* This switch turns off all file access in the common library
* where file access is only used for data loading.
* ICU data must then be provided in the form of a data DLL (or with an
* equivalent way to link to the data residing in an executable,
* as in building a combined library with both the common library's code and
* the data), or via udata_setCommonData().
* Application data must be provided via udata_setAppData() or by using
* "open" functions that take pointers to data, for example ucol_openBinary().
*
* File access is not used at all in the i18n library.
*
* File access cannot be turned off for the icuio library or for the ICU
* test suites and ICU tools.
*
* @stable ICU 3.6
*/
#ifndef UCONFIG_NO_FILE_IO
# define UCONFIG_NO_FILE_IO 0
#endif
#if UCONFIG_NO_FILE_IO && defined(U_TIMEZONE_FILES_DIR)
# error Contradictory file io switches in uconfig.h.
#endif
/**
* \def UCONFIG_NO_CONVERSION
* ICU will not completely build with this switch turned on.
* This switch turns off all converters.
*
* You may want to use this together with U_CHARSET_IS_UTF8 defined to 1
* in utypes.h if char* strings in your environment are always in UTF-8.
*
* @stable ICU 3.2
* @see U_CHARSET_IS_UTF8
*/
#ifndef UCONFIG_NO_CONVERSION
# define UCONFIG_NO_CONVERSION 0
#endif
#if UCONFIG_NO_CONVERSION
# define UCONFIG_NO_LEGACY_CONVERSION 1
#endif
/**
* \def UCONFIG_ONLY_HTML_CONVERSION
* This switch turns off all of the converters NOT listed in
* the HTML encoding standard:
* http://www.w3.org/TR/encoding/#names-and-labels
*
* This is not possible on EBCDIC platforms
* because they need ibm-37 or ibm-1047 default converters.
*
* @stable ICU 55
*/
#ifndef UCONFIG_ONLY_HTML_CONVERSION
# define UCONFIG_ONLY_HTML_CONVERSION 0
#endif
/**
* \def UCONFIG_NO_LEGACY_CONVERSION
* This switch turns off all converters except for
* - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
* - US-ASCII
* - ISO-8859-1
*
* Turning off legacy conversion is not possible on EBCDIC platforms
* because they need ibm-37 or ibm-1047 default converters.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_NO_LEGACY_CONVERSION
# define UCONFIG_NO_LEGACY_CONVERSION 0
#endif
/**
* \def UCONFIG_NO_NORMALIZATION
* This switch turns off normalization.
* It implies turning off several other services as well, for example
* collation and IDNA.
*
* @stable ICU 2.6
*/
#ifndef UCONFIG_NO_NORMALIZATION
# define UCONFIG_NO_NORMALIZATION 0
#elif UCONFIG_NO_NORMALIZATION
/* common library */
/* ICU 50 CJK dictionary BreakIterator uses normalization */
# define UCONFIG_NO_BREAK_ITERATION 1
/* IDNA (UTS #46) is implemented via normalization */
# define UCONFIG_NO_IDNA 1
/* i18n library */
# if UCONFIG_ONLY_COLLATION
# error Contradictory collation switches in uconfig.h.
# endif
# define UCONFIG_NO_COLLATION 1
# define UCONFIG_NO_TRANSLITERATION 1
#endif
/**
* \def UCONFIG_NO_BREAK_ITERATION
* This switch turns off break iteration.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_NO_BREAK_ITERATION
# define UCONFIG_NO_BREAK_ITERATION 0
#endif
/**
* \def UCONFIG_NO_IDNA
* This switch turns off IDNA.
*
* @stable ICU 2.6
*/
#ifndef UCONFIG_NO_IDNA
# define UCONFIG_NO_IDNA 0
#endif
/**
* \def UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
* Determines the default UMessagePatternApostropheMode.
* See the documentation for that enum.
*
* @stable ICU 4.8
*/
#ifndef UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
# define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL
#endif
/* i18n library switches ---------------------------------------------------- */
/**
* \def UCONFIG_NO_COLLATION
* This switch turns off collation and collation-based string search.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_NO_COLLATION
# define UCONFIG_NO_COLLATION 0
#endif
/**
* \def UCONFIG_NO_FORMATTING
* This switch turns off formatting and calendar/timezone services.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_NO_FORMATTING
# define UCONFIG_NO_FORMATTING 0
#endif
/**
* \def UCONFIG_NO_TRANSLITERATION
* This switch turns off transliteration.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_NO_TRANSLITERATION
# define UCONFIG_NO_TRANSLITERATION 0
#endif
/**
* \def UCONFIG_NO_REGULAR_EXPRESSIONS
* This switch turns off regular expressions.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS
# define UCONFIG_NO_REGULAR_EXPRESSIONS 0
#endif
/**
* \def UCONFIG_NO_SERVICE
* This switch turns off service registration.
*
* @stable ICU 3.2
*/
#ifndef UCONFIG_NO_SERVICE
# define UCONFIG_NO_SERVICE 0
#endif
/**
* \def UCONFIG_HAVE_PARSEALLINPUT
* This switch turns on the "parse all input" attribute. Binary incompatible.
*
* @internal
*/
#ifndef UCONFIG_HAVE_PARSEALLINPUT
# define UCONFIG_HAVE_PARSEALLINPUT 1
#endif
/**
* \def UCONFIG_FORMAT_FASTPATHS_49
* This switch turns on other formatting fastpaths. Binary incompatible in object DecimalFormat and DecimalFormatSymbols
*
* @internal
*/
#ifndef UCONFIG_FORMAT_FASTPATHS_49
# define UCONFIG_FORMAT_FASTPATHS_49 1
#endif
/**
* \def UCONFIG_NO_FILTERED_BREAK_ITERATION
* This switch turns off filtered break iteration code.
*
* @internal
*/
#ifndef UCONFIG_NO_FILTERED_BREAK_ITERATION
# define UCONFIG_NO_FILTERED_BREAK_ITERATION 0
#endif
#endif
// udata.h
// No supported content
// unifilt.h
// No supported content
// unifunct.h
// No supported content
// unimatch.h
// No supported content
// uniset.h
// No supported content
// unorm.h
// No supported content
// urename.h
// No supported content
// usetiter.h
// No supported content
// utf32.h
// No supported content
// utf_old.h
// No supported content
// uvernum.h
// No supported content
// platform.h
/*
******************************************************************************
*
* Copyright (C) 1997-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* FILE NAME : platform.h
*
* Date Name Description
* 05/13/98 nos Creation (content moved here from ptypes.h).
* 03/02/99 stephen Added AS400 support.
* 03/30/99 stephen Added Linux support.
* 04/13/99 stephen Reworked for autoconf.
******************************************************************************
*/
#ifndef _PLATFORM_H
#define _PLATFORM_H
/**
* \file
* \brief Basic types for the platform.
*
* This file used to be generated by autoconf/configure.
* Starting with ICU 49, platform.h is a normal source file,
* to simplify cross-compiling and working with non-autoconf/make build systems.
*
* When a value in this file does not work on a platform, then please
* try to derive it from the U_PLATFORM value
* (for which we might need a new value constant in rare cases)
* and/or from other macros that are predefined by the compiler
* or defined in standard (POSIX or platform or compiler) headers.
*
* As a temporary workaround, you can add an explicit These definitions allow to specify the encoding of text
* in the char data type as defined by the platform and the compiler.
* It is enough to determine the code point values of "invariant characters",
* which are the ones shared by all encodings that are in use
* on a given platform. Those "invariant characters" should be all the uppercase and lowercase
* latin letters, the digits, the space, and "basic punctuation".
* Also, '\\n', '\\r', '\\t' should be available. The list of "invariant characters" is: This matches the IBM Syntactic Character Set (CS 640). In other words, all the graphic characters in 7-bit ASCII should
* be safely accessible except the following:
* Tracing and Threads: Tracing functions are global to a process, and
* will be called in response to ICU operations performed by any
* thread. If tracing of an individual thread is desired, the
* tracing functions must themselves filter by checking that the
* current thread is the desired thread.
*
* @param context an uninterpretted pointer. Whatever is passed in
* here will in turn be passed to each of the tracing
* functions UTraceEntry, UTraceExit and UTraceData.
* ICU does not use or alter this pointer.
* @param e Callback function to be called on entry to a
* a traced ICU function.
* @param x Callback function to be called on exit from a
* traced ICU function.
* @param d Callback function to be called from within a
* traced ICU function, for the purpose of providing
* data to the trace.
*
* @stable ICU 2.8
*/
U_STABLE void U_EXPORT2
utrace_setFunctions(const void *context,
UTraceEntry *e, UTraceExit *x, UTraceData *d);
/**
* Get the currently installed ICU tracing functions. Note that a null function
* pointer will be returned if no trace function has been set.
*
* @param context The currently installed tracing context.
* @param e The currently installed UTraceEntry function.
* @param x The currently installed UTraceExit function.
* @param d The currently installed UTraceData function.
* @stable ICU 2.8
*/
U_STABLE void U_EXPORT2
utrace_getFunctions(const void **context,
UTraceEntry **e, UTraceExit **x, UTraceData **d);
/*
*
* ICU trace format string syntax
*
* Format Strings are passed to UTraceData functions, and define the
* number and types of the trace data being passed on each call.
*
* The UTraceData function, which is supplied by the application,
* not by ICU, can either forward the trace data (passed via
* varargs) and the format string back to ICU for formatting into
* a displayable string, or it can interpret the format itself,
* and do as it wishes with the trace data.
*
*
* Goals for the format string
* - basic data output
* - easy to use for trace programmer
* - sufficient provision for data types for trace output readability
* - well-defined types and binary portable APIs
*
* Non-goals
* - printf compatibility
* - fancy formatting
* - argument reordering and other internationalization features
*
* ICU trace format strings contain plain text with argument inserts,
* much like standard printf format strings.
* Each insert begins with a '%', then optionally contains a 'v',
* then exactly one type character.
* Two '%' in a row represent a '%' instead of an insert.
* The trace format strings need not have \n at the end.
*
*
* Types
* -----
*
* Type characters:
* - c A char character in the default codepage.
* - s A NUL-terminated char * string in the default codepage.
* - S A UChar * string. Requires two params, (ptr, length). Length=-1 for nul term.
* - b A byte (8-bit integer).
* - h A 16-bit integer. Also a 16 bit Unicode code unit.
* - d A 32-bit integer. Also a 20 bit Unicode code point value.
* - l A 64-bit integer.
* - p A data pointer.
*
* Vectors
* -------
*
* If the 'v' is not specified, then one item of the specified type
* is passed in.
* If the 'v' (for "vector") is specified, then a vector of items of the
* specified type is passed in, via a pointer to the first item
* and an int32_t value for the length of the vector.
* Length==-1 means zero or NUL termination. Works for vectors of all types.
*
* Note: %vS is a vector of (UChar *) strings. The strings must
* be nul terminated as there is no way to provide a
* separate length parameter for each string. The length
* parameter (required for all vectors) is the number of
* strings, not the length of the strings.
*
* Examples
* --------
*
* These examples show the parameters that will be passed to an application's
* UTraceData() function for various formats.
*
* - the precise formatting is up to the application!
* - the examples use type casts for arguments only to _show_ the types of
* arguments without needing variable declarations in the examples;
* the type casts will not be necessary in actual code
*
* UTraceDataFunc(context, fnNumber, level,
* "There is a character %c in the string %s.", // Format String
* (char)c, (const char *)s); // varargs parameters
* -> There is a character 0x42 'B' in the string "Bravo".
*
* UTraceDataFunc(context, fnNumber, level,
* "Vector of bytes %vb vector of chars %vc",
* (const uint8_t *)bytes, (int32_t)bytesLength,
* (const char *)chars, (int32_t)charsLength);
* -> Vector of bytes
* 42 63 64 3f [4]
* vector of chars
* "Bcd?"[4]
*
* UTraceDataFunc(context, fnNumber, level,
* "An int32_t %d and a whole bunch of them %vd",
* (int32_t)-5, (const int32_t *)ints, (int32_t)intsLength);
* -> An int32_t 0xfffffffb and a whole bunch of them
* fffffffb 00000005 0000010a [3]
*
*/
/**
* Trace output Formatter. An application's UTraceData tracing functions may call
* back to this function to format the trace output in a
* human readable form. Note that a UTraceData function may choose
* to not format the data; it could, for example, save it in
* in the raw form it was received (more compact), leaving
* formatting for a later trace analyis tool.
* @param outBuf pointer to a buffer to receive the formatted output. Output
* will be nul terminated if there is space in the buffer -
* if the length of the requested output < the output buffer size.
* @param capacity Length of the output buffer.
* @param indent Number of spaces to indent the output. Intended to allow
* data displayed from nested functions to be indented for readability.
* @param fmt Format specification for the data to output
* @param args Data to be formatted.
* @return Length of formatted output, including the terminating NUL.
* If buffer capacity is insufficient, the required capacity is returned.
* @stable ICU 2.8
*/
U_STABLE int32_t U_EXPORT2
utrace_vformat(char *outBuf, int32_t capacity,
int32_t indent, const char *fmt, va_list args);
/**
* Trace output Formatter. An application's UTraceData tracing functions may call
* this function to format any additional trace data, beyond that
* provided by default, in human readable form with the same
* formatting conventions used by utrace_vformat().
* @param outBuf pointer to a buffer to receive the formatted output. Output
* will be nul terminated if there is space in the buffer -
* if the length of the requested output < the output buffer size.
* @param capacity Length of the output buffer.
* @param indent Number of spaces to indent the output. Intended to allow
* data displayed from nested functions to be indented for readability.
* @param fmt Format specification for the data to output
* @param ... Data to be formatted.
* @return Length of formatted output, including the terminating NUL.
* If buffer capacity is insufficient, the required capacity is returned.
* @stable ICU 2.8
*/
U_STABLE int32_t U_EXPORT2
utrace_format(char *outBuf, int32_t capacity,
int32_t indent, const char *fmt, ...);
/* Trace function numbers --------------------------------------------------- */
/**
* Get the name of a function from its trace function number.
*
* @param fnNumber The trace number for an ICU function.
* @return The name string for the function.
*
* @see UTraceFunctionNumber
* @stable ICU 2.8
*/
U_STABLE const char * U_EXPORT2
utrace_functionName(int32_t fnNumber);
U_CDECL_END
#endif
// ustringtrie.h
/*
*******************************************************************************
* Copyright (C) 2010-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: udicttrie.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010dec17
* created by: Markus W. Scherer
*/
#ifndef __USTRINGTRIE_H__
#define __USTRINGTRIE_H__
/**
* \file
* \brief C API: Helper definitions for dictionary trie APIs.
*/
/**
* Return values for BytesTrie::next(), UCharsTrie::next() and similar methods.
* @see USTRINGTRIE_MATCHES
* @see USTRINGTRIE_HAS_VALUE
* @see USTRINGTRIE_HAS_NEXT
* @stable ICU 4.8
*/
enum UStringTrieResult {
/**
* The input unit(s) did not continue a matching string.
* Once current()/next() return USTRINGTRIE_NO_MATCH,
* all further calls to current()/next() will also return USTRINGTRIE_NO_MATCH,
* until the trie is reset to its original state or to a saved state.
* @stable ICU 4.8
*/
USTRINGTRIE_NO_MATCH,
/**
* The input unit(s) continued a matching string
* but there is no value for the string so far.
* (It is a prefix of a longer string.)
* @stable ICU 4.8
*/
USTRINGTRIE_NO_VALUE,
/**
* The input unit(s) continued a matching string
* and there is a value for the string so far.
* This value will be returned by getValue().
* No further input byte/unit can continue a matching string.
* @stable ICU 4.8
*/
USTRINGTRIE_FINAL_VALUE,
/**
* The input unit(s) continued a matching string
* and there is a value for the string so far.
* This value will be returned by getValue().
* Another input byte/unit can continue a matching string.
* @stable ICU 4.8
*/
USTRINGTRIE_INTERMEDIATE_VALUE
};
/**
* Same as (result!=USTRINGTRIE_NO_MATCH).
* @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
* @return true if the input bytes/units so far are part of a matching string/byte sequence.
* @stable ICU 4.8
*/
#define USTRINGTRIE_MATCHES(result) ((result)!=USTRINGTRIE_NO_MATCH)
/**
* Equivalent to (result==USTRINGTRIE_INTERMEDIATE_VALUE || result==USTRINGTRIE_FINAL_VALUE) but
* this macro evaluates result exactly once.
* @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
* @return true if there is a value for the input bytes/units so far.
* @see BytesTrie::getValue
* @see UCharsTrie::getValue
* @stable ICU 4.8
*/
#define USTRINGTRIE_HAS_VALUE(result) ((result)>=USTRINGTRIE_FINAL_VALUE)
/**
* Equivalent to (result==USTRINGTRIE_NO_VALUE || result==USTRINGTRIE_INTERMEDIATE_VALUE) but
* this macro evaluates result exactly once.
* @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
* @return true if another input byte/unit can continue a matching string.
* @stable ICU 4.8
*/
#define USTRINGTRIE_HAS_NEXT(result) ((result)&1)
#endif /* __USTRINGTRIE_H__ */
// ushape.h
/*
******************************************************************************
*
* Copyright (C) 2000-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: ushape.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2000jun29
* created by: Markus W. Scherer
*/
#ifndef __USHAPE_H__
#define __USHAPE_H__
/**
* \file
* \brief C API: Arabic shaping
*
*/
/**
* Shape Arabic text on a character basis.
*
* This function performs basic operations for "shaping" Arabic text. It is most
* useful for use with legacy data formats and legacy display technology
* (simple terminals). All operations are performed on Unicode characters. Text-based shaping means that some character code points in the text are
* replaced by others depending on the context. It transforms one kind of text
* into another. In comparison, modern displays for Arabic text select
* appropriate, context-dependent font glyphs for each text element, which means
* that they transform text into a glyph vector. Text transformations are necessary when modern display technology is not
* available or when text needs to be transformed to or from legacy formats that
* use "shaped" characters. Since the Arabic script is cursive, connecting
* adjacent letters to each other, computers select images for each letter based
* on the surrounding letters. This usually results in four images per Arabic
* letter: initial, middle, final, and isolated forms. In Unicode, on the other
* hand, letters are normally stored abstract, and a display system is expected
* to select the necessary glyphs. (This makes searching and other text
* processing easier because the same letter has only one code.) It is possible
* to mimic this with text transformations because there are characters in
* Unicode that are rendered as letters with a specific shape
* (or cursive connectivity). They were included for interoperability with
* legacy systems and codepages, and for unsophisticated display systems. A second kind of text transformations is supported for Arabic digits:
* For compatibility with legacy codepages that only include European digits,
* it is possible to replace one set of digits by another, changing the
* character code points. These operations can be performed for either
* Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic
* digits (U+06f0...U+06f9). Some replacements may result in more or fewer characters (code points).
* By default, this means that the destination buffer may receive text with a
* length different from the source length. Some legacy systems rely on the
* length of the text to be constant. They expect extra spaces to be added
* or consumed either next to the affected character or at the end of the
* text. For details about the available operations, see the description of the
* Note: To search by short or long script alias only, use
* u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does
* a fast lookup with no access of the locale data.
*
* @param nameOrAbbrOrLocale name of the script, as given in
* PropertyValueAliases.txt, or ISO 15924 code or locale
* @param fillIn the UScriptCode buffer to fill in the script code
* @param capacity the capacity (size) fo UScriptCode buffer passed in.
* @param err the error status code.
* @return The number of script codes filled in the buffer passed in
* @stable ICU 2.4
*/
U_STABLE int32_t U_EXPORT2
uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);
/**
* Returns the long Unicode script name, if there is one.
* Otherwise returns the 4-letter ISO 15924 script code.
* Returns "Malayam" given USCRIPT_MALAYALAM.
*
* @param scriptCode UScriptCode enum
* @return long script name as given in PropertyValueAliases.txt, or the 4-letter code,
* or NULL if scriptCode is invalid
* @stable ICU 2.4
*/
U_STABLE const char* U_EXPORT2
uscript_getName(UScriptCode scriptCode);
/**
* Returns the 4-letter ISO 15924 script code,
* which is the same as the short Unicode script name if Unicode has names for the script.
* Returns "Mlym" given USCRIPT_MALAYALAM.
*
* @param scriptCode UScriptCode enum
* @return short script name (4-letter code), or NULL if scriptCode is invalid
* @stable ICU 2.4
*/
U_STABLE const char* U_EXPORT2
uscript_getShortName(UScriptCode scriptCode);
/**
* Gets the script code associated with the given codepoint.
* Returns USCRIPT_MALAYALAM given 0x0D02
* @param codepoint UChar32 codepoint
* @param err the error status code.
* @return The UScriptCode, or 0 if codepoint is invalid
* @stable ICU 2.4
*/
U_STABLE UScriptCode U_EXPORT2
uscript_getScript(UChar32 codepoint, UErrorCode *err);
/**
* Do the Script_Extensions of code point c contain script sc?
* If c does not have explicit Script_Extensions, then this tests whether
* c has the Script property value sc.
*
* Some characters are commonly used in multiple scripts.
* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
*
* The Script_Extensions property is provisional. It may be modified or removed
* in future versions of the Unicode Standard, and thus in ICU.
* @param c code point
* @param sc script code
* @return TRUE if sc is in Script_Extensions(c)
* @stable ICU 49
*/
U_STABLE UBool U_EXPORT2
uscript_hasScript(UChar32 c, UScriptCode sc);
/**
* Writes code point c's Script_Extensions as a list of UScriptCode values
* to the output scripts array and returns the number of script codes.
* - If c does have Script_Extensions, then the Script property value
* (normally Common or Inherited) is not included.
* - If c does not have Script_Extensions, then the one Script code is written to the output array.
* - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written.
* In other words, if the return value is 1,
* then the output array contains exactly c's single Script code.
* If the return value is n>=2, then the output array contains c's n Script_Extensions script codes.
*
* Some characters are commonly used in multiple scripts.
* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
*
* If there are more than capacity script codes to be written, then
* U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned.
* (Usual ICU buffer handling behavior.)
*
* The Script_Extensions property is provisional. It may be modified or removed
* in future versions of the Unicode Standard, and thus in ICU.
* @param c code point
* @param scripts output script code array
* @param capacity capacity of the scripts array
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return number of script codes in c's Script_Extensions, or 1 for the single Script value,
* written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
* @stable ICU 49
*/
U_STABLE int32_t U_EXPORT2
uscript_getScriptExtensions(UChar32 c,
UScriptCode *scripts, int32_t capacity,
UErrorCode *errorCode);
/**
* Script usage constants.
* See UAX #31 Unicode Identifier and Pattern Syntax.
* http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
*
* @stable ICU 51
*/
typedef enum UScriptUsage {
/** Not encoded in Unicode. @stable ICU 51 */
USCRIPT_USAGE_NOT_ENCODED,
/** Unknown script usage. @stable ICU 51 */
USCRIPT_USAGE_UNKNOWN,
/** Candidate for Exclusion from Identifiers. @stable ICU 51 */
USCRIPT_USAGE_EXCLUDED,
/** Limited Use script. @stable ICU 51 */
USCRIPT_USAGE_LIMITED_USE,
/** Aspirational Use script. @stable ICU 51 */
USCRIPT_USAGE_ASPIRATIONAL,
/** Recommended script. @stable ICU 51 */
USCRIPT_USAGE_RECOMMENDED
} UScriptUsage;
/**
* Writes the script sample character string.
* This string normally consists of one code point but might be longer.
* The string is empty if the script is not encoded.
*
* @param script script code
* @param dest output string array
* @param capacity number of UChars in the dest array
* @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input
* @return the string length, even if U_BUFFER_OVERFLOW_ERROR
* @stable ICU 51
*/
U_STABLE int32_t U_EXPORT2
uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode);
/**
* Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
* Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.
*
* @param script script code
* @return script usage
* @see UScriptUsage
* @stable ICU 51
*/
U_STABLE UScriptUsage U_EXPORT2
uscript_getUsage(UScriptCode script);
/**
* Returns TRUE if the script is written right-to-left.
* For example, Arab and Hebr.
*
* @param script script code
* @return TRUE if the script is right-to-left
* @stable ICU 51
*/
U_STABLE UBool U_EXPORT2
uscript_isRightToLeft(UScriptCode script);
/**
* Returns TRUE if the script allows line breaks between letters (excluding hyphenation).
* Such a script typically requires dictionary-based line breaking.
* For example, Hani and Thai.
*
* @param script script code
* @return TRUE if the script allows line breaks between letters
* @stable ICU 51
*/
U_STABLE UBool U_EXPORT2
uscript_breaksBetweenLetters(UScriptCode script);
/**
* Returns TRUE if in modern (or most recent) usage of the script case distinctions are customary.
* For example, Latn and Cyrl.
*
* @param script script code
* @return TRUE if the script is cased
* @stable ICU 51
*/
U_STABLE UBool U_EXPORT2
uscript_isCased(UScriptCode script);
#endif
// urep.h
/*
******************************************************************************
* Copyright (C) 1997-2010, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
* Date Name Description
* 06/23/00 aliu Creation.
******************************************************************************
*/
#ifndef __UREP_H
#define __UREP_H
U_CDECL_BEGIN
/********************************************************************
* General Notes
********************************************************************
* TODO
* Add usage scenario
* Add test code
* Talk about pinning
* Talk about "can truncate result if out of memory"
*/
/********************************************************************
* Data Structures
********************************************************************/
/**
* \file
* \brief C API: Callbacks for UReplaceable
*/
/**
* An opaque replaceable text object. This will be manipulated only
* through the caller-supplied UReplaceableFunctor struct. Related
* to the C++ class Replaceable.
* This is currently only used in the Transliterator C API, see utrans.h .
* @stable ICU 2.0
*/
typedef void* UReplaceable;
/**
* A set of function pointers that transliterators use to manipulate a
* UReplaceable. The caller should supply the required functions to
* manipulate their text appropriately. Related to the C++ class
* Replaceable.
* @stable ICU 2.0
*/
typedef struct UReplaceableCallbacks {
/**
* Function pointer that returns the number of UChar code units in
* this text.
*
* @param rep A pointer to "this" UReplaceable object.
* @return The length of the text.
* @stable ICU 2.0
*/
int32_t (*length)(const UReplaceable* rep);
/**
* Function pointer that returns a UChar code units at the given
* offset into this text; 0 <= offset < n, where n is the value
* returned by (*length)(rep). See unistr.h for a description of
* charAt() vs. char32At().
*
* @param rep A pointer to "this" UReplaceable object.
* @param offset The index at which to fetch the UChar (code unit).
* @return The UChar (code unit) at offset, or U+FFFF if the offset is out of bounds.
* @stable ICU 2.0
*/
UChar (*charAt)(const UReplaceable* rep,
int32_t offset);
/**
* Function pointer that returns a UChar32 code point at the given
* offset into this text. See unistr.h for a description of
* charAt() vs. char32At().
*
* @param rep A pointer to "this" UReplaceable object.
* @param offset The index at which to fetch the UChar32 (code point).
* @return The UChar32 (code point) at offset, or U+FFFF if the offset is out of bounds.
* @stable ICU 2.0
*/
UChar32 (*char32At)(const UReplaceable* rep,
int32_t offset);
/**
* Function pointer that replaces text between start and limit in
* this text with the given text. Attributes (out of band info)
* should be retained.
*
* @param rep A pointer to "this" UReplaceable object.
* @param start the starting index of the text to be replaced,
* inclusive.
* @param limit the ending index of the text to be replaced,
* exclusive.
* @param text the new text to replace the UChars from
* start..limit-1.
* @param textLength the number of UChars at text, or -1 if text
* is null-terminated.
* @stable ICU 2.0
*/
void (*replace)(UReplaceable* rep,
int32_t start,
int32_t limit,
const UChar* text,
int32_t textLength);
/**
* Function pointer that copies the characters in the range
* [start, limit) into the array dst.
*
* @param rep A pointer to "this" UReplaceable object.
* @param start offset of first character which will be copied
* into the array
* @param limit offset immediately following the last character to
* be copied
* @param dst array in which to copy characters. The length of
* dst must be at least (limit - start).
* @stable ICU 2.1
*/
void (*extract)(UReplaceable* rep,
int32_t start,
int32_t limit,
UChar* dst);
/**
* Function pointer that copies text between start and limit in
* this text to another index in the text. Attributes (out of
* band info) should be retained. After this call, there will be
* (at least) two copies of the characters originally located at
* start..limit-1.
*
* @param rep A pointer to "this" UReplaceable object.
* @param start the starting index of the text to be copied,
* inclusive.
* @param limit the ending index of the text to be copied,
* exclusive.
* @param dest the index at which the copy of the UChars should be
* inserted.
* @stable ICU 2.0
*/
void (*copy)(UReplaceable* rep,
int32_t start,
int32_t limit,
int32_t dest);
} UReplaceableCallbacks;
U_CDECL_END
#endif
// uobject.h
/*
******************************************************************************
*
* Copyright (C) 2002-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: uobject.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002jun26
* created by: Markus W. Scherer
*/
#ifndef __UOBJECT_H__
#define __UOBJECT_H__
/**
* \file
* \brief C++ API: Common ICU base class UObject.
*/
/**
* @{
* \def U_NO_THROW
* Define this to define the throw() specification so
* certain functions do not throw any exceptions
*
* UMemory operator new methods should have the throw() specification
* appended to them, so that the compiler adds the additional NULL check
* before calling constructors. Without, if
* You create a
* The second option includes an additonal ISO Country
* Code. These codes are the upper-case two-letter codes
* as defined by ISO-3166.
* You can find a full list of these codes at a number of sites, such as:
*
* The third option requires another additonal information--the
* Variant.
* The Variant codes are vendor and browser-specific.
* For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX.
* Where there are two variants, separate them with an underscore, and
* put the most important one first. For
* example, a Traditional Spanish collation might be referenced, with
* "ES", "ES", "Traditional_WIN".
*
*
* Because a
* The
* Once you've specified a locale you can query it for information about
* itself. Use
* The ICU provides a number of services that perform locale-sensitive
* operations. For example, the
* Each international serivce that performs locale-sensitive operations
* allows you
* to get all the available objects of that type. You can sift
* through these objects by language, country, or variant,
* and use the display names to present a menu to the user.
* For example, you can create a menu of all the collation objects
* suitable for a given language. Such classes implement these
* three class methods:
* \htmlonly
* Concerning POSIX/RFC1766 Locale IDs,
* the getLanguage/getCountry/getVariant/getName functions do understand
* the POSIX type form of language_COUNTRY.ENCODING\@VARIANT
* and if there is not an ICU-stype variant, uloc_getVariant() for example
* will return the one listed after the \@at sign. As well, the hyphen
* "-" is recognized as a country/variant separator similarly to RFC1766.
* So for example, "en-us" will be interpreted as en_US.
* As a result, uloc_getName() is far from a no-op, and will have the
* effect of converting POSIX/RFC1766 IDs into ICU form, although it does
* NOT map any of the actual codes (i.e. russian->ru) in any way.
* Applications should call uloc_getName() at the point where a locale ID
* is coming from an external source (user entry, OS, web browser)
* and pass the resulting string to other ICU functions. For example,
* don't use de-de\@EURO as an argument to resourcebundle.
*
* @see UResourceBundle
*/
/** Useful constant for this language. @stable ICU 2.0 */
#define ULOC_CHINESE "zh"
/** Useful constant for this language. @stable ICU 2.0 */
#define ULOC_ENGLISH "en"
/** Useful constant for this language. @stable ICU 2.0 */
#define ULOC_FRENCH "fr"
/** Useful constant for this language. @stable ICU 2.0 */
#define ULOC_GERMAN "de"
/** Useful constant for this language. @stable ICU 2.0 */
#define ULOC_ITALIAN "it"
/** Useful constant for this language. @stable ICU 2.0 */
#define ULOC_JAPANESE "ja"
/** Useful constant for this language. @stable ICU 2.0 */
#define ULOC_KOREAN "ko"
/** Useful constant for this language. @stable ICU 2.0 */
#define ULOC_SIMPLIFIED_CHINESE "zh_CN"
/** Useful constant for this language. @stable ICU 2.0 */
#define ULOC_TRADITIONAL_CHINESE "zh_TW"
/** Useful constant for this country/region. @stable ICU 2.0 */
#define ULOC_CANADA "en_CA"
/** Useful constant for this country/region. @stable ICU 2.0 */
#define ULOC_CANADA_FRENCH "fr_CA"
/** Useful constant for this country/region. @stable ICU 2.0 */
#define ULOC_CHINA "zh_CN"
/** Useful constant for this country/region. @stable ICU 2.0 */
#define ULOC_PRC "zh_CN"
/** Useful constant for this country/region. @stable ICU 2.0 */
#define ULOC_FRANCE "fr_FR"
/** Useful constant for this country/region. @stable ICU 2.0 */
#define ULOC_GERMANY "de_DE"
/** Useful constant for this country/region. @stable ICU 2.0 */
#define ULOC_ITALY "it_IT"
/** Useful constant for this country/region. @stable ICU 2.0 */
#define ULOC_JAPAN "ja_JP"
/** Useful constant for this country/region. @stable ICU 2.0 */
#define ULOC_KOREA "ko_KR"
/** Useful constant for this country/region. @stable ICU 2.0 */
#define ULOC_TAIWAN "zh_TW"
/** Useful constant for this country/region. @stable ICU 2.0 */
#define ULOC_UK "en_GB"
/** Useful constant for this country/region. @stable ICU 2.0 */
#define ULOC_US "en_US"
/**
* Useful constant for the maximum size of the language part of a locale ID.
* (including the terminating NULL).
* @stable ICU 2.0
*/
#define ULOC_LANG_CAPACITY 12
/**
* Useful constant for the maximum size of the country part of a locale ID
* (including the terminating NULL).
* @stable ICU 2.0
*/
#define ULOC_COUNTRY_CAPACITY 4
/**
* Useful constant for the maximum size of the whole locale ID
* (including the terminating NULL and all keywords).
* @stable ICU 2.0
*/
#define ULOC_FULLNAME_CAPACITY 157
/**
* Useful constant for the maximum size of the script part of a locale ID
* (including the terminating NULL).
* @stable ICU 2.8
*/
#define ULOC_SCRIPT_CAPACITY 6
/**
* Useful constant for the maximum size of keywords in a locale
* @stable ICU 2.8
*/
#define ULOC_KEYWORDS_CAPACITY 96
/**
* Useful constant for the maximum total size of keywords and their values in a locale
* @stable ICU 2.8
*/
#define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
/**
* Invariant character separating keywords from the locale string
* @stable ICU 2.8
*/
#define ULOC_KEYWORD_SEPARATOR '@'
/**
* Unicode code point for '@' separating keywords from the locale string.
* @see ULOC_KEYWORD_SEPARATOR
* @stable ICU 4.6
*/
#define ULOC_KEYWORD_SEPARATOR_UNICODE 0x40
/**
* Invariant character for assigning value to a keyword
* @stable ICU 2.8
*/
#define ULOC_KEYWORD_ASSIGN '='
/**
* Unicode code point for '=' for assigning value to a keyword.
* @see ULOC_KEYWORD_ASSIGN
* @stable ICU 4.6
*/
#define ULOC_KEYWORD_ASSIGN_UNICODE 0x3D
/**
* Invariant character separating keywords
* @stable ICU 2.8
*/
#define ULOC_KEYWORD_ITEM_SEPARATOR ';'
/**
* Unicode code point for ';' separating keywords
* @see ULOC_KEYWORD_ITEM_SEPARATOR
* @stable ICU 4.6
*/
#define ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE 0x3B
/**
* Constants for *_getLocale()
* Allow user to select whether she wants information on
* requested, valid or actual locale.
* For example, a collator for "en_US_CALIFORNIA" was
* requested. In the current state of ICU (2.0),
* the requested locale is "en_US_CALIFORNIA",
* the valid locale is "en_US" (most specific locale supported by ICU)
* and the actual locale is "root" (the collation data comes unmodified
* from the UCA)
* The locale is considered supported by ICU if there is a core ICU bundle
* for that locale (although it may be empty).
* @stable ICU 2.1
*/
typedef enum {
/** This is locale the data actually comes from
* @stable ICU 2.1
*/
ULOC_ACTUAL_LOCALE = 0,
/** This is the most specific locale supported by ICU
* @stable ICU 2.1
*/
ULOC_VALID_LOCALE = 1,
ULOC_DATA_LOCALE_TYPE_LIMIT = 3
} ULocDataLocaleType ;
#ifndef U_HIDE_SYSTEM_API
/**
* Gets ICU's default locale.
* The returned string is a snapshot in time, and will remain valid
* and unchanged even when uloc_setDefault() is called.
* The returned storage is owned by ICU, and must not be altered or deleted
* by the caller.
*
* @return the ICU default locale
* @system
* @stable ICU 2.0
*/
U_STABLE const char* U_EXPORT2
uloc_getDefault(void);
/**
* Sets ICU's default locale.
* By default (without calling this function), ICU's default locale will be based
* on information obtained from the underlying system environment.
*
* Changes to ICU's default locale do not propagate back to the
* system environment.
*
* Changes to ICU's default locale to not affect any ICU services that
* may already be open based on the previous default locale value.
*
* @param localeID the new ICU default locale. A value of NULL will try to get
* the system's default locale.
* @param status the error information if the setting of default locale fails
* @system
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
uloc_setDefault(const char* localeID,
UErrorCode* status);
#endif /* U_HIDE_SYSTEM_API */
/**
* Gets the language code for the specified locale.
*
* @param localeID the locale to get the ISO language code with
* @param language the language code for localeID
* @param languageCapacity the size of the language buffer to store the
* language code with
* @param err error information if retrieving the language code failed
* @return the actual buffer size needed for the language code. If it's greater
* than languageCapacity, the returned language code will be truncated.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
uloc_getLanguage(const char* localeID,
char* language,
int32_t languageCapacity,
UErrorCode* err);
/**
* Gets the script code for the specified locale.
*
* @param localeID the locale to get the ISO language code with
* @param script the language code for localeID
* @param scriptCapacity the size of the language buffer to store the
* language code with
* @param err error information if retrieving the language code failed
* @return the actual buffer size needed for the language code. If it's greater
* than scriptCapacity, the returned language code will be truncated.
* @stable ICU 2.8
*/
U_STABLE int32_t U_EXPORT2
uloc_getScript(const char* localeID,
char* script,
int32_t scriptCapacity,
UErrorCode* err);
/**
* Gets the country code for the specified locale.
*
* @param localeID the locale to get the country code with
* @param country the country code for localeID
* @param countryCapacity the size of the country buffer to store the
* country code with
* @param err error information if retrieving the country code failed
* @return the actual buffer size needed for the country code. If it's greater
* than countryCapacity, the returned country code will be truncated.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
uloc_getCountry(const char* localeID,
char* country,
int32_t countryCapacity,
UErrorCode* err);
/**
* Gets the variant code for the specified locale.
*
* @param localeID the locale to get the variant code with
* @param variant the variant code for localeID
* @param variantCapacity the size of the variant buffer to store the
* variant code with
* @param err error information if retrieving the variant code failed
* @return the actual buffer size needed for the variant code. If it's greater
* than variantCapacity, the returned variant code will be truncated.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
uloc_getVariant(const char* localeID,
char* variant,
int32_t variantCapacity,
UErrorCode* err);
/**
* Gets the full name for the specified locale.
* Note: This has the effect of 'canonicalizing' the ICU locale ID to
* a certain extent. Upper and lower case are set as needed.
* It does NOT map aliased names in any way.
* See the top of this header file.
* This API supports preflighting.
*
* @param localeID the locale to get the full name with
* @param name fill in buffer for the name without keywords.
* @param nameCapacity capacity of the fill in buffer.
* @param err error information if retrieving the full name failed
* @return the actual buffer size needed for the full name. If it's greater
* than nameCapacity, the returned full name will be truncated.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
uloc_getName(const char* localeID,
char* name,
int32_t nameCapacity,
UErrorCode* err);
/**
* Gets the full name for the specified locale.
* Note: This has the effect of 'canonicalizing' the string to
* a certain extent. Upper and lower case are set as needed,
* and if the components were in 'POSIX' format they are changed to
* ICU format. It does NOT map aliased names in any way.
* See the top of this header file.
*
* @param localeID the locale to get the full name with
* @param name the full name for localeID
* @param nameCapacity the size of the name buffer to store the
* full name with
* @param err error information if retrieving the full name failed
* @return the actual buffer size needed for the full name. If it's greater
* than nameCapacity, the returned full name will be truncated.
* @stable ICU 2.8
*/
U_STABLE int32_t U_EXPORT2
uloc_canonicalize(const char* localeID,
char* name,
int32_t nameCapacity,
UErrorCode* err);
/**
* Gets the ISO language code for the specified locale.
*
* @param localeID the locale to get the ISO language code with
* @return language the ISO language code for localeID
* @stable ICU 2.0
*/
U_STABLE const char* U_EXPORT2
uloc_getISO3Language(const char* localeID);
/**
* Gets the ISO country code for the specified locale.
*
* @param localeID the locale to get the ISO country code with
* @return country the ISO country code for localeID
* @stable ICU 2.0
*/
U_STABLE const char* U_EXPORT2
uloc_getISO3Country(const char* localeID);
/**
* Gets the Win32 LCID value for the specified locale.
* If the ICU locale is not recognized by Windows, 0 will be returned.
*
* LCIDs were deprecated with Windows Vista and Microsoft recommends
* developers to use BCP47 style tags instead (uloc_toLanguageTag.)
*
* @param localeID the locale to get the Win32 LCID value with
* @return country the Win32 LCID for localeID
* @stable ICU 2.0
*/
U_STABLE uint32_t U_EXPORT2
uloc_getLCID(const char* localeID);
/**
* Gets the language name suitable for display for the specified locale.
*
* @param locale the locale to get the ISO language code with
* @param displayLocale Specifies the locale to be used to display the name. In other words,
* if the locale's language code is "en", passing Locale::getFrench() for
* inLocale would result in "Anglais", while passing Locale::getGerman()
* for inLocale would result in "Englisch".
* @param language the displayable language code for localeID
* @param languageCapacity the size of the language buffer to store the
* displayable language code with
* @param status error information if retrieving the displayable language code failed
* @return the actual buffer size needed for the displayable language code. If it's greater
* than languageCapacity, the returned language code will be truncated.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
uloc_getDisplayLanguage(const char* locale,
const char* displayLocale,
UChar* language,
int32_t languageCapacity,
UErrorCode* status);
/**
* Gets the script name suitable for display for the specified locale.
*
* @param locale the locale to get the displayable script code with. NULL may be used to specify the default.
* @param displayLocale Specifies the locale to be used to display the name. In other words,
* if the locale's language code is "en", passing Locale::getFrench() for
* inLocale would result in "", while passing Locale::getGerman()
* for inLocale would result in "". NULL may be used to specify the default.
* @param script the displayable country code for localeID
* @param scriptCapacity the size of the script buffer to store the
* displayable script code with
* @param status error information if retrieving the displayable script code failed
* @return the actual buffer size needed for the displayable script code. If it's greater
* than scriptCapacity, the returned displayable script code will be truncated.
* @stable ICU 2.8
*/
U_STABLE int32_t U_EXPORT2
uloc_getDisplayScript(const char* locale,
const char* displayLocale,
UChar* script,
int32_t scriptCapacity,
UErrorCode* status);
/**
* Gets the country name suitable for display for the specified locale.
* Warning: this is for the region part of a valid locale ID; it cannot just be the region code (like "FR").
* To get the display name for a region alone, or for other options, use ULocaleDisplayNames instead.
*
* @param locale the locale to get the displayable country code with. NULL may be used to specify the default.
* @param displayLocale Specifies the locale to be used to display the name. In other words,
* if the locale's language code is "en", passing Locale::getFrench() for
* inLocale would result in "Anglais", while passing Locale::getGerman()
* for inLocale would result in "Englisch". NULL may be used to specify the default.
* @param country the displayable country code for localeID
* @param countryCapacity the size of the country buffer to store the
* displayable country code with
* @param status error information if retrieving the displayable country code failed
* @return the actual buffer size needed for the displayable country code. If it's greater
* than countryCapacity, the returned displayable country code will be truncated.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
uloc_getDisplayCountry(const char* locale,
const char* displayLocale,
UChar* country,
int32_t countryCapacity,
UErrorCode* status);
/**
* Gets the variant name suitable for display for the specified locale.
*
* @param locale the locale to get the displayable variant code with. NULL may be used to specify the default.
* @param displayLocale Specifies the locale to be used to display the name. In other words,
* if the locale's language code is "en", passing Locale::getFrench() for
* inLocale would result in "Anglais", while passing Locale::getGerman()
* for inLocale would result in "Englisch". NULL may be used to specify the default.
* @param variant the displayable variant code for localeID
* @param variantCapacity the size of the variant buffer to store the
* displayable variant code with
* @param status error information if retrieving the displayable variant code failed
* @return the actual buffer size needed for the displayable variant code. If it's greater
* than variantCapacity, the returned displayable variant code will be truncated.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
uloc_getDisplayVariant(const char* locale,
const char* displayLocale,
UChar* variant,
int32_t variantCapacity,
UErrorCode* status);
/**
* Gets the keyword name suitable for display for the specified locale.
* E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display
* string for the keyword collation.
* Usage:
*
* This implements the 'Language-Tag' production of BCP47, and so
* supports grandfathered (regular and irregular) as well as private
* use language tags. Private use tags are represented as 'x-whatever',
* and grandfathered tags are converted to their canonical replacements
* where they exist. Note that a few grandfathered tags have no modern
* replacement, these will be converted using the fallback described in
* the first paragraph, so some information might be lost.
* @param langtag the input BCP47 language tag.
* @param localeID the output buffer receiving a locale ID for the
* specified BCP47 language tag.
* @param localeIDCapacity the size of the locale ID output buffer.
* @param parsedLength if not NULL, successfully parsed length
* for the input language tag is set.
* @param err error information if receiving the locald ID
* failed.
* @return the length of the locale ID.
* @stable ICU 4.2
*/
U_STABLE int32_t U_EXPORT2
uloc_forLanguageTag(const char* langtag,
char* localeID,
int32_t localeIDCapacity,
int32_t* parsedLength,
UErrorCode* err);
/**
* Returns a well-formed language tag for this locale ID.
*
* Note: When
* When the specified keyword is unknown, but satisfies the BCP syntax,
* then the pointer to the input keyword itself will be returned.
* For example,
*
* When the specified keyword is not recognized, but the specified value
* satisfies the syntax of the BCP 47 Unicode locale extension type,
* or when the specified keyword allows 'variable' type and the specified
* value satisfies the syntax, then the pointer to the input type value itself
* will be returned.
* For example,
*
* When the specified keyword is not recognized, but the specified value
* satisfies the syntax of legacy key, or when the specified keyword
* allows 'variable' type and the specified value satisfies the syntax,
* then the pointer to the input type value itself will be returned.
* For example,
*
* Resource bundles in ICU4C are currently defined using text files which conform to the following
* BNF definition.
* More on resource bundle concepts and syntax can be found in the
* Users Guide.
*
*/
/**
* UResourceBundle is an opaque type for handles for resource bundles in C APIs.
* @stable ICU 2.0
*/
struct UResourceBundle;
/**
* @stable ICU 2.0
*/
typedef struct UResourceBundle UResourceBundle;
/**
* Numeric constants for types of resource items.
* @see ures_getType
* @stable ICU 2.0
*/
typedef enum {
/** Resource type constant for "no resource". @stable ICU 2.6 */
URES_NONE=-1,
/** Resource type constant for 16-bit Unicode strings. @stable ICU 2.6 */
URES_STRING=0,
/** Resource type constant for binary data. @stable ICU 2.6 */
URES_BINARY=1,
/** Resource type constant for tables of key-value pairs. @stable ICU 2.6 */
URES_TABLE=2,
/**
* Resource type constant for aliases;
* internally stores a string which identifies the actual resource
* storing the data (can be in a different resource bundle).
* Resolved internally before delivering the actual resource through the API.
* @stable ICU 2.6
*/
URES_ALIAS=3,
/**
* Resource type constant for a single 28-bit integer, interpreted as
* signed or unsigned by the ures_getInt() or ures_getUInt() function.
* @see ures_getInt
* @see ures_getUInt
* @stable ICU 2.6
*/
URES_INT=7,
/** Resource type constant for arrays of resources. @stable ICU 2.6 */
URES_ARRAY=8,
/**
* Resource type constant for vectors of 32-bit integers.
* @see ures_getIntVector
* @stable ICU 2.6
*/
URES_INT_VECTOR = 14,
URES_LIMIT = 16
} UResType;
/*
* Functions to create and destroy resource bundles.
*/
/**
* Opens a UResourceBundle, from which users can extract strings by using
* their corresponding keys.
* Note that the caller is responsible of calling ures_close on each succesfully
* opened resource bundle.
* @param packageName The packageName and locale together point to an ICU udata object,
* as defined by Note: If the ISO 4217 numeric code is not assigned for the currency or
* the currency is unknown, this function returns 0.
*
* @param currency null-terminated 3-letter ISO 4217 code
* @return The ISO 4217 numeric code of the currency
* @stable ICU 49
*/
U_STABLE int32_t U_EXPORT2
ucurr_getNumericCode(const UChar* currency);
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif
// ucnv_err.h
/*
**********************************************************************
* Copyright (C) 1999-2009, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
*
* ucnv_err.h:
*/
/**
* \file
* \brief C UConverter predefined error callbacks
*
* This API is used to convert codepage or character encoded data to and
* from UTF-16. You can open a converter with {@link ucnv_open() }. With that
* converter, you can get its properties, set options, convert your data and
* close the converter. Since many software programs recogize different converter names for
* different types of converters, there are other functions in this API to
* iterate over the converter aliases. The functions {@link ucnv_getAvailableName() },
* {@link ucnv_getAlias() } and {@link ucnv_getStandardName() } are some of the
* more frequently used alias functions to get this information. When a converter encounters an illegal, irregular, invalid or unmappable character
* its default behavior is to use a substitution character to replace the
* bad byte sequence. This behavior can be changed by using {@link ucnv_setFromUCallBack() }
* or {@link ucnv_setToUCallBack() } on the converter. The header ucnv_err.h defines
* many other callback actions that can be used instead of a character substitution. More information about this API can be found in our
* User's
* Guide. A converter name for ICU 1.5 and above may contain options
* like a locale specification to control the specific behavior of
* the newly instantiated converter.
* The meaning of the options depends on the particular converter.
* If an option is not defined for or recognized by a given converter, then it is ignored. Options are appended to the converter name string, with a
* If the alias is ambiguous, then the preferred converter is used
* and the status is set to U_AMBIGUOUS_ALIAS_WARNING. The conversion behavior and names can vary between platforms. ICU may
* convert some characters differently from other platforms. Details on this topic
* are in the User's
* Guide. Aliases starting with a "cp" prefix have no specific meaning
* other than its an alias starting with the letters "cp". Please do not
* associate any meaning to these aliases. See ucnv_open for the complete details Creates a UConverter object specified from a packageName and a converterName. The packageName and converterName must point to an ICU udata object, as defined by
* The name will NOT be looked up in the alias mechanism, nor will the converter be
* stored in the converter cache or the alias table. The only way to open further converters
* is call this function multiple times, or use the ucnv_safeClone() function to clone a
* 'master' converter. A future version of ICU may add alias table lookups and/or caching
* to this function. Example Use:
* Handling of surrogate pairs and supplementary-plane code points:#define for some macros
* before it is first tested, or add an equivalent -D macro definition
* to the compiler's command line.
*
* Note: Some compilers provide ways to show the predefined macros.
* For example, with gcc you can compile an empty .c file and have the compiler
* print the predefined macros with
* \code
* gcc -E -dM -x c /dev/null | sort
* \endcode
* (You can provide an actual empty .c file rather than /dev/null.
* -x c++ is for C++.)
*/
/**
* Define some things so that they can be documented.
* @internal
*/
#ifdef U_IN_DOXYGEN
/*
* Problem: "platform.h:335: warning: documentation for unknown define U_HAVE_STD_STRING found." means that U_HAVE_STD_STRING is not documented.
* Solution: #define any defines for non @internal API here, so that they are visible in the docs. If you just set PREDEFINED in Doxyfile.in, they won't be documented.
*/
/* None for now. */
#endif
/**
* \def U_PLATFORM
* The U_PLATFORM macro defines the platform we're on.
*
* We used to define one different, value-less macro per platform.
* That made it hard to know the set of relevant platforms and macros,
* and hard to deal with variants of platforms.
*
* Starting with ICU 49, we define platforms as numeric macros,
* with ranges of values for related platforms and their variants.
* The U_PLATFORM macro is set to one of these values.
*
* Historical note from the Solaris Wikipedia article:
* AT&T and Sun collaborated on a project to merge the most popular Unix variants
* on the market at that time: BSD, System V, and Xenix.
* This became Unix System V Release 4 (SVR4).
*
* @internal
*/
/** Unknown platform. @internal */
#define U_PF_UNKNOWN 0
/** Windows @internal */
#define U_PF_WINDOWS 1000
/** MinGW. Windows, calls to Win32 API, but using GNU gcc and binutils. @internal */
#define U_PF_MINGW 1800
/**
* Cygwin. Windows, calls to cygwin1.dll for Posix functions,
* using MSVC or GNU gcc and binutils.
* @internal
*/
#define U_PF_CYGWIN 1900
/* Reserve 2000 for U_PF_UNIX? */
/** HP-UX is based on UNIX System V. @internal */
#define U_PF_HPUX 2100
/** Solaris is a Unix operating system based on SVR4. @internal */
#define U_PF_SOLARIS 2600
/** BSD is a UNIX operating system derivative. @internal */
#define U_PF_BSD 3000
/** AIX is based on UNIX System V Releases and 4.3 BSD. @internal */
#define U_PF_AIX 3100
/** IRIX is based on UNIX System V with BSD extensions. @internal */
#define U_PF_IRIX 3200
/**
* Darwin is a POSIX-compliant operating system, composed of code developed by Apple,
* as well as code derived from NeXTSTEP, BSD, and other projects,
* built around the Mach kernel.
* Darwin forms the core set of components upon which Mac OS X, Apple TV, and iOS are based.
* (Original description modified from WikiPedia.)
* @internal
*/
#define U_PF_DARWIN 3500
/** iPhone OS (iOS) is a derivative of Mac OS X. @internal */
#define U_PF_IPHONE 3550
/** QNX is a commercial Unix-like real-time operating system related to BSD. @internal */
#define U_PF_QNX 3700
/** Linux is a Unix-like operating system. @internal */
#define U_PF_LINUX 4000
/**
* Native Client is pretty close to Linux.
* See https://developer.chrome.com/native-client and
* http://www.chromium.org/nativeclient
* @internal
*/
#define U_PF_BROWSER_NATIVE_CLIENT 4020
/** Android is based on Linux. @internal */
#define U_PF_ANDROID 4050
/* Maximum value for Linux-based platform is 4499 */
/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */
#define U_PF_OS390 9000
/** "IBM i" is the current name of what used to be i5/OS and earlier OS/400. @internal */
#define U_PF_OS400 9400
#ifdef U_PLATFORM
/* Use the predefined value. */
#elif defined(__MINGW32__)
# define U_PLATFORM U_PF_MINGW
#elif defined(__CYGWIN__)
# define U_PLATFORM U_PF_CYGWIN
#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
# define U_PLATFORM U_PF_WINDOWS
#elif defined(__ANDROID__)
# define U_PLATFORM U_PF_ANDROID
/* Android wchar_t support depends on the API level. */
# include
* \code
* A-Z a-z 0-9 SPACE " % & ' ( ) * + , - . / : ; < = > ? _
* \endcode
*
* (52 letters + 10 numbers + 20 punc/sym/space = 82 total) Basic types and constants for UTF
* This file defines basic types and constants for utf.h to be
* platform-independent. umachine.h and utf.h are included into
* utypes.h to provide all the general definitions for ICU.
* All of these definitions used to be in utypes.h before
* the UTF-handling macros made this unmaintainable.
*
*/
/*==========================================================================*/
/* Include platform-dependent definitions */
/* which are contained in the platform-specific file platform.h */
/*==========================================================================*/
/*
* ANSI C headers:
* stddef.h defines wchar_t
*/
#include while(i
U_SHAPE_... options.source.
*
* @param dest The destination buffer that will receive the results of the
* requested operations. It may be NULL only if
* destSize is 0. The source and destination must not
* overlap.
*
* @param destSize The size (capacity) of the destination buffer in UChars.
* If destSize is 0, then no output is produced,
* but the necessary buffer size is returned ("preflighting").
*
* @param options This is a 32-bit set of flags that specify the operations
* that are performed on the input text. If no error occurs,
* then the result will always be written to the destination
* buffer.
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @return The number of UChars written to the destination buffer.
* If an error occured, then no output was written, or it may be
* incomplete. If U_BUFFER_OVERFLOW_ERROR is set, then
* the return value indicates the necessary destination buffer size.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_shapeArabic(const UChar *source, int32_t sourceLength,
UChar *dest, int32_t destSize,
uint32_t options,
UErrorCode *pErrorCode);
/**
* Memory option: allow the result to have a different length than the source.
* Affects: LamAlef options
* @stable ICU 2.0
*/
#define U_SHAPE_LENGTH_GROW_SHRINK 0
/**
* Memory option: allow the result to have a different length than the source.
* Affects: LamAlef options
* This option is an alias to U_SHAPE_LENGTH_GROW_SHRINK
* @stable ICU 4.2
*/
#define U_SHAPE_LAMALEF_RESIZE 0
/**
* Memory option: the result must have the same length as the source.
* If more room is necessary, then try to consume spaces next to modified characters.
* @stable ICU 2.0
*/
#define U_SHAPE_LENGTH_FIXED_SPACES_NEAR 1
/**
* Memory option: the result must have the same length as the source.
* If more room is necessary, then try to consume spaces next to modified characters.
* Affects: LamAlef options
* This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_NEAR
* @stable ICU 4.2
*/
#define U_SHAPE_LAMALEF_NEAR 1
/**
* Memory option: the result must have the same length as the source.
* If more room is necessary, then try to consume spaces at the end of the text.
* @stable ICU 2.0
*/
#define U_SHAPE_LENGTH_FIXED_SPACES_AT_END 2
/**
* Memory option: the result must have the same length as the source.
* If more room is necessary, then try to consume spaces at the end of the text.
* Affects: LamAlef options
* This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_AT_END
* @stable ICU 4.2
*/
#define U_SHAPE_LAMALEF_END 2
/**
* Memory option: the result must have the same length as the source.
* If more room is necessary, then try to consume spaces at the beginning of the text.
* @stable ICU 2.0
*/
#define U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING 3
/**
* Memory option: the result must have the same length as the source.
* If more room is necessary, then try to consume spaces at the beginning of the text.
* Affects: LamAlef options
* This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING
* @stable ICU 4.2
*/
#define U_SHAPE_LAMALEF_BEGIN 3
/**
* Memory option: the result must have the same length as the source.
* Shaping Mode: For each LAMALEF character found, expand LAMALEF using space at end.
* If there is no space at end, use spaces at beginning of the buffer. If there
* is no space at beginning of the buffer, use spaces at the near (i.e. the space
* after the LAMALEF character).
* If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h)
* will be set in pErrorCode
*
* Deshaping Mode: Perform the same function as the flag equals U_SHAPE_LAMALEF_END.
* Affects: LamAlef options
* @stable ICU 4.2
*/
#define U_SHAPE_LAMALEF_AUTO 0x10000
/** Bit mask for memory options. @stable ICU 2.0 */
#define U_SHAPE_LENGTH_MASK 0x10003 /* Changed old value 3 */
/**
* Bit mask for LamAlef memory options.
* @stable ICU 4.2
*/
#define U_SHAPE_LAMALEF_MASK 0x10003 /* updated */
/** Direction indicator: the source is in logical (keyboard) order. @stable ICU 2.0 */
#define U_SHAPE_TEXT_DIRECTION_LOGICAL 0
/**
* Direction indicator:
* the source is in visual RTL order,
* the rightmost displayed character stored first.
* This option is an alias to U_SHAPE_TEXT_DIRECTION_LOGICAL
* @stable ICU 4.2
*/
#define U_SHAPE_TEXT_DIRECTION_VISUAL_RTL 0
/**
* Direction indicator:
* the source is in visual LTR order,
* the leftmost displayed character stored first.
* @stable ICU 2.0
*/
#define U_SHAPE_TEXT_DIRECTION_VISUAL_LTR 4
/** Bit mask for direction indicators. @stable ICU 2.0 */
#define U_SHAPE_TEXT_DIRECTION_MASK 4
/** Letter shaping option: do not perform letter shaping. @stable ICU 2.0 */
#define U_SHAPE_LETTERS_NOOP 0
/** Letter shaping option: replace abstract letter characters by "shaped" ones. @stable ICU 2.0 */
#define U_SHAPE_LETTERS_SHAPE 8
/** Letter shaping option: replace "shaped" letter characters by abstract ones. @stable ICU 2.0 */
#define U_SHAPE_LETTERS_UNSHAPE 0x10
/**
* Letter shaping option: replace abstract letter characters by "shaped" ones.
* The only difference with U_SHAPE_LETTERS_SHAPE is that Tashkeel letters
* are always "shaped" into the isolated form instead of the medial form
* (selecting code points from the Arabic Presentation Forms-B block).
* @stable ICU 2.0
*/
#define U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED 0x18
/** Bit mask for letter shaping options. @stable ICU 2.0 */
#define U_SHAPE_LETTERS_MASK 0x18
/** Digit shaping option: do not perform digit shaping. @stable ICU 2.0 */
#define U_SHAPE_DIGITS_NOOP 0
/**
* Digit shaping option:
* Replace European digits (U+0030...) by Arabic-Indic digits.
* @stable ICU 2.0
*/
#define U_SHAPE_DIGITS_EN2AN 0x20
/**
* Digit shaping option:
* Replace Arabic-Indic digits by European digits (U+0030...).
* @stable ICU 2.0
*/
#define U_SHAPE_DIGITS_AN2EN 0x40
/**
* Digit shaping option:
* Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
* strongly directional character is an Arabic letter
* (u_charDirection() result U_RIGHT_TO_LEFT_ARABIC [AL]).
* The direction of "preceding" depends on the direction indicator option.
* For the first characters, the preceding strongly directional character
* (initial state) is assumed to be not an Arabic letter
* (it is U_LEFT_TO_RIGHT [L] or U_RIGHT_TO_LEFT [R]).
* @stable ICU 2.0
*/
#define U_SHAPE_DIGITS_ALEN2AN_INIT_LR 0x60
/**
* Digit shaping option:
* Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
* strongly directional character is an Arabic letter
* (u_charDirection() result U_RIGHT_TO_LEFT_ARABIC [AL]).
* The direction of "preceding" depends on the direction indicator option.
* For the first characters, the preceding strongly directional character
* (initial state) is assumed to be an Arabic letter.
* @stable ICU 2.0
*/
#define U_SHAPE_DIGITS_ALEN2AN_INIT_AL 0x80
/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
#define U_SHAPE_DIGITS_RESERVED 0xa0
/** Bit mask for digit shaping options. @stable ICU 2.0 */
#define U_SHAPE_DIGITS_MASK 0xe0
/** Digit type option: Use Arabic-Indic digits (U+0660...U+0669). @stable ICU 2.0 */
#define U_SHAPE_DIGIT_TYPE_AN 0
/** Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9). @stable ICU 2.0 */
#define U_SHAPE_DIGIT_TYPE_AN_EXTENDED 0x100
/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
#define U_SHAPE_DIGIT_TYPE_RESERVED 0x200
/** Bit mask for digit type options. @stable ICU 2.0 */
#define U_SHAPE_DIGIT_TYPE_MASK 0x300 /* I need to change this from 0x3f00 to 0x300 */
/**
* Tashkeel aggregation option:
* Replaces any combination of U+0651 with one of
* U+064C, U+064D, U+064E, U+064F, U+0650 with
* U+FC5E, U+FC5F, U+FC60, U+FC61, U+FC62 consecutively.
* @stable ICU 3.6
*/
#define U_SHAPE_AGGREGATE_TASHKEEL 0x4000
/** Tashkeel aggregation option: do not aggregate tashkeels. @stable ICU 3.6 */
#define U_SHAPE_AGGREGATE_TASHKEEL_NOOP 0
/** Bit mask for tashkeel aggregation. @stable ICU 3.6 */
#define U_SHAPE_AGGREGATE_TASHKEEL_MASK 0x4000
/**
* Presentation form option:
* Don't replace Arabic Presentation Forms-A and Arabic Presentation Forms-B
* characters with 0+06xx characters, before shaping.
* @stable ICU 3.6
*/
#define U_SHAPE_PRESERVE_PRESENTATION 0x8000
/** Presentation form option:
* Replace Arabic Presentation Forms-A and Arabic Presentationo Forms-B with
* their unshaped correspondants in range 0+06xx, before shaping.
* @stable ICU 3.6
*/
#define U_SHAPE_PRESERVE_PRESENTATION_NOOP 0
/** Bit mask for preserve presentation form. @stable ICU 3.6 */
#define U_SHAPE_PRESERVE_PRESENTATION_MASK 0x8000
/* Seen Tail option */
/**
* Memory option: the result must have the same length as the source.
* Shaping mode: The SEEN family character will expand into two characters using space near
* the SEEN family character(i.e. the space after the character).
* If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h)
* will be set in pErrorCode
*
* De-shaping mode: Any Seen character followed by Tail character will be
* replaced by one cell Seen and a space will replace the Tail.
* Affects: Seen options
* @stable ICU 4.2
*/
#define U_SHAPE_SEEN_TWOCELL_NEAR 0x200000
/**
* Bit mask for Seen memory options.
* @stable ICU 4.2
*/
#define U_SHAPE_SEEN_MASK 0x700000
/* YehHamza option */
/**
* Memory option: the result must have the same length as the source.
* Shaping mode: The YEHHAMZA character will expand into two characters using space near it
* (i.e. the space after the character
* If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h)
* will be set in pErrorCode
*
* De-shaping mode: Any Yeh (final or isolated) character followed by Hamza character will be
* replaced by one cell YehHamza and space will replace the Hamza.
* Affects: YehHamza options
* @stable ICU 4.2
*/
#define U_SHAPE_YEHHAMZA_TWOCELL_NEAR 0x1000000
/**
* Bit mask for YehHamza memory options.
* @stable ICU 4.2
*/
#define U_SHAPE_YEHHAMZA_MASK 0x3800000
/* New Tashkeel options */
/**
* Memory option: the result must have the same length as the source.
* Shaping mode: Tashkeel characters will be replaced by spaces.
* Spaces will be placed at beginning of the buffer
*
* De-shaping mode: N/A
* Affects: Tashkeel options
* @stable ICU 4.2
*/
#define U_SHAPE_TASHKEEL_BEGIN 0x40000
/**
* Memory option: the result must have the same length as the source.
* Shaping mode: Tashkeel characters will be replaced by spaces.
* Spaces will be placed at end of the buffer
*
* De-shaping mode: N/A
* Affects: Tashkeel options
* @stable ICU 4.2
*/
#define U_SHAPE_TASHKEEL_END 0x60000
/**
* Memory option: allow the result to have a different length than the source.
* Shaping mode: Tashkeel characters will be removed, buffer length will shrink.
* De-shaping mode: N/A
*
* Affect: Tashkeel options
* @stable ICU 4.2
*/
#define U_SHAPE_TASHKEEL_RESIZE 0x80000
/**
* Memory option: the result must have the same length as the source.
* Shaping mode: Tashkeel characters will be replaced by Tatweel if it is connected to adjacent
* characters (i.e. shaped on Tatweel) or replaced by space if it is not connected.
*
* De-shaping mode: N/A
* Affects: YehHamza options
* @stable ICU 4.2
*/
#define U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL 0xC0000
/**
* Bit mask for Tashkeel replacement with Space or Tatweel memory options.
* @stable ICU 4.2
*/
#define U_SHAPE_TASHKEEL_MASK 0xE0000
/* Space location Control options */
/**
* This option affect the meaning of BEGIN and END options. if this option is not used the default
* for BEGIN and END will be as following:
* The Default (for both Visual LTR, Visual RTL and Logical Text)
* 1. BEGIN always refers to the start address of physical memory.
* 2. END always refers to the end address of physical memory.
*
* If this option is used it will swap the meaning of BEGIN and END only for Visual LTR text.
*
* The effect on BEGIN and END Memory Options will be as following:
* A. BEGIN For Visual LTR text: This will be the beginning (right side) of the visual text(
* corresponding to the physical memory address end for Visual LTR text, Same as END in
* default behavior)
* B. BEGIN For Logical text: Same as BEGIN in default behavior.
* C. END For Visual LTR text: This will be the end (left side) of the visual text (corresponding
* to the physical memory address beginning for Visual LTR text, Same as BEGIN in default behavior.
* D. END For Logical text: Same as END in default behavior).
* Affects: All LamAlef BEGIN, END and AUTO options.
* @stable ICU 4.2
*/
#define U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END 0x4000000
/**
* Bit mask for swapping BEGIN and END for Visual LTR text
* @stable ICU 4.2
*/
#define U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK 0x4000000
/**
* If this option is used, shaping will use the new Unicode code point for TAIL (i.e. 0xFE73).
* If this option is not specified (Default), old unofficial Unicode TAIL code point is used (i.e. 0x200B)
* De-shaping will not use this option as it will always search for both the new Unicode code point for the
* TAIL (i.e. 0xFE73) or the old unofficial Unicode TAIL code point (i.e. 0x200B) and de-shape the
* Seen-Family letter accordingly.
*
* Shaping Mode: Only shaping.
* De-shaping Mode: N/A.
* Affects: All Seen options
* @stable ICU 4.8
*/
#define U_SHAPE_TAIL_NEW_UNICODE 0x8000000
/**
* Bit mask for new Unicode Tail option
* @stable ICU 4.8
*/
#define U_SHAPE_TAIL_TYPE_MASK 0x8000000
#endif
// uscript.h
/*
**********************************************************************
* Copyright (C) 1997-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* File USCRIPT.H
*
* Modification History:
*
* Date Name Description
* 07/06/2001 Ram Creation.
******************************************************************************
*/
#ifndef USCRIPT_H
#define USCRIPT_H
/**
* \file
* \brief C API: Unicode Script Information
*/
/**
* Constants for ISO 15924 script codes.
*
* The current set of script code constants supports at least all scripts
* that are encoded in the version of Unicode which ICU currently supports.
* The names of the constants are usually derived from the
* Unicode script property value aliases.
* See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/)
* and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .
*
* Starting with ICU 3.6, constants for most ISO 15924 script codes
* are included, for use with language tags, CLDR data, and similar.
* Some of those codes are not used in the Unicode Character Database (UCD).
* For example, there are no characters that have a UCD script property value of
* Hans or Hant. All Han ideographs have the Hani script property value in Unicode.
*
* Private-use codes Qaaa..Qabx are not included.
*
* Starting with ICU 55, script codes are only added when their scripts
* have been or will certainly be encoded in Unicode,
* and have been assigned Unicode script property value aliases,
* to ensure that their script names are stable and match the names of the constants.
* Script codes like Latf and Aran that are not subject to separate encoding
* may be added at any time.
*
* @stable ICU 2.2
*/
typedef enum UScriptCode {
/*
* Note: UScriptCode constants and their ISO script code comments
* are parsed by preparseucd.py.
* It matches lines like
* USCRIPT_operator new returns NULL the
* constructor is still called, and if the constructor references member
* data, (which it typically does), the result is a segmentation violation.
*
* @stable ICU 4.2
*/
#ifndef U_NO_THROW
#define U_NO_THROW throw()
#endif
/** @} */
/*===========================================================================*/
/* UClassID-based RTTI */
/*===========================================================================*/
/**
* UClassID is used to identify classes without using the compiler's RTTI.
* This was used before C++ compilers consistently supported RTTI.
* ICU 4.6 requires compiler RTTI to be turned on.
*
* Each class hierarchy which needs
* to implement polymorphic clone() or operator==() defines two methods,
* described in detail below. UClassID values can be compared using
* operator==(). Nothing else should be done with them.
*
* \par
* In class hierarchies that implement "poor man's RTTI",
* each concrete subclass implements getDynamicClassID() in the same way:
*
* \code
* class Derived {
* public:
* virtual UClassID getDynamicClassID() const
* { return Derived::getStaticClassID(); }
* }
* \endcode
*
* Each concrete class implements getStaticClassID() as well, which allows
* clients to test for a specific type.
*
* \code
* class Derived {
* public:
* static UClassID U_EXPORT2 getStaticClassID();
* private:
* static char fgClassID;
* }
*
* // In Derived.cpp:
* UClassID Derived::getStaticClassID()
* { return (UClassID)&Derived::fgClassID; }
* char Derived::fgClassID = 0; // Value is irrelevant
* \endcode
* @stable ICU 2.0
*/
typedef void* UClassID;
#endif
// umisc.h
/*
**********************************************************************
* Copyright (C) 1999-2006, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: umisc.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999oct15
* created by: Markus W. Scherer
*/
#ifndef UMISC_H
#define UMISC_H
/**
* \file
* \brief C API:misc definitions
*
* This file contains miscellaneous definitions for the C APIs.
*/
U_CDECL_BEGIN
/** A struct representing a range of text containing a specific field
* @stable ICU 2.0
*/
typedef struct UFieldPosition {
/**
* The field
* @stable ICU 2.0
*/
int32_t field;
/**
* The start of the text range containing field
* @stable ICU 2.0
*/
int32_t beginIndex;
/**
* The limit of the text range containing field
* @stable ICU 2.0
*/
int32_t endIndex;
} UFieldPosition;
#if !UCONFIG_NO_SERVICE
/**
* Opaque type returned by registerInstance, registerFactory and unregister for service registration.
* @stable ICU 2.6
*/
typedef const void* URegistryKey;
#endif
U_CDECL_END
#endif
// ulistformatter.h
/*
*****************************************************************************************
* Copyright (C) 2015-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*****************************************************************************************
*/
#ifndef ULISTFORMATTER_H
#define ULISTFORMATTER_H
#if !UCONFIG_NO_FORMATTING
/**
* \file
* \brief C API: Format a list in a locale-appropriate way.
*
* A UListFormatter is used to format a list of items in a locale-appropriate way,
* using data from CLDR.
* Example: Input data ["Alice", "Bob", "Charlie", "Delta"] will be formatted
* as "Alice, Bob, Charlie, and Delta" in English.
*/
/**
* Opaque UListFormatter object for use in C
* @stable ICU 55
*/
struct UListFormatter;
typedef struct UListFormatter UListFormatter; /**< C typedef for struct UListFormatter. @stable ICU 55 */
/**
* Open a new UListFormatter object using the rules for a given locale.
* @param locale
* The locale whose rules should be used; may be NULL for
* default locale.
* @param status
* A pointer to a standard ICU UErrorCode (input/output parameter).
* Its input value must pass the U_SUCCESS() test, or else the
* function returns immediately. The caller should check its output
* value with U_FAILURE(), or use with function chaining (see User
* Guide for details).
* @return
* A pointer to a UListFormatter object for the specified locale,
* or NULL if an error occurred.
* @stable ICU 55
*/
U_STABLE UListFormatter* U_EXPORT2
ulistfmt_open(const char* locale,
UErrorCode* status);
/**
* Close a UListFormatter object. Once closed it may no longer be used.
* @param listfmt
* The UListFormatter object to close.
* @stable ICU 55
*/
U_STABLE void U_EXPORT2
ulistfmt_close(UListFormatter *listfmt);
/**
* Formats a list of strings using the conventions established for the
* UListFormatter object.
* @param listfmt
* The UListFormatter object specifying the list conventions.
* @param strings
* An array of pointers to UChar strings; the array length is
* specified by stringCount. Must be non-NULL if stringCount > 0.
* @param stringLengths
* An array of string lengths corresponding to the strings[]
* parameter; any individual length value may be negative to indicate
* that the corresponding strings[] entry is 0-terminated, or
* stringLengths itself may be NULL if all of the strings are
* 0-terminated. If non-NULL, the stringLengths array must have
* stringCount entries.
* @param stringCount
* the number of entries in strings[], and the number of entries
* in the stringLengths array if it is not NULL. Must be >= 0.
* @param result
* A pointer to a buffer to receive the formatted list.
* @param resultCapacity
* The maximum size of result.
* @param status
* A pointer to a standard ICU UErrorCode (input/output parameter).
* Its input value must pass the U_SUCCESS() test, or else the
* function returns immediately. The caller should check its output
* value with U_FAILURE(), or use with function chaining (see User
* Guide for details).
* @return
* The total buffer size needed; if greater than resultLength, the
* output was truncated. May be <=0 if unable to determine the
* total buffer size needed (e.g. for illegal arguments).
* @stable ICU 55
*/
U_DRAFT int32_t U_EXPORT2
ulistfmt_format(const UListFormatter* listfmt,
const UChar* const strings[],
const int32_t * stringLengths,
int32_t stringCount,
UChar* result,
int32_t resultCapacity,
UErrorCode* status);
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif
// uiter.h
/*
*******************************************************************************
*
* Copyright (C) 2002-2011 International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: uiter.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002jan18
* created by: Markus W. Scherer
*/
#ifndef __UITER_H__
#define __UITER_H__
/**
* \file
* \brief C API: Unicode Character Iteration
*
* @see UCharIterator
*/
U_CDECL_BEGIN
struct UCharIterator;
typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
/**
* Origin constants for UCharIterator.getIndex() and UCharIterator.move().
* @see UCharIteratorMove
* @see UCharIterator
* @stable ICU 2.1
*/
typedef enum UCharIteratorOrigin {
UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
} UCharIteratorOrigin;
/** Constants for UCharIterator. @stable ICU 2.6 */
enum {
/**
* Constant value that may be returned by UCharIteratorMove
* indicating that the final UTF-16 index is not known, but that the move succeeded.
* This can occur when moving relative to limit or length, or
* when moving relative to the current index after a setState()
* when the current UTF-16 index is not known.
*
* It would be very inefficient to have to count from the beginning of the text
* just to get the current/limit/length index after moving relative to it.
* The actual index can be determined with getIndex(UITER_CURRENT)
* which will count the UChars if necessary.
*
* @stable ICU 2.6
*/
UITER_UNKNOWN_INDEX=-2
};
/**
* Constant for UCharIterator getState() indicating an error or
* an unknown state.
* Returned by uiter_getState()/UCharIteratorGetState
* when an error occurs.
* Also, some UCharIterator implementations may not be able to return
* a valid state for each position. This will be clearly documented
* for each such iterator (none of the public ones here).
*
* @stable ICU 2.6
*/
#define UITER_NO_STATE ((uint32_t)0xffffffff)
/**
* Function type declaration for UCharIterator.getIndex().
*
* Gets the current position, or the start or limit of the
* iteration range.
*
* This function may perform slowly for UITER_CURRENT after setState() was called,
* or for UITER_LENGTH, because an iterator implementation may have to count
* UChars if the underlying storage is not UTF-16.
*
* @param iter the UCharIterator structure ("this pointer")
* @param origin get the 0, start, limit, length, or current index
* @return the requested index, or U_SENTINEL in an error condition
*
* @see UCharIteratorOrigin
* @see UCharIterator
* @stable ICU 2.1
*/
typedef int32_t U_CALLCONV
UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
/**
* Function type declaration for UCharIterator.move().
*
* Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
*
* Moves the current position relative to the start or limit of the
* iteration range, or relative to the current position itself.
* The movement is expressed in numbers of code units forward
* or backward by specifying a positive or negative delta.
* Out of bounds movement will be pinned to the start or limit.
*
* This function may perform slowly for moving relative to UITER_LENGTH
* because an iterator implementation may have to count the rest of the
* UChars if the native storage is not UTF-16.
*
* When moving relative to the limit or length, or
* relative to the current position after setState() was called,
* move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
* determination of the actual UTF-16 index.
* The actual index can be determined with getIndex(UITER_CURRENT)
* which will count the UChars if necessary.
* See UITER_UNKNOWN_INDEX for details.
*
* @param iter the UCharIterator structure ("this pointer")
* @param delta can be positive, zero, or negative
* @param origin move relative to the 0, start, limit, length, or current index
* @return the new index, or U_SENTINEL on an error condition,
* or UITER_UNKNOWN_INDEX when the index is not known.
*
* @see UCharIteratorOrigin
* @see UCharIterator
* @see UITER_UNKNOWN_INDEX
* @stable ICU 2.1
*/
typedef int32_t U_CALLCONV
UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
/**
* Function type declaration for UCharIterator.hasNext().
*
* Check if current() and next() can still
* return another code unit.
*
* @param iter the UCharIterator structure ("this pointer")
* @return boolean value for whether current() and next() can still return another code unit
*
* @see UCharIterator
* @stable ICU 2.1
*/
typedef UBool U_CALLCONV
UCharIteratorHasNext(UCharIterator *iter);
/**
* Function type declaration for UCharIterator.hasPrevious().
*
* Check if previous() can still return another code unit.
*
* @param iter the UCharIterator structure ("this pointer")
* @return boolean value for whether previous() can still return another code unit
*
* @see UCharIterator
* @stable ICU 2.1
*/
typedef UBool U_CALLCONV
UCharIteratorHasPrevious(UCharIterator *iter);
/**
* Function type declaration for UCharIterator.current().
*
* Return the code unit at the current position,
* or U_SENTINEL if there is none (index is at the limit).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the current code unit
*
* @see UCharIterator
* @stable ICU 2.1
*/
typedef UChar32 U_CALLCONV
UCharIteratorCurrent(UCharIterator *iter);
/**
* Function type declaration for UCharIterator.next().
*
* Return the code unit at the current index and increment
* the index (post-increment, like s[i++]),
* or return U_SENTINEL if there is none (index is at the limit).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the current code unit (and post-increment the current index)
*
* @see UCharIterator
* @stable ICU 2.1
*/
typedef UChar32 U_CALLCONV
UCharIteratorNext(UCharIterator *iter);
/**
* Function type declaration for UCharIterator.previous().
*
* Decrement the index and return the code unit from there
* (pre-decrement, like s[--i]),
* or return U_SENTINEL if there is none (index is at the start).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the previous code unit (after pre-decrementing the current index)
*
* @see UCharIterator
* @stable ICU 2.1
*/
typedef UChar32 U_CALLCONV
UCharIteratorPrevious(UCharIterator *iter);
/**
* Function type declaration for UCharIterator.reservedFn().
* Reserved for future use.
*
* @param iter the UCharIterator structure ("this pointer")
* @param something some integer argument
* @return some integer
*
* @see UCharIterator
* @stable ICU 2.1
*/
typedef int32_t U_CALLCONV
UCharIteratorReserved(UCharIterator *iter, int32_t something);
/**
* Function type declaration for UCharIterator.getState().
*
* Get the "state" of the iterator in the form of a single 32-bit word.
* It is recommended that the state value be calculated to be as small as
* is feasible. For strings with limited lengths, fewer than 32 bits may
* be sufficient.
*
* This is used together with setState()/UCharIteratorSetState
* to save and restore the iterator position more efficiently than with
* getIndex()/move().
*
* The iterator state is defined as a uint32_t value because it is designed
* for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
* of the character iterator.
*
* With some UCharIterator implementations (e.g., UTF-8),
* getting and setting the UTF-16 index with existing functions
* (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
* relatively slow because the iterator has to "walk" from a known index
* to the requested one.
* This takes more time the farther it needs to go.
*
* An opaque state value allows an iterator implementation to provide
* an internal index (UTF-8: the source byte array index) for
* fast, constant-time restoration.
*
* After calling setState(), a getIndex(UITER_CURRENT) may be slow because
* the UTF-16 index may not be restored as well, but the iterator can deliver
* the correct text contents and move relative to the current position
* without performance degradation.
*
* Some UCharIterator implementations may not be able to return
* a valid state for each position, in which case they return UITER_NO_STATE instead.
* This will be clearly documented for each such iterator (none of the public ones here).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the state word
*
* @see UCharIterator
* @see UCharIteratorSetState
* @see UITER_NO_STATE
* @stable ICU 2.6
*/
typedef uint32_t U_CALLCONV
UCharIteratorGetState(const UCharIterator *iter);
/**
* Function type declaration for UCharIterator.setState().
*
* Restore the "state" of the iterator using a state word from a getState() call.
* The iterator object need not be the same one as for which getState() was called,
* but it must be of the same type (set up using the same uiter_setXYZ function)
* and it must iterate over the same string
* (binary identical regardless of memory address).
* For more about the state word see UCharIteratorGetState.
*
* After calling setState(), a getIndex(UITER_CURRENT) may be slow because
* the UTF-16 index may not be restored as well, but the iterator can deliver
* the correct text contents and move relative to the current position
* without performance degradation.
*
* @param iter the UCharIterator structure ("this pointer")
* @param state the state word from a getState() call
* on a same-type, same-string iterator
* @param pErrorCode Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @see UCharIterator
* @see UCharIteratorGetState
* @stable ICU 2.6
*/
typedef void U_CALLCONV
UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
/**
* C API for code unit iteration.
* This can be used as a C wrapper around
* CharacterIterator, Replaceable, or implemented using simple strings, etc.
*
* There are two roles for using UCharIterator:
*
* A "provider" sets the necessary function pointers and controls the "protected"
* fields of the UCharIterator structure. A "provider" passes a UCharIterator
* into C APIs that need a UCharIterator as an abstract, flexible string interface.
*
* Implementations of such C APIs are "callers" of UCharIterator functions;
* they only use the "public" function pointers and never access the "protected"
* fields directly.
*
* The current() and next() functions only check the current index against the
* limit, and previous() only checks the current index against the start,
* to see if the iterator already reached the end of the iteration range.
*
* The assumption - in all iterators - is that the index is moved via the API,
* which means it won't go out of bounds, or the index is modified by
* user code that knows enough about the iterator implementation to set valid
* index values.
*
* UCharIterator functions return code unit values 0..0xffff,
* or U_SENTINEL if the iteration bounds are reached.
*
* @stable ICU 2.1
*/
struct UCharIterator {
/**
* (protected) Pointer to string or wrapped object or similar.
* Not used by caller.
* @stable ICU 2.1
*/
const void *context;
/**
* (protected) Length of string or similar.
* Not used by caller.
* @stable ICU 2.1
*/
int32_t length;
/**
* (protected) Start index or similar.
* Not used by caller.
* @stable ICU 2.1
*/
int32_t start;
/**
* (protected) Current index or similar.
* Not used by caller.
* @stable ICU 2.1
*/
int32_t index;
/**
* (protected) Limit index or similar.
* Not used by caller.
* @stable ICU 2.1
*/
int32_t limit;
/**
* (protected) Used by UTF-8 iterators and possibly others.
* @stable ICU 2.1
*/
int32_t reservedField;
/**
* (public) Returns the current position or the
* start or limit index of the iteration range.
*
* @see UCharIteratorGetIndex
* @stable ICU 2.1
*/
UCharIteratorGetIndex *getIndex;
/**
* (public) Moves the current position relative to the start or limit of the
* iteration range, or relative to the current position itself.
* The movement is expressed in numbers of code units forward
* or backward by specifying a positive or negative delta.
*
* @see UCharIteratorMove
* @stable ICU 2.1
*/
UCharIteratorMove *move;
/**
* (public) Check if current() and next() can still
* return another code unit.
*
* @see UCharIteratorHasNext
* @stable ICU 2.1
*/
UCharIteratorHasNext *hasNext;
/**
* (public) Check if previous() can still return another code unit.
*
* @see UCharIteratorHasPrevious
* @stable ICU 2.1
*/
UCharIteratorHasPrevious *hasPrevious;
/**
* (public) Return the code unit at the current position,
* or U_SENTINEL if there is none (index is at the limit).
*
* @see UCharIteratorCurrent
* @stable ICU 2.1
*/
UCharIteratorCurrent *current;
/**
* (public) Return the code unit at the current index and increment
* the index (post-increment, like s[i++]),
* or return U_SENTINEL if there is none (index is at the limit).
*
* @see UCharIteratorNext
* @stable ICU 2.1
*/
UCharIteratorNext *next;
/**
* (public) Decrement the index and return the code unit from there
* (pre-decrement, like s[--i]),
* or return U_SENTINEL if there is none (index is at the start).
*
* @see UCharIteratorPrevious
* @stable ICU 2.1
*/
UCharIteratorPrevious *previous;
/**
* (public) Reserved for future use. Currently NULL.
*
* @see UCharIteratorReserved
* @stable ICU 2.1
*/
UCharIteratorReserved *reservedFn;
/**
* (public) Return the state of the iterator, to be restored later with setState().
* This function pointer is NULL if the iterator does not implement it.
*
* @see UCharIteratorGet
* @stable ICU 2.6
*/
UCharIteratorGetState *getState;
/**
* (public) Restore the iterator state from the state word from a call
* to getState().
* This function pointer is NULL if the iterator does not implement it.
*
* @see UCharIteratorSet
* @stable ICU 2.6
*/
UCharIteratorSetState *setState;
};
/**
* Helper function for UCharIterator to get the code point
* at the current index.
*
* Return the code point that includes the code unit at the current position,
* or U_SENTINEL if there is none (index is at the limit).
* If the current code unit is a lead or trail surrogate,
* then the following or preceding surrogate is used to form
* the code point value.
*
* @param iter the UCharIterator structure ("this pointer")
* @return the current code point
*
* @see UCharIterator
* @see U16_GET
* @see UnicodeString::char32At()
* @stable ICU 2.1
*/
U_STABLE UChar32 U_EXPORT2
uiter_current32(UCharIterator *iter);
/**
* Helper function for UCharIterator to get the next code point.
*
* Return the code point at the current index and increment
* the index (post-increment, like s[i++]),
* or return U_SENTINEL if there is none (index is at the limit).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the current code point (and post-increment the current index)
*
* @see UCharIterator
* @see U16_NEXT
* @stable ICU 2.1
*/
U_STABLE UChar32 U_EXPORT2
uiter_next32(UCharIterator *iter);
/**
* Helper function for UCharIterator to get the previous code point.
*
* Decrement the index and return the code point from there
* (pre-decrement, like s[--i]),
* or return U_SENTINEL if there is none (index is at the start).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the previous code point (after pre-decrementing the current index)
*
* @see UCharIterator
* @see U16_PREV
* @stable ICU 2.1
*/
U_STABLE UChar32 U_EXPORT2
uiter_previous32(UCharIterator *iter);
/**
* Get the "state" of the iterator in the form of a single 32-bit word.
* This is a convenience function that calls iter->getState(iter)
* if iter->getState is not NULL;
* if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
*
* Some UCharIterator implementations may not be able to return
* a valid state for each position, in which case they return UITER_NO_STATE instead.
* This will be clearly documented for each such iterator (none of the public ones here).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the state word
*
* @see UCharIterator
* @see UCharIteratorGetState
* @see UITER_NO_STATE
* @stable ICU 2.6
*/
U_STABLE uint32_t U_EXPORT2
uiter_getState(const UCharIterator *iter);
/**
* Restore the "state" of the iterator using a state word from a getState() call.
* This is a convenience function that calls iter->setState(iter, state, pErrorCode)
* if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
*
* @param iter the UCharIterator structure ("this pointer")
* @param state the state word from a getState() call
* on a same-type, same-string iterator
* @param pErrorCode Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @see UCharIterator
* @see UCharIteratorSetState
* @stable ICU 2.6
*/
U_STABLE void U_EXPORT2
uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
/**
* Set up a UCharIterator to iterate over a string.
*
* Sets the UCharIterator function pointers for iteration over the string s
* with iteration boundaries start=index=0 and length=limit=string length.
* The "provider" may set the start, index, and limit values at any time
* within the range 0..length.
* The length field will be ignored.
*
* The string pointer s is set into UCharIterator.context without copying
* or reallocating the string contents.
*
* getState() simply returns the current index.
* move() will always return the final index.
*
* @param iter UCharIterator structure to be set for iteration
* @param s String to iterate over
* @param length Length of s, or -1 if NUL-terminated
*
* @see UCharIterator
* @stable ICU 2.1
*/
U_STABLE void U_EXPORT2
uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
/**
* Set up a UCharIterator to iterate over a UTF-16BE string
* (byte vector with a big-endian pair of bytes per UChar).
*
* Everything works just like with a normal UChar iterator (uiter_setString),
* except that UChars are assembled from byte pairs,
* and that the length argument here indicates an even number of bytes.
*
* getState() simply returns the current index.
* move() will always return the final index.
*
* @param iter UCharIterator structure to be set for iteration
* @param s UTF-16BE string to iterate over
* @param length Length of s as an even number of bytes, or -1 if NUL-terminated
* (NUL means pair of 0 bytes at even index from s)
*
* @see UCharIterator
* @see uiter_setString
* @stable ICU 2.6
*/
U_STABLE void U_EXPORT2
uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
/**
* Set up a UCharIterator to iterate over a UTF-8 string.
*
* Sets the UCharIterator function pointers for iteration over the UTF-8 string s
* with UTF-8 iteration boundaries 0 and length.
* The implementation counts the UTF-16 index on the fly and
* lazily evaluates the UTF-16 length of the text.
*
* The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
* When the reservedField is not 0, then it contains a supplementary code point
* and the UTF-16 index is between the two corresponding surrogates.
* At that point, the UTF-8 index is behind that code point.
*
* The UTF-8 string pointer s is set into UCharIterator.context without copying
* or reallocating the string contents.
*
* getState() returns a state value consisting of
* - the current UTF-8 source byte index (bits 31..1)
* - a flag (bit 0) that indicates whether the UChar position is in the middle
* of a surrogate pair
* (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
*
* getState() cannot also encode the UTF-16 index in the state value.
* move(relative to limit or length), or
* move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
*
* @param iter UCharIterator structure to be set for iteration
* @param s UTF-8 string to iterate over
* @param length Length of s in bytes, or -1 if NUL-terminated
*
* @see UCharIterator
* @stable ICU 2.6
*/
U_STABLE void U_EXPORT2
uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
U_CDECL_END
#endif
// uenum.h
/*
*******************************************************************************
*
* Copyright (C) 2002-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: uenum.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:2
*
* created on: 2002jul08
* created by: Vladimir Weinstein
*/
#ifndef __UENUM_H
#define __UENUM_H
/**
* \file
* \brief C API: String Enumeration
*/
/**
* An enumeration object.
* For usage in C programs.
* @stable ICU 2.2
*/
struct UEnumeration;
/** structure representing an enumeration object instance @stable ICU 2.2 */
typedef struct UEnumeration UEnumeration;
/**
* Disposes of resources in use by the iterator. If en is NULL,
* does nothing. After this call, any char* or UChar* pointer
* returned by uenum_unext() or uenum_next() is invalid.
* @param en UEnumeration structure pointer
* @stable ICU 2.2
*/
U_STABLE void U_EXPORT2
uenum_close(UEnumeration* en);
/**
* Returns the number of elements that the iterator traverses. If
* the iterator is out-of-sync with its service, status is set to
* U_ENUM_OUT_OF_SYNC_ERROR.
* This is a convenience function. It can end up being very
* expensive as all the items might have to be pre-fetched (depending
* on the type of data being traversed). Use with caution and only
* when necessary.
* @param en UEnumeration structure pointer
* @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the
* iterator is out of sync.
* @return number of elements in the iterator
* @stable ICU 2.2
*/
U_STABLE int32_t U_EXPORT2
uenum_count(UEnumeration* en, UErrorCode* status);
/**
* Returns the next element in the iterator's list. If there are
* no more elements, returns NULL. If the iterator is out-of-sync
* with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
* NULL is returned. If the native service string is a char* string,
* it is converted to UChar* with the invariant converter.
* The result is terminated by (UChar)0.
* @param en the iterator object
* @param resultLength pointer to receive the length of the result
* (not including the terminating \\0).
* If the pointer is NULL it is ignored.
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
* the iterator is out of sync with its service.
* @return a pointer to the string. The string will be
* zero-terminated. The return pointer is owned by this iterator
* and must not be deleted by the caller. The pointer is valid
* until the next call to any uenum_... method, including
* uenum_next() or uenum_unext(). When all strings have been
* traversed, returns NULL.
* @stable ICU 2.2
*/
U_STABLE const UChar* U_EXPORT2
uenum_unext(UEnumeration* en,
int32_t* resultLength,
UErrorCode* status);
/**
* Returns the next element in the iterator's list. If there are
* no more elements, returns NULL. If the iterator is out-of-sync
* with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
* NULL is returned. If the native service string is a UChar*
* string, it is converted to char* with the invariant converter.
* The result is terminated by (char)0. If the conversion fails
* (because a character cannot be converted) then status is set to
* U_INVARIANT_CONVERSION_ERROR and the return value is undefined
* (but non-NULL).
* @param en the iterator object
* @param resultLength pointer to receive the length of the result
* (not including the terminating \\0).
* If the pointer is NULL it is ignored.
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
* the iterator is out of sync with its service. Set to
* U_INVARIANT_CONVERSION_ERROR if the underlying native string is
* UChar* and conversion to char* with the invariant converter
* fails. This error pertains only to current string, so iteration
* might be able to continue successfully.
* @return a pointer to the string. The string will be
* zero-terminated. The return pointer is owned by this iterator
* and must not be deleted by the caller. The pointer is valid
* until the next call to any uenum_... method, including
* uenum_next() or uenum_unext(). When all strings have been
* traversed, returns NULL.
* @stable ICU 2.2
*/
U_STABLE const char* U_EXPORT2
uenum_next(UEnumeration* en,
int32_t* resultLength,
UErrorCode* status);
/**
* Resets the iterator to the current list of service IDs. This
* re-establishes sync with the service and rewinds the iterator
* to start at the first element.
* @param en the iterator object
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
* the iterator is out of sync with its service.
* @stable ICU 2.2
*/
U_STABLE void U_EXPORT2
uenum_reset(UEnumeration* en, UErrorCode* status);
/**
* Given an array of const UChar* strings, return a UEnumeration. String pointers from 0..count-1 must not be null.
* Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
* \snippet test/cintltst/uenumtst.c uenum_openUCharStringsEnumeration
* @param strings array of const UChar* strings (each null terminated). All storage is owned by the caller.
* @param count length of the array
* @param ec error code
* @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory.
* @see uenum_close
* @stable ICU 50
*/
U_STABLE UEnumeration* U_EXPORT2
uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count,
UErrorCode* ec);
/* Note: next function is not hidden as draft, as it is used internally (it was formerly an internal function). */
/**
* Given an array of const char* strings (invariant chars only), return a UEnumeration. String pointers from 0..count-1 must not be null.
* Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
* \snippet test/cintltst/uenumtst.c uenum_openCharStringsEnumeration
* @param strings array of char* strings (each null terminated). All storage is owned by the caller.
* @param count length of the array
* @param ec error code
* @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory
* @see uenum_close
* @stable ICU 50
*/
U_STABLE UEnumeration* U_EXPORT2
uenum_openCharStringsEnumeration(const char* const strings[], int32_t count,
UErrorCode* ec);
#endif
// uloc.h
/*
**********************************************************************
* Copyright (C) 1997-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* File ULOC.H
*
* Modification History:
*
* Date Name Description
* 04/01/97 aliu Creation.
* 08/22/98 stephen JDK 1.2 sync.
* 12/08/98 rtg New C API for Locale
* 03/30/99 damiba overhaul
* 03/31/99 helena Javadoc for uloc functions.
* 04/15/99 Madhu Updated Javadoc
********************************************************************************
*/
#ifndef ULOC_H
#define ULOC_H
/**
* \file
* \brief C API: Locale
*
* ULoc C API for Locale
* A Locale represents a specific geographical, political,
* or cultural region. An operation that requires a Locale to perform
* its task is called locale-sensitive and uses the Locale
* to tailor information for the user. For example, displaying a number
* is a locale-sensitive operation--the number should be formatted
* according to the customs/conventions of the user's native country,
* region, or culture. In the C APIs, a locales is simply a const char string.
*
* Locale with one of the three options listed below.
* Each of the component is separated by '_' in the locale string.
* \htmlonly\endhtmlonly
*
\endhtmlonly
* The first option is a valid ISO
* Language Code. These codes are the lower-case two-letter
* codes as defined by ISO-639.
* You can find a full list of these codes at a number of sites, such as:
*
* \code
* newLanguage
*
* newLanguage + newCountry
*
* newLanguage + newCountry + newVariant
* \endcode
*
* \htmlonly
* http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt
*
*
* http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html
*
* Locale is just an identifier for a region,
* no validity check is performed when you specify a Locale.
* If you want to see whether particular resources are available for the
* Locale you asked for, you must query those resources. For
* example, ask the UNumberFormat for the locales it supports
* using its getAvailable method.
*
Note: When you ask for a resource for a particular
* locale, you get back the best available match, not necessarily
* precisely what you asked for. For more information, look at
* UResourceBundle.
*
* Locale provides a number of convenient constants
* that you can use to specify the commonly used
* locales. For example, the following refers to a locale
* for the United States:
* \htmlonly\endhtmlonly
*
\endhtmlonly
*
*
* \code
* ULOC_US
* \endcode
*
* \htmlonlyuloc_getCountry to get the ISO Country Code and
* uloc_getLanguage to get the ISO Language Code. You can
* use uloc_getDisplayCountry to get the
* name of the country suitable for displaying to the user. Similarly,
* you can use uloc_getDisplayLanguage to get the name of
* the language suitable for displaying to the user. Interestingly,
* the uloc_getDisplayXXX methods are themselves locale-sensitive
* and have two versions: one that uses the default locale and one
* that takes a locale as an argument and displays the name or country in
* a language appropriate to that locale.
*
* unum_xxx functions format
* numbers, currency, or percentages in a locale-sensitive manner.
* \endhtmlonly
*
\endhtmlonly
* Each of these methods has two variants; one with an explicit locale
* and one without; the latter using the default locale.
* \htmlonly
* \code
* UErrorCode success = U_ZERO_ERROR;
* UNumberFormat *nf;
* const char* myLocale = "fr_FR";
*
* nf = unum_open( UNUM_DEFAULT, NULL, success );
* unum_close(nf);
* nf = unum_open( UNUM_CURRENCY, NULL, success );
* unum_close(nf);
* nf = unum_open( UNUM_PERCENT, NULL, success );
* unum_close(nf);
* \endcode
*
* \htmlonly\endhtmlonly
*
\endhtmlonly
* A
* \code
*
* nf = unum_open( UNUM_DEFAULT, myLocale, success );
* unum_close(nf);
* nf = unum_open( UNUM_CURRENCY, myLocale, success );
* unum_close(nf);
* nf = unum_open( UNUM_PERCENT, myLocale, success );
* unum_close(nf);
* \endcode
*
* \htmlonlyLocale is the mechanism for identifying the kind of services
* (UNumberFormat) that you would like to get. The locale is
* just a mechanism for identifying these services.
*
* \endhtmlonly
*
\endhtmlonly
*
* \code
* const char* uloc_getAvailable(int32_t index);
* int32_t uloc_countAvailable();
* int32_t
* uloc_getDisplayName(const char* localeID,
* const char* inLocaleID,
* UChar* result,
* int32_t maxResultSize,
* UErrorCode* err);
*
* \endcode
*
* \htmlonly
* UErrorCode status = U_ZERO_ERROR;
* const char* keyword =NULL;
* int32_t keywordLen = 0;
* int32_t keywordCount = 0;
* UChar displayKeyword[256];
* int32_t displayKeywordLen = 0;
* UEnumeration* keywordEnum = uloc_openKeywords("de_DE@collation=PHONEBOOK;calendar=TRADITIONAL", &status);
* for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount--){
* if(U_FAILURE(status)){
* ...something went wrong so handle the error...
* break;
* }
* // the uenum_next returns NUL terminated string
* keyword = uenum_next(keywordEnum, &keywordLen, &status);
* displayKeywordLen = uloc_getDisplayKeyword(keyword, "en_US", displayKeyword, 256);
* ... do something interesting .....
* }
* uenum_close(keywordEnum);
*
* @param keyword The keyword whose display string needs to be returned.
* @param displayLocale Specifies the locale to be used to display the name. In other words,
* if the locale's language code is "en", passing Locale::getFrench() for
* inLocale would result in "Anglais", while passing Locale::getGerman()
* for inLocale would result in "Englisch". NULL may be used to specify the default.
* @param dest the buffer to which the displayable keyword should be written.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the
* result without writing any of the result string (pre-flighting).
* @param status error information if retrieving the displayable string failed.
* Should not be NULL and should not indicate failure on entry.
* @return the actual buffer size needed for the displayable variant code.
* @see #uloc_openKeywords
* @stable ICU 2.8
*/
U_STABLE int32_t U_EXPORT2
uloc_getDisplayKeyword(const char* keyword,
const char* displayLocale,
UChar* dest,
int32_t destCapacity,
UErrorCode* status);
/**
* Gets the value of the keyword suitable for display for the specified locale.
* E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display
* string for PHONEBOOK, in the display locale, when "collation" is specified as the keyword.
*
* @param locale The locale to get the displayable variant code with. NULL may be used to specify the default.
* @param keyword The keyword for whose value should be used.
* @param displayLocale Specifies the locale to be used to display the name. In other words,
* if the locale's language code is "en", passing Locale::getFrench() for
* inLocale would result in "Anglais", while passing Locale::getGerman()
* for inLocale would result in "Englisch". NULL may be used to specify the default.
* @param dest the buffer to which the displayable keyword should be written.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the
* result without writing any of the result string (pre-flighting).
* @param status error information if retrieving the displayable string failed.
* Should not be NULL and must not indicate failure on entry.
* @return the actual buffer size needed for the displayable variant code.
* @stable ICU 2.8
*/
U_STABLE int32_t U_EXPORT2
uloc_getDisplayKeywordValue( const char* locale,
const char* keyword,
const char* displayLocale,
UChar* dest,
int32_t destCapacity,
UErrorCode* status);
/**
* Gets the full name suitable for display for the specified locale.
*
* @param localeID the locale to get the displayable name with. NULL may be used to specify the default.
* @param inLocaleID Specifies the locale to be used to display the name. In other words,
* if the locale's language code is "en", passing Locale::getFrench() for
* inLocale would result in "Anglais", while passing Locale::getGerman()
* for inLocale would result in "Englisch". NULL may be used to specify the default.
* @param result the displayable name for localeID
* @param maxResultSize the size of the name buffer to store the
* displayable full name with
* @param err error information if retrieving the displayable name failed
* @return the actual buffer size needed for the displayable name. If it's greater
* than maxResultSize, the returned displayable name will be truncated.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
uloc_getDisplayName(const char* localeID,
const char* inLocaleID,
UChar* result,
int32_t maxResultSize,
UErrorCode* err);
/**
* Gets the specified locale from a list of all available locales.
* The return value is a pointer to an item of
* a locale name array. Both this array and the pointers
* it contains are owned by ICU and should not be deleted or written through
* by the caller. The locale name is terminated by a null pointer.
* @param n the specific locale name index of the available locale list
* @return a specified locale name of all available locales
* @stable ICU 2.0
*/
U_STABLE const char* U_EXPORT2
uloc_getAvailable(int32_t n);
/**
* Gets the size of the all available locale list.
*
* @return the size of the locale list
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2 uloc_countAvailable(void);
/**
*
* Gets a list of all available 2-letter language codes defined in ISO 639,
* plus additional 3-letter codes determined to be useful for locale generation as
* defined by Unicode CLDR. This is a pointer
* to an array of pointers to arrays of char. All of these pointers are owned
* by ICU-- do not delete them, and do not write through them. The array is
* terminated with a null pointer.
* @return a list of all available language codes
* @stable ICU 2.0
*/
U_STABLE const char* const* U_EXPORT2
uloc_getISOLanguages(void);
/**
*
* Gets a list of all available 2-letter country codes defined in ISO 639. This is a
* pointer to an array of pointers to arrays of char. All of these pointers are
* owned by ICU-- do not delete them, and do not write through them. The array is
* terminated with a null pointer.
* @return a list of all available country codes
* @stable ICU 2.0
*/
U_STABLE const char* const* U_EXPORT2
uloc_getISOCountries(void);
/**
* Truncate the locale ID string to get the parent locale ID.
* Copies the part of the string before the last underscore.
* The parent locale ID will be an empty string if there is no
* underscore, or if there is only one underscore at localeID[0].
*
* @param localeID Input locale ID string.
* @param parent Output string buffer for the parent locale ID.
* @param parentCapacity Size of the output buffer.
* @param err A UErrorCode value.
* @return The length of the parent locale ID.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
uloc_getParent(const char* localeID,
char* parent,
int32_t parentCapacity,
UErrorCode* err);
/**
* Gets the full name for the specified locale, like uloc_getName(),
* but without keywords.
*
* Note: This has the effect of 'canonicalizing' the string to
* a certain extent. Upper and lower case are set as needed,
* and if the components were in 'POSIX' format they are changed to
* ICU format. It does NOT map aliased names in any way.
* See the top of this header file.
*
* This API strips off the keyword part, so "de_DE\@collation=phonebook"
* will become "de_DE".
* This API supports preflighting.
*
* @param localeID the locale to get the full name with
* @param name fill in buffer for the name without keywords.
* @param nameCapacity capacity of the fill in buffer.
* @param err error information if retrieving the full name failed
* @return the actual buffer size needed for the full name. If it's greater
* than nameCapacity, the returned full name will be truncated.
* @stable ICU 2.8
*/
U_STABLE int32_t U_EXPORT2
uloc_getBaseName(const char* localeID,
char* name,
int32_t nameCapacity,
UErrorCode* err);
/**
* Gets an enumeration of keywords for the specified locale. Enumeration
* must get disposed of by the client using uenum_close function.
*
* @param localeID the locale to get the variant code with
* @param status error information if retrieving the keywords failed
* @return enumeration of keywords or NULL if there are no keywords.
* @stable ICU 2.8
*/
U_STABLE UEnumeration* U_EXPORT2
uloc_openKeywords(const char* localeID,
UErrorCode* status);
/**
* Get the value for a keyword. Locale name does not need to be normalized.
*
* @param localeID locale name containing the keyword ("de_DE@currency=EURO;collation=PHONEBOOK")
* @param keywordName name of the keyword for which we want the value. Case insensitive.
* @param buffer receiving buffer
* @param bufferCapacity capacity of receiving buffer
* @param status containing error code - buffer not big enough.
* @return the length of keyword value
* @stable ICU 2.8
*/
U_STABLE int32_t U_EXPORT2
uloc_getKeywordValue(const char* localeID,
const char* keywordName,
char* buffer, int32_t bufferCapacity,
UErrorCode* status);
/**
* Sets or removes the value of the specified keyword.
*
* For removing all keywords, use uloc_getBaseName().
*
* NOTE: Unlike almost every other ICU function which takes a
* buffer, this function will NOT truncate the output text. If a
* BUFFER_OVERFLOW_ERROR is received, it means that the original
* buffer is untouched. This is done to prevent incorrect or possibly
* even malformed locales from being generated and used.
*
* @param keywordName name of the keyword to be set. Case insensitive.
* @param keywordValue value of the keyword to be set. If 0-length or
* NULL, will result in the keyword being removed. No error is given if
* that keyword does not exist.
* @param buffer input buffer containing locale to be modified.
* @param bufferCapacity capacity of receiving buffer
* @param status containing error code - buffer not big enough.
* @return the length needed for the buffer
* @see uloc_getKeywordValue
* @stable ICU 3.2
*/
U_STABLE int32_t U_EXPORT2
uloc_setKeywordValue(const char* keywordName,
const char* keywordValue,
char* buffer, int32_t bufferCapacity,
UErrorCode* status);
/**
* Returns whether the locale's script is written right-to-left.
* If there is no script subtag, then the likely script is used, see uloc_addLikelySubtags().
* If no likely script is known, then FALSE is returned.
*
* A script is right-to-left according to the CLDR script metadata
* which corresponds to whether the script's letters have Bidi_Class=R or AL.
*
* Returns TRUE for "ar" and "en-Hebr", FALSE for "zh" and "fa-Cyrl".
*
* @param locale input locale ID
* @return TRUE if the locale's script is written right-to-left
* @stable ICU 54
*/
U_STABLE UBool U_EXPORT2
uloc_isRightToLeft(const char *locale);
/**
* enums for the return value for the character and line orientation
* functions.
* @stable ICU 4.0
*/
typedef enum {
ULOC_LAYOUT_LTR = 0, /* left-to-right. */
ULOC_LAYOUT_RTL = 1, /* right-to-left. */
ULOC_LAYOUT_TTB = 2, /* top-to-bottom. */
ULOC_LAYOUT_BTT = 3, /* bottom-to-top. */
ULOC_LAYOUT_UNKNOWN
} ULayoutType;
/**
* Get the layout character orientation for the specified locale.
*
* @param localeId locale name
* @param status Error status
* @return an enum indicating the layout orientation for characters.
* @stable ICU 4.0
*/
U_STABLE ULayoutType U_EXPORT2
uloc_getCharacterOrientation(const char* localeId,
UErrorCode *status);
/**
* Get the layout line orientation for the specified locale.
*
* @param localeId locale name
* @param status Error status
* @return an enum indicating the layout orientation for lines.
* @stable ICU 4.0
*/
U_STABLE ULayoutType U_EXPORT2
uloc_getLineOrientation(const char* localeId,
UErrorCode *status);
/**
* enums for the 'outResult' parameter return value
* @see uloc_acceptLanguageFromHTTP
* @see uloc_acceptLanguage
* @stable ICU 3.2
*/
typedef enum {
ULOC_ACCEPT_FAILED = 0, /* No exact match was found. */
ULOC_ACCEPT_VALID = 1, /* An exact match was found. */
ULOC_ACCEPT_FALLBACK = 2 /* A fallback was found, for example,
Accept list contained 'ja_JP'
which matched available locale 'ja'. */
} UAcceptResult;
/**
* Based on a HTTP header from a web browser and a list of available locales,
* determine an acceptable locale for the user.
* @param result - buffer to accept the result locale
* @param resultAvailable the size of the result buffer.
* @param outResult - An out parameter that contains the fallback status
* @param httpAcceptLanguage - "Accept-Language:" header as per HTTP.
* @param availableLocales - list of available locales to match
* @param status Error status, may be BUFFER_OVERFLOW_ERROR
* @return length needed for the locale.
* @stable ICU 3.2
*/
U_STABLE int32_t U_EXPORT2
uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable,
UAcceptResult *outResult,
const char *httpAcceptLanguage,
UEnumeration* availableLocales,
UErrorCode *status);
/**
* Based on a list of available locales,
* determine an acceptable locale for the user.
* @param result - buffer to accept the result locale
* @param resultAvailable the size of the result buffer.
* @param outResult - An out parameter that contains the fallback status
* @param acceptList - list of acceptable languages
* @param acceptListCount - count of acceptList items
* @param availableLocales - list of available locales to match
* @param status Error status, may be BUFFER_OVERFLOW_ERROR
* @return length needed for the locale.
* @stable ICU 3.2
*/
U_STABLE int32_t U_EXPORT2
uloc_acceptLanguage(char *result, int32_t resultAvailable,
UAcceptResult *outResult, const char **acceptList,
int32_t acceptListCount,
UEnumeration* availableLocales,
UErrorCode *status);
/**
* Gets the ICU locale ID for the specified Win32 LCID value.
*
* @param hostID the Win32 LCID to translate
* @param locale the output buffer for the ICU locale ID, which will be NUL-terminated
* if there is room.
* @param localeCapacity the size of the output buffer
* @param status an error is returned if the LCID is unrecognized or the output buffer
* is too small
* @return actual the actual size of the locale ID, not including NUL-termination
* @stable ICU 3.8
*/
U_STABLE int32_t U_EXPORT2
uloc_getLocaleForLCID(uint32_t hostID, char *locale, int32_t localeCapacity,
UErrorCode *status);
/**
* Add the likely subtags for a provided locale ID, per the algorithm described
* in the following CLDR technical report:
*
* http://www.unicode.org/reports/tr35/#Likely_Subtags
*
* If localeID is already in the maximal form, or there is no data available
* for maximization, it will be copied to the output buffer. For example,
* "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
*
* Examples:
*
* "en" maximizes to "en_Latn_US"
*
* "de" maximizes to "de_Latn_US"
*
* "sr" maximizes to "sr_Cyrl_RS"
*
* "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
*
* "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
*
* @param localeID The locale to maximize
* @param maximizedLocaleID The maximized locale
* @param maximizedLocaleIDCapacity The capacity of the maximizedLocaleID buffer
* @param err Error information if maximizing the locale failed. If the length
* of the localeID and the null-terminator is greater than the maximum allowed size,
* or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
* @return The actual buffer size needed for the maximized locale. If it's
* greater than maximizedLocaleIDCapacity, the returned ID will be truncated.
* On error, the return value is -1.
* @stable ICU 4.0
*/
U_STABLE int32_t U_EXPORT2
uloc_addLikelySubtags(const char* localeID,
char* maximizedLocaleID,
int32_t maximizedLocaleIDCapacity,
UErrorCode* err);
/**
* Minimize the subtags for a provided locale ID, per the algorithm described
* in the following CLDR technical report:
*
* http://www.unicode.org/reports/tr35/#Likely_Subtags
*
* If localeID is already in the minimal form, or there is no data available
* for minimization, it will be copied to the output buffer. Since the
* minimization algorithm relies on proper maximization, see the comments
* for uloc_addLikelySubtags for reasons why there might not be any data.
*
* Examples:
*
* "en_Latn_US" minimizes to "en"
*
* "de_Latn_US" minimizes to "de"
*
* "sr_Cyrl_RS" minimizes to "sr"
*
* "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
* script, and minimizing to "zh" would imply "zh_Hans_CN".)
*
* @param localeID The locale to minimize
* @param minimizedLocaleID The minimized locale
* @param minimizedLocaleIDCapacity The capacity of the minimizedLocaleID buffer
* @param err Error information if minimizing the locale failed. If the length
* of the localeID and the null-terminator is greater than the maximum allowed size,
* or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
* @return The actual buffer size needed for the minimized locale. If it's
* greater than minimizedLocaleIDCapacity, the returned ID will be truncated.
* On error, the return value is -1.
* @stable ICU 4.0
*/
U_STABLE int32_t U_EXPORT2
uloc_minimizeSubtags(const char* localeID,
char* minimizedLocaleID,
int32_t minimizedLocaleIDCapacity,
UErrorCode* err);
/**
* Returns a locale ID for the specified BCP47 language tag string.
* If the specified language tag contains any ill-formed subtags,
* the first such subtag and all following subtags are ignored.
* strict is FALSE, any locale
* fields which do not satisfy the BCP47 syntax requirement will
* be omitted from the result. When strict is
* TRUE, this function sets U_ILLEGAL_ARGUMENT_ERROR to the
* err if any locale fields do not satisfy the
* BCP47 syntax requirement.
* @param localeID the input locale ID
* @param langtag the output buffer receiving BCP47 language
* tag for the locale ID.
* @param langtagCapacity the size of the BCP47 language tag
* output buffer.
* @param strict boolean value indicating if the function returns
* an error for an ill-formed input locale ID.
* @param err error information if receiving the language
* tag failed.
* @return The length of the BCP47 language tag.
* @stable ICU 4.2
*/
U_STABLE int32_t U_EXPORT2
uloc_toLanguageTag(const char* localeID,
char* langtag,
int32_t langtagCapacity,
UBool strict,
UErrorCode* err);
/**
* Converts the specified keyword (legacy key, or BCP 47 Unicode locale
* extension key) to the equivalent BCP 47 Unicode locale extension key.
* For example, BCP 47 Unicode locale extension key "co" is returned for
* the input keyword "collation".
* uloc_toUnicodeLocaleKey("ZZ") returns "ZZ".
*
* @param keyword the input locale keyword (either legacy key
* such as "collation" or BCP 47 Unicode locale extension
* key such as "co").
* @return the well-formed BCP 47 Unicode locale extension key,
* or NULL if the specified locale keyword cannot be
* mapped to a well-formed BCP 47 Unicode locale extension
* key.
* @see uloc_toLegacyKey
* @stable ICU 54
*/
U_STABLE const char* U_EXPORT2
uloc_toUnicodeLocaleKey(const char* keyword);
/**
* Converts the specified keyword value (legacy type, or BCP 47
* Unicode locale extension type) to the well-formed BCP 47 Unicode locale
* extension type for the specified keyword (category). For example, BCP 47
* Unicode locale extension type "phonebk" is returned for the input
* keyword value "phonebook", with the keyword "collation" (or "co").
* uloc_toUnicodeLocaleType("Foo", "Bar") returns "Bar",
* uloc_toUnicodeLocaleType("variableTop", "00A4") returns "00A4".
*
* @param keyword the locale keyword (either legacy key such as
* "collation" or BCP 47 Unicode locale extension
* key such as "co").
* @param value the locale keyword value (either legacy type
* such as "phonebook" or BCP 47 Unicode locale extension
* type such as "phonebk").
* @return the well-formed BCP47 Unicode locale extension type,
* or NULL if the locale keyword value cannot be mapped to
* a well-formed BCP 47 Unicode locale extension type.
* @see uloc_toLegacyType
* @stable ICU 54
*/
U_STABLE const char* U_EXPORT2
uloc_toUnicodeLocaleType(const char* keyword, const char* value);
/**
* Converts the specified keyword (BCP 47 Unicode locale extension key, or
* legacy key) to the legacy key. For example, legacy key "collation" is
* returned for the input BCP 47 Unicode locale extension key "co".
*
* @param keyword the input locale keyword (either BCP 47 Unicode locale
* extension key or legacy key).
* @return the well-formed legacy key, or NULL if the specified
* keyword cannot be mapped to a well-formed legacy key.
* @see toUnicodeLocaleKey
* @stable ICU 54
*/
U_STABLE const char* U_EXPORT2
uloc_toLegacyKey(const char* keyword);
/**
* Converts the specified keyword value (BCP 47 Unicode locale extension type,
* or legacy type or type alias) to the canonical legacy type. For example,
* the legacy type "phonebook" is returned for the input BCP 47 Unicode
* locale extension type "phonebk" with the keyword "collation" (or "co").
* uloc_toLegacyType("Foo", "Bar") returns "Bar",
* uloc_toLegacyType("vt", "00A4") returns "00A4".
*
* @param keyword the locale keyword (either legacy keyword such as
* "collation" or BCP 47 Unicode locale extension
* key such as "co").
* @param value the locale keyword value (either BCP 47 Unicode locale
* extension type such as "phonebk" or legacy keyword value
* such as "phonebook").
* @return the well-formed legacy type, or NULL if the specified
* keyword value cannot be mapped to a well-formed legacy
* type.
* @see toUnicodeLocaleType
* @stable ICU 54
*/
U_STABLE const char* U_EXPORT2
uloc_toLegacyType(const char* keyword, const char* value);
#endif /*_ULOC*/
// ures.h
/*
**********************************************************************
* Copyright (C) 1997-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* File URES.H (formerly CRESBUND.H)
*
* Modification History:
*
* Date Name Description
* 04/01/97 aliu Creation.
* 02/22/99 damiba overhaul.
* 04/04/99 helena Fixed internal header inclusion.
* 04/15/99 Madhu Updated Javadoc
* 06/14/99 stephen Removed functions taking a filename suffix.
* 07/20/99 stephen Language-independent ypedef to void*
* 11/09/99 weiv Added ures_getLocale()
* 06/24/02 weiv Added support for resource sharing
******************************************************************************
*/
#ifndef URES_H
#define URES_H
/**
* \file
* \brief C API: Resource Bundle
*
* C API: Resource Bundle
*
* C API representing a collection of resource information pertaining to a given
* locale. A resource bundle provides a way of accessing locale- specific information in
* a data file. You create a resource bundle that manages the resources for a given
* locale and then ask it for individual resources.
* udata_open( packageName, "res", locale, err)
* or equivalent. Typically, packageName will refer to a (.dat) file, or to
* a package registered with udata_setAppData(). Using a full file or directory
* pathname for packageName is deprecated. If NULL, ICU data will be used.
* @param locale specifies the locale for which we want to open the resource
* if NULL, the default locale will be used. If strlen(locale) == 0
* root locale will be used.
*
* @param status fills in the outgoing error code.
* The UErrorCode err parameter is used to return status information to the user. To
* check whether the construction succeeded or not, you should check the value of
* U_SUCCESS(err). If you wish more detailed information, you can check for
* informational status results which still indicate success. U_USING_FALLBACK_WARNING
* indicates that a fall back locale was used. For example, 'de_CH' was requested,
* but nothing was found there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that
* the default locale data or root locale data was used; neither the requested locale
* nor any of its fall back locales could be found. Please see the users guide for more
* information on this topic.
* @return a newly allocated resource bundle.
* @see ures_close
* @stable ICU 2.0
*/
U_STABLE UResourceBundle* U_EXPORT2
ures_open(const char* packageName,
const char* locale,
UErrorCode* status);
/** This function does not care what kind of localeID is passed in. It simply opens a bundle with
* that name. Fallback mechanism is disabled for the new bundle. If the requested bundle contains
* an %%ALIAS directive, the results are undefined.
* @param packageName The packageName and locale together point to an ICU udata object,
* as defined by udata_open( packageName, "res", locale, err)
* or equivalent. Typically, packageName will refer to a (.dat) file, or to
* a package registered with udata_setAppData(). Using a full file or directory
* pathname for packageName is deprecated. If NULL, ICU data will be used.
* @param locale specifies the locale for which we want to open the resource
* if NULL, the default locale will be used. If strlen(locale) == 0
* root locale will be used.
*
* @param status fills in the outgoing error code. Either U_ZERO_ERROR or U_MISSING_RESOURCE_ERROR
* @return a newly allocated resource bundle or NULL if it doesn't exist.
* @see ures_close
* @stable ICU 2.0
*/
U_STABLE UResourceBundle* U_EXPORT2
ures_openDirect(const char* packageName,
const char* locale,
UErrorCode* status);
/**
* Same as ures_open() but takes a const UChar *path.
* This path will be converted to char * using the default converter,
* then ures_open() is called.
*
* @param packageName The packageName and locale together point to an ICU udata object,
* as defined by udata_open( packageName, "res", locale, err)
* or equivalent. Typically, packageName will refer to a (.dat) file, or to
* a package registered with udata_setAppData(). Using a full file or directory
* pathname for packageName is deprecated. If NULL, ICU data will be used.
* @param locale specifies the locale for which we want to open the resource
* if NULL, the default locale will be used. If strlen(locale) == 0
* root locale will be used.
* @param status fills in the outgoing error code.
* @return a newly allocated resource bundle.
* @see ures_open
* @stable ICU 2.0
*/
U_STABLE UResourceBundle* U_EXPORT2
ures_openU(const UChar* packageName,
const char* locale,
UErrorCode* status);
/**
* Close a resource bundle, all pointers returned from the various ures_getXXX calls
* on this particular bundle should be considered invalid henceforth.
*
* @param resourceBundle a pointer to a resourceBundle struct. Can be NULL.
* @see ures_open
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ures_close(UResourceBundle* resourceBundle);
/**
* Return the version number associated with this ResourceBundle as an
* UVersionInfo array.
*
* @param resB The resource bundle for which the version is checked.
* @param versionInfo A UVersionInfo array that is filled with the version number
* as specified in the resource bundle or its parent.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ures_getVersion(const UResourceBundle* resB,
UVersionInfo versionInfo);
/**
* Return the name of the Locale associated with this ResourceBundle.
* You can choose between requested, valid and real locale.
*
* @param resourceBundle resource bundle in question
* @param type You can choose between requested, valid and actual
* locale. For description see the definition of
* ULocDataLocaleType in uloc.h
* @param status just for catching illegal arguments
* @return A Locale name
* @stable ICU 2.8
*/
U_STABLE const char* U_EXPORT2
ures_getLocaleByType(const UResourceBundle* resourceBundle,
ULocDataLocaleType type,
UErrorCode* status);
/**
* Returns a string from a string resource type
*
* @param resourceBundle a string resource
* @param len fills in the length of resulting string
* @param status fills in the outgoing error code
* could be U_MISSING_RESOURCE_ERROR if the key is not found
* Always check the value of status. Don't count on returning NULL.
* could be a non-failing error
* e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING
* @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
* @see ures_getBinary
* @see ures_getIntVector
* @see ures_getInt
* @see ures_getUInt
* @stable ICU 2.0
*/
U_STABLE const UChar* U_EXPORT2
ures_getString(const UResourceBundle* resourceBundle,
int32_t* len,
UErrorCode* status);
/**
* Returns a UTF-8 string from a string resource.
* The UTF-8 string may be returnable directly as a pointer, or
* it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
* or equivalent.
*
* If forceCopy==TRUE, then the string is always written to the dest buffer
* and dest is returned.
*
* If forceCopy==FALSE, then the string is returned as a pointer if possible,
* without needing a dest buffer (it can be NULL). If the string needs to be
* copied or transformed, then it may be placed into dest at an arbitrary offset.
*
* If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
* U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
*
* If the string is transformed from UTF-16, then a conversion error may occur
* if an unpaired surrogate is encountered. If the function is successful, then
* the output UTF-8 string is always well-formed.
*
* @param resB Resource bundle.
* @param dest Destination buffer. Can be NULL only if capacity=*length==0.
* @param length Input: Capacity of destination buffer.
* Output: Actual length of the UTF-8 string, not counting the
* terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
* Can be NULL, meaning capacity=0 and the string length is not
* returned to the caller.
* @param forceCopy If TRUE, then the output string will always be written to
* dest, with U_BUFFER_OVERFLOW_ERROR and
* U_STRING_NOT_TERMINATED_WARNING set if appropriate.
* If FALSE, then the dest buffer may or may not contain a
* copy of the string. dest may or may not be modified.
* If a copy needs to be written, then the UErrorCode parameter
* indicates overflow etc. as usual.
* @param status Pointer to a standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return The pointer to the UTF-8 string. It may be dest, or at some offset
* from dest (only if !forceCopy), or in unrelated memory.
* Always NUL-terminated unless the string was written to dest and
* length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
*
* @see ures_getString
* @see u_strToUTF8
* @stable ICU 3.6
*/
U_STABLE const char * U_EXPORT2
ures_getUTF8String(const UResourceBundle *resB,
char *dest, int32_t *length,
UBool forceCopy,
UErrorCode *status);
/**
* Returns a binary data from a binary resource.
*
* @param resourceBundle a string resource
* @param len fills in the length of resulting byte chunk
* @param status fills in the outgoing error code
* could be U_MISSING_RESOURCE_ERROR if the key is not found
* Always check the value of status. Don't count on returning NULL.
* could be a non-failing error
* e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING
* @return a pointer to a chunk of unsigned bytes which live in a memory mapped/DLL file.
* @see ures_getString
* @see ures_getIntVector
* @see ures_getInt
* @see ures_getUInt
* @stable ICU 2.0
*/
U_STABLE const uint8_t* U_EXPORT2
ures_getBinary(const UResourceBundle* resourceBundle,
int32_t* len,
UErrorCode* status);
/**
* Returns a 32 bit integer array from a resource.
*
* @param resourceBundle an int vector resource
* @param len fills in the length of resulting byte chunk
* @param status fills in the outgoing error code
* could be U_MISSING_RESOURCE_ERROR if the key is not found
* Always check the value of status. Don't count on returning NULL.
* could be a non-failing error
* e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING
* @return a pointer to a chunk of integers which live in a memory mapped/DLL file.
* @see ures_getBinary
* @see ures_getString
* @see ures_getInt
* @see ures_getUInt
* @stable ICU 2.0
*/
U_STABLE const int32_t* U_EXPORT2
ures_getIntVector(const UResourceBundle* resourceBundle,
int32_t* len,
UErrorCode* status);
/**
* Returns an unsigned integer from a resource.
* This integer is originally 28 bits.
*
* @param resourceBundle a string resource
* @param status fills in the outgoing error code
* could be U_MISSING_RESOURCE_ERROR if the key is not found
* could be a non-failing error
* e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING
* @return an integer value
* @see ures_getInt
* @see ures_getIntVector
* @see ures_getBinary
* @see ures_getString
* @stable ICU 2.0
*/
U_STABLE uint32_t U_EXPORT2
ures_getUInt(const UResourceBundle* resourceBundle,
UErrorCode *status);
/**
* Returns a signed integer from a resource.
* This integer is originally 28 bit and the sign gets propagated.
*
* @param resourceBundle a string resource
* @param status fills in the outgoing error code
* could be U_MISSING_RESOURCE_ERROR if the key is not found
* could be a non-failing error
* e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING
* @return an integer value
* @see ures_getUInt
* @see ures_getIntVector
* @see ures_getBinary
* @see ures_getString
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ures_getInt(const UResourceBundle* resourceBundle,
UErrorCode *status);
/**
* Returns the size of a resource. Size for scalar types is always 1,
* and for vector/table types is the number of child resources.
* @warning Integer array is treated as a scalar type. There are no
* APIs to access individual members of an integer array. It
* is always returned as a whole.
* @param resourceBundle a resource
* @return number of resources in a given resource.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ures_getSize(const UResourceBundle *resourceBundle);
/**
* Returns the type of a resource. Available types are defined in enum UResType
*
* @param resourceBundle a resource
* @return type of the given resource.
* @see UResType
* @stable ICU 2.0
*/
U_STABLE UResType U_EXPORT2
ures_getType(const UResourceBundle *resourceBundle);
/**
* Returns the key associated with a given resource. Not all the resources have a key - only
* those that are members of a table.
*
* @param resourceBundle a resource
* @return a key associated to this resource, or NULL if it doesn't have a key
* @stable ICU 2.0
*/
U_STABLE const char * U_EXPORT2
ures_getKey(const UResourceBundle *resourceBundle);
/* ITERATION API
This API provides means for iterating through a resource
*/
/**
* Resets the internal context of a resource so that iteration starts from the first element.
*
* @param resourceBundle a resource
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ures_resetIterator(UResourceBundle *resourceBundle);
/**
* Checks whether the given resource has another element to iterate over.
*
* @param resourceBundle a resource
* @return TRUE if there are more elements, FALSE if there is no more elements
* @stable ICU 2.0
*/
U_STABLE UBool U_EXPORT2
ures_hasNext(const UResourceBundle *resourceBundle);
/**
* Returns the next resource in a given resource or NULL if there are no more resources
* to iterate over. Features a fill-in parameter.
*
* @param resourceBundle a resource
* @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
* Alternatively, you can supply a struct to be filled by this function.
* @param status fills in the outgoing error code. You may still get a non NULL result even if an
* error occured. Check status instead.
* @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
* @stable ICU 2.0
*/
U_STABLE UResourceBundle* U_EXPORT2
ures_getNextResource(UResourceBundle *resourceBundle,
UResourceBundle *fillIn,
UErrorCode *status);
/**
* Returns the next string in a given resource or NULL if there are no more resources
* to iterate over.
*
* @param resourceBundle a resource
* @param len fill in length of the string
* @param key fill in for key associated with this string. NULL if no key
* @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't
* count on it. Check status instead!
* @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
* @stable ICU 2.0
*/
U_STABLE const UChar* U_EXPORT2
ures_getNextString(UResourceBundle *resourceBundle,
int32_t* len,
const char ** key,
UErrorCode *status);
/**
* Returns the resource in a given resource at the specified index. Features a fill-in parameter.
*
* @param resourceBundle the resource bundle from which to get a sub-resource
* @param indexR an index to the wanted resource.
* @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
* Alternatively, you can supply a struct to be filled by this function.
* @param status fills in the outgoing error code. Don't count on NULL being returned if an error has
* occured. Check status instead.
* @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
* @stable ICU 2.0
*/
U_STABLE UResourceBundle* U_EXPORT2
ures_getByIndex(const UResourceBundle *resourceBundle,
int32_t indexR,
UResourceBundle *fillIn,
UErrorCode *status);
/**
* Returns the string in a given resource at the specified index.
*
* @param resourceBundle a resource
* @param indexS an index to the wanted string.
* @param len fill in length of the string
* @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't
* count on it. Check status instead!
* @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
* @stable ICU 2.0
*/
U_STABLE const UChar* U_EXPORT2
ures_getStringByIndex(const UResourceBundle *resourceBundle,
int32_t indexS,
int32_t* len,
UErrorCode *status);
/**
* Returns a UTF-8 string from a resource at the specified index.
* The UTF-8 string may be returnable directly as a pointer, or
* it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
* or equivalent.
*
* If forceCopy==TRUE, then the string is always written to the dest buffer
* and dest is returned.
*
* If forceCopy==FALSE, then the string is returned as a pointer if possible,
* without needing a dest buffer (it can be NULL). If the string needs to be
* copied or transformed, then it may be placed into dest at an arbitrary offset.
*
* If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
* U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
*
* If the string is transformed from UTF-16, then a conversion error may occur
* if an unpaired surrogate is encountered. If the function is successful, then
* the output UTF-8 string is always well-formed.
*
* @param resB Resource bundle.
* @param stringIndex An index to the wanted string.
* @param dest Destination buffer. Can be NULL only if capacity=*length==0.
* @param pLength Input: Capacity of destination buffer.
* Output: Actual length of the UTF-8 string, not counting the
* terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
* Can be NULL, meaning capacity=0 and the string length is not
* returned to the caller.
* @param forceCopy If TRUE, then the output string will always be written to
* dest, with U_BUFFER_OVERFLOW_ERROR and
* U_STRING_NOT_TERMINATED_WARNING set if appropriate.
* If FALSE, then the dest buffer may or may not contain a
* copy of the string. dest may or may not be modified.
* If a copy needs to be written, then the UErrorCode parameter
* indicates overflow etc. as usual.
* @param status Pointer to a standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return The pointer to the UTF-8 string. It may be dest, or at some offset
* from dest (only if !forceCopy), or in unrelated memory.
* Always NUL-terminated unless the string was written to dest and
* length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
*
* @see ures_getStringByIndex
* @see u_strToUTF8
* @stable ICU 3.6
*/
U_STABLE const char * U_EXPORT2
ures_getUTF8StringByIndex(const UResourceBundle *resB,
int32_t stringIndex,
char *dest, int32_t *pLength,
UBool forceCopy,
UErrorCode *status);
/**
* Returns a resource in a given resource that has a given key. This procedure works only with table
* resources. Features a fill-in parameter.
*
* @param resourceBundle a resource
* @param key a key associated with the wanted resource
* @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
* Alternatively, you can supply a struct to be filled by this function.
* @param status fills in the outgoing error code.
* @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
* @stable ICU 2.0
*/
U_STABLE UResourceBundle* U_EXPORT2
ures_getByKey(const UResourceBundle *resourceBundle,
const char* key,
UResourceBundle *fillIn,
UErrorCode *status);
/**
* Returns a string in a given resource that has a given key. This procedure works only with table
* resources.
*
* @param resB a resource
* @param key a key associated with the wanted string
* @param len fill in length of the string
* @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't
* count on it. Check status instead!
* @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
* @stable ICU 2.0
*/
U_STABLE const UChar* U_EXPORT2
ures_getStringByKey(const UResourceBundle *resB,
const char* key,
int32_t* len,
UErrorCode *status);
/**
* Returns a UTF-8 string from a resource and a key.
* This function works only with table resources.
*
* The UTF-8 string may be returnable directly as a pointer, or
* it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
* or equivalent.
*
* If forceCopy==TRUE, then the string is always written to the dest buffer
* and dest is returned.
*
* If forceCopy==FALSE, then the string is returned as a pointer if possible,
* without needing a dest buffer (it can be NULL). If the string needs to be
* copied or transformed, then it may be placed into dest at an arbitrary offset.
*
* If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
* U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
*
* If the string is transformed from UTF-16, then a conversion error may occur
* if an unpaired surrogate is encountered. If the function is successful, then
* the output UTF-8 string is always well-formed.
*
* @param resB Resource bundle.
* @param key A key associated with the wanted resource
* @param dest Destination buffer. Can be NULL only if capacity=*length==0.
* @param pLength Input: Capacity of destination buffer.
* Output: Actual length of the UTF-8 string, not counting the
* terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
* Can be NULL, meaning capacity=0 and the string length is not
* returned to the caller.
* @param forceCopy If TRUE, then the output string will always be written to
* dest, with U_BUFFER_OVERFLOW_ERROR and
* U_STRING_NOT_TERMINATED_WARNING set if appropriate.
* If FALSE, then the dest buffer may or may not contain a
* copy of the string. dest may or may not be modified.
* If a copy needs to be written, then the UErrorCode parameter
* indicates overflow etc. as usual.
* @param status Pointer to a standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return The pointer to the UTF-8 string. It may be dest, or at some offset
* from dest (only if !forceCopy), or in unrelated memory.
* Always NUL-terminated unless the string was written to dest and
* length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
*
* @see ures_getStringByKey
* @see u_strToUTF8
* @stable ICU 3.6
*/
U_STABLE const char * U_EXPORT2
ures_getUTF8StringByKey(const UResourceBundle *resB,
const char *key,
char *dest, int32_t *pLength,
UBool forceCopy,
UErrorCode *status);
/**
* Create a string enumerator, owned by the caller, of all locales located within
* the specified resource tree.
* @param packageName name of the tree, such as (NULL) or U_ICUDATA_ALIAS or or "ICUDATA-coll"
* This call is similar to uloc_getAvailable().
* @param status error code
* @stable ICU 3.2
*/
U_STABLE UEnumeration* U_EXPORT2
ures_openAvailableLocales(const char *packageName, UErrorCode *status);
#endif /*_URES*/
/*eof*/
// udisplaycontext.h
/*
*****************************************************************************************
* Copyright (C) 2014-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*****************************************************************************************
*/
#ifndef UDISPLAYCONTEXT_H
#define UDISPLAYCONTEXT_H
#if !UCONFIG_NO_FORMATTING
/**
* \file
* \brief C API: Display context types (enum values)
*/
/**
* Display context types, for getting values of a particular setting.
* Note, the specific numeric values are internal and may change.
* @stable ICU 51
*/
enum UDisplayContextType {
/**
* Type to retrieve the dialect handling setting, e.g.
* UDISPCTX_STANDARD_NAMES or UDISPCTX_DIALECT_NAMES.
* @stable ICU 51
*/
UDISPCTX_TYPE_DIALECT_HANDLING = 0,
/**
* Type to retrieve the capitalization context setting, e.g.
* UDISPCTX_CAPITALIZATION_NONE, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE,
* UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, etc.
* @stable ICU 51
*/
UDISPCTX_TYPE_CAPITALIZATION = 1,
/**
* Type to retrieve the display length setting, e.g.
* UDISPCTX_LENGTH_FULL, UDISPCTX_LENGTH_SHORT.
* @stable ICU 54
*/
UDISPCTX_TYPE_DISPLAY_LENGTH = 2
};
/**
* @stable ICU 51
*/
typedef enum UDisplayContextType UDisplayContextType;
/**
* Display context settings.
* Note, the specific numeric values are internal and may change.
* @stable ICU 51
*/
enum UDisplayContext {
/**
* ================================
* DIALECT_HANDLING can be set to one of UDISPCTX_STANDARD_NAMES or
* UDISPCTX_DIALECT_NAMES. Use UDisplayContextType UDISPCTX_TYPE_DIALECT_HANDLING
* to get the value.
*/
/**
* A possible setting for DIALECT_HANDLING:
* use standard names when generating a locale name,
* e.g. en_GB displays as 'English (United Kingdom)'.
* @stable ICU 51
*/
UDISPCTX_STANDARD_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 0,
/**
* A possible setting for DIALECT_HANDLING:
* use dialect names, when generating a locale name,
* e.g. en_GB displays as 'British English'.
* @stable ICU 51
*/
UDISPCTX_DIALECT_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 1,
/**
* ================================
* CAPITALIZATION can be set to one of UDISPCTX_CAPITALIZATION_NONE,
* UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE,
* UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE,
* UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU, or
* UDISPCTX_CAPITALIZATION_FOR_STANDALONE.
* Use UDisplayContextType UDISPCTX_TYPE_CAPITALIZATION to get the value.
*/
/**
* The capitalization context to be used is unknown (this is the default value).
* @stable ICU 51
*/
UDISPCTX_CAPITALIZATION_NONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 0,
/**
* The capitalization context if a date, date symbol or display name is to be
* formatted with capitalization appropriate for the middle of a sentence.
* @stable ICU 51
*/
UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 1,
/**
* The capitalization context if a date, date symbol or display name is to be
* formatted with capitalization appropriate for the beginning of a sentence.
* @stable ICU 51
*/
UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 2,
/**
* The capitalization context if a date, date symbol or display name is to be
* formatted with capitalization appropriate for a user-interface list or menu item.
* @stable ICU 51
*/
UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 3,
/**
* The capitalization context if a date, date symbol or display name is to be
* formatted with capitalization appropriate for stand-alone usage such as an
* isolated name on a calendar page.
* @stable ICU 51
*/
UDISPCTX_CAPITALIZATION_FOR_STANDALONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 4,
/**
* ================================
* DISPLAY_LENGTH can be set to one of UDISPCTX_LENGTH_FULL or
* UDISPCTX_LENGTH_SHORT. Use UDisplayContextType UDISPCTX_TYPE_DISPLAY_LENGTH
* to get the value.
*/
/**
* A possible setting for DISPLAY_LENGTH:
* use full names when generating a locale name,
* e.g. "United States" for US.
* @stable ICU 54
*/
UDISPCTX_LENGTH_FULL = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 0,
/**
* A possible setting for DISPLAY_LENGTH:
* use short names when generating a locale name,
* e.g. "U.S." for US.
* @stable ICU 54
*/
UDISPCTX_LENGTH_SHORT = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 1
};
/**
* @stable ICU 51
*/
typedef enum UDisplayContext UDisplayContext;
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif
// uldnames.h
/*
*******************************************************************************
* Copyright (C) 2010-2016, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
#ifndef __ULDNAMES_H__
#define __ULDNAMES_H__
/**
* \file
* \brief C API: Provides display names of Locale ids and their components.
*/
/**
* Enum used in LocaleDisplayNames::createInstance.
* @stable ICU 4.4
*/
typedef enum {
/**
* Use standard names when generating a locale name,
* e.g. en_GB displays as 'English (United Kingdom)'.
* @stable ICU 4.4
*/
ULDN_STANDARD_NAMES = 0,
/**
* Use dialect names, when generating a locale name,
* e.g. en_GB displays as 'British English'.
* @stable ICU 4.4
*/
ULDN_DIALECT_NAMES
} UDialectHandling;
/**
* Opaque C service object type for the locale display names API
* @stable ICU 4.4
*/
struct ULocaleDisplayNames;
/**
* C typedef for struct ULocaleDisplayNames.
* @stable ICU 4.4
*/
typedef struct ULocaleDisplayNames ULocaleDisplayNames;
#if !UCONFIG_NO_FORMATTING
/**
* Returns an instance of LocaleDisplayNames that returns names
* formatted for the provided locale, using the provided
* dialectHandling. The usual value for dialectHandling is
* ULOC_STANDARD_NAMES.
*
* @param locale the display locale
* @param dialectHandling how to select names for locales
* @return a ULocaleDisplayNames instance
* @param pErrorCode the status code
* @stable ICU 4.4
*/
U_STABLE ULocaleDisplayNames * U_EXPORT2
uldn_open(const char * locale,
UDialectHandling dialectHandling,
UErrorCode *pErrorCode);
/**
* Closes a ULocaleDisplayNames instance obtained from uldn_open().
* @param ldn the ULocaleDisplayNames instance to be closed
* @stable ICU 4.4
*/
U_STABLE void U_EXPORT2
uldn_close(ULocaleDisplayNames *ldn);
/* getters for state */
/**
* Returns the locale used to determine the display names. This is
* not necessarily the same locale passed to {@link #uldn_open}.
* @param ldn the LocaleDisplayNames instance
* @return the display locale
* @stable ICU 4.4
*/
U_STABLE const char * U_EXPORT2
uldn_getLocale(const ULocaleDisplayNames *ldn);
/**
* Returns the dialect handling used in the display names.
* @param ldn the LocaleDisplayNames instance
* @return the dialect handling enum
* @stable ICU 4.4
*/
U_STABLE UDialectHandling U_EXPORT2
uldn_getDialectHandling(const ULocaleDisplayNames *ldn);
/* names for entire locales */
/**
* Returns the display name of the provided locale.
* @param ldn the LocaleDisplayNames instance
* @param locale the locale whose display name to return
* @param result receives the display name
* @param maxResultSize the size of the result buffer
* @param pErrorCode the status code
* @return the actual buffer size needed for the display name. If it's
* greater than maxResultSize, the returned name will be truncated.
* @stable ICU 4.4
*/
U_STABLE int32_t U_EXPORT2
uldn_localeDisplayName(const ULocaleDisplayNames *ldn,
const char *locale,
UChar *result,
int32_t maxResultSize,
UErrorCode *pErrorCode);
/* names for components of a locale */
/**
* Returns the display name of the provided language code.
* @param ldn the LocaleDisplayNames instance
* @param lang the language code whose display name to return
* @param result receives the display name
* @param maxResultSize the size of the result buffer
* @param pErrorCode the status code
* @return the actual buffer size needed for the display name. If it's
* greater than maxResultSize, the returned name will be truncated.
* @stable ICU 4.4
*/
U_STABLE int32_t U_EXPORT2
uldn_languageDisplayName(const ULocaleDisplayNames *ldn,
const char *lang,
UChar *result,
int32_t maxResultSize,
UErrorCode *pErrorCode);
/**
* Returns the display name of the provided script.
* @param ldn the LocaleDisplayNames instance
* @param script the script whose display name to return
* @param result receives the display name
* @param maxResultSize the size of the result buffer
* @param pErrorCode the status code
* @return the actual buffer size needed for the display name. If it's
* greater than maxResultSize, the returned name will be truncated.
* @stable ICU 4.4
*/
U_STABLE int32_t U_EXPORT2
uldn_scriptDisplayName(const ULocaleDisplayNames *ldn,
const char *script,
UChar *result,
int32_t maxResultSize,
UErrorCode *pErrorCode);
/**
* Returns the display name of the provided script code.
* @param ldn the LocaleDisplayNames instance
* @param scriptCode the script code whose display name to return
* @param result receives the display name
* @param maxResultSize the size of the result buffer
* @param pErrorCode the status code
* @return the actual buffer size needed for the display name. If it's
* greater than maxResultSize, the returned name will be truncated.
* @stable ICU 4.4
*/
U_STABLE int32_t U_EXPORT2
uldn_scriptCodeDisplayName(const ULocaleDisplayNames *ldn,
UScriptCode scriptCode,
UChar *result,
int32_t maxResultSize,
UErrorCode *pErrorCode);
/**
* Returns the display name of the provided region code.
* @param ldn the LocaleDisplayNames instance
* @param region the region code whose display name to return
* @param result receives the display name
* @param maxResultSize the size of the result buffer
* @param pErrorCode the status code
* @return the actual buffer size needed for the display name. If it's
* greater than maxResultSize, the returned name will be truncated.
* @stable ICU 4.4
*/
U_STABLE int32_t U_EXPORT2
uldn_regionDisplayName(const ULocaleDisplayNames *ldn,
const char *region,
UChar *result,
int32_t maxResultSize,
UErrorCode *pErrorCode);
/**
* Returns the display name of the provided variant
* @param ldn the LocaleDisplayNames instance
* @param variant the variant whose display name to return
* @param result receives the display name
* @param maxResultSize the size of the result buffer
* @param pErrorCode the status code
* @return the actual buffer size needed for the display name. If it's
* greater than maxResultSize, the returned name will be truncated.
* @stable ICU 4.4
*/
U_STABLE int32_t U_EXPORT2
uldn_variantDisplayName(const ULocaleDisplayNames *ldn,
const char *variant,
UChar *result,
int32_t maxResultSize,
UErrorCode *pErrorCode);
/**
* Returns the display name of the provided locale key
* @param ldn the LocaleDisplayNames instance
* @param key the locale key whose display name to return
* @param result receives the display name
* @param maxResultSize the size of the result buffer
* @param pErrorCode the status code
* @return the actual buffer size needed for the display name. If it's
* greater than maxResultSize, the returned name will be truncated.
* @stable ICU 4.4
*/
U_STABLE int32_t U_EXPORT2
uldn_keyDisplayName(const ULocaleDisplayNames *ldn,
const char *key,
UChar *result,
int32_t maxResultSize,
UErrorCode *pErrorCode);
/**
* Returns the display name of the provided value (used with the provided key).
* @param ldn the LocaleDisplayNames instance
* @param key the locale key
* @param value the locale key's value
* @param result receives the display name
* @param maxResultSize the size of the result buffer
* @param pErrorCode the status code
* @return the actual buffer size needed for the display name. If it's
* greater than maxResultSize, the returned name will be truncated.
* @stable ICU 4.4
*/
U_STABLE int32_t U_EXPORT2
uldn_keyValueDisplayName(const ULocaleDisplayNames *ldn,
const char *key,
const char *value,
UChar *result,
int32_t maxResultSize,
UErrorCode *pErrorCode);
/**
* Returns an instance of LocaleDisplayNames that returns names formatted
* for the provided locale, using the provided UDisplayContext settings.
*
* @param locale The display locale
* @param contexts List of one or more context settings (e.g. for dialect
* handling, capitalization, etc.
* @param length Number of items in the contexts list
* @param pErrorCode Pointer to UErrorCode input/output status. If at entry this indicates
* a failure status, the function will do nothing; otherwise this will be
* updated with any new status from the function.
* @return a ULocaleDisplayNames instance
* @stable ICU 51
*/
U_STABLE ULocaleDisplayNames * U_EXPORT2
uldn_openForContext(const char * locale, UDisplayContext *contexts,
int32_t length, UErrorCode *pErrorCode);
/**
* Returns the UDisplayContext value for the specified UDisplayContextType.
* @param ldn the ULocaleDisplayNames instance
* @param type the UDisplayContextType whose value to return
* @param pErrorCode Pointer to UErrorCode input/output status. If at entry this indicates
* a failure status, the function will do nothing; otherwise this will be
* updated with any new status from the function.
* @return the UDisplayContextValue for the specified type.
* @stable ICU 51
*/
U_STABLE UDisplayContext U_EXPORT2
uldn_getContext(const ULocaleDisplayNames *ldn, UDisplayContextType type,
UErrorCode *pErrorCode);
#endif /* !UCONFIG_NO_FORMATTING */
#endif /* __ULDNAMES_H__ */
// ucurr.h
/*
**********************************************************************
* Copyright (c) 2002-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
#ifndef _UCURR_H_
#define _UCURR_H_
/**
* \file
* \brief C API: Encapsulates information about a currency.
*
* The ucurr API encapsulates information about a currency, as defined by
* ISO 4217. A currency is represented by a 3-character string
* containing its ISO 4217 code. This API can return various data
* necessary the proper display of a currency:
*
*
*
* The DecimalFormat class uses these data to display
* currencies.
* @author Alan Liu
* @since ICU 2.2
*/
#if !UCONFIG_NO_FORMATTING
/**
* Currency Usage used for Decimal Format
* @stable ICU 54
*/
enum UCurrencyUsage {
/**
* a setting to specify currency usage which determines currency digit
* and rounding for standard usage, for example: "50.00 NT$"
* used as DEFAULT value
* @stable ICU 54
*/
UCURR_USAGE_STANDARD=0,
/**
* a setting to specify currency usage which determines currency digit
* and rounding for cash usage, for example: "50 NT$"
* @stable ICU 54
*/
UCURR_USAGE_CASH=1,
/**
* One higher than the last enum UCurrencyUsage constant.
* @stable ICU 54
*/
UCURR_USAGE_COUNT=2
};
typedef enum UCurrencyUsage UCurrencyUsage;
/**
* Finds a currency code for the given locale.
* @param locale the locale for which to retrieve a currency code.
* Currency can be specified by the "currency" keyword
* in which case it overrides the default currency code
* @param buff fill in buffer. Can be NULL for preflighting.
* @param buffCapacity capacity of the fill in buffer. Can be 0 for
* preflighting. If it is non-zero, the buff parameter
* must not be NULL.
* @param ec error code
* @return length of the currency string. It should always be 3. If 0,
* currency couldn't be found or the input values are
* invalid.
* @stable ICU 2.8
*/
U_STABLE int32_t U_EXPORT2
ucurr_forLocale(const char* locale,
UChar* buff,
int32_t buffCapacity,
UErrorCode* ec);
/**
* Selector constants for ucurr_getName().
*
* @see ucurr_getName
* @stable ICU 2.6
*/
typedef enum UCurrNameStyle {
/**
* Selector for ucurr_getName indicating a symbolic name for a
* currency, such as "$" for USD.
* @stable ICU 2.6
*/
UCURR_SYMBOL_NAME,
/**
* Selector for ucurr_getName indicating the long name for a
* currency, such as "US Dollar" for USD.
* @stable ICU 2.6
*/
UCURR_LONG_NAME
} UCurrNameStyle;
#if !UCONFIG_NO_SERVICE
/**
* @stable ICU 2.6
*/
typedef const void* UCurrRegistryKey;
/**
* Register an (existing) ISO 4217 currency code for the given locale.
* Only the country code and the two variants EURO and PRE_EURO are
* recognized.
* @param isoCode the three-letter ISO 4217 currency code
* @param locale the locale for which to register this currency code
* @param status the in/out status code
* @return a registry key that can be used to unregister this currency code, or NULL
* if there was an error.
* @stable ICU 2.6
*/
U_STABLE UCurrRegistryKey U_EXPORT2
ucurr_register(const UChar* isoCode,
const char* locale,
UErrorCode* status);
/**
* Unregister the previously-registered currency definitions using the
* URegistryKey returned from ucurr_register. Key becomes invalid after
* a successful call and should not be used again. Any currency
* that might have been hidden by the original ucurr_register call is
* restored.
* @param key the registry key returned by a previous call to ucurr_register
* @param status the in/out status code, no special meanings are assigned
* @return TRUE if the currency for this key was successfully unregistered
* @stable ICU 2.6
*/
U_STABLE UBool U_EXPORT2
ucurr_unregister(UCurrRegistryKey key, UErrorCode* status);
#endif /* UCONFIG_NO_SERVICE */
/**
* Returns the display name for the given currency in the
* given locale. For example, the display name for the USD
* currency object in the en_US locale is "$".
* @param currency null-terminated 3-letter ISO 4217 code
* @param locale locale in which to display currency
* @param nameStyle selector for which kind of name to return
* @param isChoiceFormat fill-in set to TRUE if the returned value
* is a ChoiceFormat pattern; otherwise it is a static string
* @param len fill-in parameter to receive length of result
* @param ec error code
* @return pointer to display string of 'len' UChars. If the resource
* data contains no entry for 'currency', then 'currency' itself is
* returned. If *isChoiceFormat is TRUE, then the result is a
* ChoiceFormat pattern. Otherwise it is a static string.
* @stable ICU 2.6
*/
U_STABLE const UChar* U_EXPORT2
ucurr_getName(const UChar* currency,
const char* locale,
UCurrNameStyle nameStyle,
UBool* isChoiceFormat,
int32_t* len,
UErrorCode* ec);
/**
* Returns the plural name for the given currency in the
* given locale. For example, the plural name for the USD
* currency object in the en_US locale is "US dollar" or "US dollars".
* @param currency null-terminated 3-letter ISO 4217 code
* @param locale locale in which to display currency
* @param isChoiceFormat fill-in set to TRUE if the returned value
* is a ChoiceFormat pattern; otherwise it is a static string
* @param pluralCount plural count
* @param len fill-in parameter to receive length of result
* @param ec error code
* @return pointer to display string of 'len' UChars. If the resource
* data contains no entry for 'currency', then 'currency' itself is
* returned.
* @stable ICU 4.2
*/
U_STABLE const UChar* U_EXPORT2
ucurr_getPluralName(const UChar* currency,
const char* locale,
UBool* isChoiceFormat,
const char* pluralCount,
int32_t* len,
UErrorCode* ec);
/**
* Returns the number of the number of fraction digits that should
* be displayed for the given currency.
* This is equivalent to ucurr_getDefaultFractionDigitsForUsage(currency,UCURR_USAGE_STANDARD,ec);
* @param currency null-terminated 3-letter ISO 4217 code
* @param ec input-output error code
* @return a non-negative number of fraction digits to be
* displayed, or 0 if there is an error
* @stable ICU 3.0
*/
U_STABLE int32_t U_EXPORT2
ucurr_getDefaultFractionDigits(const UChar* currency,
UErrorCode* ec);
/**
* Returns the number of the number of fraction digits that should
* be displayed for the given currency with usage.
* @param currency null-terminated 3-letter ISO 4217 code
* @param usage enum usage for the currency
* @param ec input-output error code
* @return a non-negative number of fraction digits to be
* displayed, or 0 if there is an error
* @stable ICU 54
*/
U_STABLE int32_t U_EXPORT2
ucurr_getDefaultFractionDigitsForUsage(const UChar* currency,
const UCurrencyUsage usage,
UErrorCode* ec);
/**
* Returns the rounding increment for the given currency, or 0.0 if no
* rounding is done by the currency.
* This is equivalent to ucurr_getRoundingIncrementForUsage(currency,UCURR_USAGE_STANDARD,ec);
* @param currency null-terminated 3-letter ISO 4217 code
* @param ec input-output error code
* @return the non-negative rounding increment, or 0.0 if none,
* or 0.0 if there is an error
* @stable ICU 3.0
*/
U_STABLE double U_EXPORT2
ucurr_getRoundingIncrement(const UChar* currency,
UErrorCode* ec);
/**
* Returns the rounding increment for the given currency, or 0.0 if no
* rounding is done by the currency given usage.
* @param currency null-terminated 3-letter ISO 4217 code
* @param usage enum usage for the currency
* @param ec input-output error code
* @return the non-negative rounding increment, or 0.0 if none,
* or 0.0 if there is an error
* @stable ICU 54
*/
U_STABLE double U_EXPORT2
ucurr_getRoundingIncrementForUsage(const UChar* currency,
const UCurrencyUsage usage,
UErrorCode* ec);
/**
* Selector constants for ucurr_openCurrencies().
*
* @see ucurr_openCurrencies
* @stable ICU 3.2
*/
typedef enum UCurrCurrencyType {
/**
* Select all ISO-4217 currency codes.
* @stable ICU 3.2
*/
UCURR_ALL = INT32_MAX,
/**
* Select only ISO-4217 commonly used currency codes.
* These currencies can be found in common use, and they usually have
* bank notes or coins associated with the currency code.
* This does not include fund codes, precious metals and other
* various ISO-4217 codes limited to special financial products.
* @stable ICU 3.2
*/
UCURR_COMMON = 1,
/**
* Select ISO-4217 uncommon currency codes.
* These codes respresent fund codes, precious metals and other
* various ISO-4217 codes limited to special financial products.
* A fund code is a monetary resource associated with a currency.
* @stable ICU 3.2
*/
UCURR_UNCOMMON = 2,
/**
* Select only deprecated ISO-4217 codes.
* These codes are no longer in general public use.
* @stable ICU 3.2
*/
UCURR_DEPRECATED = 4,
/**
* Select only non-deprecated ISO-4217 codes.
* These codes are in general public use.
* @stable ICU 3.2
*/
UCURR_NON_DEPRECATED = 8
} UCurrCurrencyType;
/**
* Provides a UEnumeration object for listing ISO-4217 codes.
* @param currType You can use one of several UCurrCurrencyType values for this
* variable. You can also | (or) them together to get a specific list of
* currencies. Most people will want to use the (UCURR_CURRENCY|UCURR_NON_DEPRECATED) value to
* get a list of current currencies.
* @param pErrorCode Error code
* @stable ICU 3.2
*/
U_STABLE UEnumeration * U_EXPORT2
ucurr_openISOCurrencies(uint32_t currType, UErrorCode *pErrorCode);
/**
* Queries if the given ISO 4217 3-letter code is available on the specified date range.
*
* Note: For checking availability of a currency on a specific date, specify the date on both 'from' and 'to'
*
* When 'from' is U_DATE_MIN and 'to' is U_DATE_MAX, this method checks if the specified currency is available any time.
* If 'from' and 'to' are same UDate value, this method checks if the specified currency is available on that date.
*
* @param isoCode
* The ISO 4217 3-letter code.
*
* @param from
* The lower bound of the date range, inclusive. When 'from' is U_DATE_MIN, check the availability
* of the currency any date before 'to'
*
* @param to
* The upper bound of the date range, inclusive. When 'to' is U_DATE_MAX, check the availability of
* the currency any date after 'from'
*
* @param errorCode
* ICU error code
*
* @return TRUE if the given ISO 4217 3-letter code is supported on the specified date range.
*
* @stable ICU 4.8
*/
U_STABLE UBool U_EXPORT2
ucurr_isAvailable(const UChar* isoCode,
UDate from,
UDate to,
UErrorCode* errorCode);
/**
* Finds the number of valid currency codes for the
* given locale and date.
* @param locale the locale for which to retrieve the
* currency count.
* @param date the date for which to retrieve the
* currency count for the given locale.
* @param ec error code
* @return the number of currency codes for the
* given locale and date. If 0, currency
* codes couldn't be found for the input
* values are invalid.
* @stable ICU 4.0
*/
U_STABLE int32_t U_EXPORT2
ucurr_countCurrencies(const char* locale,
UDate date,
UErrorCode* ec);
/**
* Finds a currency code for the given locale and date
* @param locale the locale for which to retrieve a currency code.
* Currency can be specified by the "currency" keyword
* in which case it overrides the default currency code
* @param date the date for which to retrieve a currency code for
* the given locale.
* @param index the index within the available list of currency codes
* for the given locale on the given date.
* @param buff fill in buffer. Can be NULL for preflighting.
* @param buffCapacity capacity of the fill in buffer. Can be 0 for
* preflighting. If it is non-zero, the buff parameter
* must not be NULL.
* @param ec error code
* @return length of the currency string. It should always be 3.
* If 0, currency couldn't be found or the input values are
* invalid.
* @stable ICU 4.0
*/
U_STABLE int32_t U_EXPORT2
ucurr_forLocaleAndDate(const char* locale,
UDate date,
int32_t index,
UChar* buff,
int32_t buffCapacity,
UErrorCode* ec);
/**
* Given a key and a locale, returns an array of string values in a preferred
* order that would make a difference. These are all and only those values where
* the open (creation) of the service with the locale formed from the input locale
* plus input keyword and that value has different behavior than creation with the
* input locale alone.
* @param key one of the keys supported by this service. For now, only
* "currency" is supported.
* @param locale the locale
* @param commonlyUsed if set to true it will return only commonly used values
* with the given locale in preferred order. Otherwise,
* it will return all the available values for the locale.
* @param status error status
* @return a string enumeration over keyword values for the given key and the locale.
* @stable ICU 4.2
*/
U_STABLE UEnumeration* U_EXPORT2
ucurr_getKeywordValuesForLocale(const char* key,
const char* locale,
UBool commonlyUsed,
UErrorCode* status);
/**
* Returns the ISO 4217 numeric code for the currency.
* Error Behaviour Functions
* Defines some error behaviour functions called by ucnv_{from,to}Unicode
* These are provided as part of ICU and many are stable, but they
* can also be considered only as an example of what can be done with
* callbacks. You may of course write your own.
*
* If you want to write your own, you may also find the functions from
* ucnv_cb.h useful when writing your own callbacks.
*
* These functions, although public, should NEVER be called directly.
* They should be used as parameters to the ucnv_setFromUCallback
* and ucnv_setToUCallback functions, to set the behaviour of a converter
* when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
*
* usage example: 'STOP' doesn't need any context, but newContext
* could be set to something other than 'NULL' if needed. The available
* contexts in this header can modify the default behavior of the callback.
*
* \code
* UErrorCode err = U_ZERO_ERROR;
* UConverter *myConverter = ucnv_open("ibm-949", &err);
* const void *oldContext;
* UConverterFromUCallback oldAction;
*
*
* if (U_SUCCESS(err))
* {
* ucnv_setFromUCallBack(myConverter,
* UCNV_FROM_U_CALLBACK_STOP,
* NULL,
* &oldAction,
* &oldContext,
* &status);
* }
* \endcode
*
* The code above tells "myConverter" to stop when it encounters an
* ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
* Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
* and ucnv_setToUCallBack would need to be called in order to change
* that behavior too.
*
* Here is an example with a context:
*
* \code
* UErrorCode err = U_ZERO_ERROR;
* UConverter *myConverter = ucnv_open("ibm-949", &err);
* const void *oldContext;
* UConverterFromUCallback oldAction;
*
*
* if (U_SUCCESS(err))
* {
* ucnv_setToUCallBack(myConverter,
* UCNV_TO_U_CALLBACK_SUBSTITUTE,
* UCNV_SUB_STOP_ON_ILLEGAL,
* &oldAction,
* &oldContext,
* &status);
* }
* \endcode
*
* The code above tells "myConverter" to stop when it encounters an
* ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
* Codepage -> Unicode. Any unmapped and legal characters will be
* substituted to be the default substitution character.
*/
#ifndef UCNV_ERR_H
#define UCNV_ERR_H
#if !UCONFIG_NO_CONVERSION
/** Forward declaring the UConverter structure. @stable ICU 2.0 */
struct UConverter;
/** @stable ICU 2.0 */
typedef struct UConverter UConverter;
/**
* FROM_U, TO_U context options for sub callback
* @stable ICU 2.0
*/
#define UCNV_SUB_STOP_ON_ILLEGAL "i"
/**
* FROM_U, TO_U context options for skip callback
* @stable ICU 2.0
*/
#define UCNV_SKIP_STOP_ON_ILLEGAL "i"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_ICU NULL
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_JAVA "J"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
* TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_C "C"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
* TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_XML_DEC "D"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
* TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_XML_HEX "X"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_UNICODE "U"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H
*
* @param fromUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
* @param reason Defines the reason the callback was invoked
* @param err Return value will be set to success if the callback was handled,
* otherwise this value will be set to a failure status.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
const void *context,
UConverterFromUnicodeArgs *fromUArgs,
const UChar* codeUnits,
int32_t length,
UChar32 codePoint,
UConverterCallbackReason reason,
UErrorCode * err);
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This To Unicode callback skips any ILLEGAL_SEQUENCE, or
* skips only UNASSINGED_SEQUENCE depending on the context parameter
* simply ignoring those characters.
*
* @param context The function currently recognizes the callback options:
* UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
* returning the error code back to the caller immediately.
* NULL: Skips any ILLEGAL_SEQUENCE
* @param toUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param reason Defines the reason the callback was invoked
* @param err Return value will be set to success if the callback was handled,
* otherwise this value will be set to a failure status.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
const void *context,
UConverterToUnicodeArgs *toUArgs,
const char* codeUnits,
int32_t length,
UConverterCallbackReason reason,
UErrorCode * err);
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
* UNASSIGNED_SEQUENCE depending on context parameter, with the
* Unicode substitution character, U+FFFD.
*
* @param context The function currently recognizes the callback options:
* UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
* returning the error code back to the caller immediately.
* NULL: Substitutes any ILLEGAL_SEQUENCE
* @param toUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param reason Defines the reason the callback was invoked
* @param err Return value will be set to success if the callback was handled,
* otherwise this value will be set to a failure status.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
const void *context,
UConverterToUnicodeArgs *toUArgs,
const char* codeUnits,
int32_t length,
UConverterCallbackReason reason,
UErrorCode * err);
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
* hexadecimal representation of the illegal bytes
* (in the format %XNN, e.g. "%XFF%X0A%XC8%X03").
*
* @param context This function currently recognizes the callback options:
* UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
* UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
* @param toUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param reason Defines the reason the callback was invoked
* @param err Return value will be set to success if the callback was handled,
* otherwise this value will be set to a failure status.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
const void *context,
UConverterToUnicodeArgs *toUArgs,
const char* codeUnits,
int32_t length,
UConverterCallbackReason reason,
UErrorCode * err);
#endif
#endif
/*UCNV_ERR_H*/
// ucnv.h
/*
**********************************************************************
* Copyright (C) 1999-2014, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* ucnv.h:
* External APIs for the ICU's codeset conversion library
* Bertrand A. Damiba
*
* Modification History:
*
* Date Name Description
* 04/04/99 helena Fixed internal header inclusion.
* 05/11/00 helena Added setFallback and usesFallback APIs.
* 06/29/2000 helena Major rewrite of the callback APIs.
* 12/07/2000 srl Update of documentation
*/
/**
* \file
* \brief C API: Character conversion
*
* Character Conversion C API
*
* NULL is passed for the converter name, it will create one with the
* getDefaultName return value.
*
* UCNV_OPTION_SEP_CHAR between the name and the first option and
* also between adjacent options. udata_open( packageName, "cnv", converterName, err) or equivalent.
* Typically, packageName will refer to a (.dat) file, or to a package registered with
* udata_setAppData(). Using a full file or directory pathname for packageName is deprecated.cnv = ucnv_openPackage("myapp", "myconverter", &err);
*
* user allocated space for the new clone. If NULL new memory will be allocated.
* If buffer is not large enough, new memory will be allocated.
* Clients can use the U_CNV_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
* @param pBufferSize Deprecated functionality as of ICU 52, use NULL or 1.
* pointer to size of allocated space.
* @param status to indicate whether the operation went on smoothly or there were errors
* An informational status value, U_SAFECLONE_ALLOCATED_WARNING,
* is used if any allocations were necessary.
* However, it is better to check if *pBufferSize grew for checking for
* allocations because warning codes can be overridden by subsequent
* function calls.
* @return pointer to the new clone
* @stable ICU 2.0
*/
U_STABLE UConverter * U_EXPORT2
ucnv_safeClone(const UConverter *cnv,
void *stackBuffer,
int32_t *pBufferSize,
UErrorCode *status);
/**
* Deletes the unicode converter and releases resources associated
* with just this instance.
* Does not free up shared converter tables.
*
* @param converter the converter object to be deleted
* @see ucnv_open
* @see ucnv_openU
* @see ucnv_openCCSID
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucnv_close(UConverter * converter);
/**
* Fills in the output parameter, subChars, with the substitution characters
* as multiple bytes.
* If ucnv_setSubstString() set a Unicode string because the converter is
* stateful, then subChars will be an empty string.
*
* @param converter the Unicode converter
* @param subChars the subsitution characters
* @param len on input the capacity of subChars, on output the number
* of bytes copied to it
* @param err the outgoing error status code.
* If the substitution character array is too small, an
* U_INDEX_OUTOFBOUNDS_ERROR will be returned.
* @see ucnv_setSubstString
* @see ucnv_setSubstChars
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucnv_getSubstChars(const UConverter *converter,
char *subChars,
int8_t *len,
UErrorCode *err);
/**
* Sets the substitution chars when converting from unicode to a codepage. The
* substitution is specified as a string of 1-4 bytes, and may contain
* NULL bytes.
* The subChars must represent a single character. The caller needs to know the
* byte sequence of a valid character in the converter's charset.
* For some converters, for example some ISO 2022 variants, only single-byte
* substitution characters may be supported.
* The newer ucnv_setSubstString() function relaxes these limitations.
*
* @param converter the Unicode converter
* @param subChars the substitution character byte sequence we want set
* @param len the number of bytes in subChars
* @param err the error status code. U_INDEX_OUTOFBOUNDS_ERROR if
* len is bigger than the maximum number of bytes allowed in subchars
* @see ucnv_setSubstString
* @see ucnv_getSubstChars
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucnv_setSubstChars(UConverter *converter,
const char *subChars,
int8_t len,
UErrorCode *err);
/**
* Set a substitution string for converting from Unicode to a charset.
* The caller need not know the charset byte sequence for each charset.
*
* Unlike ucnv_setSubstChars() which is designed to set a charset byte sequence
* for a single character, this function takes a Unicode string with
* zero, one or more characters, and immediately verifies that the string can be
* converted to the charset.
* If not, or if the result is too long (more than 32 bytes as of ICU 3.6),
* then the function returns with an error accordingly.
*
* Also unlike ucnv_setSubstChars(), this function works for stateful charsets
* by converting on the fly at the point of substitution rather than setting
* a fixed byte sequence.
*
* @param cnv The UConverter object.
* @param s The Unicode string.
* @param length The number of UChars in s, or -1 for a NUL-terminated string.
* @param err Pointer to a standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
*
* @see ucnv_setSubstChars
* @see ucnv_getSubstChars
* @stable ICU 3.6
*/
U_STABLE void U_EXPORT2
ucnv_setSubstString(UConverter *cnv,
const UChar *s,
int32_t length,
UErrorCode *err);
/**
* Fills in the output parameter, errBytes, with the error characters from the
* last failing conversion.
*
* @param converter the Unicode converter
* @param errBytes the codepage bytes which were in error
* @param len on input the capacity of errBytes, on output the number of
* bytes which were copied to it
* @param err the error status code.
* If the substitution character array is too small, an
* U_INDEX_OUTOFBOUNDS_ERROR will be returned.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucnv_getInvalidChars(const UConverter *converter,
char *errBytes,
int8_t *len,
UErrorCode *err);
/**
* Fills in the output parameter, errChars, with the error characters from the
* last failing conversion.
*
* @param converter the Unicode converter
* @param errUChars the UChars which were in error
* @param len on input the capacity of errUChars, on output the number of
* UChars which were copied to it
* @param err the error status code.
* If the substitution character array is too small, an
* U_INDEX_OUTOFBOUNDS_ERROR will be returned.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucnv_getInvalidUChars(const UConverter *converter,
UChar *errUChars,
int8_t *len,
UErrorCode *err);
/**
* Resets the state of a converter to the default state. This is used
* in the case of an error, to restart a conversion from a known default state.
* It will also empty the internal output buffers.
* @param converter the Unicode converter
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucnv_reset(UConverter *converter);
/**
* Resets the to-Unicode part of a converter state to the default state.
* This is used in the case of an error to restart a conversion to
* Unicode to a known default state. It will also empty the internal
* output buffers used for the conversion to Unicode codepoints.
* @param converter the Unicode converter
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucnv_resetToUnicode(UConverter *converter);
/**
* Resets the from-Unicode part of a converter state to the default state.
* This is used in the case of an error to restart a conversion from
* Unicode to a known default state. It will also empty the internal output
* buffers used for the conversion from Unicode codepoints.
* @param converter the Unicode converter
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucnv_resetFromUnicode(UConverter *converter);
/**
* Returns the maximum number of bytes that are output per UChar in conversion
* from Unicode using this converter.
* The returned number can be used with UCNV_GET_MAX_BYTES_FOR_STRING
* to calculate the size of a target buffer for conversion from Unicode.
*
* Note: Before ICU 2.8, this function did not return reliable numbers for
* some stateful converters (EBCDIC_STATEFUL, ISO-2022) and LMBCS.
*
* This number may not be the same as the maximum number of bytes per
* "conversion unit". In other words, it may not be the intuitively expected
* number of bytes per character that would be published for a charset,
* and may not fulfill any other purpose than the allocation of an output
* buffer of guaranteed sufficient size for a given input length and converter.
*
* Examples for special cases that are taken into account:
* - Supplementary code points may convert to more bytes than BMP code points.
* This function returns bytes per UChar (UTF-16 code unit), not per
* Unicode code point, for efficient buffer allocation.
* - State-shifting output (SI/SO, escapes, etc.) from stateful converters.
* - When m input UChars are converted to n output bytes, then the maximum m/n
* is taken into account.
*
* The number returned here does not take into account
* (see UCNV_GET_MAX_BYTES_FOR_STRING):
* - callbacks which output more than one charset character sequence per call,
* like escape callbacks
* - initial and final non-character bytes that are output by some converters
* (automatic BOMs, initial escape sequence, final SI, etc.)
*
* Examples for returned values:
* - SBCS charsets: 1
* - Shift-JIS: 2
* - UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted)
* - UTF-8: 3 (3 per BMP, 4 per surrogate _pair_)
* - EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS)
* - ISO-2022: 3 (always outputs UTF-8)
* - ISO-2022-JP: 6 (4-byte escape sequences + DBCS)
* - ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + DBCS)
*
* @param converter The Unicode converter.
* @return The maximum number of bytes per UChar (16 bit code unit)
* that are output by ucnv_fromUnicode(),
* to be used together with UCNV_GET_MAX_BYTES_FOR_STRING
* for buffer allocation.
*
* @see UCNV_GET_MAX_BYTES_FOR_STRING
* @see ucnv_getMinCharSize
* @stable ICU 2.0
*/
U_STABLE int8_t U_EXPORT2
ucnv_getMaxCharSize(const UConverter *converter);
/**
* Calculates the size of a buffer for conversion from Unicode to a charset.
* The calculated size is guaranteed to be sufficient for this conversion.
*
* It takes into account initial and final non-character bytes that are output
* by some converters.
* It does not take into account callbacks which output more than one charset
* character sequence per call, like escape callbacks.
* The default (substitution) callback only outputs one charset character sequence.
*
* @param length Number of UChars to be converted.
* @param maxCharSize Return value from ucnv_getMaxCharSize() for the converter
* that will be used.
* @return Size of a buffer that will be large enough to hold the output bytes of
* converting length UChars with the converter that returned the maxCharSize.
*
* @see ucnv_getMaxCharSize
* @stable ICU 2.8
*/
#define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \
(((int32_t)(length)+10)*(int32_t)(maxCharSize))
/**
* Returns the minimum byte length (per codepoint) for characters in this codepage.
* This is usually either 1 or 2.
* @param converter the Unicode converter
* @return the minimum number of bytes per codepoint allowed by this particular converter
* @see ucnv_getMaxCharSize
* @stable ICU 2.0
*/
U_STABLE int8_t U_EXPORT2
ucnv_getMinCharSize(const UConverter *converter);
/**
* Returns the display name of the converter passed in based on the Locale
* passed in. If the locale contains no display name, the internal ASCII
* name will be filled in.
*
* @param converter the Unicode converter.
* @param displayLocale is the specific Locale we want to localised for
* @param displayName user provided buffer to be filled in
* @param displayNameCapacity size of displayName Buffer
* @param err error status code
* @return displayNameLength number of UChar needed in displayName
* @see ucnv_getName
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ucnv_getDisplayName(const UConverter *converter,
const char *displayLocale,
UChar *displayName,
int32_t displayNameCapacity,
UErrorCode *err);
/**
* Gets the internal, canonical name of the converter (zero-terminated).
* The lifetime of the returned string will be that of the converter
* passed to this function.
* @param converter the Unicode converter
* @param err UErrorCode status
* @return the internal name of the converter
* @see ucnv_getDisplayName
* @stable ICU 2.0
*/
U_STABLE const char * U_EXPORT2
ucnv_getName(const UConverter *converter, UErrorCode *err);
/**
* Gets a codepage number associated with the converter. This is not guaranteed
* to be the one used to create the converter. Some converters do not represent
* platform registered codepages and return zero for the codepage number.
* The error code fill-in parameter indicates if the codepage number
* is available.
* Does not check if the converter is NULL or if converter's data
* table is NULL.
*
* Important: The use of CCSIDs is not recommended because it is limited
* to only two platforms in principle and only one (UCNV_IBM) in the current
* ICU converter API.
* Also, CCSIDs are insufficient to identify IBM Unicode conversion tables precisely.
* For more details see ucnv_openCCSID().
*
* @param converter the Unicode converter
* @param err the error status code.
* @return If any error occurrs, -1 will be returned otherwise, the codepage number
* will be returned
* @see ucnv_openCCSID
* @see ucnv_getPlatform
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ucnv_getCCSID(const UConverter *converter,
UErrorCode *err);
/**
* Gets a codepage platform associated with the converter. Currently,
* only UCNV_IBM will be returned.
* Does not test if the converter is NULL or if converter's data
* table is NULL.
* @param converter the Unicode converter
* @param err the error status code.
* @return The codepage platform
* @stable ICU 2.0
*/
U_STABLE UConverterPlatform U_EXPORT2
ucnv_getPlatform(const UConverter *converter,
UErrorCode *err);
/**
* Gets the type of the converter
* e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022,
* EBCDIC_STATEFUL, LATIN_1
* @param converter a valid, opened converter
* @return the type of the converter
* @stable ICU 2.0
*/
U_STABLE UConverterType U_EXPORT2
ucnv_getType(const UConverter * converter);
/**
* Gets the "starter" (lead) bytes for converters of type MBCS.
* Will fill in an U_ILLEGAL_ARGUMENT_ERROR if converter passed in
* is not MBCS. Fills in an array of type UBool, with the value of the byte
* as offset to the array. For example, if (starters[0x20] == TRUE) at return,
* it means that the byte 0x20 is a starter byte in this converter.
* Context pointers are always owned by the caller.
*
* @param converter a valid, opened converter of type MBCS
* @param starters an array of size 256 to be filled in
* @param err error status, U_ILLEGAL_ARGUMENT_ERROR if the
* converter is not a type which can return starters.
* @see ucnv_getType
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucnv_getStarters(const UConverter* converter,
UBool starters[256],
UErrorCode* err);
/**
* Selectors for Unicode sets that can be returned by ucnv_getUnicodeSet().
* @see ucnv_getUnicodeSet
* @stable ICU 2.6
*/
typedef enum UConverterUnicodeSet {
/** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */
UCNV_ROUNDTRIP_SET,
/** Select the set of Unicode code points with roundtrip or fallback mappings. @stable ICU 4.0 */
UCNV_ROUNDTRIP_AND_FALLBACK_SET,
/** Number of UConverterUnicodeSet selectors. @stable ICU 2.6 */
UCNV_SET_COUNT
} UConverterUnicodeSet;
/**
* Returns the set of Unicode code points that can be converted by an ICU converter.
*
* Returns one of several kinds of set:
*
* 1. UCNV_ROUNDTRIP_SET
*
* The set of all Unicode code points that can be roundtrip-converted
* (converted without any data loss) with the converter (ucnv_fromUnicode()).
* This set will not include code points that have fallback mappings
* or are only the result of reverse fallback mappings.
* This set will also not include PUA code points with fallbacks, although
* ucnv_fromUnicode() will always uses those mappings despite ucnv_setFallback().
* See UTR #22 "Character Mapping Markup Language"
* at http://www.unicode.org/reports/tr22/
*
* This is useful for example for
* - checking that a string or document can be roundtrip-converted with a converter,
* without/before actually performing the conversion
* - testing if a converter can be used for text for typical text for a certain locale,
* by comparing its roundtrip set with the set of ExemplarCharacters from
* ICU's locale data or other sources
*
* 2. UCNV_ROUNDTRIP_AND_FALLBACK_SET
*
* The set of all Unicode code points that can be converted with the converter (ucnv_fromUnicode())
* when fallbacks are turned on (see ucnv_setFallback()).
* This set includes all code points with roundtrips and fallbacks (but not reverse fallbacks).
*
* In the future, there may be more UConverterUnicodeSet choices to select
* sets with different properties.
*
* @param cnv The converter for which a set is requested.
* @param setFillIn A valid USet *. It will be cleared by this function before
* the converter's specific set is filled into the USet.
* @param whichSet A UConverterUnicodeSet selector;
* currently UCNV_ROUNDTRIP_SET is the only supported value.
* @param pErrorCode ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
*
* @see UConverterUnicodeSet
* @see uset_open
* @see uset_close
* @stable ICU 2.6
*/
U_STABLE void U_EXPORT2
ucnv_getUnicodeSet(const UConverter *cnv,
USet *setFillIn,
UConverterUnicodeSet whichSet,
UErrorCode *pErrorCode);
/**
* Gets the current calback function used by the converter when an illegal
* or invalid codepage sequence is found.
* Context pointers are always owned by the caller.
*
* @param converter the unicode converter
* @param action fillin: returns the callback function pointer
* @param context fillin: returns the callback's private void* context
* @see ucnv_setToUCallBack
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucnv_getToUCallBack (const UConverter * converter,
UConverterToUCallback *action,
const void **context);
/**
* Gets the current callback function used by the converter when illegal
* or invalid Unicode sequence is found.
* Context pointers are always owned by the caller.
*
* @param converter the unicode converter
* @param action fillin: returns the callback function pointer
* @param context fillin: returns the callback's private void* context
* @see ucnv_setFromUCallBack
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucnv_getFromUCallBack (const UConverter * converter,
UConverterFromUCallback *action,
const void **context);
/**
* Changes the callback function used by the converter when
* an illegal or invalid sequence is found.
* Context pointers are always owned by the caller.
* Predefined actions and contexts can be found in the ucnv_err.h header.
*
* @param converter the unicode converter
* @param newAction the new callback function
* @param newContext the new toUnicode callback context pointer. This can be NULL.
* @param oldAction fillin: returns the old callback function pointer. This can be NULL.
* @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
* @param err The error code status
* @see ucnv_getToUCallBack
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucnv_setToUCallBack (UConverter * converter,
UConverterToUCallback newAction,
const void* newContext,
UConverterToUCallback *oldAction,
const void** oldContext,
UErrorCode * err);
/**
* Changes the current callback function used by the converter when
* an illegal or invalid sequence is found.
* Context pointers are always owned by the caller.
* Predefined actions and contexts can be found in the ucnv_err.h header.
*
* @param converter the unicode converter
* @param newAction the new callback function
* @param newContext the new fromUnicode callback context pointer. This can be NULL.
* @param oldAction fillin: returns the old callback function pointer. This can be NULL.
* @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
* @param err The error code status
* @see ucnv_getFromUCallBack
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucnv_setFromUCallBack (UConverter * converter,
UConverterFromUCallback newAction,
const void *newContext,
UConverterFromUCallback *oldAction,
const void **oldContext,
UErrorCode * err);
/**
* Converts an array of unicode characters to an array of codepage
* characters. This function is optimized for converting a continuous
* stream of data in buffer-sized chunks, where the entire source and
* target does not fit in available buffers.
*
* The source pointer is an in/out parameter. It starts out pointing where the
* conversion is to begin, and ends up pointing after the last UChar consumed.
*
* Target similarly starts out pointer at the first available byte in the output
* buffer, and ends up pointing after the last byte written to the output.
*
* The converter always attempts to consume the entire source buffer, unless
* (1.) the target buffer is full, or (2.) a failing error is returned from the
* current callback function. When a successful error status has been
* returned, it means that all of the source buffer has been
* consumed. At that point, the caller should reset the source and
* sourceLimit pointers to point to the next chunk.
*
* At the end of the stream (flush==TRUE), the input is completely consumed
* when *source==sourceLimit and no error code is set.
* The converter object is then automatically reset by this function.
* (This means that a converter need not be reset explicitly between data
* streams if it finishes the previous stream without errors.)
*
* This is a stateful conversion. Additionally, even when all source data has
* been consumed, some data may be in the converters' internal state.
* Call this function repeatedly, updating the target pointers with
* the next empty chunk of target in case of a
* U_BUFFER_OVERFLOW_ERROR, and updating the source pointers
* with the next chunk of source when a successful error status is
* returned, until there are no more chunks of source data.
* @param converter the Unicode converter
* @param target I/O parameter. Input : Points to the beginning of the buffer to copy
* codepage characters to. Output : points to after the last codepage character copied
* to target.
* @param targetLimit the pointer just after last of the target buffer
* @param source I/O parameter, pointer to pointer to the source Unicode character buffer.
* @param sourceLimit the pointer just after the last of the source buffer
* @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
* of allocated cells as target. Will fill in offsets from target to source pointer
* e.g: offsets[3] is equal to 6, it means that the target[3] was a result of transcoding source[6]
* For output data carried across calls, and other data without a specific source character
* (such as from escape sequences or callbacks) -1 will be placed for offsets.
* @param flush set to TRUE if the current source buffer is the last available
* chunk of the source, FALSE otherwise. Note that if a failing status is returned,
* this function may have to be called multiple times with flush set to TRUE until
* the source buffer is consumed.
* @param err the error status. U_ILLEGAL_ARGUMENT_ERROR will be set if the
* converter is NULL.
* U_BUFFER_OVERFLOW_ERROR will be set if the target is full and there is
* still data to be written to the target.
* @see ucnv_fromUChars
* @see ucnv_convert
* @see ucnv_getMinCharSize
* @see ucnv_setToUCallBack
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucnv_fromUnicode (UConverter * converter,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
UBool flush,
UErrorCode * err);
/**
* Converts a buffer of codepage bytes into an array of unicode UChars
* characters. This function is optimized for converting a continuous
* stream of data in buffer-sized chunks, where the entire source and
* target does not fit in available buffers.
*
* The source pointer is an in/out parameter. It starts out pointing where the
* conversion is to begin, and ends up pointing after the last byte of source consumed.
*
* Target similarly starts out pointer at the first available UChar in the output
* buffer, and ends up pointing after the last UChar written to the output.
* It does NOT necessarily keep UChar sequences together.
*
* The converter always attempts to consume the entire source buffer, unless
* (1.) the target buffer is full, or (2.) a failing error is returned from the
* current callback function. When a successful error status has been
* returned, it means that all of the source buffer has been
* consumed. At that point, the caller should reset the source and
* sourceLimit pointers to point to the next chunk.
*
* At the end of the stream (flush==TRUE), the input is completely consumed
* when *source==sourceLimit and no error code is set
* The converter object is then automatically reset by this function.
* (This means that a converter need not be reset explicitly between data
* streams if it finishes the previous stream without errors.)
*
* This is a stateful conversion. Additionally, even when all source data has
* been consumed, some data may be in the converters' internal state.
* Call this function repeatedly, updating the target pointers with
* the next empty chunk of target in case of a
* U_BUFFER_OVERFLOW_ERROR, and updating the source pointers
* with the next chunk of source when a successful error status is
* returned, until there are no more chunks of source data.
* @param converter the Unicode converter
* @param target I/O parameter. Input : Points to the beginning of the buffer to copy
* UChars into. Output : points to after the last UChar copied.
* @param targetLimit the pointer just after the end of the target buffer
* @param source I/O parameter, pointer to pointer to the source codepage buffer.
* @param sourceLimit the pointer to the byte after the end of the source buffer
* @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
* of allocated cells as target. Will fill in offsets from target to source pointer
* e.g: offsets[3] is equal to 6, it means that the target[3] was a result of transcoding source[6]
* For output data carried across calls, and other data without a specific source character
* (such as from escape sequences or callbacks) -1 will be placed for offsets.
* @param flush set to TRUE if the current source buffer is the last available
* chunk of the source, FALSE otherwise. Note that if a failing status is returned,
* this function may have to be called multiple times with flush set to TRUE until
* the source buffer is consumed.
* @param err the error status. U_ILLEGAL_ARGUMENT_ERROR will be set if the
* converter is NULL.
* U_BUFFER_OVERFLOW_ERROR will be set if the target is full and there is
* still data to be written to the target.
* @see ucnv_fromUChars
* @see ucnv_convert
* @see ucnv_getMinCharSize
* @see ucnv_setFromUCallBack
* @see ucnv_getNextUChar
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucnv_toUnicode(UConverter *converter,
UChar **target,
const UChar *targetLimit,
const char **source,
const char *sourceLimit,
int32_t *offsets,
UBool flush,
UErrorCode *err);
/**
* Convert the Unicode string into a codepage string using an existing UConverter.
* The output string is NUL-terminated if possible.
*
* This function is a more convenient but less powerful version of ucnv_fromUnicode().
* It is only useful for whole strings, not for streaming conversion.
*
* The maximum output buffer capacity required (barring output from callbacks) will be
* UCNV_GET_MAX_BYTES_FOR_STRING(srcLength, ucnv_getMaxCharSize(cnv)).
*
* @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called)
* @param src the input Unicode string
* @param srcLength the input string length, or -1 if NUL-terminated
* @param dest destination string buffer, can be NULL if destCapacity==0
* @param destCapacity the number of chars available at dest
* @param pErrorCode normal ICU error code;
* common error codes that may be set by this function include
* U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
* U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
* @return the length of the output string, not counting the terminating NUL;
* if the length is greater than destCapacity, then the string will not fit
* and a buffer of the indicated length would need to be passed in
* @see ucnv_fromUnicode
* @see ucnv_convert
* @see UCNV_GET_MAX_BYTES_FOR_STRING
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ucnv_fromUChars(UConverter *cnv,
char *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode);
/**
* Convert the codepage string into a Unicode string using an existing UConverter.
* The output string is NUL-terminated if possible.
*
* This function is a more convenient but less powerful version of ucnv_toUnicode().
* It is only useful for whole strings, not for streaming conversion.
*
* The maximum output buffer capacity required (barring output from callbacks) will be
* 2*srcLength (each char may be converted into a surrogate pair).
*
* @param cnv the converter object to be used (ucnv_resetToUnicode() will be called)
* @param src the input codepage string
* @param srcLength the input string length, or -1 if NUL-terminated
* @param dest destination string buffer, can be NULL if destCapacity==0
* @param destCapacity the number of UChars available at dest
* @param pErrorCode normal ICU error code;
* common error codes that may be set by this function include
* U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
* U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
* @return the length of the output string, not counting the terminating NUL;
* if the length is greater than destCapacity, then the string will not fit
* and a buffer of the indicated length would need to be passed in
* @see ucnv_toUnicode
* @see ucnv_convert
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ucnv_toUChars(UConverter *cnv,
UChar *dest, int32_t destCapacity,
const char *src, int32_t srcLength,
UErrorCode *pErrorCode);
/**
* Convert a codepage buffer into Unicode one character at a time.
* The input is completely consumed when the U_INDEX_OUTOFBOUNDS_ERROR is set.
*
* Advantage compared to ucnv_toUnicode() or ucnv_toUChars():
* - Faster for small amounts of data, for most converters, e.g.,
* US-ASCII, ISO-8859-1, UTF-8/16/32, and most "normal" charsets.
* (For complex converters, e.g., SCSU, UTF-7 and ISO 2022 variants,
* it uses ucnv_toUnicode() internally.)
* - Convenient.
*
* Limitations compared to ucnv_toUnicode():
* - Always assumes flush=TRUE.
* This makes ucnv_getNextUChar() unsuitable for "streaming" conversion,
* that is, for where the input is supplied in multiple buffers,
* because ucnv_getNextUChar() will assume the end of the input at the end
* of the first buffer.
* - Does not provide offset output.
*
* It is possible to "mix" ucnv_getNextUChar() and ucnv_toUnicode() because
* ucnv_getNextUChar() uses the current state of the converter
* (unlike ucnv_toUChars() which always resets first).
* However, if ucnv_getNextUChar() is called after ucnv_toUnicode()
* stopped in the middle of a character sequence (with flush=FALSE),
* then ucnv_getNextUChar() will always use the slower ucnv_toUnicode()
* internally until the next character boundary.
* (This is new in ICU 2.6. In earlier releases, ucnv_getNextUChar() had to
* start at a character boundary.)
*
* Instead of using ucnv_getNextUChar(), it is recommended
* to convert using ucnv_toUnicode() or ucnv_toUChars()
* and then iterate over the text using U16_NEXT() or a UCharIterator (uiter.h)
* or a C++ CharacterIterator or similar.
* This allows streaming conversion and offset output, for example.
*
*
* There are two different kinds of codepages that provide mappings for surrogate characters:
*
*
U_INDEX_OUTOFBOUNDS_ERROR will be set if the input
* is empty or does not convert to any output (e.g.: pure state-change
* codes SI/SO, escape sequences for ISO 2022,
* or if the callback did not output anything, ...).
* This function will not set a U_BUFFER_OVERFLOW_ERROR because
* the "buffer" is the return code. However, there might be subsequent output
* stored in the converter object
* that will be returned in following calls to this function.
* @return a UChar32 resulting from the partial conversion of source
* @see ucnv_toUnicode
* @see ucnv_toUChars
* @see ucnv_convert
* @stable ICU 2.0
*/
U_STABLE UChar32 U_EXPORT2
ucnv_getNextUChar(UConverter * converter,
const char **source,
const char * sourceLimit,
UErrorCode * err);
/**
* Convert from one external charset to another using two existing UConverters.
* Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() -
* are used, "pivoting" through 16-bit Unicode.
*
* Important: For streaming conversion (multiple function calls for successive
* parts of a text stream), the caller must provide a pivot buffer explicitly,
* and must preserve the pivot buffer and associated pointers from one
* call to another. (The buffer may be moved if its contents and the relative
* pointer positions are preserved.)
*
* There is a similar function, ucnv_convert(),
* which has the following limitations:
* - it takes charset names, not converter objects, so that
* - two converters are opened for each call
* - only single-string conversion is possible, not streaming operation
* - it does not provide enough information to find out,
* in case of failure, whether the toUnicode or
* the fromUnicode conversion failed
*
* By contrast, ucnv_convertEx()
* - takes UConverter parameters instead of charset names
* - fully exposes the pivot buffer for streaming conversion and complete error handling
*
* ucnv_convertEx() also provides further convenience:
* - an option to reset the converters at the beginning
* (if reset==TRUE, see parameters;
* also sets *pivotTarget=*pivotSource=pivotStart)
* - allow NUL-terminated input
* (only a single NUL byte, will not work for charsets with multi-byte NULs)
* (if sourceLimit==NULL, see parameters)
* - terminate with a NUL on output
* (only a single NUL byte, not useful for charsets with multi-byte NULs),
* or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
* the target buffer
* - the pivot buffer can be provided internally;
* possible only for whole-string conversion, not streaming conversion;
* in this case, the caller will not be able to get details about where an
* error occurred
* (if pivotStart==NULL, see below)
*
* The function returns when one of the following is true:
* - the entire source text has been converted successfully to the target buffer
* - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
* - a conversion error occurred
* (other U_FAILURE(), see description of pErrorCode)
*
* Limitation compared to the direct use of
* ucnv_fromUnicode() and ucnv_toUnicode():
* ucnv_convertEx() does not provide offset information.
*
* Limitation compared to ucnv_fromUChars() and ucnv_toUChars():
* ucnv_convertEx() does not support preflighting directly.
*
* Sample code for converting a single string from
* one external charset to UTF-8, ignoring the location of errors:
*
* \code
* int32_t
* myToUTF8(UConverter *cnv,
* const char *s, int32_t length,
* char *u8, int32_t capacity,
* UErrorCode *pErrorCode) {
* UConverter *utf8Cnv;
* char *target;
*
* if(U_FAILURE(*pErrorCode)) {
* return 0;
* }
*
* utf8Cnv=myGetCachedUTF8Converter(pErrorCode);
* if(U_FAILURE(*pErrorCode)) {
* return 0;
* }
*
* if(length<0) {
* length=strlen(s);
* }
* target=u8;
* ucnv_convertEx(utf8Cnv, cnv,
* &target, u8+capacity,
* &s, s+length,
* NULL, NULL, NULL, NULL,
* TRUE, TRUE,
* pErrorCode);
*
* myReleaseCachedUTF8Converter(utf8Cnv);
*
* // return the output string length, but without preflighting
* return (int32_t)(target-u8);
* }
* \endcode
*
* @param targetCnv Output converter, used to convert from the UTF-16 pivot
* to the target using ucnv_fromUnicode().
* @param sourceCnv Input converter, used to convert from the source to
* the UTF-16 pivot using ucnv_toUnicode().
* @param target I/O parameter, same as for ucnv_fromUChars().
* Input: *target points to the beginning of the target buffer.
* Output: *target points to the first unit after the last char written.
* @param targetLimit Pointer to the first unit after the target buffer.
* @param source I/O parameter, same as for ucnv_toUChars().
* Input: *source points to the beginning of the source buffer.
* Output: *source points to the first unit after the last char read.
* @param sourceLimit Pointer to the first unit after the source buffer.
* @param pivotStart Pointer to the UTF-16 pivot buffer. If pivotStart==NULL,
* then an internal buffer is used and the other pivot
* arguments are ignored and can be NULL as well.
* @param pivotSource I/O parameter, same as source in ucnv_fromUChars() for
* conversion from the pivot buffer to the target buffer.
* @param pivotTarget I/O parameter, same as target in ucnv_toUChars() for
* conversion from the source buffer to the pivot buffer.
* It must be pivotStart<=*pivotSource<=*pivotTarget<=pivotLimit
* and pivotStartucnv_countAliases() string-pointers
* (const char *) that will be filled in.
* The strings themselves are owned by the library.
* @param pErrorCode result of operation
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode);
/**
* Return a new UEnumeration object for enumerating all the
* alias names for a given converter that are recognized by a standard.
* This method only enumerates the listed entries in the alias file.
* The convrtrs.txt file can be modified to change the results of
* this function.
* The first result in this list is the same result given by
* ucnv_getStandardName, which is the default alias for
* the specified standard name. The returned object must be closed with
* uenum_close when you are done with the object.
*
* @param convName original converter name
* @param standard name of the standard governing the names; MIME and IANA
* are such standards
* @param pErrorCode The error code
* @return A UEnumeration object for getting all aliases that are recognized
* by a standard. If any of the parameters are invalid, NULL
* is returned.
* @see ucnv_getStandardName
* @see uenum_close
* @see uenum_next
* @stable ICU 2.2
*/
U_STABLE UEnumeration * U_EXPORT2
ucnv_openStandardNames(const char *convName,
const char *standard,
UErrorCode *pErrorCode);
/**
* Gives the number of standards associated to converter names.
* @return number of standards
* @stable ICU 2.0
*/
U_STABLE uint16_t U_EXPORT2
ucnv_countStandards(void);
/**
* Gives the name of the standard at given index of standard list.
* @param n index in standard list
* @param pErrorCode result of operation
* @return returns the name of the standard at given index. Owned by the library.
* @stable ICU 2.0
*/
U_STABLE const char * U_EXPORT2
ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode);
/**
* Returns a standard name for a given converter name.
*
* Example alias table:
* conv alias1 { STANDARD1 } alias2 { STANDARD1* }
*
* Result of ucnv_getStandardName("conv", "STANDARD1") from example
* alias table:
* "alias2"
*
* @param name original converter name
* @param standard name of the standard governing the names; MIME and IANA
* are such standards
* @param pErrorCode result of operation
* @return returns the standard converter name;
* if a standard converter name cannot be determined,
* then NULL is returned. Owned by the library.
* @stable ICU 2.0
*/
U_STABLE const char * U_EXPORT2
ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode);
/**
* This function will return the internal canonical converter name of the
* tagged alias. This is the opposite of ucnv_openStandardNames, which
* returns the tagged alias given the canonical name.
*
* Example alias table:
* conv alias1 { STANDARD1 } alias2 { STANDARD1* }
*
* Result of ucnv_getStandardName("alias1", "STANDARD1") from example
* alias table:
* "conv"
*
* @return returns the canonical converter name;
* if a standard or alias name cannot be determined,
* then NULL is returned. The returned string is
* owned by the library.
* @see ucnv_getStandardName
* @stable ICU 2.4
*/
U_STABLE const char * U_EXPORT2
ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode);
/**
* Returns the current default converter name. If you want to open
* a default converter, you do not need to use this function.
* It is faster if you pass a NULL argument to ucnv_open the
* default converter.
*
* If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function
* always returns "UTF-8".
*
* @return returns the current default converter name.
* Storage owned by the library
* @see ucnv_setDefaultName
* @stable ICU 2.0
*/
U_STABLE const char * U_EXPORT2
ucnv_getDefaultName(void);
#ifndef U_HIDE_SYSTEM_API
/**
* This function is not thread safe. DO NOT call this function when ANY ICU
* function is being used from more than one thread! This function sets the
* current default converter name. If this function needs to be called, it
* should be called during application initialization. Most of the time, the
* results from ucnv_getDefaultName() or ucnv_open with a NULL string argument
* is sufficient for your application.
*
* If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function
* does nothing.
*
* @param name the converter name to be the default (must be known by ICU).
* @see ucnv_getDefaultName
* @system
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucnv_setDefaultName(const char *name);
#endif /* U_HIDE_SYSTEM_API */
/**
* Fixes the backslash character mismapping. For example, in SJIS, the backslash
* character in the ASCII portion is also used to represent the yen currency sign.
* When mapping from Unicode character 0x005C, it's unclear whether to map the
* character back to yen or backslash in SJIS. This function will take the input
* buffer and replace all the yen sign characters with backslash. This is necessary
* when the user tries to open a file with the input buffer on Windows.
* This function will test the converter to see whether such mapping is
* required. You can sometimes avoid using this function by using the correct version
* of Shift-JIS.
*
* @param cnv The converter representing the target codepage.
* @param source the input buffer to be fixed
* @param sourceLen the length of the input buffer
* @see ucnv_isAmbiguous
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucnv_fixFileSeparator(const UConverter *cnv, UChar *source, int32_t sourceLen);
/**
* Determines if the converter contains ambiguous mappings of the same
* character or not.
* @param cnv the converter to be tested
* @return TRUE if the converter contains ambiguous mapping of the same
* character, FALSE otherwise.
* @stable ICU 2.0
*/
U_STABLE UBool U_EXPORT2
ucnv_isAmbiguous(const UConverter *cnv);
/**
* Sets the converter to use fallback mappings or not.
* Regardless of this flag, the converter will always use
* fallbacks from Unicode Private Use code points, as well as
* reverse fallbacks (to Unicode).
* For details see ".ucm File Format"
* in the Conversion Data chapter of the ICU User Guide:
* http://www.icu-project.org/userguide/conversion-data.html#ucmformat
*
* @param cnv The converter to set the fallback mapping usage on.
* @param usesFallback TRUE if the user wants the converter to take advantage of the fallback
* mapping, FALSE otherwise.
* @stable ICU 2.0
* @see ucnv_usesFallback
*/
U_STABLE void U_EXPORT2
ucnv_setFallback(UConverter *cnv, UBool usesFallback);
/**
* Determines if the converter uses fallback mappings or not.
* This flag has restrictions, see ucnv_setFallback().
*
* @param cnv The converter to be tested
* @return TRUE if the converter uses fallback, FALSE otherwise.
* @stable ICU 2.0
* @see ucnv_setFallback
*/
U_STABLE UBool U_EXPORT2
ucnv_usesFallback(const UConverter *cnv);
/**
* Detects Unicode signature byte sequences at the start of the byte stream
* and returns the charset name of the indicated Unicode charset.
* NULL is returned when no Unicode signature is recognized.
* The number of bytes in the signature is output as well.
*
* The caller can ucnv_open() a converter using the charset name.
* The first code unit (UChar) from the start of the stream will be U+FEFF
* (the Unicode BOM/signature character) and can usually be ignored.
*
* For most Unicode charsets it is also possible to ignore the indicated
* number of initial stream bytes and start converting after them.
* However, there are stateful Unicode charsets (UTF-7 and BOCU-1) for which
* this will not work. Therefore, it is best to ignore the first output UChar
* instead of the input signature bytes.
*
* Usage: * \snippet samples/ucnv/convsamp.cpp ucnv_detectUnicodeSignature * * @param source The source string in which the signature should be detected. * @param sourceLength Length of the input string, or -1 if terminated with a NUL byte. * @param signatureLength A pointer to int32_t to receive the number of bytes that make up the signature * of the detected UTF. 0 if not detected. * Can be a NULL pointer. * @param pErrorCode ICU error code in/out parameter. * Must fulfill U_SUCCESS before the function call. * @return The name of the encoding detected. NULL if encoding is not detected. * @stable ICU 2.4 */ U_STABLE const char* U_EXPORT2 ucnv_detectUnicodeSignature(const char* source, int32_t sourceLength, int32_t *signatureLength, UErrorCode *pErrorCode); /** * Returns the number of UChars held in the converter's internal state * because more input is needed for completing the conversion. This function is * useful for mapping semantics of ICU's converter interface to those of iconv, * and this information is not needed for normal conversion. * @param cnv The converter in which the input is held * @param status ICU error code in/out parameter. * Must fulfill U_SUCCESS before the function call. * @return The number of UChars in the state. -1 if an error is encountered. * @stable ICU 3.4 */ U_STABLE int32_t U_EXPORT2 ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status); /** * Returns the number of chars held in the converter's internal state * because more input is needed for completing the conversion. This function is * useful for mapping semantics of ICU's converter interface to those of iconv, * and this information is not needed for normal conversion. * @param cnv The converter in which the input is held as internal state * @param status ICU error code in/out parameter. * Must fulfill U_SUCCESS before the function call. * @return The number of chars in the state. -1 if an error is encountered. * @stable ICU 3.4 */ U_STABLE int32_t U_EXPORT2 ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status); /** * Returns whether or not the charset of the converter has a fixed number of bytes * per charset character. * An example of this are converters that are of the type UCNV_SBCS or UCNV_DBCS. * Another example is UTF-32 which is always 4 bytes per character. * A Unicode code point may be represented by more than one UTF-8 or UTF-16 code unit * but a UTF-32 converter encodes each code point with 4 bytes. * Note: This method is not intended to be used to determine whether the charset has a * fixed ratio of bytes to Unicode codes units for any particular Unicode encoding form. * FALSE is returned with the UErrorCode if error occurs or cnv is NULL. * @param cnv The converter to be tested * @param status ICU error code in/out paramter * @return TRUE if the converter is fixed-width * @stable ICU 4.8 */ U_STABLE UBool U_EXPORT2 ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status); #endif #endif /*_UCNV*/ // ucnv_cb.h /* ********************************************************************** * Copyright (C) 2000-2004, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * ucnv_cb.h: * External APIs for the ICU's codeset conversion library * Helena Shih * * Modification History: * * Date Name Description */ /** * \file * \brief C UConverter functions to aid the writers of callbacks * *
NULL.
* An Error will be returned if some required part of ICU data can not
* be loaded or initialized.
* The function returns immediately if the input error code indicates a
* failure, as usual.
*
* @stable ICU 2.6
*/
U_STABLE void U_EXPORT2
u_init(UErrorCode *status);
#ifndef U_HIDE_SYSTEM_API
/**
* Clean up the system resources, such as allocated memory or open files,
* used in all ICU libraries. This will free/delete all memory owned by the
* ICU libraries, and return them to their original load state. All open ICU
* items (collators, resource bundles, converters, etc.) must be closed before
* calling this function, otherwise ICU may not free its allocated memory
* (e.g. close your converters and resource bundles before calling this
* function). Generally, this function should be called once just before
* an application exits. For applications that dynamically load and unload
* the ICU libraries (relatively uncommon), u_cleanup() should be called
* just before the library unload.
* * u_cleanup() also clears any ICU heap functions, mutex functions or * trace functions that may have been set for the process. * This has the effect of restoring ICU to its initial condition, before * any of these override functions were installed. Refer to * u_setMemoryFunctions(), u_setMutexFunctions and * utrace_setFunctions(). If ICU is to be reinitialized after after * calling u_cleanup(), these runtime override functions will need to * be set up again if they are still required. *
* u_cleanup() is not thread safe. All other threads should stop using ICU * before calling this function. *
* Any open ICU items will be left in an undefined state by u_cleanup(), * and any subsequent attempt to use such an item will give unpredictable * results. *
* After calling u_cleanup(), an application may continue to use ICU by * calling u_init(). An application must invoke u_init() first from one single * thread before allowing other threads call u_init(). All threads existing * at the time of the first thread's call to u_init() must also call * u_init() themselves before continuing with other ICU operations. *
* The use of u_cleanup() just before an application terminates is optional, * but it should be called only once for performance reasons. The primary * benefit is to eliminate reports of memory or resource leaks originating * in ICU code from the results generated by heap analysis tools. *
* Use this function with great care! *
* * @stable ICU 2.0 * @system */ U_STABLE void U_EXPORT2 u_cleanup(void); /** * Pointer type for a user supplied memory allocation function. * @param context user supplied value, obtained from from u_setMemoryFunctions(). * @param size The number of bytes to be allocated * @return Pointer to the newly allocated memory, or NULL if the allocation failed. * @stable ICU 2.8 * @system */ typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size); /** * Pointer type for a user supplied memory re-allocation function. * @param context user supplied value, obtained from from u_setMemoryFunctions(). * @param size The number of bytes to be allocated * @return Pointer to the newly allocated memory, or NULL if the allocation failed. * @stable ICU 2.8 * @system */ typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t size); /** * Pointer type for a user supplied memory free function. Behavior should be * similar the standard C library free(). * @param context user supplied value, obtained from from u_setMemoryFunctions(). * @param mem Pointer to the memory block to be resized * @param size The new size for the block * @return Pointer to the resized memory block, or NULL if the resizing failed. * @stable ICU 2.8 * @system */ typedef void U_CALLCONV UMemFreeFn (const void *context, void *mem); /** * Set the functions that ICU will use for memory allocation. * Use of this function is optional; by default (without this function), ICU will * use the standard C library malloc() and free() functions. * This function can only be used when ICU is in an initial, unused state, before * u_init() has been called. * @param context This pointer value will be saved, and then (later) passed as * a parameter to the memory functions each time they * are called. * @param a Pointer to a user-supplied malloc function. * @param r Pointer to a user-supplied realloc function. * @param f Pointer to a user-supplied free function. * @param status Receives error values. * @stable ICU 2.8 * @system */ U_STABLE void U_EXPORT2 u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f, UErrorCode *status); #endif /* U_HIDE_SYSTEM_API */ #endif // uchar.h /* ********************************************************************** * Copyright (C) 1997-2016, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * * File UCHAR.H * * Modification History: * * Date Name Description * 04/02/97 aliu Creation. * 03/29/99 helena Updated for C APIs. * 4/15/99 Madhu Updated for C Implementation and Javadoc * 5/20/99 Madhu Added the function u_getVersion() * 8/19/1999 srl Upgraded scripts to Unicode 3.0 * 8/27/1999 schererm UCharDirection constants: U_... * 11/11/1999 weiv added u_isalnum(), cleaned comments * 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion(). ****************************************************************************** */ #ifndef UCHAR_H #define UCHAR_H U_CDECL_BEGIN /*==========================================================================*/ /* Unicode version number */ /*==========================================================================*/ /** * Unicode version number, default for the current ICU version. * The actual Unicode Character Database (UCD) data is stored in uprops.dat * and may be generated from UCD files from a different Unicode version. * Call u_getUnicodeVersion to get the actual Unicode version of the data. * * @see u_getUnicodeVersion * @stable ICU 2.0 */ #define U_UNICODE_VERSION "8.0" /** * \file * \brief C API: Unicode Properties * * This C API provides low-level access to the Unicode Character Database. * In addition to raw property values, some convenience functions calculate * derived properties, for example for Java-style programming. * * Unicode assigns each code point (not just assigned character) values for * many properties. * Most of them are simple boolean flags, or constants from a small enumerated list. * For some properties, values are strings or other relatively more complex types. * * For more information see * "About the Unicode Character Database" (http://www.unicode.org/ucd/) * and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html). * * Many functions are designed to match java.lang.Character functions. * See the individual function documentation, * and see the JDK 1.4 java.lang.Character documentation * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html * * There are also functions that provide easy migration from C/POSIX functions * like isblank(). Their use is generally discouraged because the C/POSIX * standards do not define their semantics beyond the ASCII range, which means * that different implementations exhibit very different behavior. * Instead, Unicode properties should be used directly. * * There are also only a few, broad C/POSIX character classes, and they tend * to be used for conflicting purposes. For example, the "isalpha()" class * is sometimes used to determine word boundaries, while a more sophisticated * approach would at least distinguish initial letters from continuation * characters (the latter including combining marks). * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) * Another example: There is no "istitle()" class for titlecase characters. * * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. * ICU implements them according to the Standard Recommendations in * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). * * API access for C/POSIX character classes is as follows: * - alpha: u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC) * - lower: u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE) * - upper: u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE) * - punct: u_ispunct(c) * - digit: u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER * - xdigit: u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT) * - alnum: u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM) * - space: u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE) * - blank: u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK) * - cntrl: u_charType(c)==U_CONTROL_CHAR * - graph: u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH) * - print: u_hasBinaryProperty(c, UCHAR_POSIX_PRINT) * * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match, * the Standard Recommendations in UTS #18. Instead, they match Java * functions according to their API documentation. * * \htmlonly * The C/POSIX character classes are also available in UnicodeSet patterns, * using patterns like [:graph:] or \p{graph}. * \endhtmlonly * * Note: There are several ICU whitespace functions. * Comparison: * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; * most of general categories "Z" (separators) + most whitespace ISO controls * (including no-break spaces, but excluding IS1..IS4 and ZWSP) * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces) * - u_isspace: Z + whitespace ISO controls (including no-break spaces) * - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP */ /** * Constants. */ /** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */ #define UCHAR_MIN_VALUE 0 /** * The highest Unicode code point value (scalar value) according to * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up). * For a single character, UChar32 is a simple type that can hold any code point value. * * @see UChar32 * @stable ICU 2.0 */ #define UCHAR_MAX_VALUE 0x10ffff /** * Get a single-bit bit set (a flag) from a bit number 0..31. * @stable ICU 2.1 */ #define U_MASK(x) ((uint32_t)1<<(x)) /** * Selection constants for Unicode properties. * These constants are used in functions like u_hasBinaryProperty to select * one of the Unicode properties. * * The properties APIs are intended to reflect Unicode properties as defined * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). * For details about the properties see http://www.unicode.org/ucd/ . * For names of Unicode properties see the UCD file PropertyAliases.txt. * * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, * then properties marked with "new in Unicode 3.2" are not or not fully available. * Check u_getUnicodeVersion to be sure. * * @see u_hasBinaryProperty * @see u_getIntPropertyValue * @see u_getUnicodeVersion * @stable ICU 2.1 */ typedef enum UProperty { /* * Note: UProperty constants are parsed by preparseucd.py. * It matches lines like * UCHAR_0<=code<=0x10ffff.
* @param nameChoice Selector for which name to get.
* @param buffer Destination address for copying the name.
* The name will always be zero-terminated.
* If there is no name, then the buffer will be set to the empty string.
* @param bufferLength ==sizeof(buffer)
* @param pErrorCode Pointer to a UErrorCode variable;
* check for U_SUCCESS() after u_charName()
* returns.
* @return The length of the name, or 0 if there is no name for this character.
* If the bufferLength is less than or equal to the length, then the buffer
* contains the truncated name and the returned length indicates the full
* length of the name.
* The length does not include the zero-termination.
*
* @see UCharNameChoice
* @see u_charFromName
* @see u_enumCharNames
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_charName(UChar32 code, UCharNameChoice nameChoice,
char *buffer, int32_t bufferLength,
UErrorCode *pErrorCode);
/**
* Find a Unicode character by its name and return its code point value.
* The name is matched exactly and completely.
* If the name does not correspond to a code point, pErrorCode
* is set to U_INVALID_CHAR_FOUND.
* A Unicode 1.0 name is matched only if it differs from the modern name.
* Unicode names are all uppercase. Extended names are lowercase followed
* by an uppercase hexadecimal number, and within angle brackets.
*
* @param nameChoice Selector for which name to match.
* @param name The name to match.
* @param pErrorCode Pointer to a UErrorCode variable
* @return The Unicode value of the code point with the given name,
* or an undefined value if there is no such code point.
*
* @see UCharNameChoice
* @see u_charName
* @see u_enumCharNames
* @stable ICU 1.7
*/
U_STABLE UChar32 U_EXPORT2
u_charFromName(UCharNameChoice nameChoice,
const char *name,
UErrorCode *pErrorCode);
/**
* Type of a callback function for u_enumCharNames() that gets called
* for each Unicode character with the code point value and
* the character name.
* If such a function returns FALSE, then the enumeration is stopped.
*
* @param context The context pointer that was passed to u_enumCharNames().
* @param code The Unicode code point for the character with this name.
* @param nameChoice Selector for which kind of names is enumerated.
* @param name The character's name, zero-terminated.
* @param length The length of the name.
* @return TRUE if the enumeration should continue, FALSE to stop it.
*
* @see UCharNameChoice
* @see u_enumCharNames
* @stable ICU 1.7
*/
typedef UBool U_CALLCONV UEnumCharNamesFn(void *context,
UChar32 code,
UCharNameChoice nameChoice,
const char *name,
int32_t length);
/**
* Enumerate all assigned Unicode characters between the start and limit
* code points (start inclusive, limit exclusive) and call a function
* for each, passing the code point value and the character name.
* For Unicode 1.0 names, only those are enumerated that differ from the
* modern names.
*
* @param start The first code point in the enumeration range.
* @param limit One more than the last code point in the enumeration range
* (the first one after the range).
* @param fn The function that is to be called for each character name.
* @param context An arbitrary pointer that is passed to the function.
* @param nameChoice Selector for which kind of names to enumerate.
* @param pErrorCode Pointer to a UErrorCode variable
*
* @see UCharNameChoice
* @see UEnumCharNamesFn
* @see u_charName
* @see u_charFromName
* @stable ICU 1.7
*/
U_STABLE void U_EXPORT2
u_enumCharNames(UChar32 start, UChar32 limit,
UEnumCharNamesFn *fn,
void *context,
UCharNameChoice nameChoice,
UErrorCode *pErrorCode);
/**
* Return the Unicode name for a given property, as given in the
* Unicode database file PropertyAliases.txt.
*
* In addition, this function maps the property
* UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
* "General_Category_Mask". These names are not in
* PropertyAliases.txt.
*
* @param property UProperty selector other than UCHAR_INVALID_CODE.
* If out of range, NULL is returned.
*
* @param nameChoice selector for which name to get. If out of range,
* NULL is returned. All properties have a long name. Most
* have a short name, but some do not. Unicode allows for
* additional names; if present these will be returned by
* U_LONG_PROPERTY_NAME + i, where i=1, 2,...
*
* @return a pointer to the name, or NULL if either the
* property or the nameChoice is out of range. If a given
* nameChoice returns NULL, then all larger values of
* nameChoice will return NULL, with one exception: if NULL is
* returned for U_SHORT_PROPERTY_NAME, then
* U_LONG_PROPERTY_NAME (and higher) may still return a
* non-NULL value. The returned pointer is valid until
* u_cleanup() is called.
*
* @see UProperty
* @see UPropertyNameChoice
* @stable ICU 2.4
*/
U_STABLE const char* U_EXPORT2
u_getPropertyName(UProperty property,
UPropertyNameChoice nameChoice);
/**
* Return the UProperty enum for a given property name, as specified
* in the Unicode database file PropertyAliases.txt. Short, long, and
* any other variants are recognized.
*
* In addition, this function maps the synthetic names "gcm" /
* "General_Category_Mask" to the property
* UCHAR_GENERAL_CATEGORY_MASK. These names are not in
* PropertyAliases.txt.
*
* @param alias the property name to be matched. The name is compared
* using "loose matching" as described in PropertyAliases.txt.
*
* @return a UProperty enum, or UCHAR_INVALID_CODE if the given name
* does not match any property.
*
* @see UProperty
* @stable ICU 2.4
*/
U_STABLE UProperty U_EXPORT2
u_getPropertyEnum(const char* alias);
/**
* Return the Unicode name for a given property value, as given in the
* Unicode database file PropertyValueAliases.txt.
*
* Note: Some of the names in PropertyValueAliases.txt can only be
* retrieved using UCHAR_GENERAL_CATEGORY_MASK, not
* UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" /
* "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
* / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
*
* @param property UProperty selector constant.
* Must be UCHAR_BINARY_START<=whichc is not a valid digit in the specified
* radix, -1 is returned. A character is a valid digit
* if at least one of the following is true:
* 'A' through 'Z'.
* In this case the value is c-'A'+10.'a' through 'z'.
* In this case the value is ch-'a'+10.radix is not a
* valid radix, or the value of digit is not a valid
* digit in the specified radix, the null character
* (U+0000) is returned.
*
* The radix argument is valid if it is greater than or
* equal to 2 and less than or equal to 36.
* The digit argument is valid if
* 0 <= digit < radix.
*
* If the digit is less than 10, then
* '0' + digit is returned. Otherwise, the value
* 'a' + digit - 10 is returned.
*
* Same as java.lang.Character.forDigit().
*
* @param digit the number to convert to a character.
* @param radix the radix.
* @return the char representation of the specified digit
* in the specified radix.
*
* @see u_digit
* @see u_charDigitValue
* @see u_isdigit
* @stable ICU 2.0
*/
U_STABLE UChar32 U_EXPORT2
u_forDigit(int32_t digit, int8_t radix);
/**
* Get the "age" of the code point.
* The "age" is the Unicode version when the code point was first
* designated (as a non-character or for Private Use)
* or assigned a character.
* This can be useful to avoid emitting code points to receiving
* processes that do not accept newer characters.
* The data is from the UCD file DerivedAge.txt.
*
* @param c The code point.
* @param versionArray The Unicode version number array, to be filled in.
*
* @stable ICU 2.1
*/
U_STABLE void U_EXPORT2
u_charAge(UChar32 c, UVersionInfo versionArray);
/**
* Gets the Unicode version information.
* The version array is filled in with the version information
* for the Unicode standard that is currently used by ICU.
* For example, Unicode version 3.1.1 is represented as an array with
* the values { 3, 1, 1, 0 }.
*
* @param versionArray an output array that will be filled in with
* the Unicode version number
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
u_getUnicodeVersion(UVersionInfo versionArray);
#if !UCONFIG_NO_NORMALIZATION
/**
* Get the FC_NFKC_Closure property string for a character.
* See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure"
* or for "FNC": http://www.unicode.org/reports/tr15/
*
* @param c The character (code point) for which to get the FC_NFKC_Closure string.
* It must be 0<=c<=0x10ffff.
* @param dest Destination address for copying the string.
* The string will be zero-terminated if possible.
* If there is no FC_NFKC_Closure string,
* then the buffer will be set to the empty string.
* @param destCapacity ==sizeof(dest)
* @param pErrorCode Pointer to a UErrorCode variable.
* @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character.
* If the destCapacity is less than or equal to the length, then the buffer
* contains the truncated name and the returned length indicates the full
* length of the name.
* The length does not include the zero-termination.
*
* @stable ICU 2.2
*/
U_STABLE int32_t U_EXPORT2
u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);
#endif
U_CDECL_END
#endif /*_UCHAR*/
/*eof*/
// utext.h
/*
*******************************************************************************
*
* Copyright (C) 2004-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: utext.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2004oct06
* created by: Markus W. Scherer
*/
#ifndef __UTEXT_H__
#define __UTEXT_H__
/**
* \file
* \brief C API: Abstract Unicode Text API
*
* The Text Access API provides a means to allow text that is stored in alternative
* formats to work with ICU services. ICU normally operates on text that is
* stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type
* UnicodeString for C++ APIs.
*
* ICU Text Access allows other formats, such as UTF-8 or non-contiguous
* UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services.
*
* There are three general classes of usage for UText:
*
* Application Level Use. This is the simplest usage - applications would
* use one of the utext_open() functions on their input text, and pass
* the resulting UText to the desired ICU service.
*
* Second is usage in ICU Services, such as break iteration, that will need to
* operate on input presented to them as a UText. These implementations
* will need to use the iteration and related UText functions to gain
* access to the actual text.
*
* The third class of UText users are "text providers." These are the
* UText implementations for the various text storage formats. An application
* or system with a unique text storage format can implement a set of
* UText provider functions for that format, which will then allow
* ICU services to operate on that format.
*
*
* Iterating over text
*
* Here is sample code for a forward iteration over the contents of a UText
*
* \code
* UChar32 c;
* UText *ut = whatever();
*
* for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) {
* // do whatever with the codepoint c here.
* }
* \endcode
*
* And here is similar code to iterate in the reverse direction, from the end
* of the text towards the beginning.
*
* \code
* UChar32 c;
* UText *ut = whatever();
* int textLength = utext_nativeLength(ut);
* for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) {
* // do whatever with the codepoint c here.
* }
* \endcode
*
* Characters and Indexing
*
* Indexing into text by UText functions is nearly always in terms of the native
* indexing of the underlying text storage. The storage format could be UTF-8
* or UTF-32, for example. When coding to the UText access API, no assumptions
* can be made regarding the size of characters, or how far an index
* may move when iterating between characters.
*
* All indices supplied to UText functions are pinned to the length of the
* text. An out-of-bounds index is not considered to be an error, but is
* adjusted to be in the range 0 <= index <= length of input text.
*
*
* When an index position is returned from a UText function, it will be
* a native index to the underlying text. In the case of multi-unit characters,
* it will always refer to the first position of the character,
* never to the interior. This is essentially the same thing as saying that
* a returned index will always point to a boundary between characters.
*
* When a native index is supplied to a UText function, all indices that
* refer to any part of a multi-unit character representation are considered
* to be equivalent. In the case of multi-unit characters, an incoming index
* will be logically normalized to refer to the start of the character.
*
* It is possible to test whether a native index is on a code point boundary
* by doing a utext_setNativeIndex() followed by a utext_getNativeIndex().
* If the index is returned unchanged, it was on a code point boundary. If
* an adjusted index is returned, the original index referred to the
* interior of a character.
*
* Conventions for calling UText functions
*
* Most UText access functions have as their first parameter a (UText *) pointer,
* which specifies the UText to be used. Unless otherwise noted, the
* pointer must refer to a valid, open UText. Attempting to
* use a closed UText or passing a NULL pointer is a programming error and
* will produce undefined results or NULL pointer exceptions.
*
* The UText_Open family of functions can either open an existing (closed)
* UText, or heap allocate a new UText. Here is sample code for creating
* a stack-allocated UText.
*
* \code
* char *s = whatever(); // A utf-8 string
* U_ErrorCode status = U_ZERO_ERROR;
* UText ut = UTEXT_INITIALIZER;
* utext_openUTF8(ut, s, -1, &status);
* if (U_FAILURE(status)) {
* // error handling
* } else {
* // work with the UText
* }
* \endcode
*
* Any existing UText passed to an open function _must_ have been initialized,
* either by the UTEXT_INITIALIZER, or by having been originally heap-allocated
* by an open function. Passing NULL will cause the open function to
* heap-allocate and fully initialize a new UText.
*
*/
U_CDECL_BEGIN
struct UText;
typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */
/***************************************************************************************
*
* C Functions for creating UText wrappers around various kinds of text strings.
*
****************************************************************************************/
/**
* Close function for UText instances.
* Cleans up, releases any resources being held by an open UText.
*
* If the UText was originally allocated by one of the utext_open functions, * the storage associated with the utext will also be freed. * If the UText storage originated with the application, as it would with * a local or static instance, the storage will not be deleted. * * An open UText can be reset to refer to new string by using one of the utext_open() * functions without first closing the UText. * * @param ut The UText to be closed. * @return NULL if the UText struct was deleted by the close. If the UText struct * was originally provided by the caller to the open function, it is * returned by this function, and may be safely used again in * a subsequent utext_open. * * @stable ICU 3.4 */ U_STABLE UText * U_EXPORT2 utext_close(UText *ut); /** * Open a read-only UText implementation for UTF-8 strings. * * \htmlonly * Any invalid UTF-8 in the input will be handled in this way: * a sequence of bytes that has the form of a truncated, but otherwise valid, * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. * Any other illegal bytes will each be replaced by a \uFFFD. * \endhtmlonly * * @param ut Pointer to a UText struct. If NULL, a new UText will be created. * If non-NULL, must refer to an initialized UText struct, which will then * be reset to reference the specified UTF-8 string. * @param s A UTF-8 string. Must not be NULL. * @param length The length of the UTF-8 string in bytes, or -1 if the string is * zero terminated. * @param status Errors are returned here. * @return A pointer to the UText. If a pre-allocated UText was provided, it * will always be used and returned. * @stable ICU 3.4 */ U_STABLE UText * U_EXPORT2 utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status); /** * Open a read-only UText for UChar * string. * * @param ut Pointer to a UText struct. If NULL, a new UText will be created. * If non-NULL, must refer to an initialized UText struct, which will then * be reset to reference the specified UChar string. * @param s A UChar (UTF-16) string * @param length The number of UChars in the input string, or -1 if the string is * zero terminated. * @param status Errors are returned here. * @return A pointer to the UText. If a pre-allocated UText was provided, it * will always be used and returned. * @stable ICU 3.4 */ U_STABLE UText * U_EXPORT2 utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status); /** * Clone a UText. This is much like opening a UText where the source text is itself * another UText. * * A deep clone will copy both the UText data structures and the underlying text. * The original and cloned UText will operate completely independently; modifications * made to the text in one will not affect the other. Text providers are not * required to support deep clones. The user of clone() must check the status return * and be prepared to handle failures. * * The standard UText implementations for UTF8, UChar *, UnicodeString and * Replaceable all support deep cloning. * * The UText returned from a deep clone will be writable, assuming that the text * provider is able to support writing, even if the source UText had been made * non-writable by means of UText_freeze(). * * A shallow clone replicates only the UText data structures; it does not make * a copy of the underlying text. Shallow clones can be used as an efficient way to * have multiple iterators active in a single text string that is not being * modified. * * A shallow clone operation will not fail, barring truly exceptional conditions such * as memory allocation failures. * * Shallow UText clones should be avoided if the UText functions that modify the * text are expected to be used, either on the original or the cloned UText. * Any such modifications can cause unpredictable behavior. Read Only * shallow clones provide some protection against errors of this type by * disabling text modification via the cloned UText. * * A shallow clone made with the readOnly parameter == FALSE will preserve the * utext_isWritable() state of the source object. Note, however, that * write operations must be avoided while more than one UText exists that refer * to the same underlying text. * * A UText and its clone may be safely concurrently accessed by separate threads. * This is true for read access only with shallow clones, and for both read and * write access with deep clones. * It is the responsibility of the Text Provider to ensure that this thread safety * constraint is met. * * @param dest A UText struct to be filled in with the result of the clone operation, * or NULL if the clone function should heap-allocate a new UText struct. * If non-NULL, must refer to an already existing UText, which will then * be reset to become the clone. * @param src The UText to be cloned. * @param deep TRUE to request a deep clone, FALSE for a shallow clone. * @param readOnly TRUE to request that the cloned UText have read only access to the * underlying text. * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR * will be returned if the text provider is unable to clone the * original text. * @return The newly created clone, or NULL if the clone operation failed. * @stable ICU 3.4 */ U_STABLE UText * U_EXPORT2 utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status); /** * Compare two UText objects for equality. * UTexts are equal if they are iterating over the same text, and * have the same iteration position within the text. * If either or both of the parameters are NULL, the comparison is FALSE. * * @param a The first of the two UTexts to compare. * @param b The other UText to be compared. * @return TRUE if the two UTexts are equal. * @stable ICU 3.6 */ U_STABLE UBool U_EXPORT2 utext_equals(const UText *a, const UText *b); /***************************************************************************** * * Functions to work with the text represeted by a UText wrapper * *****************************************************************************/ /** * Get the length of the text. Depending on the characteristics * of the underlying text representation, this may be expensive. * @see utext_isLengthExpensive() * * * @param ut the text to be accessed. * @return the length of the text, expressed in native units. * * @stable ICU 3.4 */ U_STABLE int64_t U_EXPORT2 utext_nativeLength(UText *ut); /** * Return TRUE if calculating the length of the text could be expensive. * Finding the length of NUL terminated strings is considered to be expensive. * * Note that the value of this function may change * as the result of other operations on a UText. * Once the length of a string has been discovered, it will no longer * be expensive to report it. * * @param ut the text to be accessed. * @return TRUE if determining the length of the text could be time consuming. * @stable ICU 3.4 */ U_STABLE UBool U_EXPORT2 utext_isLengthExpensive(const UText *ut); /** * Returns the code point at the requested index, * or U_SENTINEL (-1) if it is out of bounds. * * If the specified index points to the interior of a multi-unit * character - one of the trail bytes of a UTF-8 sequence, for example - * the complete code point will be returned. * * The iteration position will be set to the start of the returned code point. * * This function is roughly equivalent to the the sequence * utext_setNativeIndex(index); * utext_current32(); * (There is a subtle difference if the index is out of bounds by being less than zero - * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current() * will return the char at zero. utext_char32At(negative index), on the other hand, will * return the U_SENTINEL value of -1.) * * @param ut the text to be accessed * @param nativeIndex the native index of the character to be accessed. If the index points * to other than the first unit of a multi-unit character, it will be adjusted * to the start of the character. * @return the code point at the specified index. * @stable ICU 3.4 */ U_STABLE UChar32 U_EXPORT2 utext_char32At(UText *ut, int64_t nativeIndex); /** * * Get the code point at the current iteration position, * or U_SENTINEL (-1) if the iteration has reached the end of * the input text. * * @param ut the text to be accessed. * @return the Unicode code point at the current iterator position. * @stable ICU 3.4 */ U_STABLE UChar32 U_EXPORT2 utext_current32(UText *ut); /** * Get the code point at the current iteration position of the UText, and * advance the position to the first index following the character. * * If the position is at the end of the text (the index following * the last character, which is also the length of the text), * return U_SENTINEL (-1) and do not advance the index. * * This is a post-increment operation. * * An inline macro version of this function, UTEXT_NEXT32(), * is available for performance critical use. * * @param ut the text to be accessed. * @return the Unicode code point at the iteration position. * @see UTEXT_NEXT32 * @stable ICU 3.4 */ U_STABLE UChar32 U_EXPORT2 utext_next32(UText *ut); /** * Move the iterator position to the character (code point) whose * index precedes the current position, and return that character. * This is a pre-decrement operation. * * If the initial position is at the start of the text (index of 0) * return U_SENTINEL (-1), and leave the position unchanged. * * An inline macro version of this function, UTEXT_PREVIOUS32(), * is available for performance critical use. * * @param ut the text to be accessed. * @return the previous UChar32 code point, or U_SENTINEL (-1) * if the iteration has reached the start of the text. * @see UTEXT_PREVIOUS32 * @stable ICU 3.4 */ U_STABLE UChar32 U_EXPORT2 utext_previous32(UText *ut); /** * Set the iteration index and return the code point at that index. * Leave the iteration index at the start of the following code point. * * This function is the most efficient and convenient way to * begin a forward iteration. The results are identical to the those * from the sequence * \code * utext_setIndex(); * utext_next32(); * \endcode * * @param ut the text to be accessed. * @param nativeIndex Iteration index, in the native units of the text provider. * @return Code point which starts at or before index, * or U_SENTINEL (-1) if it is out of bounds. * @stable ICU 3.4 */ U_STABLE UChar32 U_EXPORT2 utext_next32From(UText *ut, int64_t nativeIndex); /** * Set the iteration index, and return the code point preceding the * one specified by the initial index. Leave the iteration position * at the start of the returned code point. * * This function is the most efficient and convenient way to * begin a backwards iteration. * * @param ut the text to be accessed. * @param nativeIndex Iteration index in the native units of the text provider. * @return Code point preceding the one at the initial index, * or U_SENTINEL (-1) if it is out of bounds. * * @stable ICU 3.4 */ U_STABLE UChar32 U_EXPORT2 utext_previous32From(UText *ut, int64_t nativeIndex); /** * Get the current iterator position, which can range from 0 to * the length of the text. * The position is a native index into the input text, in whatever format it * may have (possibly UTF-8 for example), and may not always be the same as * the corresponding UChar (UTF-16) index. * The returned position will always be aligned to a code point boundary. * * @param ut the text to be accessed. * @return the current index position, in the native units of the text provider. * @stable ICU 3.4 */ U_STABLE int64_t U_EXPORT2 utext_getNativeIndex(const UText *ut); /** * Set the current iteration position to the nearest code point * boundary at or preceding the specified index. * The index is in the native units of the original input text. * If the index is out of range, it will be pinned to be within * the range of the input text. *
* It will usually be more efficient to begin an iteration * using the functions utext_next32From() or utext_previous32From() * rather than setIndex(). *
* Moving the index position to an adjacent character is best done * with utext_next32(), utext_previous32() or utext_moveIndex32(). * Attempting to do direct arithmetic on the index position is * complicated by the fact that the size (in native units) of a * character depends on the underlying representation of the character * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not * easily knowable. * * @param ut the text to be accessed. * @param nativeIndex the native unit index of the new iteration position. * @stable ICU 3.4 */ U_STABLE void U_EXPORT2 utext_setNativeIndex(UText *ut, int64_t nativeIndex); /** * Move the iterator postion by delta code points. The number of code points * is a signed number; a negative delta will move the iterator backwards, * towards the start of the text. *
* The index is moved by delta code points
* forward or backward, but no further backward than to 0 and
* no further forward than to utext_nativeLength().
* The resulting index value will be in between 0 and length, inclusive.
*
* @param ut the text to be accessed.
* @param delta the signed number of code points to move the iteration position.
* @return TRUE if the position could be moved the requested number of positions while
* staying within the range [0 - text length].
* @stable ICU 3.4
*/
U_STABLE UBool U_EXPORT2
utext_moveIndex32(UText *ut, int32_t delta);
/**
* Get the native index of the character preceeding the current position.
* If the iteration position is already at the start of the text, zero
* is returned.
* The value returned is the same as that obtained from the following sequence,
* but without the side effect of changing the iteration position.
*
* \code
* UText *ut = whatever;
* ...
* utext_previous(ut)
* utext_getNativeIndex(ut);
* \endcode
*
* This function is most useful during forwards iteration, where it will get the
* native index of the character most recently returned from utext_next().
*
* @param ut the text to be accessed
* @return the native index of the character preceeding the current index position,
* or zero if the current position is at the start of the text.
* @stable ICU 3.6
*/
U_STABLE int64_t U_EXPORT2
utext_getPreviousNativeIndex(UText *ut);
/**
*
* Extract text from a UText into a UChar buffer. The range of text to be extracted
* is specified in the native indices of the UText provider. These may not necessarily
* be UTF-16 indices.
*
* The size (number of 16 bit UChars) of the data to be extracted is returned. The * full number of UChars is returned, even when the extracted text is truncated * because the specified buffer size is too small. *
* The extracted string will (if you are a user) / must (if you are a text provider) * be NUL-terminated if there is sufficient space in the destination buffer. This * terminating NUL is not included in the returned length. *
* The iteration index is left at the position following the last extracted character. * * @param ut the UText from which to extract data. * @param nativeStart the native index of the first character to extract.\ * If the specified index is out of range, * it will be pinned to to be within 0 <= index <= textLength * @param nativeLimit the native string index of the position following the last * character to extract. If the specified index is out of range, * it will be pinned to to be within 0 <= index <= textLength. * nativeLimit must be >= nativeStart. * @param dest the UChar (UTF-16) buffer into which the extracted text is placed * @param destCapacity The size, in UChars, of the destination buffer. May be zero * for precomputing the required size. * @param status receives any error status. * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the * buffer was too small. Returns number of UChars for preflighting. * @return Number of UChars in the data to be extracted. Does not include a trailing NUL. * * @stable ICU 3.4 */ U_STABLE int32_t U_EXPORT2 utext_extract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status); /************************************************************************************ * * #define inline versions of selected performance-critical text access functions * Caution: do not use auto increment++ or decrement-- expressions * as parameters to these macros. * * For most use, where there is no extreme performance constraint, the * normal, non-inline functions are a better choice. The resulting code * will be smaller, and, if the need ever arises, easier to debug. * * These are implemented as #defines rather than real functions * because there is no fully portable way to do inline functions in plain C. * ************************************************************************************/ /** * inline version of utext_next32(), for performance-critical situations. * * Get the code point at the current iteration position of the UText, and * advance the position to the first index following the character. * This is a post-increment operation. * Returns U_SENTINEL (-1) if the position is at the end of the * text. * * @stable ICU 3.4 */ #define UTEXT_NEXT32(ut) \ ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut)) /** * inline version of utext_previous32(), for performance-critical situations. * * Move the iterator position to the character (code point) whose * index precedes the current position, and return that character. * This is a pre-decrement operation. * Returns U_SENTINEL (-1) if the position is at the start of the text. * * @stable ICU 3.4 */ #define UTEXT_PREVIOUS32(ut) \ ((ut)->chunkOffset > 0 && \ (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \ (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut)) /** * inline version of utext_getNativeIndex(), for performance-critical situations. * * Get the current iterator position, which can range from 0 to * the length of the text. * The position is a native index into the input text, in whatever format it * may have (possibly UTF-8 for example), and may not always be the same as * the corresponding UChar (UTF-16) index. * The returned position will always be aligned to a code point boundary. * * @stable ICU 3.6 */ #define UTEXT_GETNATIVEINDEX(ut) \ ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \ (ut)->chunkNativeStart+(ut)->chunkOffset : \ (ut)->pFuncs->mapOffsetToNative(ut)) /** * inline version of utext_setNativeIndex(), for performance-critical situations. * * Set the current iteration position to the nearest code point * boundary at or preceding the specified index. * The index is in the native units of the original input text. * If the index is out of range, it will be pinned to be within * the range of the input text. * * @stable ICU 3.8 */ #define UTEXT_SETNATIVEINDEX(ut, ix) \ { int64_t __offset = (ix) - (ut)->chunkNativeStart; \ if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \ (ut)->chunkOffset=(int32_t)__offset; \ } else { \ utext_setNativeIndex((ut), (ix)); } } /************************************************************************************ * * Functions related to writing or modifying the text. * These will work only with modifiable UTexts. Attempting to * modify a read-only UText will return an error status. * ************************************************************************************/ /** * Return TRUE if the text can be written (modified) with utext_replace() or * utext_copy(). For the text to be writable, the text provider must * be of a type that supports writing and the UText must not be frozen. * * Attempting to modify text when utext_isWriteable() is FALSE will fail - * the text will not be modified, and an error will be returned from the function * that attempted the modification. * * @param ut the UText to be tested. * @return TRUE if the text is modifiable. * * @see utext_freeze() * @see utext_replace() * @see utext_copy() * @stable ICU 3.4 * */ U_STABLE UBool U_EXPORT2 utext_isWritable(const UText *ut); /** * Test whether there is meta data associated with the text. * @see Replaceable::hasMetaData() * * @param ut The UText to be tested * @return TRUE if the underlying text includes meta data. * @stable ICU 3.4 */ U_STABLE UBool U_EXPORT2 utext_hasMetaData(const UText *ut); /** * Replace a range of the original text with a replacement text. * * Leaves the current iteration position at the position following the * newly inserted replacement text. * * This function is only available on UText types that support writing, * that is, ones where utext_isWritable() returns TRUE. * * When using this function, there should be only a single UText opened onto the * underlying native text string. Behavior after a replace operation * on a UText is undefined for any other additional UTexts that refer to the * modified string. * * @param ut the UText representing the text to be operated on. * @param nativeStart the native index of the start of the region to be replaced * @param nativeLimit the native index of the character following the region to be replaced. * @param replacementText pointer to the replacement text * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated. * @param status receives any error status. Possible errors include * U_NO_WRITE_PERMISSION * * @return The signed number of (native) storage units by which * the length of the text expanded or contracted. * * @stable ICU 3.4 */ U_STABLE int32_t U_EXPORT2 utext_replace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacementLength, UErrorCode *status); /** * * Copy or move a substring from one position to another within the text, * while retaining any metadata associated with the text. * This function is used to duplicate or reorder substrings. * The destination index must not overlap the source range. * * The text to be copied or moved is inserted at destIndex; * it does not replace or overwrite any existing text. * * The iteration position is left following the newly inserted text * at the destination position. * * This function is only available on UText types that support writing, * that is, ones where utext_isWritable() returns TRUE. * * When using this function, there should be only a single UText opened onto the * underlying native text string. Behavior after a copy operation * on a UText is undefined in any other additional UTexts that refer to the * modified string. * * @param ut The UText representing the text to be operated on. * @param nativeStart The native index of the start of the region to be copied or moved * @param nativeLimit The native index of the character position following the region * to be copied. * @param destIndex The native destination index to which the source substring is * copied or moved. * @param move If TRUE, then the substring is moved, not copied/duplicated. * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION * * @stable ICU 3.4 */ U_STABLE void U_EXPORT2 utext_copy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t destIndex, UBool move, UErrorCode *status); /** *
* Freeze a UText. This prevents any modification to the underlying text itself * by means of functions operating on this UText. *
** Once frozen, a UText can not be unfrozen. The intent is to ensure * that a the text underlying a frozen UText wrapper cannot be modified via that UText. *
** Caution: freezing a UText will disable changes made via the specific * frozen UText wrapper only; it will not have any effect on the ability to * directly modify the text by bypassing the UText. Any such backdoor modifications * are always an error while UText access is occuring because the underlying * text can get out of sync with UText's buffering. *
* * @param ut The UText to be frozen. * @see utext_isWritable() * @stable ICU 3.6 */ U_STABLE void U_EXPORT2 utext_freeze(UText *ut); /** * UText provider properties (bit field indexes). * * @see UText * @stable ICU 3.4 */ enum { /** * It is potentially time consuming for the provider to determine the length of the text. * @stable ICU 3.4 */ UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1, /** * Text chunks remain valid and usable until the text object is modified or * deleted, not just until the next time the access() function is called * (which is the default). * @stable ICU 3.4 */ UTEXT_PROVIDER_STABLE_CHUNKS = 2, /** * The provider supports modifying the text via the replace() and copy() * functions. * @see Replaceable * @stable ICU 3.4 */ UTEXT_PROVIDER_WRITABLE = 3, /** * There is meta data associated with the text. * @see Replaceable::hasMetaData() * @stable ICU 3.4 */ UTEXT_PROVIDER_HAS_META_DATA = 4, /** * Text provider owns the text storage. * Generally occurs as the result of a deep clone of the UText. * When closing the UText, the associated text must * also be closed/deleted/freed/ whatever is appropriate. * @stable ICU 3.6 */ UTEXT_PROVIDER_OWNS_TEXT = 5 }; /** * Function type declaration for UText.clone(). * * clone a UText. Much like opening a UText where the source text is itself * another UText. * * A deep clone will copy both the UText data structures and the underlying text. * The original and cloned UText will operate completely independently; modifications * made to the text in one will not effect the other. Text providers are not * required to support deep clones. The user of clone() must check the status return * and be prepared to handle failures. * * A shallow clone replicates only the UText data structures; it does not make * a copy of the underlying text. Shallow clones can be used as an efficient way to * have multiple iterators active in a single text string that is not being * modified. * * A shallow clone operation must not fail except for truly exceptional conditions such * as memory allocation failures. * * A UText and its clone may be safely concurrently accessed by separate threads. * This is true for both shallow and deep clones. * It is the responsibility of the Text Provider to ensure that this thread safety * constraint is met. * * @param dest A UText struct to be filled in with the result of the clone operation, * or NULL if the clone function should heap-allocate a new UText struct. * @param src The UText to be cloned. * @param deep TRUE to request a deep clone, FALSE for a shallow clone. * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR * should be returned if the text provider is unable to clone the * original text. * @return The newly created clone, or NULL if the clone operation failed. * * @stable ICU 3.4 */ typedef UText * U_CALLCONV UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status); /** * Function type declaration for UText.nativeLength(). * * @param ut the UText to get the length of. * @return the length, in the native units of the original text string. * @see UText * @stable ICU 3.4 */ typedef int64_t U_CALLCONV UTextNativeLength(UText *ut); /** * Function type declaration for UText.access(). Get the description of the text chunk * containing the text at a requested native index. The UText's iteration * position will be left at the requested index. If the index is out * of bounds, the iteration position will be left at the start or end * of the string, as appropriate. * * Chunks must begin and end on code point boundaries. A single code point * comprised of multiple storage units must never span a chunk boundary. * * * @param ut the UText being accessed. * @param nativeIndex Requested index of the text to be accessed. * @param forward If TRUE, then the returned chunk must contain text * starting from the index, so that start<=index* The extracted string will (if you are a user) / must (if you are a text provider) * be NUL-terminated if there is sufficient space in the destination buffer. * * @param ut the UText from which to extract data. * @param nativeStart the native index of the first characer to extract. * @param nativeLimit the native string index of the position following the last * character to extract. * @param dest the UChar (UTF-16) buffer into which the extracted text is placed * @param destCapacity The size, in UChars, of the destination buffer. May be zero * for precomputing the required size. * @param status receives any error status. * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for * preflighting. * @return Number of UChars in the data. Does not include a trailing NUL. * * @stable ICU 3.4 */ typedef int32_t U_CALLCONV UTextExtract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status); /** * Function type declaration for UText.replace(). * * Replace a range of the original text with a replacement text. * * Leaves the current iteration position at the position following the * newly inserted replacement text. * * This function need only be implemented on UText types that support writing. * * When using this function, there should be only a single UText opened onto the * underlying native text string. The function is responsible for updating the * text chunk within the UText to reflect the updated iteration position, * taking into account any changes to the underlying string's structure caused * by the replace operation. * * @param ut the UText representing the text to be operated on. * @param nativeStart the index of the start of the region to be replaced * @param nativeLimit the index of the character following the region to be replaced. * @param replacementText pointer to the replacement text * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated. * @param status receives any error status. Possible errors include * U_NO_WRITE_PERMISSION * * @return The signed number of (native) storage units by which * the length of the text expanded or contracted. * * @stable ICU 3.4 */ typedef int32_t U_CALLCONV UTextReplace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacmentLength, UErrorCode *status); /** * Function type declaration for UText.copy(). * * Copy or move a substring from one position to another within the text, * while retaining any metadata associated with the text. * This function is used to duplicate or reorder substrings. * The destination index must not overlap the source range. * * The text to be copied or moved is inserted at destIndex; * it does not replace or overwrite any existing text. * * This function need only be implemented for UText types that support writing. * * When using this function, there should be only a single UText opened onto the * underlying native text string. The function is responsible for updating the * text chunk within the UText to reflect the updated iteration position, * taking into account any changes to the underlying string's structure caused * by the replace operation. * * @param ut The UText representing the text to be operated on. * @param nativeStart The index of the start of the region to be copied or moved * @param nativeLimit The index of the character following the region to be replaced. * @param nativeDest The destination index to which the source substring is copied or moved. * @param move If TRUE, then the substring is moved, not copied/duplicated. * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION * * @stable ICU 3.4 */ typedef void U_CALLCONV UTextCopy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t nativeDest, UBool move, UErrorCode *status); /** * Function type declaration for UText.mapOffsetToNative(). * Map from the current UChar offset within the current text chunk to * the corresponding native index in the original source text. * * This is required only for text providers that do not use native UTF-16 indexes. * * @param ut the UText. * @return Absolute (native) index corresponding to chunkOffset in the current chunk. * The returned native index should always be to a code point boundary. * * @stable ICU 3.4 */ typedef int64_t U_CALLCONV UTextMapOffsetToNative(const UText *ut); /** * Function type declaration for UText.mapIndexToUTF16(). * Map from a native index to a UChar offset within a text chunk. * Behavior is undefined if the native index does not fall within the * current chunk. * * This function is required only for text providers that do not use native UTF-16 indexes. * * @param ut The UText containing the text chunk. * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit. * @return Chunk-relative UTF-16 offset corresponding to the specified native * index. * * @stable ICU 3.4 */ typedef int32_t U_CALLCONV UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex); /** * Function type declaration for UText.utextClose(). * * A Text Provider close function is only required for provider types that make * allocations in their open function (or other functions) that must be * cleaned when the UText is closed. * * The allocation of the UText struct itself and any "extra" storage * associated with the UText is handled by the common UText implementation * and does not require provider specific cleanup in a close function. * * Most UText provider implementations do not need to implement this function. * * @param ut A UText object to be closed. * * @stable ICU 3.4 */ typedef void U_CALLCONV UTextClose(UText *ut); /** * (public) Function dispatch table for UText. * Conceptually very much like a C++ Virtual Function Table. * This struct defines the organization of the table. * Each text provider implementation must provide an * actual table that is initialized with the appropriate functions * for the type of text being handled. * @stable ICU 3.6 */ struct UTextFuncs { /** * (public) Function table size, sizeof(UTextFuncs) * Intended for use should the table grow to accomodate added * functions in the future, to allow tests for older format * function tables that do not contain the extensions. * * Fields are placed for optimal alignment on * 32/64/128-bit-pointer machines, by normally grouping together * 4 32-bit fields, * 4 pointers, * 2 64-bit fields * in sequence. * @stable ICU 3.6 */ int32_t tableSize; /** * (private) Alignment padding. * Do not use, reserved for use by the UText framework only. * @internal */ int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3; /** * (public) Function pointer for UTextClone * * @see UTextClone * @stable ICU 3.6 */ UTextClone *clone; /** * (public) function pointer for UTextLength * May be expensive to compute! * * @see UTextLength * @stable ICU 3.6 */ UTextNativeLength *nativeLength; /** * (public) Function pointer for UTextAccess. * * @see UTextAccess * @stable ICU 3.6 */ UTextAccess *access; /** * (public) Function pointer for UTextExtract. * * @see UTextExtract * @stable ICU 3.6 */ UTextExtract *extract; /** * (public) Function pointer for UTextReplace. * * @see UTextReplace * @stable ICU 3.6 */ UTextReplace *replace; /** * (public) Function pointer for UTextCopy. * * @see UTextCopy * @stable ICU 3.6 */ UTextCopy *copy; /** * (public) Function pointer for UTextMapOffsetToNative. * * @see UTextMapOffsetToNative * @stable ICU 3.6 */ UTextMapOffsetToNative *mapOffsetToNative; /** * (public) Function pointer for UTextMapNativeIndexToUTF16. * * @see UTextMapNativeIndexToUTF16 * @stable ICU 3.6 */ UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16; /** * (public) Function pointer for UTextClose. * * @see UTextClose * @stable ICU 3.6 */ UTextClose *close; /** * (private) Spare function pointer * @internal */ UTextClose *spare1; /** * (private) Spare function pointer * @internal */ UTextClose *spare2; /** * (private) Spare function pointer * @internal */ UTextClose *spare3; }; /** * Function dispatch table for UText * @see UTextFuncs */ typedef struct UTextFuncs UTextFuncs; /** * UText struct. Provides the interface between the generic UText access code * and the UText provider code that works on specific kinds of * text (UTF-8, noncontiguous UTF-16, whatever.) * * Applications that are using predefined types of text providers * to pass text data to ICU services will have no need to view the * internals of the UText structs that they open. * * @stable ICU 3.6 */ struct UText { /** * (private) Magic. Used to help detect when UText functions are handed * invalid or unitialized UText structs. * utext_openXYZ() functions take an initialized, * but not necessarily open, UText struct as an * optional fill-in parameter. This magic field * is used to check for that initialization. * Text provider close functions must NOT clear * the magic field because that would prevent * reuse of the UText struct. * @internal */ uint32_t magic; /** * (private) Flags for managing the allocation and freeing of * memory associated with this UText. * @internal */ int32_t flags; /** * Text provider properties. This set of flags is maintainted by the * text provider implementation. * @stable ICU 3.4 */ int32_t providerProperties; /** * (public) sizeOfStruct=sizeof(UText) * Allows possible backward compatible extension. * * @stable ICU 3.4 */ int32_t sizeOfStruct; /* ------ 16 byte alignment boundary ----------- */ /** * (protected) Native index of the first character position following * the current chunk. * @stable ICU 3.6 */ int64_t chunkNativeLimit; /** * (protected) Size in bytes of the extra space (pExtra). * @stable ICU 3.4 */ int32_t extraSize; /** * (protected) The highest chunk offset where native indexing and * chunk (UTF-16) indexing correspond. For UTF-16 sources, value * will be equal to chunkLength. * * @stable ICU 3.6 */ int32_t nativeIndexingLimit; /* ---- 16 byte alignment boundary------ */ /** * (protected) Native index of the first character in the text chunk. * @stable ICU 3.6 */ int64_t chunkNativeStart; /** * (protected) Current iteration position within the text chunk (UTF-16 buffer). * This is the index to the character that will be returned by utext_next32(). * @stable ICU 3.6 */ int32_t chunkOffset; /** * (protected) Length the text chunk (UTF-16 buffer), in UChars. * @stable ICU 3.6 */ int32_t chunkLength; /* ---- 16 byte alignment boundary-- */ /** * (protected) pointer to a chunk of text in UTF-16 format. * May refer either to original storage of the source of the text, or * if conversion was required, to a buffer owned by the UText. * @stable ICU 3.6 */ const UChar *chunkContents; /** * (public) Pointer to Dispatch table for accessing functions for this UText. * @stable ICU 3.6 */ const UTextFuncs *pFuncs; /** * (protected) Pointer to additional space requested by the * text provider during the utext_open operation. * @stable ICU 3.4 */ void *pExtra; /** * (protected) Pointer to string or text-containin object or similar. * This is the source of the text that this UText is wrapping, in a format * that is known to the text provider functions. * @stable ICU 3.4 */ const void *context; /* --- 16 byte alignment boundary--- */ /** * (protected) Pointer fields available for use by the text provider. * Not used by UText common code. * @stable ICU 3.6 */ const void *p; /** * (protected) Pointer fields available for use by the text provider. * Not used by UText common code. * @stable ICU 3.6 */ const void *q; /** * (protected) Pointer fields available for use by the text provider. * Not used by UText common code. * @stable ICU 3.6 */ const void *r; /** * Private field reserved for future use by the UText framework * itself. This is not to be touched by the text providers. * @internal ICU 3.4 */ void *privP; /* --- 16 byte alignment boundary--- */ /** * (protected) Integer field reserved for use by the text provider. * Not used by the UText framework, or by the client (user) of the UText. * @stable ICU 3.4 */ int64_t a; /** * (protected) Integer field reserved for use by the text provider. * Not used by the UText framework, or by the client (user) of the UText. * @stable ICU 3.4 */ int32_t b; /** * (protected) Integer field reserved for use by the text provider. * Not used by the UText framework, or by the client (user) of the UText. * @stable ICU 3.4 */ int32_t c; /* ---- 16 byte alignment boundary---- */ /** * Private field reserved for future use by the UText framework * itself. This is not to be touched by the text providers. * @internal ICU 3.4 */ int64_t privA; /** * Private field reserved for future use by the UText framework * itself. This is not to be touched by the text providers. * @internal ICU 3.4 */ int32_t privB; /** * Private field reserved for future use by the UText framework * itself. This is not to be touched by the text providers. * @internal ICU 3.4 */ int32_t privC; }; /** * Common function for use by Text Provider implementations to allocate and/or initialize * a new UText struct. To be called in the implementation of utext_open() functions. * If the supplied UText parameter is null, a new UText struct will be allocated on the heap. * If the supplied UText is already open, the provider's close function will be called * so that the struct can be reused by the open that is in progress. * * @param ut pointer to a UText struct to be re-used, or null if a new UText * should be allocated. * @param extraSpace The amount of additional space to be allocated as part * of this UText, for use by types of providers that require * additional storage. * @param status Errors are returned here. * @return pointer to the UText, allocated if necessary, with extra space set up if requested. * @stable ICU 3.4 */ U_STABLE UText * U_EXPORT2 utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status); /** * initializer to be used with local (stack) instances of a UText * struct. UText structs must be initialized before passing * them to one of the utext_open functions. * * @stable ICU 3.6 */ #define UTEXT_INITIALIZER { \ UTEXT_MAGIC, /* magic */ \ 0, /* flags */ \ 0, /* providerProps */ \ sizeof(UText), /* sizeOfStruct */ \ 0, /* chunkNativeLimit */ \ 0, /* extraSize */ \ 0, /* nativeIndexingLimit */ \ 0, /* chunkNativeStart */ \ 0, /* chunkOffset */ \ 0, /* chunkLength */ \ NULL, /* chunkContents */ \ NULL, /* pFuncs */ \ NULL, /* pExtra */ \ NULL, /* context */ \ NULL, NULL, NULL, /* p, q, r */ \ NULL, /* privP */ \ 0, 0, 0, /* a, b, c */ \ 0, 0, 0 /* privA,B,C, */ \ } U_CDECL_END #endif // uset.h /* ******************************************************************************* * * Copyright (C) 2002-2014, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: uset.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2002mar07 * created by: Markus W. Scherer * * C version of UnicodeSet. */ /** * \file * \brief C API: Unicode Set * *
This is a C wrapper around the C++ UnicodeSet class.
*/ #ifndef __USET_H__ #define __USET_H__ #ifndef UCNV_H struct USet; /** * A UnicodeSet. Use the uset_* API to manipulate. Create with * uset_open*, and destroy with uset_close. * @stable ICU 2.4 */ typedef struct USet USet; #endif /** * Bitmask values to be passed to uset_openPatternOptions() or * uset_applyPattern() taking an option parameter. * @stable ICU 2.4 */ enum { /** * Ignore white space within patterns unless quoted or escaped. * @stable ICU 2.4 */ USET_IGNORE_SPACE = 1, /** * Enable case insensitive matching. E.g., "[ab]" with this flag * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will * match all except 'a', 'A', 'b', and 'B'. This performs a full * closure over case mappings, e.g. U+017F for s. * * The resulting set is a superset of the input for the code points but * not for the strings. * It performs a case mapping closure of the code points and adds * full case folding strings for the code points, and reduces strings of * the original set to their full case folding equivalents. * * This is designed for case-insensitive matches, for example * in regular expressions. The full code point case closure allows checking of * an input character directly against the closure set. * Strings are matched by comparing the case-folded form from the closure * set with an incremental case folding of the string in question. * * The closure set will also contain single code points if the original * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.). * This is not necessary (that is, redundant) for the above matching method * but results in the same closure sets regardless of whether the original * set contained the code point or a string. * * @stable ICU 2.4 */ USET_CASE_INSENSITIVE = 2, /** * Enable case insensitive matching. E.g., "[ab]" with this flag * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will * match all except 'a', 'A', 'b', and 'B'. This adds the lower-, * title-, and uppercase mappings as well as the case folding * of each existing element in the set. * @stable ICU 3.2 */ USET_ADD_CASE_MAPPINGS = 4 }; /** * Argument values for whether span() and similar functions continue while * the current character is contained vs. not contained in the set. * * The functionality is straightforward for sets with only single code points, * without strings (which is the common case): * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same. * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONTAINED. * - span() and spanBack() partition any string the same way when * alternating between span(USET_SPAN_NOT_CONTAINED) and * span(either "contained" condition). * - Using a complemented (inverted) set and the opposite span conditions * yields the same results. * * When a set contains multi-code point strings, then these statements may not * be true, depending on the strings in the set (for example, whether they * overlap with each other) and the string that is processed. * For a set with strings: * - The complement of the set contains the opposite set of code points, * but the same set of strings. * Therefore, complementing both the set and the span conditions * may yield different results. * - When starting spans at different positions in a string * (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different * because a set string may start before the later position. * - span(USET_SPAN_SIMPLE) may be shorter than * span(USET_SPAN_CONTAINED) because it will not recursively try * all possible paths. * For example, with a set which contains the three strings "xy", "xya" and "ax", * span("xyax", USET_SPAN_CONTAINED) will return 4 but * span("xyax", USET_SPAN_SIMPLE) will return 3. * span(USET_SPAN_SIMPLE) will never be longer than * span(USET_SPAN_CONTAINED). * - With either "contained" condition, span() and spanBack() may partition * a string in different ways. * For example, with a set which contains the two strings "ab" and "ba", * and when processing the string "aba", * span() will yield contained/not-contained boundaries of { 0, 2, 3 } * while spanBack() will yield boundaries of { 0, 1, 3 }. * * Note: If it is important to get the same boundaries whether iterating forward * or backward through a string, then either only span() should be used and * the boundaries cached for backward operation, or an ICU BreakIterator * could be used. * * Note: Unpaired surrogates are treated like surrogate code points. * Similarly, set strings match only on code point boundaries, * never in the middle of a surrogate pair. * Illegal UTF-8 sequences are treated like U+FFFD. * When processing UTF-8 strings, malformed set strings * (strings with unpaired surrogates which cannot be converted to UTF-8) * are ignored. * * @stable ICU 3.8 */ typedef enum USetSpanCondition { /** * Continues a span() while there is no set element at the current position. * Increments by one code point at a time. * Stops before the first set element (character or string). * (For code points only, this is like while contains(current)==FALSE). * * When span() returns, the substring between where it started and the position * it returned consists only of characters that are not in the set, * and none of its strings overlap with the span. * * @stable ICU 3.8 */ USET_SPAN_NOT_CONTAINED = 0, /** * Spans the longest substring that is a concatenation of set elements (characters or strings). * (For characters only, this is like while contains(current)==TRUE). * * When span() returns, the substring between where it started and the position * it returned consists only of set elements (characters or strings) that are in the set. * * If a set contains strings, then the span will be the longest substring for which there * exists at least one non-overlapping concatenation of set elements (characters or strings). * This is equivalent to a POSIX regular expression for(OR of each set element)*.
* (Java/ICU/Perl regex stops at the first match of an OR.)
*
* @stable ICU 3.8
*/
USET_SPAN_CONTAINED = 1,
/**
* Continues a span() while there is a set element at the current position.
* Increments by the longest matching element at each position.
* (For characters only, this is like while contains(current)==TRUE).
*
* When span() returns, the substring between where it started and the position
* it returned consists only of set elements (characters or strings) that are in the set.
*
* If a set only contains single characters, then this is the same
* as USET_SPAN_CONTAINED.
*
* If a set contains strings, then the span will be the longest substring
* with a match at each position with the longest single set element (character or string).
*
* Use this span condition together with other longest-match algorithms,
* such as ICU converters (ucnv_getUnicodeSet()).
*
* @stable ICU 3.8
*/
USET_SPAN_SIMPLE = 2,
/**
* One more than the last span condition.
* @stable ICU 3.8
*/
USET_SPAN_CONDITION_COUNT
} USetSpanCondition;
enum {
/**
* Capacity of USerializedSet::staticArray.
* Enough for any single-code point set.
* Also provides padding for nice sizeof(USerializedSet).
* @stable ICU 2.4
*/
USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
};
/**
* A serialized form of a Unicode set. Limited manipulations are
* possible directly on a serialized set. See below.
* @stable ICU 2.4
*/
typedef struct USerializedSet {
/**
* The serialized Unicode Set.
* @stable ICU 2.4
*/
const uint16_t *array;
/**
* The length of the array that contains BMP characters.
* @stable ICU 2.4
*/
int32_t bmpLength;
/**
* The total length of the array.
* @stable ICU 2.4
*/
int32_t length;
/**
* A small buffer for the array to reduce memory allocations.
* @stable ICU 2.4
*/
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
} USerializedSet;
/*********************************************************************
* USet API
*********************************************************************/
/**
* Create an empty USet object.
* Equivalent to uset_open(1, 0).
* @return a newly created USet. The caller must call uset_close() on
* it when done.
* @stable ICU 4.2
*/
U_STABLE USet* U_EXPORT2
uset_openEmpty(void);
/**
* Creates a USet object that contains the range of characters
* start..end, inclusive. If start > end
* then an empty set is created (same as using uset_openEmpty()).
* @param start first character of the range, inclusive
* @param end last character of the range, inclusive
* @return a newly created USet. The caller must call uset_close() on
* it when done.
* @stable ICU 2.4
*/
U_STABLE USet* U_EXPORT2
uset_open(UChar32 start, UChar32 end);
/**
* Creates a set from the given pattern. See the UnicodeSet class
* description for the syntax of the pattern language.
* @param pattern a string specifying what characters are in the set
* @param patternLength the length of the pattern, or -1 if null
* terminated
* @param ec the error code
* @stable ICU 2.4
*/
U_STABLE USet* U_EXPORT2
uset_openPattern(const UChar* pattern, int32_t patternLength,
UErrorCode* ec);
/**
* Creates a set from the given pattern. See the UnicodeSet class
* description for the syntax of the pattern language.
* @param pattern a string specifying what characters are in the set
* @param patternLength the length of the pattern, or -1 if null
* terminated
* @param options bitmask for options to apply to the pattern.
* Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
* @param ec the error code
* @stable ICU 2.4
*/
U_STABLE USet* U_EXPORT2
uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
uint32_t options,
UErrorCode* ec);
/**
* Disposes of the storage used by a USet object. This function should
* be called exactly once for objects returned by uset_open().
* @param set the object to dispose of
* @stable ICU 2.4
*/
U_STABLE void U_EXPORT2
uset_close(USet* set);
/**
* Returns a copy of this object.
* If this set is frozen, then the clone will be frozen as well.
* Use uset_cloneAsThawed() for a mutable clone of a frozen set.
* @param set the original set
* @return the newly allocated copy of the set
* @see uset_cloneAsThawed
* @stable ICU 3.8
*/
U_STABLE USet * U_EXPORT2
uset_clone(const USet *set);
/**
* Determines whether the set has been frozen (made immutable) or not.
* See the ICU4J Freezable interface for details.
* @param set the set
* @return TRUE/FALSE for whether the set has been frozen
* @see uset_freeze
* @see uset_cloneAsThawed
* @stable ICU 3.8
*/
U_STABLE UBool U_EXPORT2
uset_isFrozen(const USet *set);
/**
* Freeze the set (make it immutable).
* Once frozen, it cannot be unfrozen and is therefore thread-safe
* until it is deleted.
* See the ICU4J Freezable interface for details.
* Freezing the set may also make some operations faster, for example
* uset_contains() and uset_span().
* A frozen set will not be modified. (It remains frozen.)
* @param set the set
* @return the same set, now frozen
* @see uset_isFrozen
* @see uset_cloneAsThawed
* @stable ICU 3.8
*/
U_STABLE void U_EXPORT2
uset_freeze(USet *set);
/**
* Clone the set and make the clone mutable.
* See the ICU4J Freezable interface for details.
* @param set the set
* @return the mutable clone
* @see uset_freeze
* @see uset_isFrozen
* @see uset_clone
* @stable ICU 3.8
*/
U_STABLE USet * U_EXPORT2
uset_cloneAsThawed(const USet *set);
/**
* Causes the USet object to represent the range start - end.
* If start > end then this USet is set to an empty range.
* A frozen set will not be modified.
* @param set the object to set to the given range
* @param start first character in the set, inclusive
* @param end last character in the set, inclusive
* @stable ICU 3.2
*/
U_STABLE void U_EXPORT2
uset_set(USet* set,
UChar32 start, UChar32 end);
/**
* Modifies the set to represent the set specified by the given
* pattern. See the UnicodeSet class description for the syntax of
* the pattern language. See also the User Guide chapter about UnicodeSet.
* Empties the set passed before applying the pattern.
* A frozen set will not be modified.
* @param set The set to which the pattern is to be applied.
* @param pattern A pointer to UChar string specifying what characters are in the set.
* The character at pattern[0] must be a '['.
* @param patternLength The length of the UChar string. -1 if NUL terminated.
* @param options A bitmask for options to apply to the pattern.
* Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
* @param status Returns an error if the pattern cannot be parsed.
* @return Upon successful parse, the value is either
* the index of the character after the closing ']'
* of the parsed pattern.
* If the status code indicates failure, then the return value
* is the index of the error in the source.
*
* @stable ICU 2.8
*/
U_STABLE int32_t U_EXPORT2
uset_applyPattern(USet *set,
const UChar *pattern, int32_t patternLength,
uint32_t options,
UErrorCode *status);
/**
* Modifies the set to contain those code points which have the given value
* for the given binary or enumerated property, as returned by
* u_getIntPropertyValue. Prior contents of this set are lost.
* A frozen set will not be modified.
*
* @param set the object to contain the code points defined by the property
*
* @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
* or UCHAR_INT_START..UCHAR_INT_LIMIT-1
* or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
*
* @param value a value in the range u_getIntPropertyMinValue(prop)..
* u_getIntPropertyMaxValue(prop), with one exception. If prop is
* UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
* rather a mask value produced by U_GET_GC_MASK(). This allows grouped
* categories such as [:L:] to be represented.
*
* @param ec error code input/output parameter
*
* @stable ICU 3.2
*/
U_STABLE void U_EXPORT2
uset_applyIntPropertyValue(USet* set,
UProperty prop, int32_t value, UErrorCode* ec);
/**
* Modifies the set to contain those code points which have the
* given value for the given property. Prior contents of this
* set are lost.
* A frozen set will not be modified.
*
* @param set the object to contain the code points defined by the given
* property and value alias
*
* @param prop a string specifying a property alias, either short or long.
* The name is matched loosely. See PropertyAliases.txt for names and a
* description of loose matching. If the value string is empty, then this
* string is interpreted as either a General_Category value alias, a Script
* value alias, a binary property alias, or a special ID. Special IDs are
* matched loosely and correspond to the following sets:
*
* "ANY" = [\\u0000-\\U0010FFFF],
* "ASCII" = [\\u0000-\\u007F],
* "Assigned" = [:^Cn:].
*
* @param propLength the length of the prop, or -1 if NULL
*
* @param value a string specifying a value alias, either short or long.
* The name is matched loosely. See PropertyValueAliases.txt for names
* and a description of loose matching. In addition to aliases listed,
* numeric values and canonical combining classes may be expressed
* numerically, e.g., ("nv", "0.5") or ("ccc", "220"). The value string
* may also be empty.
*
* @param valueLength the length of the value, or -1 if NULL
*
* @param ec error code input/output parameter
*
* @stable ICU 3.2
*/
U_STABLE void U_EXPORT2
uset_applyPropertyAlias(USet* set,
const UChar *prop, int32_t propLength,
const UChar *value, int32_t valueLength,
UErrorCode* ec);
/**
* Return true if the given position, in the given pattern, appears
* to be the start of a UnicodeSet pattern.
*
* @param pattern a string specifying the pattern
* @param patternLength the length of the pattern, or -1 if NULL
* @param pos the given position
* @stable ICU 3.2
*/
U_STABLE UBool U_EXPORT2
uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
int32_t pos);
/**
* Returns a string representation of this set. If the result of
* calling this function is passed to a uset_openPattern(), it
* will produce another set that is equal to this one.
* @param set the set
* @param result the string to receive the rules, may be NULL
* @param resultCapacity the capacity of result, may be 0 if result is NULL
* @param escapeUnprintable if TRUE then convert unprintable
* character to their hex escape representations, \\uxxxx or
* \\Uxxxxxxxx. Unprintable characters are those other than
* U+000A, U+0020..U+007E.
* @param ec error code.
* @return length of string, possibly larger than resultCapacity
* @stable ICU 2.4
*/
U_STABLE int32_t U_EXPORT2
uset_toPattern(const USet* set,
UChar* result, int32_t resultCapacity,
UBool escapeUnprintable,
UErrorCode* ec);
/**
* Adds the given character to the given USet. After this call,
* uset_contains(set, c) will return TRUE.
* A frozen set will not be modified.
* @param set the object to which to add the character
* @param c the character to add
* @stable ICU 2.4
*/
U_STABLE void U_EXPORT2
uset_add(USet* set, UChar32 c);
/**
* Adds all of the elements in the specified set to this set if
* they're not already present. This operation effectively
* modifies this set so that its value is the union of the two
* sets. The behavior of this operation is unspecified if the specified
* collection is modified while the operation is in progress.
* A frozen set will not be modified.
*
* @param set the object to which to add the set
* @param additionalSet the source set whose elements are to be added to this set.
* @stable ICU 2.6
*/
U_STABLE void U_EXPORT2
uset_addAll(USet* set, const USet *additionalSet);
/**
* Adds the given range of characters to the given USet. After this call,
* uset_contains(set, start, end) will return TRUE.
* A frozen set will not be modified.
* @param set the object to which to add the character
* @param start the first character of the range to add, inclusive
* @param end the last character of the range to add, inclusive
* @stable ICU 2.2
*/
U_STABLE void U_EXPORT2
uset_addRange(USet* set, UChar32 start, UChar32 end);
/**
* Adds the given string to the given USet. After this call,
* uset_containsString(set, str, strLen) will return TRUE.
* A frozen set will not be modified.
* @param set the object to which to add the character
* @param str the string to add
* @param strLen the length of the string or -1 if null terminated.
* @stable ICU 2.4
*/
U_STABLE void U_EXPORT2
uset_addString(USet* set, const UChar* str, int32_t strLen);
/**
* Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
* If this set already any particular character, it has no effect on that character.
* A frozen set will not be modified.
* @param set the object to which to add the character
* @param str the source string
* @param strLen the length of the string or -1 if null terminated.
* @stable ICU 3.4
*/
U_STABLE void U_EXPORT2
uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
/**
* Removes the given character from the given USet. After this call,
* uset_contains(set, c) will return FALSE.
* A frozen set will not be modified.
* @param set the object from which to remove the character
* @param c the character to remove
* @stable ICU 2.4
*/
U_STABLE void U_EXPORT2
uset_remove(USet* set, UChar32 c);
/**
* Removes the given range of characters from the given USet. After this call,
* uset_contains(set, start, end) will return FALSE.
* A frozen set will not be modified.
* @param set the object to which to add the character
* @param start the first character of the range to remove, inclusive
* @param end the last character of the range to remove, inclusive
* @stable ICU 2.2
*/
U_STABLE void U_EXPORT2
uset_removeRange(USet* set, UChar32 start, UChar32 end);
/**
* Removes the given string to the given USet. After this call,
* uset_containsString(set, str, strLen) will return FALSE.
* A frozen set will not be modified.
* @param set the object to which to add the character
* @param str the string to remove
* @param strLen the length of the string or -1 if null terminated.
* @stable ICU 2.4
*/
U_STABLE void U_EXPORT2
uset_removeString(USet* set, const UChar* str, int32_t strLen);
/**
* Removes from this set all of its elements that are contained in the
* specified set. This operation effectively modifies this
* set so that its value is the asymmetric set difference of
* the two sets.
* A frozen set will not be modified.
* @param set the object from which the elements are to be removed
* @param removeSet the object that defines which elements will be
* removed from this set
* @stable ICU 3.2
*/
U_STABLE void U_EXPORT2
uset_removeAll(USet* set, const USet* removeSet);
/**
* Retain only the elements in this set that are contained in the
* specified range. If start > end then an empty range is
* retained, leaving the set empty. This is equivalent to
* a boolean logic AND, or a set INTERSECTION.
* A frozen set will not be modified.
*
* @param set the object for which to retain only the specified range
* @param start first character, inclusive, of range to be retained
* to this set.
* @param end last character, inclusive, of range to be retained
* to this set.
* @stable ICU 3.2
*/
U_STABLE void U_EXPORT2
uset_retain(USet* set, UChar32 start, UChar32 end);
/**
* Retains only the elements in this set that are contained in the
* specified set. In other words, removes from this set all of
* its elements that are not contained in the specified set. This
* operation effectively modifies this set so that its value is
* the intersection of the two sets.
* A frozen set will not be modified.
*
* @param set the object on which to perform the retain
* @param retain set that defines which elements this set will retain
* @stable ICU 3.2
*/
U_STABLE void U_EXPORT2
uset_retainAll(USet* set, const USet* retain);
/**
* Reallocate this objects internal structures to take up the least
* possible space, without changing this object's value.
* A frozen set will not be modified.
*
* @param set the object on which to perfrom the compact
* @stable ICU 3.2
*/
U_STABLE void U_EXPORT2
uset_compact(USet* set);
/**
* Inverts this set. This operation modifies this set so that
* its value is its complement. This operation does not affect
* the multicharacter strings, if any.
* A frozen set will not be modified.
* @param set the set
* @stable ICU 2.4
*/
U_STABLE void U_EXPORT2
uset_complement(USet* set);
/**
* Complements in this set all elements contained in the specified
* set. Any character in the other set will be removed if it is
* in this set, or will be added if it is not in this set.
* A frozen set will not be modified.
*
* @param set the set with which to complement
* @param complement set that defines which elements will be xor'ed
* from this set.
* @stable ICU 3.2
*/
U_STABLE void U_EXPORT2
uset_complementAll(USet* set, const USet* complement);
/**
* Removes all of the elements from this set. This set will be
* empty after this call returns.
* A frozen set will not be modified.
* @param set the set
* @stable ICU 2.4
*/
U_STABLE void U_EXPORT2
uset_clear(USet* set);
/**
* Close this set over the given attribute. For the attribute
* USET_CASE, the result is to modify this set so that:
*
* 1. For each character or string 'a' in this set, all strings or
* characters 'b' such that foldCase(a) == foldCase(b) are added
* to this set.
*
* 2. For each string 'e' in the resulting set, if e !=
* foldCase(e), 'e' will be removed.
*
* Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
*
* (Here foldCase(x) refers to the operation u_strFoldCase, and a
* == b denotes that the contents are the same, not pointer
* comparison.)
*
* A frozen set will not be modified.
*
* @param set the set
*
* @param attributes bitmask for attributes to close over.
* Currently only the USET_CASE bit is supported. Any undefined bits
* are ignored.
* @stable ICU 4.2
*/
U_STABLE void U_EXPORT2
uset_closeOver(USet* set, int32_t attributes);
/**
* Remove all strings from this set.
*
* @param set the set
* @stable ICU 4.2
*/
U_STABLE void U_EXPORT2
uset_removeAllStrings(USet* set);
/**
* Returns TRUE if the given USet contains no characters and no
* strings.
* @param set the set
* @return true if set is empty
* @stable ICU 2.4
*/
U_STABLE UBool U_EXPORT2
uset_isEmpty(const USet* set);
/**
* Returns TRUE if the given USet contains the given character.
* This function works faster with a frozen set.
* @param set the set
* @param c The codepoint to check for within the set
* @return true if set contains c
* @stable ICU 2.4
*/
U_STABLE UBool U_EXPORT2
uset_contains(const USet* set, UChar32 c);
/**
* Returns TRUE if the given USet contains all characters c
* where start <= c && c <= end.
* @param set the set
* @param start the first character of the range to test, inclusive
* @param end the last character of the range to test, inclusive
* @return TRUE if set contains the range
* @stable ICU 2.2
*/
U_STABLE UBool U_EXPORT2
uset_containsRange(const USet* set, UChar32 start, UChar32 end);
/**
* Returns TRUE if the given USet contains the given string.
* @param set the set
* @param str the string
* @param strLen the length of the string or -1 if null terminated.
* @return true if set contains str
* @stable ICU 2.4
*/
U_STABLE UBool U_EXPORT2
uset_containsString(const USet* set, const UChar* str, int32_t strLen);
/**
* Returns the index of the given character within this set, where
* the set is ordered by ascending code point. If the character
* is not in this set, return -1. The inverse of this method is
* charAt().
* @param set the set
* @param c the character to obtain the index for
* @return an index from 0..size()-1, or -1
* @stable ICU 3.2
*/
U_STABLE int32_t U_EXPORT2
uset_indexOf(const USet* set, UChar32 c);
/**
* Returns the character at the given index within this set, where
* the set is ordered by ascending code point. If the index is
* out of range, return (UChar32)-1. The inverse of this method is
* indexOf().
* @param set the set
* @param charIndex an index from 0..size()-1 to obtain the char for
* @return the character at the given index, or (UChar32)-1.
* @stable ICU 3.2
*/
U_STABLE UChar32 U_EXPORT2
uset_charAt(const USet* set, int32_t charIndex);
/**
* Returns the number of characters and strings contained in the given
* USet.
* @param set the set
* @return a non-negative integer counting the characters and strings
* contained in set
* @stable ICU 2.4
*/
U_STABLE int32_t U_EXPORT2
uset_size(const USet* set);
/**
* Returns the number of items in this set. An item is either a range
* of characters or a single multicharacter string.
* @param set the set
* @return a non-negative integer counting the character ranges
* and/or strings contained in set
* @stable ICU 2.4
*/
U_STABLE int32_t U_EXPORT2
uset_getItemCount(const USet* set);
/**
* Returns an item of this set. An item is either a range of
* characters or a single multicharacter string.
* @param set the set
* @param itemIndex a non-negative integer in the range 0..
* uset_getItemCount(set)-1
* @param start pointer to variable to receive first character
* in range, inclusive
* @param end pointer to variable to receive last character in range,
* inclusive
* @param str buffer to receive the string, may be NULL
* @param strCapacity capacity of str, or 0 if str is NULL
* @param ec error code
* @return the length of the string (>= 2), or 0 if the item is a
* range, in which case it is the range *start..*end, or -1 if
* itemIndex is out of range
* @stable ICU 2.4
*/
U_STABLE int32_t U_EXPORT2
uset_getItem(const USet* set, int32_t itemIndex,
UChar32* start, UChar32* end,
UChar* str, int32_t strCapacity,
UErrorCode* ec);
/**
* Returns true if set1 contains all the characters and strings
* of set2. It answers the question, 'Is set1 a superset of set2?'
* @param set1 set to be checked for containment
* @param set2 set to be checked for containment
* @return true if the test condition is met
* @stable ICU 3.2
*/
U_STABLE UBool U_EXPORT2
uset_containsAll(const USet* set1, const USet* set2);
/**
* Returns true if this set contains all the characters
* of the given string. This is does not check containment of grapheme
* clusters, like uset_containsString.
* @param set set of characters to be checked for containment
* @param str string containing codepoints to be checked for containment
* @param strLen the length of the string or -1 if null terminated.
* @return true if the test condition is met
* @stable ICU 3.4
*/
U_STABLE UBool U_EXPORT2
uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
/**
* Returns true if set1 contains none of the characters and strings
* of set2. It answers the question, 'Is set1 a disjoint set of set2?'
* @param set1 set to be checked for containment
* @param set2 set to be checked for containment
* @return true if the test condition is met
* @stable ICU 3.2
*/
U_STABLE UBool U_EXPORT2
uset_containsNone(const USet* set1, const USet* set2);
/**
* Returns true if set1 contains some of the characters and strings
* of set2. It answers the question, 'Does set1 and set2 have an intersection?'
* @param set1 set to be checked for containment
* @param set2 set to be checked for containment
* @return true if the test condition is met
* @stable ICU 3.2
*/
U_STABLE UBool U_EXPORT2
uset_containsSome(const USet* set1, const USet* set2);
/**
* Returns the length of the initial substring of the input string which
* consists only of characters and strings that are contained in this set
* (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
* or only of characters and strings that are not contained
* in this set (USET_SPAN_NOT_CONTAINED).
* See USetSpanCondition for details.
* Similar to the strspn() C library function.
* Unpaired surrogates are treated according to contains() of their surrogate code points.
* This function works faster with a frozen set and with a non-negative string length argument.
* @param set the set
* @param s start of the string
* @param length of the string; can be -1 for NUL-terminated
* @param spanCondition specifies the containment condition
* @return the length of the initial substring according to the spanCondition;
* 0 if the start of the string does not fit the spanCondition
* @stable ICU 3.8
* @see USetSpanCondition
*/
U_STABLE int32_t U_EXPORT2
uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
/**
* Returns the start of the trailing substring of the input string which
* consists only of characters and strings that are contained in this set
* (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
* or only of characters and strings that are not contained
* in this set (USET_SPAN_NOT_CONTAINED).
* See USetSpanCondition for details.
* Unpaired surrogates are treated according to contains() of their surrogate code points.
* This function works faster with a frozen set and with a non-negative string length argument.
* @param set the set
* @param s start of the string
* @param length of the string; can be -1 for NUL-terminated
* @param spanCondition specifies the containment condition
* @return the start of the trailing substring according to the spanCondition;
* the string length if the end of the string does not fit the spanCondition
* @stable ICU 3.8
* @see USetSpanCondition
*/
U_STABLE int32_t U_EXPORT2
uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
/**
* Returns the length of the initial substring of the input string which
* consists only of characters and strings that are contained in this set
* (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
* or only of characters and strings that are not contained
* in this set (USET_SPAN_NOT_CONTAINED).
* See USetSpanCondition for details.
* Similar to the strspn() C library function.
* Malformed byte sequences are treated according to contains(0xfffd).
* This function works faster with a frozen set and with a non-negative string length argument.
* @param set the set
* @param s start of the string (UTF-8)
* @param length of the string; can be -1 for NUL-terminated
* @param spanCondition specifies the containment condition
* @return the length of the initial substring according to the spanCondition;
* 0 if the start of the string does not fit the spanCondition
* @stable ICU 3.8
* @see USetSpanCondition
*/
U_STABLE int32_t U_EXPORT2
uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
/**
* Returns the start of the trailing substring of the input string which
* consists only of characters and strings that are contained in this set
* (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
* or only of characters and strings that are not contained
* in this set (USET_SPAN_NOT_CONTAINED).
* See USetSpanCondition for details.
* Malformed byte sequences are treated according to contains(0xfffd).
* This function works faster with a frozen set and with a non-negative string length argument.
* @param set the set
* @param s start of the string (UTF-8)
* @param length of the string; can be -1 for NUL-terminated
* @param spanCondition specifies the containment condition
* @return the start of the trailing substring according to the spanCondition;
* the string length if the end of the string does not fit the spanCondition
* @stable ICU 3.8
* @see USetSpanCondition
*/
U_STABLE int32_t U_EXPORT2
uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
/**
* Returns true if set1 contains all of the characters and strings
* of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'
* @param set1 set to be checked for containment
* @param set2 set to be checked for containment
* @return true if the test condition is met
* @stable ICU 3.2
*/
U_STABLE UBool U_EXPORT2
uset_equals(const USet* set1, const USet* set2);
/*********************************************************************
* Serialized set API
*********************************************************************/
/**
* Serializes this set into an array of 16-bit integers. Serialization
* (currently) only records the characters in the set; multicharacter
* strings are ignored.
*
* The array
* has following format (each line is one 16-bit integer):
*
* length = (n+2*m) | (m!=0?0x8000:0)
* bmpLength = n; present if m!=0
* bmp[0]
* bmp[1]
* ...
* bmp[n-1]
* supp-high[0]
* supp-low[0]
* supp-high[1]
* supp-low[1]
* ...
* supp-high[m-1]
* supp-low[m-1]
*
* The array starts with a header. After the header are n bmp
* code points, then m supplementary code points. Either n or m
* or both may be zero. n+2*m is always <= 0x7FFF.
*
* If there are no supplementary characters (if m==0) then the
* header is one 16-bit integer, 'length', with value n.
*
* If there are supplementary characters (if m!=0) then the header
* is two 16-bit integers. The first, 'length', has value
* (n+2*m)|0x8000. The second, 'bmpLength', has value n.
*
* After the header the code points are stored in ascending order.
* Supplementary code points are stored as most significant 16
* bits followed by least significant 16 bits.
*
* @param set the set
* @param dest pointer to buffer of destCapacity 16-bit integers.
* May be NULL only if destCapacity is zero.
* @param destCapacity size of dest, or zero. Must not be negative.
* @param pErrorCode pointer to the error code. Will be set to
* U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF. Will be set to
* U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
* @return the total length of the serialized format, including
* the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
* than U_BUFFER_OVERFLOW_ERROR.
* @stable ICU 2.4
*/
U_STABLE int32_t U_EXPORT2
uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
/**
* Given a serialized array, fill in the given serialized set object.
* @param fillSet pointer to result
* @param src pointer to start of array
* @param srcLength length of array
* @return true if the given array is valid, otherwise false
* @stable ICU 2.4
*/
U_STABLE UBool U_EXPORT2
uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
/**
* Set the USerializedSet to contain the given character (and nothing
* else).
* @param fillSet pointer to result
* @param c The codepoint to set
* @stable ICU 2.4
*/
U_STABLE void U_EXPORT2
uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c);
/**
* Returns TRUE if the given USerializedSet contains the given
* character.
* @param set the serialized set
* @param c The codepoint to check for within the set
* @return true if set contains c
* @stable ICU 2.4
*/
U_STABLE UBool U_EXPORT2
uset_serializedContains(const USerializedSet* set, UChar32 c);
/**
* Returns the number of disjoint ranges of characters contained in
* the given serialized set. Ignores any strings contained in the
* set.
* @param set the serialized set
* @return a non-negative integer counting the character ranges
* contained in set
* @stable ICU 2.4
*/
U_STABLE int32_t U_EXPORT2
uset_getSerializedRangeCount(const USerializedSet* set);
/**
* Returns a range of characters contained in the given serialized
* set.
* @param set the serialized set
* @param rangeIndex a non-negative integer in the range 0..
* uset_getSerializedRangeCount(set)-1
* @param pStart pointer to variable to receive first character
* in range, inclusive
* @param pEnd pointer to variable to receive last character in range,
* inclusive
* @return true if rangeIndex is valid, otherwise false
* @stable ICU 2.4
*/
U_STABLE UBool U_EXPORT2
uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
UChar32* pStart, UChar32* pEnd);
#endif
// unorm2.h
/*
*******************************************************************************
*
* Copyright (C) 2009-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: unorm2.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2009dec15
* created by: Markus W. Scherer
*/
#ifndef __UNORM2_H__
#define __UNORM2_H__
/**
* \file
* \brief C API: New API for Unicode Normalization.
*
* Unicode normalization functionality for standard Unicode normalization or
* for using custom mapping tables.
* All instances of UNormalizer2 are unmodifiable/immutable.
* Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller.
* For more details see the Normalizer2 C++ class.
*/
/**
* Constants for normalization modes.
* For details about standard Unicode normalization forms
* and about the algorithms which are also used with custom mapping tables
* see http://www.unicode.org/unicode/reports/tr15/
* @stable ICU 4.4
*/
typedef enum {
/**
* Decomposition followed by composition.
* Same as standard NFC when using an "nfc" instance.
* Same as standard NFKC when using an "nfkc" instance.
* For details about standard Unicode normalization forms
* see http://www.unicode.org/unicode/reports/tr15/
* @stable ICU 4.4
*/
UNORM2_COMPOSE,
/**
* Map, and reorder canonically.
* Same as standard NFD when using an "nfc" instance.
* Same as standard NFKD when using an "nfkc" instance.
* For details about standard Unicode normalization forms
* see http://www.unicode.org/unicode/reports/tr15/
* @stable ICU 4.4
*/
UNORM2_DECOMPOSE,
/**
* "Fast C or D" form.
* If a string is in this form, then further decomposition without reordering
* would yield the same form as DECOMPOSE.
* Text in "Fast C or D" form can be processed efficiently with data tables
* that are "canonically closed", that is, that provide equivalent data for
* equivalent text, without having to be fully normalized.
* Not a standard Unicode normalization form.
* Not a unique form: Different FCD strings can be canonically equivalent.
* For details see http://www.unicode.org/notes/tn5/#FCD
* @stable ICU 4.4
*/
UNORM2_FCD,
/**
* Compose only contiguously.
* Also known as "FCC" or "Fast C Contiguous".
* The result will often but not always be in NFC.
* The result will conform to FCD which is useful for processing.
* Not a standard Unicode normalization form.
* For details see http://www.unicode.org/notes/tn5/#FCC
* @stable ICU 4.4
*/
UNORM2_COMPOSE_CONTIGUOUS
} UNormalization2Mode;
/**
* Result values for normalization quick check functions.
* For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
* @stable ICU 2.0
*/
typedef enum UNormalizationCheckResult {
/**
* The input string is not in the normalization form.
* @stable ICU 2.0
*/
UNORM_NO,
/**
* The input string is in the normalization form.
* @stable ICU 2.0
*/
UNORM_YES,
/**
* The input string may or may not be in the normalization form.
* This value is only returned for composition forms like NFC and FCC,
* when a backward-combining character is found for which the surrounding text
* would have to be analyzed further.
* @stable ICU 2.0
*/
UNORM_MAYBE
} UNormalizationCheckResult;
/**
* Opaque C service object type for the new normalization API.
* @stable ICU 4.4
*/
struct UNormalizer2;
typedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */
#if !UCONFIG_NO_NORMALIZATION
/**
* Returns a UNormalizer2 instance for Unicode NFC normalization.
* Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode).
* Returns an unmodifiable singleton instance. Do not delete it.
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested Normalizer2, if successful
* @stable ICU 49
*/
U_STABLE const UNormalizer2 * U_EXPORT2
unorm2_getNFCInstance(UErrorCode *pErrorCode);
/**
* Returns a UNormalizer2 instance for Unicode NFD normalization.
* Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode).
* Returns an unmodifiable singleton instance. Do not delete it.
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested Normalizer2, if successful
* @stable ICU 49
*/
U_STABLE const UNormalizer2 * U_EXPORT2
unorm2_getNFDInstance(UErrorCode *pErrorCode);
/**
* Returns a UNormalizer2 instance for Unicode NFKC normalization.
* Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode).
* Returns an unmodifiable singleton instance. Do not delete it.
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested Normalizer2, if successful
* @stable ICU 49
*/
U_STABLE const UNormalizer2 * U_EXPORT2
unorm2_getNFKCInstance(UErrorCode *pErrorCode);
/**
* Returns a UNormalizer2 instance for Unicode NFKD normalization.
* Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode).
* Returns an unmodifiable singleton instance. Do not delete it.
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested Normalizer2, if successful
* @stable ICU 49
*/
U_STABLE const UNormalizer2 * U_EXPORT2
unorm2_getNFKDInstance(UErrorCode *pErrorCode);
/**
* Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization.
* Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode).
* Returns an unmodifiable singleton instance. Do not delete it.
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested Normalizer2, if successful
* @stable ICU 49
*/
U_STABLE const UNormalizer2 * U_EXPORT2
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
/**
* Returns a UNormalizer2 instance which uses the specified data file
* (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
* and which composes or decomposes text according to the specified mode.
* Returns an unmodifiable singleton instance. Do not delete it.
*
* Use packageName=NULL for data files that are part of ICU's own data.
* Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
* Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
* Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
*
* @param packageName NULL for ICU built-in data, otherwise application data package name
* @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
* @param mode normalization mode (compose or decompose etc.)
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested UNormalizer2, if successful
* @stable ICU 4.4
*/
U_STABLE const UNormalizer2 * U_EXPORT2
unorm2_getInstance(const char *packageName,
const char *name,
UNormalization2Mode mode,
UErrorCode *pErrorCode);
/**
* Constructs a filtered normalizer wrapping any UNormalizer2 instance
* and a filter set.
* Both are aliased and must not be modified or deleted while this object
* is used.
* The filter set should be frozen; otherwise the performance will suffer greatly.
* @param norm2 wrapped UNormalizer2 instance
* @param filterSet USet which determines the characters to be normalized
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested UNormalizer2, if successful
* @stable ICU 4.4
*/
U_STABLE UNormalizer2 * U_EXPORT2
unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode);
/**
* Closes a UNormalizer2 instance from unorm2_openFiltered().
* Do not close instances from unorm2_getInstance()!
* @param norm2 UNormalizer2 instance to be closed
* @stable ICU 4.4
*/
U_STABLE void U_EXPORT2
unorm2_close(UNormalizer2 *norm2);
/**
* Writes the normalized form of the source string to the destination string
* (replacing its contents) and returns the length of the destination string.
* The source and destination strings must be different buffers.
* @param norm2 UNormalizer2 instance
* @param src source string
* @param length length of the source string, or -1 if NUL-terminated
* @param dest destination string; its contents is replaced with normalized src
* @param capacity number of UChars that can be written to dest
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return dest
* @stable ICU 4.4
*/
U_STABLE int32_t U_EXPORT2
unorm2_normalize(const UNormalizer2 *norm2,
const UChar *src, int32_t length,
UChar *dest, int32_t capacity,
UErrorCode *pErrorCode);
/**
* Appends the normalized form of the second string to the first string
* (merging them at the boundary) and returns the length of the first string.
* The result is normalized if the first string was normalized.
* The first and second strings must be different buffers.
* @param norm2 UNormalizer2 instance
* @param first string, should be normalized
* @param firstLength length of the first string, or -1 if NUL-terminated
* @param firstCapacity number of UChars that can be written to first
* @param second string, will be normalized
* @param secondLength length of the source string, or -1 if NUL-terminated
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return first
* @stable ICU 4.4
*/
U_STABLE int32_t U_EXPORT2
unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
UChar *first, int32_t firstLength, int32_t firstCapacity,
const UChar *second, int32_t secondLength,
UErrorCode *pErrorCode);
/**
* Appends the second string to the first string
* (merging them at the boundary) and returns the length of the first string.
* The result is normalized if both the strings were normalized.
* The first and second strings must be different buffers.
* @param norm2 UNormalizer2 instance
* @param first string, should be normalized
* @param firstLength length of the first string, or -1 if NUL-terminated
* @param firstCapacity number of UChars that can be written to first
* @param second string, should be normalized
* @param secondLength length of the source string, or -1 if NUL-terminated
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return first
* @stable ICU 4.4
*/
U_STABLE int32_t U_EXPORT2
unorm2_append(const UNormalizer2 *norm2,
UChar *first, int32_t firstLength, int32_t firstCapacity,
const UChar *second, int32_t secondLength,
UErrorCode *pErrorCode);
/**
* Gets the decomposition mapping of c.
* Roughly equivalent to normalizing the String form of c
* on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function
* returns a negative value and does not write a string
* if c does not have a decomposition mapping in this instance's data.
* This function is independent of the mode of the UNormalizer2.
* @param norm2 UNormalizer2 instance
* @param c code point
* @param decomposition String buffer which will be set to c's
* decomposition mapping, if there is one.
* @param capacity number of UChars that can be written to decomposition
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the non-negative length of c's decomposition, if there is one; otherwise a negative value
* @stable ICU 4.6
*/
U_STABLE int32_t U_EXPORT2
unorm2_getDecomposition(const UNormalizer2 *norm2,
UChar32 c, UChar *decomposition, int32_t capacity,
UErrorCode *pErrorCode);
/**
* Gets the raw decomposition mapping of c.
*
* This is similar to the unorm2_getDecomposition() function but returns the
* raw decomposition mapping as specified in UnicodeData.txt or
* (for custom data) in the mapping files processed by the gennorm2 tool.
* By contrast, unorm2_getDecomposition() returns the processed,
* recursively-decomposed version of this mapping.
*
* When used on a standard NFKC Normalizer2 instance,
* unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
*
* When used on a standard NFC Normalizer2 instance,
* it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
* in this case, the result contains either one or two code points (=1..4 UChars).
*
* This function is independent of the mode of the UNormalizer2.
* @param norm2 UNormalizer2 instance
* @param c code point
* @param decomposition String buffer which will be set to c's
* raw decomposition mapping, if there is one.
* @param capacity number of UChars that can be written to decomposition
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value
* @stable ICU 49
*/
U_STABLE int32_t U_EXPORT2
unorm2_getRawDecomposition(const UNormalizer2 *norm2,
UChar32 c, UChar *decomposition, int32_t capacity,
UErrorCode *pErrorCode);
/**
* Performs pairwise composition of a & b and returns the composite if there is one.
*
* Returns a composite code point c only if c has a two-way mapping to a+b.
* In standard Unicode normalization, this means that
* c has a canonical decomposition to a+b
* and c does not have the Full_Composition_Exclusion property.
*
* This function is independent of the mode of the UNormalizer2.
* @param norm2 UNormalizer2 instance
* @param a A (normalization starter) code point.
* @param b Another code point.
* @return The non-negative composite code point if there is one; otherwise a negative value.
* @stable ICU 49
*/
U_STABLE UChar32 U_EXPORT2
unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b);
/**
* Gets the combining class of c.
* The default implementation returns 0
* but all standard implementations return the Unicode Canonical_Combining_Class value.
* @param norm2 UNormalizer2 instance
* @param c code point
* @return c's combining class
* @stable ICU 49
*/
U_STABLE uint8_t U_EXPORT2
unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c);
/**
* Tests if the string is normalized.
* Internally, in cases where the quickCheck() method would return "maybe"
* (which is only possible for the two COMPOSE modes) this method
* resolves to "yes" or "no" to provide a definitive result,
* at the cost of doing more work in those cases.
* @param norm2 UNormalizer2 instance
* @param s input string
* @param length length of the string, or -1 if NUL-terminated
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return TRUE if s is normalized
* @stable ICU 4.4
*/
U_STABLE UBool U_EXPORT2
unorm2_isNormalized(const UNormalizer2 *norm2,
const UChar *s, int32_t length,
UErrorCode *pErrorCode);
/**
* Tests if the string is normalized.
* For the two COMPOSE modes, the result could be "maybe" in cases that
* would take a little more work to resolve definitively.
* Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
* combination of quick check + normalization, to avoid
* re-checking the "yes" prefix.
* @param norm2 UNormalizer2 instance
* @param s input string
* @param length length of the string, or -1 if NUL-terminated
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return UNormalizationCheckResult
* @stable ICU 4.4
*/
U_STABLE UNormalizationCheckResult U_EXPORT2
unorm2_quickCheck(const UNormalizer2 *norm2,
const UChar *s, int32_t length,
UErrorCode *pErrorCode);
/**
* Returns the end of the normalized substring of the input string.
* In other words, with end=spanQuickCheckYes(s, ec);
* the substring UnicodeString(s, 0, end)
* will pass the quick check with a "yes" result.
*
* The returned end index is usually one or more characters before the
* "no" or "maybe" character: The end index is at a normalization boundary.
* (See the class documentation for more about normalization boundaries.)
*
* When the goal is a normalized string and most input strings are expected
* to be normalized already, then call this method,
* and if it returns a prefix shorter than the input string,
* copy that prefix and use normalizeSecondAndAppend() for the remainder.
* @param norm2 UNormalizer2 instance
* @param s input string
* @param length length of the string, or -1 if NUL-terminated
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return "yes" span end index
* @stable ICU 4.4
*/
U_STABLE int32_t U_EXPORT2
unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
const UChar *s, int32_t length,
UErrorCode *pErrorCode);
/**
* Tests if the character always has a normalization boundary before it,
* regardless of context.
* For details see the Normalizer2 base class documentation.
* @param norm2 UNormalizer2 instance
* @param c character to test
* @return TRUE if c has a normalization boundary before it
* @stable ICU 4.4
*/
U_STABLE UBool U_EXPORT2
unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c);
/**
* Tests if the character always has a normalization boundary after it,
* regardless of context.
* For details see the Normalizer2 base class documentation.
* @param norm2 UNormalizer2 instance
* @param c character to test
* @return TRUE if c has a normalization boundary after it
* @stable ICU 4.4
*/
U_STABLE UBool U_EXPORT2
unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c);
/**
* Tests if the character is normalization-inert.
* For details see the Normalizer2 base class documentation.
* @param norm2 UNormalizer2 instance
* @param c character to test
* @return TRUE if c is normalization-inert
* @stable ICU 4.4
*/
U_STABLE UBool U_EXPORT2
unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
/**
* Option bit for unorm_compare:
* Both input strings are assumed to fulfill FCD conditions.
* @stable ICU 2.2
*/
#define UNORM_INPUT_IS_FCD 0x20000
/**
* Option bit for unorm_compare:
* Perform case-insensitive comparison.
* @stable ICU 2.2
*/
#define U_COMPARE_IGNORE_CASE 0x10000
#ifndef U_COMPARE_CODE_POINT_ORDER
/* see also unistr.h and ustring.h */
/**
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
* Compare strings in code point order instead of code unit order.
* @stable ICU 2.2
*/
#define U_COMPARE_CODE_POINT_ORDER 0x8000
#endif
/**
* Compares two strings for canonical equivalence.
* Further options include case-insensitive comparison and
* code point order (as opposed to code unit order).
*
* Canonical equivalence between two strings is defined as their normalized
* forms (NFD or NFC) being identical.
* This function compares strings incrementally instead of normalizing
* (and optionally case-folding) both strings entirely,
* improving performance significantly.
*
* Bulk normalization is only necessary if the strings do not fulfill the FCD
* conditions. Only in this case, and only if the strings are relatively long,
* is memory allocated temporarily.
* For FCD strings and short non-FCD strings there is no memory allocation.
*
* Semantically, this is equivalent to
* strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2))))
* where code point order and foldCase are all optional.
*
* UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
* the case folding must be performed first, then the normalization.
*
* @param s1 First source string.
* @param length1 Length of first source string, or -1 if NUL-terminated.
*
* @param s2 Second source string.
* @param length2 Length of second source string, or -1 if NUL-terminated.
*
* @param options A bit set of options:
* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
* Case-sensitive comparison in code unit order, and the input strings
* are quick-checked for FCD.
*
* - UNORM_INPUT_IS_FCD
* Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
* If not set, the function will quickCheck for FCD
* and normalize if necessary.
*
* - U_COMPARE_CODE_POINT_ORDER
* Set to choose code point order instead of code unit order
* (see u_strCompare for details).
*
* - U_COMPARE_IGNORE_CASE
* Set to compare strings case-insensitively using case folding,
* instead of case-sensitively.
* If set, then the following case folding options are used.
*
* - Options as used with case-insensitive comparisons, currently:
*
* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
* (see u_strCaseCompare for details)
*
* - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
*
* @param pErrorCode ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return <0 or 0 or >0 as usual for string comparisons
*
* @see unorm_normalize
* @see UNORM_FCD
* @see u_strCompare
* @see u_strCaseCompare
*
* @stable ICU 2.2
*/
U_STABLE int32_t U_EXPORT2
unorm_compare(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,
uint32_t options,
UErrorCode *pErrorCode);
#endif /* !UCONFIG_NO_NORMALIZATION */
#endif /* __UNORM2_H__ */
// ucnvsel.h
/*
*******************************************************************************
*
* Copyright (C) 2008-2011, International Business Machines
* Corporation, Google and others. All Rights Reserved.
*
*******************************************************************************
*/
/*
* Author : eldawy@google.com (Mohamed Eldawy)
* ucnvsel.h
*
* Purpose: To generate a list of encodings capable of handling
* a given Unicode text
*
* Started 09-April-2008
*/
#ifndef __ICU_UCNV_SEL_H__
#define __ICU_UCNV_SEL_H__
#if !UCONFIG_NO_CONVERSION
/**
* \file
*
* A converter selector is built with a set of encoding/charset names
* and given an input string returns the set of names of the
* corresponding converters which can convert the string.
*
* A converter selector can be serialized into a buffer and reopened
* from the serialized form.
*/
/**
* @{
* The selector data structure
*/
struct UConverterSelector;
typedef struct UConverterSelector UConverterSelector;
/** @} */
/**
* Open a selector.
* If converterListSize is 0, build for all available converters.
* If excludedCodePoints is NULL, don't exclude any code points.
*
* @param converterList a pointer to encoding names needed to be involved.
* Can be NULL if converterListSize==0.
* The list and the names will be cloned, and the caller
* retains ownership of the original.
* @param converterListSize number of encodings in above list.
* If 0, builds a selector for all available converters.
* @param excludedCodePoints a set of code points to be excluded from consideration.
* That is, excluded code points in a string do not change
* the selection result. (They might be handled by a callback.)
* Use NULL to exclude nothing.
* @param whichSet what converter set to use? Use this to determine whether
* to consider only roundtrip mappings or also fallbacks.
* @param status an in/out ICU UErrorCode
* @return the new selector
*
* @stable ICU 4.2
*/
U_STABLE UConverterSelector* U_EXPORT2
ucnvsel_open(const char* const* converterList, int32_t converterListSize,
const USet* excludedCodePoints,
const UConverterUnicodeSet whichSet, UErrorCode* status);
/**
* Closes a selector.
* If any Enumerations were returned by ucnv_select*, they become invalid.
* They can be closed before or after calling ucnv_closeSelector,
* but should never be used after the selector is closed.
*
* @see ucnv_selectForString
* @see ucnv_selectForUTF8
*
* @param sel selector to close
*
* @stable ICU 4.2
*/
U_STABLE void U_EXPORT2
ucnvsel_close(UConverterSelector *sel);
/**
* Open a selector from its serialized form.
* The buffer must remain valid and unchanged for the lifetime of the selector.
* This is much faster than creating a selector from scratch.
* Using a serialized form from a different machine (endianness/charset) is supported.
*
* @param buffer pointer to the serialized form of a converter selector;
* must be 32-bit-aligned
* @param length the capacity of this buffer (can be equal to or larger than
* the actual data length)
* @param status an in/out ICU UErrorCode
* @return the new selector
*
* @stable ICU 4.2
*/
U_STABLE UConverterSelector* U_EXPORT2
ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
/**
* Serialize a selector into a linear buffer.
* The serialized form is portable to different machines.
*
* @param sel selector to consider
* @param buffer pointer to 32-bit-aligned memory to be filled with the
* serialized form of this converter selector
* @param bufferCapacity the capacity of this buffer
* @param status an in/out ICU UErrorCode
* @return the required buffer capacity to hold serialize data (even if the call fails
* with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
*
* @stable ICU 4.2
*/
U_STABLE int32_t U_EXPORT2
ucnvsel_serialize(const UConverterSelector* sel,
void* buffer, int32_t bufferCapacity, UErrorCode* status);
/**
* Select converters that can map all characters in a UTF-16 string,
* ignoring the excluded code points.
*
* @param sel a selector
* @param s UTF-16 string
* @param length length of the string, or -1 if NUL-terminated
* @param status an in/out ICU UErrorCode
* @return an enumeration containing encoding names.
* The returned encoding names and their order will be the same as
* supplied when building the selector.
*
* @stable ICU 4.2
*/
U_STABLE UEnumeration * U_EXPORT2
ucnvsel_selectForString(const UConverterSelector* sel,
const UChar *s, int32_t length, UErrorCode *status);
/**
* Select converters that can map all characters in a UTF-8 string,
* ignoring the excluded code points.
*
* @param sel a selector
* @param s UTF-8 string
* @param length length of the string, or -1 if NUL-terminated
* @param status an in/out ICU UErrorCode
* @return an enumeration containing encoding names.
* The returned encoding names and their order will be the same as
* supplied when building the selector.
*
* @stable ICU 4.2
*/
U_STABLE UEnumeration * U_EXPORT2
ucnvsel_selectForUTF8(const UConverterSelector* sel,
const char *s, int32_t length, UErrorCode *status);
#endif /* !UCONFIG_NO_CONVERSION */
#endif /* __ICU_UCNV_SEL_H__ */
// ucat.h
/*
**********************************************************************
* Copyright (c) 2003-2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: March 19 2003
* Since: ICU 2.6
**********************************************************************
*/
#ifndef UCAT_H
#define UCAT_H
/**
* \file
* \brief C API: Message Catalog Wrappers
*
* This C API provides look-alike functions that deliberately resemble
* the POSIX catopen, catclose, and catgets functions. The underlying
* implementation is in terms of ICU resource bundles, rather than
* POSIX message catalogs.
*
* The ICU resource bundles obey standard ICU inheritance policies.
* To facilitate this, sets and messages are flattened into one tier.
* This is done by creating resource bundle keys of the form
* <set_num>%<msg_num> where set_num is the set number and msg_num is
* the message number, formatted as decimal strings.
*
* Example: Consider a message catalog containing two sets:
*
* Set 1: Message 4 = "Good morning."
* Message 5 = "Good afternoon."
* Message 7 = "Good evening."
* Message 8 = "Good night."
* Set 4: Message 14 = "Please "
* Message 19 = "Thank you."
* Message 20 = "Sincerely,"
*
* The ICU resource bundle source file would, assuming it is named
* "greet.txt", would look like this:
*
* greet
* {
* 1%4 { "Good morning." }
* 1%5 { "Good afternoon." }
* 1%7 { "Good evening." }
* 1%8 { "Good night." }
*
* 4%14 { "Please " }
* 4%19 { "Thank you." }
* 4%20 { "Sincerely," }
* }
*
* The catgets function is commonly used in combination with functions
* like printf and strftime. ICU components like message format can
* be used instead, although they use a different format syntax.
* There is an ICU package, icuio, that provides some of
* the POSIX-style formatting API.
*/
U_CDECL_BEGIN
/**
* An ICU message catalog descriptor, analogous to nl_catd.
*
* @stable ICU 2.6
*/
typedef UResourceBundle* u_nl_catd;
/**
* Open and return an ICU message catalog descriptor. The descriptor
* may be passed to u_catgets() to retrieve localized strings.
*
* @param name string containing the full path pointing to the
* directory where the resources reside followed by the package name
* e.g. "/usr/resource/my_app/resources/guimessages" on a Unix system.
* If NULL, ICU default data files will be used.
*
* Unlike POSIX, environment variables are not interpolated within the
* name.
*
* @param locale the locale for which we want to open the resource. If
* NULL, the default ICU locale will be used (see uloc_getDefault). If
* strlen(locale) == 0, the root locale will be used.
*
* @param ec input/output error code. Upon output,
* U_USING_FALLBACK_WARNING indicates that a fallback locale was
* used. For example, 'de_CH' was requested, but nothing was found
* there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that the
* default locale data or root locale data was used; neither the
* requested locale nor any of its fallback locales were found.
*
* @return a message catalog descriptor that may be passed to
* u_catgets(). If the ec parameter indicates success, then the caller
* is responsible for calling u_catclose() to close the message
* catalog. If the ec parameter indicates failure, then NULL will be
* returned.
*
* @stable ICU 2.6
*/
U_STABLE u_nl_catd U_EXPORT2
u_catopen(const char* name, const char* locale, UErrorCode* ec);
/**
* Close an ICU message catalog, given its descriptor.
*
* @param catd a message catalog descriptor to be closed. May be NULL,
* in which case no action is taken.
*
* @stable ICU 2.6
*/
U_STABLE void U_EXPORT2
u_catclose(u_nl_catd catd);
/**
* Retrieve a localized string from an ICU message catalog.
*
* @param catd a message catalog descriptor returned by u_catopen.
*
* @param set_num the message catalog set number. Sets need not be
* numbered consecutively.
*
* @param msg_num the message catalog message number within the
* set. Messages need not be numbered consecutively.
*
* @param s the default string. This is returned if the string
* specified by the set_num and msg_num is not found. It must be
* zero-terminated.
*
* @param len fill-in parameter to receive the length of the result.
* May be NULL, in which case it is ignored.
*
* @param ec input/output error code. May be U_USING_FALLBACK_WARNING
* or U_USING_DEFAULT_WARNING. U_MISSING_RESOURCE_ERROR indicates that
* the set_num/msg_num tuple does not specify a valid message string
* in this catalog.
*
* @return a pointer to a zero-terminated UChar array which lives in
* an internal buffer area, typically a memory mapped/DLL file. The
* caller must NOT delete this pointer. If the call is unsuccessful
* for any reason, then s is returned. This includes the situation in
* which ec indicates a failing error code upon entry to this
* function.
*
* @stable ICU 2.6
*/
U_STABLE const UChar* U_EXPORT2
u_catgets(u_nl_catd catd, int32_t set_num, int32_t msg_num,
const UChar* s,
int32_t* len, UErrorCode* ec);
U_CDECL_END
#endif /*UCAT_H*/
/*eof*/
// ubidi.h
/*
******************************************************************************
*
* Copyright (C) 1999-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: ubidi.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999jul27
* created by: Markus W. Scherer, updated by Matitiahu Allouche
*/
#ifndef UBIDI_H
#define UBIDI_H
/**
*\file
* \brief C API: Bidi algorithm
*
* * * Note: Libraries that perform a bidirectional algorithm and * reorder strings accordingly are sometimes called "Storage Layout Engines". * ICU's Bidi and shaping (u_shapeArabic()) APIs can be used at the core of such * "Storage Layout Engines". * *
pErrorCode pointer must be valid
* and the value that it points to must not indicate a failure before
* the function call. Otherwise, the function returns immediately.
* After the function call, the value indicates success or failure.* * The "limit" of a sequence of characters is the position just after their * last character, i.e., one more than that position.
* * Some of the API functions provide access to "runs". * Such a "run" is defined as a sequence of characters * that are at the same embedding level * after performing the Bidi algorithm.
* * @author Markus W. Scherer * @version 1.0 * * *
The basic assumptions are:
*
* \code
*#include "unicode/ubidi.h"
*
*typedef enum {
* styleNormal=0, styleSelected=1,
* styleBold=2, styleItalics=4,
* styleSuper=8, styleSub=16
*} Style;
*
*typedef struct { int32_t limit; Style style; } StyleRun;
*
*int getTextWidth(const UChar *text, int32_t start, int32_t limit,
* const StyleRun *styleRuns, int styleRunCount);
*
* // set *pLimit and *pStyleRunLimit for a line
* // from text[start] and from styleRuns[styleRunStart]
* // using ubidi_getLogicalRun(para, ...)
*void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit,
* UBiDi *para,
* const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit,
* int *pLineWidth);
*
* // render runs on a line sequentially, always from left to right
*
* // prepare rendering a new line
* void startLine(UBiDiDirection textDirection, int lineWidth);
*
* // render a run of text and advance to the right by the run width
* // the text[start..limit-1] is always in logical order
* void renderRun(const UChar *text, int32_t start, int32_t limit,
* UBiDiDirection textDirection, Style style);
*
* // We could compute a cross-product
* // from the style runs with the directional runs
* // and then reorder it.
* // Instead, here we iterate over each run type
* // and render the intersections -
* // with shortcuts in simple (and common) cases.
* // renderParagraph() is the main function.
*
* // render a directional run with
* // (possibly) multiple style runs intersecting with it
* void renderDirectionalRun(const UChar *text,
* int32_t start, int32_t limit,
* UBiDiDirection direction,
* const StyleRun *styleRuns, int styleRunCount) {
* int i;
*
* // iterate over style runs
* if(direction==UBIDI_LTR) {
* int styleLimit;
*
* for(i=0; ilimit) { styleLimit=limit; }
* renderRun(text, start, styleLimit,
* direction, styleRun[i].style);
* if(styleLimit==limit) { break; }
* start=styleLimit;
* }
* }
* } else {
* int styleStart;
*
* for(i=styleRunCount-1; i>=0; --i) {
* if(i>0) {
* styleStart=styleRun[i-1].limit;
* } else {
* styleStart=0;
* }
* if(limit>=styleStart) {
* if(styleStart=length
*
* width=getTextWidth(text, 0, length, styleRuns, styleRunCount);
* if(width<=lineWidth) {
* // everything fits onto one line
*
* // prepare rendering a new line from either left or right
* startLine(paraLevel, width);
*
* renderLine(para, text, 0, length,
* styleRuns, styleRunCount);
* } else {
* UBiDi *line;
*
* // we need to render several lines
* line=ubidi_openSized(length, 0, pErrorCode);
* if(line!=NULL) {
* int32_t start=0, limit;
* int styleRunStart=0, styleRunLimit;
*
* for(;;) {
* limit=length;
* styleRunLimit=styleRunCount;
* getLineBreak(text, start, &limit, para,
* styleRuns, styleRunStart, &styleRunLimit,
* &width);
* ubidi_setLine(para, start, limit, line, pErrorCode);
* if(U_SUCCESS(*pErrorCode)) {
* // prepare rendering a new line
* // from either left or right
* startLine(paraLevel, width);
*
* renderLine(line, text, start, limit,
* styleRuns+styleRunStart,
* styleRunLimit-styleRunStart);
* }
* if(limit==length) { break; }
* start=limit;
* styleRunStart=styleRunLimit-1;
* if(start>=styleRuns[styleRunStart].limit) {
* ++styleRunStart;
* }
* }
*
* ubidi_close(line);
* }
* }
* }
*
* ubidi_close(para);
*}
*\endcode
*
*/
/*DOCXX_TAG*/
/*@{*/
/**
* UBiDiLevel is the type of the level values in this
* Bidi implementation.
* It holds an embedding level and indicates the visual direction
* by its bit 0 (even/odd value).
*
* It can also hold non-level values for the
* paraLevel and embeddingLevels
* arguments of ubidi_setPara(); there:
*
embeddingLevels[]
* value indicates whether the using application is
* specifying the level of a character to override whatever the
* Bidi implementation would resolve it to.paraLevel can be set to the
* pseudo-level values UBIDI_DEFAULT_LTR
* and UBIDI_DEFAULT_RTL.The related constants are not real, valid level values.
* UBIDI_DEFAULT_XXX can be used to specify
* a default for the paragraph level for
* when the ubidi_setPara() function
* shall determine it but there is no
* strongly typed character in the input.
*
* Note that the value for UBIDI_DEFAULT_LTR is even
* and the one for UBIDI_DEFAULT_RTL is odd,
* just like with normal LTR and RTL level values -
* these special values are designed that way. Also, the implementation
* assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
*
* @see UBIDI_DEFAULT_LTR
* @see UBIDI_DEFAULT_RTL
* @see UBIDI_LEVEL_OVERRIDE
* @see UBIDI_MAX_EXPLICIT_LEVEL
* @stable ICU 2.0
*/
typedef uint8_t UBiDiLevel;
/** Paragraph level setting.
* * Constant indicating that the base direction depends on the first strong * directional character in the text according to the Unicode Bidirectional * Algorithm. If no strong directional character is present, * then set the paragraph level to 0 (left-to-right).
*
* If this value is used in conjunction with reordering modes
* UBIDI_REORDER_INVERSE_LIKE_DIRECT or
* UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL, the text to reorder
* is assumed to be visual LTR, and the text after reordering is required
* to be the corresponding logical string with appropriate contextual
* direction. The direction of the result string will be RTL if either
* the righmost or leftmost strong character of the source text is RTL
* or Arabic Letter, the direction will be LTR otherwise.
*
* If reordering option UBIDI_OPTION_INSERT_MARKS is set, an RLM may
* be added at the beginning of the result string to ensure round trip
* (that the result string, when reordered back to visual, will produce
* the original source text).
* @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
* @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
* @stable ICU 2.0
*/
#define UBIDI_DEFAULT_LTR 0xfe
/** Paragraph level setting.
* * Constant indicating that the base direction depends on the first strong * directional character in the text according to the Unicode Bidirectional * Algorithm. If no strong directional character is present, * then set the paragraph level to 1 (right-to-left).
*
* If this value is used in conjunction with reordering modes
* UBIDI_REORDER_INVERSE_LIKE_DIRECT or
* UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL, the text to reorder
* is assumed to be visual LTR, and the text after reordering is required
* to be the corresponding logical string with appropriate contextual
* direction. The direction of the result string will be RTL if either
* the righmost or leftmost strong character of the source text is RTL
* or Arabic Letter, or if the text contains no strong character;
* the direction will be LTR otherwise.
*
* If reordering option UBIDI_OPTION_INSERT_MARKS is set, an RLM may
* be added at the beginning of the result string to ensure round trip
* (that the result string, when reordered back to visual, will produce
* the original source text).
* @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
* @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
* @stable ICU 2.0
*/
#define UBIDI_DEFAULT_RTL 0xff
/**
* Maximum explicit embedding level.
* (The maximum resolved level can be up to UBIDI_MAX_EXPLICIT_LEVEL+1).
* @stable ICU 2.0
*/
#define UBIDI_MAX_EXPLICIT_LEVEL 125
/** Bit flag for level input.
* Overrides directional properties.
* @stable ICU 2.0
*/
#define UBIDI_LEVEL_OVERRIDE 0x80
/**
* Special value which can be returned by the mapping functions when a logical
* index has no corresponding visual index or vice-versa. This may happen
* for the logical-to-visual mapping of a Bidi control when option
* #UBIDI_OPTION_REMOVE_CONTROLS is specified. This can also happen
* for the visual-to-logical mapping of a Bidi mark (LRM or RLM) inserted
* by option #UBIDI_OPTION_INSERT_MARKS.
* @see ubidi_getVisualIndex
* @see ubidi_getVisualMap
* @see ubidi_getLogicalIndex
* @see ubidi_getLogicalMap
* @stable ICU 3.6
*/
#define UBIDI_MAP_NOWHERE (-1)
/**
* UBiDiDirection values indicate the text direction.
* @stable ICU 2.0
*/
enum UBiDiDirection {
/** Left-to-right text. This is a 0 value.
*
ubidi_getDirection(), it means
* that the source string contains no right-to-left characters, or
* that the source string is empty and the paragraph level is even.
* ubidi_getBaseDirection(), it
* means that the first strong character of the source string has
* a left-to-right direction.
* ubidi_getDirection(), it means
* that the source string contains no left-to-right characters, or
* that the source string is empty and the paragraph level is odd.
* ubidi_getBaseDirection(), it
* means that the first strong character of the source string has
* a right-to-left direction.
* As return value for ubidi_getDirection(), it means
* that the source string contains both left-to-right and
* right-to-left characters.
* @stable ICU 2.0
*/
UBIDI_MIXED,
/** No strongly directional text.
*
As return value for ubidi_getBaseDirection(), it means
* that the source string is missing or empty, or contains neither left-to-right
* nor right-to-left characters.
* @stable ICU 4.6
*/
UBIDI_NEUTRAL
};
/** @stable ICU 2.0 */
typedef enum UBiDiDirection UBiDiDirection;
/**
* Forward declaration of the UBiDi structure for the declaration of
* the API functions. Its fields are implementation-specific.
* This structure holds information about a paragraph (or multiple paragraphs) * of text with Bidi-algorithm-related details, or about one line of * such a paragraph.
* Reordering can be done on a line, or on one or more paragraphs which are
* then interpreted each as one single line.
* @stable ICU 2.0
*/
struct UBiDi;
/** @stable ICU 2.0 */
typedef struct UBiDi UBiDi;
/**
* Allocate a UBiDi structure.
* Such an object is initially empty. It is assigned
* the Bidi properties of a piece of text containing one or more paragraphs
* by ubidi_setPara()
* or the Bidi properties of a line within a paragraph by
* ubidi_setLine().
* This object can be reused for as long as it is not deallocated
* by calling ubidi_close().
* ubidi_setPara() and ubidi_setLine() will allocate
* additional memory for internal structures as necessary.
*
* @return An empty UBiDi object.
* @stable ICU 2.0
*/
U_STABLE UBiDi * U_EXPORT2
ubidi_open(void);
/**
* Allocate a UBiDi structure with preallocated memory
* for internal structures.
* This function provides a UBiDi object like ubidi_open()
* with no arguments, but it also preallocates memory for internal structures
* according to the sizings supplied by the caller.
* Subsequent functions will not allocate any more memory, and are thus * guaranteed not to fail because of lack of memory.
* The preallocation can be limited to some of the internal memory
* by setting some values to 0 here. That means that if, e.g.,
* maxRunCount cannot be reasonably predetermined and should not
* be set to maxLength (the only failproof value) to avoid
* wasting memory, then maxRunCount could be set to 0 here
* and the internal structures that are associated with it will be allocated
* on demand, just like with ubidi_open().
*
* @param maxLength is the maximum text or line length that internal memory
* will be preallocated for. An attempt to associate this object with a
* longer text will fail, unless this value is 0, which leaves the allocation
* up to the implementation.
*
* @param maxRunCount is the maximum anticipated number of same-level runs
* that internal memory will be preallocated for. An attempt to access
* visual runs on an object that was not preallocated for as many runs
* as the text was actually resolved to will fail,
* unless this value is 0, which leaves the allocation up to the implementation.
* The number of runs depends on the actual text and maybe anywhere between
* 1 and maxLength. It is typically small.
*
* @param pErrorCode must be a valid pointer to an error code value.
*
* @return An empty UBiDi object with preallocated memory.
* @stable ICU 2.0
*/
U_STABLE UBiDi * U_EXPORT2
ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode);
/**
* ubidi_close() must be called to free the memory
* associated with a UBiDi object.
*
* Important:
* A parent UBiDi object must not be destroyed or reused if
* it still has children.
* If a UBiDi object has become the child
* of another one (its parent) by calling
* ubidi_setLine(), then the child object must
* be destroyed (closed) or reused (by calling
* ubidi_setPara() or ubidi_setLine())
* before the parent object.
*
* @param pBiDi is a UBiDi object.
*
* @see ubidi_setPara
* @see ubidi_setLine
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ubidi_close(UBiDi *pBiDi);
/**
* Modify the operation of the Bidi algorithm such that it
* approximates an "inverse Bidi" algorithm. This function
* must be called before ubidi_setPara().
*
*
The normal operation of the Bidi algorithm as described * in the Unicode Technical Report is to take text stored in logical * (keyboard, typing) order and to determine the reordering of it for visual * rendering. * Some legacy systems store text in visual order, and for operations * with standard, Unicode-based algorithms, the text needs to be transformed * to logical order. This is effectively the inverse algorithm of the * described Bidi algorithm. Note that there is no standard algorithm for * this "inverse Bidi" and that the current implementation provides only an * approximation of "inverse Bidi".
* *With isInverse set to TRUE,
* this function changes the behavior of some of the subsequent functions
* in a way that they can be used for the inverse Bidi algorithm.
* Specifically, runs of text with numeric characters will be treated in a
* special way and may need to be surrounded with LRM characters when they are
* written in reordered sequence.
Output runs should be retrieved using ubidi_getVisualRun().
* Since the actual input for "inverse Bidi" is visually ordered text and
* ubidi_getVisualRun() gets the reordered runs, these are actually
* the runs of the logically ordered output.
Calling this function with argument isInverse set to
* TRUE is equivalent to calling
* ubidi_setReorderingMode with argument
* reorderingMode
* set to #UBIDI_REORDER_INVERSE_NUMBERS_AS_L.
* Calling this function with argument isInverse set to
* FALSE is equivalent to calling
* ubidi_setReorderingMode with argument
* reorderingMode
* set to #UBIDI_REORDER_DEFAULT.
*
* @param pBiDi is a UBiDi object.
*
* @param isInverse specifies "forward" or "inverse" Bidi operation.
*
* @see ubidi_setPara
* @see ubidi_writeReordered
* @see ubidi_setReorderingMode
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ubidi_setInverse(UBiDi *pBiDi, UBool isInverse);
/**
* Is this Bidi object set to perform the inverse Bidi algorithm?
*
Note: calling this function after setting the reordering mode with
* ubidi_setReorderingMode will return TRUE if the
* reordering mode was set to #UBIDI_REORDER_INVERSE_NUMBERS_AS_L,
* FALSE for all other values.
UBiDi object.
* @return TRUE if the Bidi object is set to perform the inverse Bidi algorithm
* by handling numbers as L.
*
* @see ubidi_setInverse
* @see ubidi_setReorderingMode
* @stable ICU 2.0
*/
U_STABLE UBool U_EXPORT2
ubidi_isInverse(UBiDi *pBiDi);
/**
* Specify whether block separators must be allocated level zero,
* so that successive paragraphs will progress from left to right.
* This function must be called before ubidi_setPara().
* Paragraph separators (B) may appear in the text. Setting them to level zero
* means that all paragraph separators (including one possibly appearing
* in the last text position) are kept in the reordered text after the text
* that they follow in the source text.
* When this feature is not enabled, a paragraph separator at the last
* position of the text before reordering will go to the first position
* of the reordered text when the paragraph level is odd.
*
* @param pBiDi is a UBiDi object.
*
* @param orderParagraphsLTR specifies whether paragraph separators (B) must
* receive level 0, so that successive paragraphs progress from left to right.
*
* @see ubidi_setPara
* @stable ICU 3.4
*/
U_STABLE void U_EXPORT2
ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR);
/**
* Is this Bidi object set to allocate level 0 to block separators so that
* successive paragraphs progress from left to right?
*
* @param pBiDi is a UBiDi object.
* @return TRUE if the Bidi object is set to allocate level 0 to block
* separators.
*
* @see ubidi_orderParagraphsLTR
* @stable ICU 3.4
*/
U_STABLE UBool U_EXPORT2
ubidi_isOrderParagraphsLTR(UBiDi *pBiDi);
/**
* UBiDiReorderingMode values indicate which variant of the Bidi
* algorithm to use.
*
* @see ubidi_setReorderingMode
* @stable ICU 3.6
*/
typedef enum UBiDiReorderingMode {
/** Regular Logical to Visual Bidi algorithm according to Unicode.
* This is a 0 value.
* @stable ICU 3.6 */
UBIDI_REORDER_DEFAULT = 0,
/** Logical to Visual algorithm which handles numbers in a way which
* mimicks the behavior of Windows XP.
* @stable ICU 3.6 */
UBIDI_REORDER_NUMBERS_SPECIAL,
/** Logical to Visual algorithm grouping numbers with adjacent R characters
* (reversible algorithm).
* @stable ICU 3.6 */
UBIDI_REORDER_GROUP_NUMBERS_WITH_R,
/** Reorder runs only to transform a Logical LTR string to the Logical RTL
* string with the same display, or vice-versa.#UBIDI_OPTION_INSERT_MARKS, some Bidi controls in the source
* text may be removed and other controls may be added to produce the
* minimum combination which has the required display.
* @stable ICU 3.6 */
UBIDI_REORDER_RUNS_ONLY,
/** Visual to Logical algorithm which handles numbers like L
* (same algorithm as selected by ubidi_setInverse(TRUE).
* @see ubidi_setInverse
* @stable ICU 3.6 */
UBIDI_REORDER_INVERSE_NUMBERS_AS_L,
/** Visual to Logical algorithm equivalent to the regular Logical to Visual
* algorithm.
* @stable ICU 3.6 */
UBIDI_REORDER_INVERSE_LIKE_DIRECT,
/** Inverse Bidi (Visual to Logical) algorithm for the
* UBIDI_REORDER_NUMBERS_SPECIAL Bidi algorithm.
* @stable ICU 3.6 */
UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL,
/** Number of values for reordering mode.
* @stable ICU 3.6 */
UBIDI_REORDER_COUNT
} UBiDiReorderingMode;
/**
* Modify the operation of the Bidi algorithm such that it implements some
* variant to the basic Bidi algorithm or approximates an "inverse Bidi"
* algorithm, depending on different values of the "reordering mode".
* This function must be called before ubidi_setPara(), and stays
* in effect until called again with a different argument.
*
* The normal operation of the Bidi algorithm as described * in the Unicode Standard Annex #9 is to take text stored in logical * (keyboard, typing) order and to determine how to reorder it for visual * rendering.
* *With the reordering mode set to a value other than
* #UBIDI_REORDER_DEFAULT, this function changes the behavior of
* some of the subsequent functions in a way such that they implement an
* inverse Bidi algorithm or some other algorithm variants.
Some legacy systems store text in visual order, and for operations * with standard, Unicode-based algorithms, the text needs to be transformed * into logical order. This is effectively the inverse algorithm of the * described Bidi algorithm. Note that there is no standard algorithm for * this "inverse Bidi", so a number of variants are implemented here.
* *In other cases, it may be desirable to emulate some variant of the * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a * Logical to Logical transformation.
* *#UBIDI_REORDER_DEFAULT,
* the standard Bidi Logical to Visual algorithm is applied.#UBIDI_REORDER_NUMBERS_SPECIAL,
* the algorithm used to perform Bidi transformations when calling
* ubidi_setPara should approximate the algorithm used in
* Microsoft Windows XP rather than strictly conform to the Unicode Bidi
* algorithm.
* #UBIDI_REORDER_GROUP_NUMBERS_WITH_R,
* numbers located between LTR text and RTL text are associated with the RTL
* text. For instance, an LTR paragraph with content "abc 123 DEF" (where
* upper case letters represent RTL characters) will be transformed to
* "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed
* to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc".
* This makes the algorithm reversible and makes it useful when round trip
* (from visual to logical and back to visual) must be achieved without
* adding LRM characters. However, this is a variation from the standard
* Unicode Bidi algorithm.#UBIDI_REORDER_RUNS_ONLY,
* a "Logical to Logical" transformation must be performed:
* paraLevel
* in ubidi_setPara) is even, the source text will be handled as
* LTR logical text and will be transformed to the RTL logical text which has
* the same LTR visual display.#UBIDI_REORDER_INVERSE_NUMBERS_AS_L, an "inverse Bidi" algorithm
* is applied.
* Runs of text with numeric characters will be treated like LTR letters and
* may need to be surrounded with LRM characters when they are written in
* reordered sequence (the option #UBIDI_INSERT_LRM_FOR_NUMERIC can
* be used with function ubidi_writeReordered to this end. This
* mode is equivalent to calling ubidi_setInverse() with
* argument isInverse set to TRUE.#UBIDI_REORDER_INVERSE_LIKE_DIRECT, the "direct" Logical to Visual
* Bidi algorithm is used as an approximation of an "inverse Bidi" algorithm.
* This mode is similar to mode #UBIDI_REORDER_INVERSE_NUMBERS_AS_L
* but is closer to the regular Bidi algorithm.
* UBIDI_REORDER_INVERSE_NUMBERS_AS_L.#UBIDI_OPTION_INSERT_MARKS, this mode generally
* adds Bidi marks to the output significantly more sparingly than mode
* #UBIDI_REORDER_INVERSE_NUMBERS_AS_L with option
* #UBIDI_INSERT_LRM_FOR_NUMERIC in calls to
* ubidi_writeReordered.#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL, the Logical to Visual
* Bidi algorithm used in Windows XP is used as an approximation of an "inverse Bidi" algorithm.
* In all the reordering modes specifying an "inverse Bidi" algorithm
* (i.e. those with a name starting with UBIDI_REORDER_INVERSE),
* output runs should be retrieved using
* ubidi_getVisualRun(), and the output text with
* ubidi_writeReordered(). The caller should keep in mind that in
* "inverse Bidi" modes the input is actually visually ordered text and
* reordered output returned by ubidi_getVisualRun() or
* ubidi_writeReordered() are actually runs or character string
* of logically ordered output.
* For all the "inverse Bidi" modes, the source text should not contain
* Bidi control characters other than LRM or RLM.
Note that option #UBIDI_OUTPUT_REVERSE of
* ubidi_writeReordered has no useful meaning and should not be
* used in conjunction with any value of the reordering mode specifying
* "inverse Bidi" or with value UBIDI_REORDER_RUNS_ONLY.
*
* @param pBiDi is a UBiDi object.
* @param reorderingMode specifies the required variant of the Bidi algorithm.
*
* @see UBiDiReorderingMode
* @see ubidi_setInverse
* @see ubidi_setPara
* @see ubidi_writeReordered
* @stable ICU 3.6
*/
U_STABLE void U_EXPORT2
ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode);
/**
* What is the requested reordering mode for a given Bidi object?
*
* @param pBiDi is a UBiDi object.
* @return the current reordering mode of the Bidi object
* @see ubidi_setReorderingMode
* @stable ICU 3.6
*/
U_STABLE UBiDiReorderingMode U_EXPORT2
ubidi_getReorderingMode(UBiDi *pBiDi);
/**
* UBiDiReorderingOption values indicate which options are
* specified to affect the Bidi algorithm.
*
* @see ubidi_setReorderingOptions
* @stable ICU 3.6
*/
typedef enum UBiDiReorderingOption {
/**
* option value for ubidi_setReorderingOptions:
* disable all the options which can be set with this function
* @see ubidi_setReorderingOptions
* @stable ICU 3.6
*/
UBIDI_OPTION_DEFAULT = 0,
/**
* option bit for ubidi_setReorderingOptions:
* insert Bidi marks (LRM or RLM) when needed to ensure correct result of
* a reordering to a Logical order
*
*
This option must be set or reset before calling
* ubidi_setPara.
This option is significant only with reordering modes which generate * a result with Logical order, specifically:
*#UBIDI_REORDER_RUNS_ONLY#UBIDI_REORDER_INVERSE_NUMBERS_AS_L#UBIDI_REORDER_INVERSE_LIKE_DIRECT#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIALIf this option is set in conjunction with reordering mode
* #UBIDI_REORDER_INVERSE_NUMBERS_AS_L or with calling
* ubidi_setInverse(TRUE), it implies
* option #UBIDI_INSERT_LRM_FOR_NUMERIC
* in calls to function ubidi_writeReordered().
For other reordering modes, a minimum number of LRM or RLM characters
* will be added to the source text after reordering it so as to ensure
* round trip, i.e. when applying the inverse reordering mode on the
* resulting logical text with removal of Bidi marks
* (option #UBIDI_OPTION_REMOVE_CONTROLS set before calling
* ubidi_setPara() or option #UBIDI_REMOVE_BIDI_CONTROLS
* in ubidi_writeReordered), the result will be identical to the
* source text in the first transformation.
*
*
This option will be ignored if specified together with option
* #UBIDI_OPTION_REMOVE_CONTROLS. It inhibits option
* UBIDI_REMOVE_BIDI_CONTROLS in calls to function
* ubidi_writeReordered() and it implies option
* #UBIDI_INSERT_LRM_FOR_NUMERIC in calls to function
* ubidi_writeReordered() if the reordering mode is
* #UBIDI_REORDER_INVERSE_NUMBERS_AS_L.
ubidi_setReorderingOptions:
* remove Bidi control characters
*
* This option must be set or reset before calling
* ubidi_setPara.
This option nullifies option #UBIDI_OPTION_INSERT_MARKS.
* It inhibits option #UBIDI_INSERT_LRM_FOR_NUMERIC in calls
* to function ubidi_writeReordered() and it implies option
* #UBIDI_REMOVE_BIDI_CONTROLS in calls to that function.
ubidi_setReorderingOptions:
* process the output as part of a stream to be continued
*
* This option must be set or reset before calling
* ubidi_setPara.
This option specifies that the caller is interested in processing large * text object in parts. * The results of the successive calls are expected to be concatenated by the * caller. Only the call for the last part will have this option bit off.
* *When this option bit is on, ubidi_setPara() may process
* less than the full source text in order to truncate the text at a meaningful
* boundary. The caller should call ubidi_getProcessedLength()
* immediately after calling ubidi_setPara() in order to
* determine how much of the source text has been processed.
* Source text beyond that length should be resubmitted in following calls to
* ubidi_setPara. The processed length may be less than
* the length of the source text if a character preceding the last character of
* the source text constitutes a reasonable boundary (like a block separator)
* for text to be continued.
* If the last character of the source text constitutes a reasonable
* boundary, the whole text will be processed at once.
* If nowhere in the source text there exists
* such a reasonable boundary, the processed length will be zero.
* The caller should check for such an occurrence and do one of the following:
*
UBIDI_OPTION_STREAMING.When the UBIDI_OPTION_STREAMING option is used,
* it is recommended to call ubidi_orderParagraphsLTR() with
* argument orderParagraphsLTR set to TRUE before
* calling ubidi_setPara so that later paragraphs may be
* concatenated to previous paragraphs on the right.
UBiDi object.
* @param reorderingOptions is a combination of zero or more of the following
* options:
* #UBIDI_OPTION_DEFAULT, #UBIDI_OPTION_INSERT_MARKS,
* #UBIDI_OPTION_REMOVE_CONTROLS, #UBIDI_OPTION_STREAMING.
*
* @see ubidi_getReorderingOptions
* @stable ICU 3.6
*/
U_STABLE void U_EXPORT2
ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions);
/**
* What are the reordering options applied to a given Bidi object?
*
* @param pBiDi is a UBiDi object.
* @return the current reordering options of the Bidi object
* @see ubidi_setReorderingOptions
* @stable ICU 3.6
*/
U_STABLE uint32_t U_EXPORT2
ubidi_getReorderingOptions(UBiDi *pBiDi);
/**
* Set the context before a call to ubidi_setPara().* * ubidi_setPara() computes the left-right directionality for a given piece * of text which is supplied as one of its arguments. Sometimes this piece * of text (the "main text") should be considered in context, because text * appearing before ("prologue") and/or after ("epilogue") the main text * may affect the result of this computation.
* * This function specifies the prologue and/or the epilogue for the next * call to ubidi_setPara(). The characters specified as prologue and * epilogue should not be modified by the calling program until the call * to ubidi_setPara() has returned. If successive calls to ubidi_setPara() * all need specification of a context, ubidi_setContext() must be called * before each call to ubidi_setPara(). In other words, a context is not * "remembered" after the following successful call to ubidi_setPara().
* * If a call to ubidi_setPara() specifies UBIDI_DEFAULT_LTR or * UBIDI_DEFAULT_RTL as paraLevel and is preceded by a call to * ubidi_setContext() which specifies a prologue, the paragraph level will * be computed taking in consideration the text in the prologue.
* * When ubidi_setPara() is called without a previous call to * ubidi_setContext, the main text is handled as if preceded and followed * by strong directional characters at the current paragraph level. * Calling ubidi_setContext() with specification of a prologue will change * this behavior by handling the main text as if preceded by the last * strong character appearing in the prologue, if any. * Calling ubidi_setContext() with specification of an epilogue will change * the behavior of ubidi_setPara() by handling the main text as if followed * by the first strong character or digit appearing in the epilogue, if any.
*
* Note 1: if ubidi_setContext is called repeatedly without
* calling ubidi_setPara, the earlier calls have no effect,
* only the last call will be remembered for the next call to
* ubidi_setPara.
*
* Note 2: calling ubidi_setContext(pBiDi, NULL, 0, NULL, 0, &errorCode)
* cancels any previous setting of non-empty prologue or epilogue.
* The next call to ubidi_setPara() will process no
* prologue or epilogue.
*
* Note 3: users must be aware that even after setting the context
* before a call to ubidi_setPara() to perform e.g. a logical to visual
* transformation, the resulting string may not be identical to what it
* would have been if all the text, including prologue and epilogue, had
* been processed together.
* Example (upper case letters represent RTL characters):
* prologue = "abc DE"
* epilogue = none
* main text = "FGH xyz"
* paraLevel = UBIDI_LTR
* display without prologue = "HGF xyz"
* ("HGF" is adjacent to "xyz")
* display with prologue = "abc HGFED xyz"
* ("HGF" is not adjacent to "xyz")
*
* @param pBiDi is a paragraph UBiDi object.
*
* @param prologue is a pointer to the text which precedes the text that
* will be specified in a coming call to ubidi_setPara().
* If there is no prologue to consider, then proLength
* must be zero and this pointer can be NULL.
*
* @param proLength is the length of the prologue; if proLength==-1
* then the prologue must be zero-terminated.
* Otherwise proLength must be >= 0. If proLength==0, it means
* that there is no prologue to consider.
*
* @param epilogue is a pointer to the text which follows the text that
* will be specified in a coming call to ubidi_setPara().
* If there is no epilogue to consider, then epiLength
* must be zero and this pointer can be NULL.
*
* @param epiLength is the length of the epilogue; if epiLength==-1
* then the epilogue must be zero-terminated.
* Otherwise epiLength must be >= 0. If epiLength==0, it means
* that there is no epilogue to consider.
*
* @param pErrorCode must be a valid pointer to an error code value.
*
* @see ubidi_setPara
* @stable ICU 4.8
*/
U_STABLE void U_EXPORT2
ubidi_setContext(UBiDi *pBiDi,
const UChar *prologue, int32_t proLength,
const UChar *epilogue, int32_t epiLength,
UErrorCode *pErrorCode);
/**
* Perform the Unicode Bidi algorithm. It is defined in the
* Unicode Standard Anned #9,
* version 13,
* also described in The Unicode Standard, Version 4.0 .
* * This function takes a piece of plain text containing one or more paragraphs, * with or without externally specified embedding levels from styled * text and computes the left-right-directionality of each character.
*
* If the entire text is all of the same directionality, then
* the function may not perform all the steps described by the algorithm,
* i.e., some levels may not be the same as if all steps were performed.
* This is not relevant for unidirectional text.
* For example, in pure LTR text with numbers the numbers would get
* a resolved level of 2 higher than the surrounding text according to
* the algorithm. This implementation may set all resolved levels to
* the same value in such a case.
*
* The text can be composed of multiple paragraphs. Occurrence of a block
* separator in the text terminates a paragraph, and whatever comes next starts
* a new paragraph. The exception to this rule is when a Carriage Return (CR)
* is followed by a Line Feed (LF). Both CR and LF are block separators, but
* in that case, the pair of characters is considered as terminating the
* preceding paragraph, and a new paragraph will be started by a character
* coming after the LF.
*
* @param pBiDi A UBiDi object allocated with ubidi_open()
* which will be set to contain the reordering information,
* especially the resolved levels for all the characters in text.
*
* @param text is a pointer to the text that the Bidi algorithm will be performed on.
* This pointer is stored in the UBiDi object and can be retrieved
* with ubidi_getText().
* Note: the text must be (at least) length long.
*
* @param length is the length of the text; if length==-1 then
* the text must be zero-terminated.
*
* @param paraLevel specifies the default level for the text;
* it is typically 0 (LTR) or 1 (RTL).
* If the function shall determine the paragraph level from the text,
* then paraLevel can be set to
* either #UBIDI_DEFAULT_LTR
* or #UBIDI_DEFAULT_RTL; if the text contains multiple
* paragraphs, the paragraph level shall be determined separately for
* each paragraph; if a paragraph does not include any strongly typed
* character, then the desired default is used (0 for LTR or 1 for RTL).
* Any other value between 0 and #UBIDI_MAX_EXPLICIT_LEVEL
* is also valid, with odd levels indicating RTL.
*
* @param embeddingLevels (in) may be used to preset the embedding and override levels,
* ignoring characters like LRE and PDF in the text.
* A level overrides the directional property of its corresponding
* (same index) character if the level has the
* #UBIDI_LEVEL_OVERRIDE bit set.
* Except for that bit, it must be
* paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL,
* with one exception: a level of zero may be specified for a paragraph
* separator even if paraLevel>0 when multiple paragraphs
* are submitted in the same call to ubidi_setPara().
* Caution: A copy of this pointer, not of the levels,
* will be stored in the UBiDi object;
* the embeddingLevels array must not be
* deallocated before the UBiDi structure is destroyed or reused,
* and the embeddingLevels
* should not be modified to avoid unexpected results on subsequent Bidi operations.
* However, the ubidi_setPara() and
* ubidi_setLine() functions may modify some or all of the levels.
* After the UBiDi object is reused or destroyed, the caller
* must take care of the deallocation of the embeddingLevels array.
* Note: the embeddingLevels array must be
* at least length long.
* This pointer can be NULL if this
* value is not necessary.
*
* @param pErrorCode must be a valid pointer to an error code value.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
UErrorCode *pErrorCode);
/**
* ubidi_setLine() sets a UBiDi to
* contain the reordering information, especially the resolved levels,
* for all the characters in a line of text. This line of text is
* specified by referring to a UBiDi object representing
* this information for a piece of text containing one or more paragraphs,
* and by specifying a range of indexes in this text.
* In the new line object, the indexes will range from 0 to limit-start-1.
*
* This is used after calling ubidi_setPara()
* for a piece of text, and after line-breaking on that text.
* It is not necessary if each paragraph is treated as a single line.
*
* After line-breaking, rules (L1) and (L2) for the treatment of
* trailing WS and for reordering are performed on
* a UBiDi object that represents a line.
*
* Important: pLineBiDi shares data with
* pParaBiDi.
* You must destroy or reuse pLineBiDi before pParaBiDi.
* In other words, you must destroy or reuse the UBiDi object for a line
* before the object for its parent paragraph.
*
* The text pointer that was stored in
*
* @param pBiDi is the paragraph or line
*
* @param pBiDi is the paragraph
*
* Note that this function may allocate memory under some
* circumstances, unlike
* This is especially useful for line-breaking on a paragraph.
*
* @param pBiDi is the paragraph or line
*
* Use of
*
* The value returned may be
* When the visual output is altered by using options of
*
* Note that in right-to-left runs, this mapping places
* second surrogates before first ones (which is generally a bad idea)
* and combining characters before base characters.
* Use of
*
* The value returned may be
* This is the inverse function to
* When the visual output is altered by using options of
*
* Some values in the map may be
* When the visual output is altered by using options of
*
* Note that in right-to-left runs, this mapping places
* second surrogates before first ones (which is generally a bad idea)
* and combining characters before base characters.
* Use of
* Some values in the map may be
* When the visual output is altered by using options of
*
* The index map will result in
* The index map will result in This option does not imply corresponding adjustment of the index
* mappings. This option does not imply corresponding adjustment of the index
* mappings. This has the same effect as calling Usually, the function pointer will be propagated to a If a This may be useful for assigning Bidi classes to PUA characters, or
* for special application needs. For instance, an application may want to
* handle all spaces like L or R characters (according to the base direction)
* when creating the visual ordering of logical lines which are part of a report
* organized in columns: there should not be interaction between adjacent
* cells.
*
* @param pBiDi is the paragraph The line, offset, and context fields are optional; parsing
* engines may choose not to use to use them.
*
* The preContext and postContext strings include some part of the
* context surrounding the error. If the source text is "let for=7"
* and "for" is the error (e.g., because it is a reserved word), then
* some examples of what a parser might produce are the following:
*
* Examples of engines which use UParseError (or may use it in the
* future) are Transliterator, RuleBasedBreakIterator, and
* RegexPattern.
*
* @stable ICU 2.0
*/
typedef struct UParseError {
/**
* The line on which the error occured. If the parser uses this
* field, it sets it to the line number of the source text line on
* which the error appears, which will be be a value >= 1. If the
* parse does not support line numbers, the value will be <= 0.
* @stable ICU 2.0
*/
int32_t line;
/**
* The character offset to the error. If the line field is >= 1,
* then this is the offset from the start of the line. Otherwise,
* this is the offset from the start of the text. If the parser
* does not support this field, it will have a value < 0.
* @stable ICU 2.0
*/
int32_t offset;
/**
* Textual context before the error. Null-terminated. The empty
* string if not supported by parser.
* @stable ICU 2.0
*/
UChar preContext[U_PARSE_CONTEXT_LEN];
/**
* The error itself and/or textual context after the error.
* Null-terminated. The empty string if not supported by parser.
* @stable ICU 2.0
*/
UChar postContext[U_PARSE_CONTEXT_LEN];
} UParseError;
#endif
// usprep.h
/*
*******************************************************************************
*
* Copyright (C) 2003-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: usprep.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003jul2
* created by: Ram Viswanadha
*/
#ifndef __USPREP_H__
#define __USPREP_H__
/**
* \file
* \brief C API: Implements the StringPrep algorithm.
*/
/**
*
* StringPrep API implements the StingPrep framework as described by RFC 3454.
* StringPrep prepares Unicode strings for use in network protocols.
* Profiles of StingPrep are set of rules and data according to with the
* Unicode Strings are prepared. Each profiles contains tables which describe
* how a code point should be treated. The tables are broadly classied into
* This option is ignored by the IDNA2003 implementation.
* (IDNA2003 always performs a BiDi check.)
* @stable ICU 4.6
*/
UIDNA_CHECK_BIDI=4,
/**
* IDNA option to check for whether the input conforms to the CONTEXTJ rules.
* For use in static worker and factory methods.
* This option is ignored by the IDNA2003 implementation.
* (The CONTEXTJ check is new in IDNA2008.)
* @stable ICU 4.6
*/
UIDNA_CHECK_CONTEXTJ=8,
/**
* IDNA option for nontransitional processing in ToASCII().
* For use in static worker and factory methods.
* By default, ToASCII() uses transitional processing.
* This option is ignored by the IDNA2003 implementation.
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
* @stable ICU 4.6
*/
UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
/**
* IDNA option for nontransitional processing in ToUnicode().
* For use in static worker and factory methods.
* By default, ToUnicode() uses transitional processing.
* This option is ignored by the IDNA2003 implementation.
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
* @stable ICU 4.6
*/
UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
/**
* IDNA option to check for whether the input conforms to the CONTEXTO rules.
* For use in static worker and factory methods.
* This option is ignored by the IDNA2003 implementation.
* (The CONTEXTO check is new in IDNA2008.)
* This is for use by registries for IDNA2008 conformance.
* UTS #46 does not require the CONTEXTO check.
* @stable ICU 49
*/
UIDNA_CHECK_CONTEXTO=0x40
};
/**
* Opaque C service object type for the new IDNA API.
* @stable ICU 4.6
*/
struct UIDNA;
typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */
/**
* Returns a UIDNA instance which implements UTS #46.
* Returns an unmodifiable instance, owned by the caller.
* Cache it for multiple operations, and uidna_close() it when done.
* The instance is thread-safe, that is, it can be used concurrently.
*
* For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
*
* @param options Bit set to modify the processing and error checking.
* See option bit set values in uidna.h.
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the UTS #46 UIDNA instance, if successful
* @stable ICU 4.6
*/
U_STABLE UIDNA * U_EXPORT2
uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
/**
* Closes a UIDNA instance.
* @param idna UIDNA instance to be closed
* @stable ICU 4.6
*/
U_STABLE void U_EXPORT2
uidna_close(UIDNA *idna);
/**
* Output container for IDNA processing errors.
* Initialize with UIDNA_INFO_INITIALIZER:
* \code
* UIDNAInfo info = UIDNA_INFO_INITIALIZER;
* int32_t length = uidna_nameToASCII(..., &info, &errorCode);
* if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
* \endcode
* @stable ICU 4.6
*/
typedef struct UIDNAInfo {
/** sizeof(UIDNAInfo) @stable ICU 4.6 */
int16_t size;
/**
* Set to TRUE if transitional and nontransitional processing produce different results.
* For details see C++ IDNAInfo::isTransitionalDifferent().
* @stable ICU 4.6
*/
UBool isTransitionalDifferent;
UBool reservedB3; /**< Reserved field, do not use. @internal */
/**
* Bit set indicating IDNA processing errors. 0 if no errors.
* See UIDNA_ERROR_... constants.
* @stable ICU 4.6
*/
uint32_t errors;
int32_t reservedI2; /**< Reserved field, do not use. @internal */
int32_t reservedI3; /**< Reserved field, do not use. @internal */
} UIDNAInfo;
/**
* Static initializer for a UIDNAInfo struct.
* @stable ICU 4.6
*/
#define UIDNA_INFO_INITIALIZER { \
(int16_t)sizeof(UIDNAInfo), \
FALSE, FALSE, \
0, 0, 0 }
/**
* Converts a single domain name label into its ASCII form for DNS lookup.
* If any processing step fails, then pInfo->errors will be non-zero and
* the result might not be an ASCII string.
* The label might be modified according to the types of errors.
* Labels with severe errors will be left in (or turned into) their Unicode form.
*
* The UErrorCode indicates an error only in exceptional cases,
* such as a U_MEMORY_ALLOCATION_ERROR.
*
* @param idna UIDNA instance
* @param label Input domain name label
* @param length Label length, or -1 if NUL-terminated
* @param dest Destination string buffer
* @param capacity Destination buffer capacity
* @param pInfo Output container of IDNA processing details.
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return destination string length
* @stable ICU 4.6
*/
U_STABLE int32_t U_EXPORT2
uidna_labelToASCII(const UIDNA *idna,
const UChar *label, int32_t length,
UChar *dest, int32_t capacity,
UIDNAInfo *pInfo, UErrorCode *pErrorCode);
/**
* Converts a single domain name label into its Unicode form for human-readable display.
* If any processing step fails, then pInfo->errors will be non-zero.
* The label might be modified according to the types of errors.
*
* The UErrorCode indicates an error only in exceptional cases,
* such as a U_MEMORY_ALLOCATION_ERROR.
*
* @param idna UIDNA instance
* @param label Input domain name label
* @param length Label length, or -1 if NUL-terminated
* @param dest Destination string buffer
* @param capacity Destination buffer capacity
* @param pInfo Output container of IDNA processing details.
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return destination string length
* @stable ICU 4.6
*/
U_STABLE int32_t U_EXPORT2
uidna_labelToUnicode(const UIDNA *idna,
const UChar *label, int32_t length,
UChar *dest, int32_t capacity,
UIDNAInfo *pInfo, UErrorCode *pErrorCode);
/**
* Converts a whole domain name into its ASCII form for DNS lookup.
* If any processing step fails, then pInfo->errors will be non-zero and
* the result might not be an ASCII string.
* The domain name might be modified according to the types of errors.
* Labels with severe errors will be left in (or turned into) their Unicode form.
*
* The UErrorCode indicates an error only in exceptional cases,
* such as a U_MEMORY_ALLOCATION_ERROR.
*
* @param idna UIDNA instance
* @param name Input domain name
* @param length Domain name length, or -1 if NUL-terminated
* @param dest Destination string buffer
* @param capacity Destination buffer capacity
* @param pInfo Output container of IDNA processing details.
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return destination string length
* @stable ICU 4.6
*/
U_STABLE int32_t U_EXPORT2
uidna_nameToASCII(const UIDNA *idna,
const UChar *name, int32_t length,
UChar *dest, int32_t capacity,
UIDNAInfo *pInfo, UErrorCode *pErrorCode);
/**
* Converts a whole domain name into its Unicode form for human-readable display.
* If any processing step fails, then pInfo->errors will be non-zero.
* The domain name might be modified according to the types of errors.
*
* The UErrorCode indicates an error only in exceptional cases,
* such as a U_MEMORY_ALLOCATION_ERROR.
*
* @param idna UIDNA instance
* @param name Input domain name
* @param length Domain name length, or -1 if NUL-terminated
* @param dest Destination string buffer
* @param capacity Destination buffer capacity
* @param pInfo Output container of IDNA processing details.
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return destination string length
* @stable ICU 4.6
*/
U_STABLE int32_t U_EXPORT2
uidna_nameToUnicode(const UIDNA *idna,
const UChar *name, int32_t length,
UChar *dest, int32_t capacity,
UIDNAInfo *pInfo, UErrorCode *pErrorCode);
/* UTF-8 versions of the processing methods --------------------------------- */
/**
* Converts a single domain name label into its ASCII form for DNS lookup.
* UTF-8 version of uidna_labelToASCII(), same behavior.
*
* @param idna UIDNA instance
* @param label Input domain name label
* @param length Label length, or -1 if NUL-terminated
* @param dest Destination string buffer
* @param capacity Destination buffer capacity
* @param pInfo Output container of IDNA processing details.
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return destination string length
* @stable ICU 4.6
*/
U_STABLE int32_t U_EXPORT2
uidna_labelToASCII_UTF8(const UIDNA *idna,
const char *label, int32_t length,
char *dest, int32_t capacity,
UIDNAInfo *pInfo, UErrorCode *pErrorCode);
/**
* Converts a single domain name label into its Unicode form for human-readable display.
* UTF-8 version of uidna_labelToUnicode(), same behavior.
*
* @param idna UIDNA instance
* @param label Input domain name label
* @param length Label length, or -1 if NUL-terminated
* @param dest Destination string buffer
* @param capacity Destination buffer capacity
* @param pInfo Output container of IDNA processing details.
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return destination string length
* @stable ICU 4.6
*/
U_STABLE int32_t U_EXPORT2
uidna_labelToUnicodeUTF8(const UIDNA *idna,
const char *label, int32_t length,
char *dest, int32_t capacity,
UIDNAInfo *pInfo, UErrorCode *pErrorCode);
/**
* Converts a whole domain name into its ASCII form for DNS lookup.
* UTF-8 version of uidna_nameToASCII(), same behavior.
*
* @param idna UIDNA instance
* @param name Input domain name
* @param length Domain name length, or -1 if NUL-terminated
* @param dest Destination string buffer
* @param capacity Destination buffer capacity
* @param pInfo Output container of IDNA processing details.
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return destination string length
* @stable ICU 4.6
*/
U_STABLE int32_t U_EXPORT2
uidna_nameToASCII_UTF8(const UIDNA *idna,
const char *name, int32_t length,
char *dest, int32_t capacity,
UIDNAInfo *pInfo, UErrorCode *pErrorCode);
/**
* Converts a whole domain name into its Unicode form for human-readable display.
* UTF-8 version of uidna_nameToUnicode(), same behavior.
*
* @param idna UIDNA instance
* @param name Input domain name
* @param length Domain name length, or -1 if NUL-terminated
* @param dest Destination string buffer
* @param capacity Destination buffer capacity
* @param pInfo Output container of IDNA processing details.
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return destination string length
* @stable ICU 4.6
*/
U_STABLE int32_t U_EXPORT2
uidna_nameToUnicodeUTF8(const UIDNA *idna,
const char *name, int32_t length,
char *dest, int32_t capacity,
UIDNAInfo *pInfo, UErrorCode *pErrorCode);
/*
* IDNA error bit set values.
* When a domain name or label fails a processing step or does not meet the
* validity criteria, then one or more of these error bits are set.
*/
enum {
/**
* A non-final domain name label (or the whole domain name) is empty.
* @stable ICU 4.6
*/
UIDNA_ERROR_EMPTY_LABEL=1,
/**
* A domain name label is longer than 63 bytes.
* (See STD13/RFC1034 3.1. Name space specifications and terminology.)
* This is only checked in ToASCII operations, and only if the output label is all-ASCII.
* @stable ICU 4.6
*/
UIDNA_ERROR_LABEL_TOO_LONG=2,
/**
* A domain name is longer than 255 bytes in its storage form.
* (See STD13/RFC1034 3.1. Name space specifications and terminology.)
* This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
* @stable ICU 4.6
*/
UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
/**
* A label starts with a hyphen-minus ('-').
* @stable ICU 4.6
*/
UIDNA_ERROR_LEADING_HYPHEN=8,
/**
* A label ends with a hyphen-minus ('-').
* @stable ICU 4.6
*/
UIDNA_ERROR_TRAILING_HYPHEN=0x10,
/**
* A label contains hyphen-minus ('-') in the third and fourth positions.
* @stable ICU 4.6
*/
UIDNA_ERROR_HYPHEN_3_4=0x20,
/**
* A label starts with a combining mark.
* @stable ICU 4.6
*/
UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
/**
* A label or domain name contains disallowed characters.
* @stable ICU 4.6
*/
UIDNA_ERROR_DISALLOWED=0x80,
/**
* A label starts with "xn--" but does not contain valid Punycode.
* That is, an xn-- label failed Punycode decoding.
* @stable ICU 4.6
*/
UIDNA_ERROR_PUNYCODE=0x100,
/**
* A label contains a dot=full stop.
* This can occur in an input string for a single-label function.
* @stable ICU 4.6
*/
UIDNA_ERROR_LABEL_HAS_DOT=0x200,
/**
* An ACE label does not contain a valid label string.
* The label was successfully ACE (Punycode) decoded but the resulting
* string had severe validation errors. For example,
* it might contain characters that are not allowed in ACE labels,
* or it might not be normalized.
* @stable ICU 4.6
*/
UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
/**
* A label does not meet the IDNA BiDi requirements (for right-to-left characters).
* @stable ICU 4.6
*/
UIDNA_ERROR_BIDI=0x800,
/**
* A label does not meet the IDNA CONTEXTJ requirements.
* @stable ICU 4.6
*/
UIDNA_ERROR_CONTEXTJ=0x1000,
/**
* A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
* Some punctuation characters "Would otherwise have been DISALLOWED"
* but are allowed in certain contexts. (RFC 5892)
* @stable ICU 49
*/
UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
/**
* A label does not meet the IDNA CONTEXTO requirements for digits.
* Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
* @stable ICU 49
*/
UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
};
#endif /* #if !UCONFIG_NO_IDNA */
#endif
// ubrk.h
/*
******************************************************************************
* Copyright (C) 1996-2015, International Business Machines Corporation and others.
* All Rights Reserved.
******************************************************************************
*/
#ifndef UBRK_H
#define UBRK_H
/**
* A text-break iterator.
* For usage in C programs.
*/
#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
# define UBRK_TYPEDEF_UBREAK_ITERATOR
/**
* Opaque type representing an ICU Break iterator object.
* @stable ICU 2.0
*/
typedef struct UBreakIterator UBreakIterator;
#endif
#if !UCONFIG_NO_BREAK_ITERATION
/**
* \file
* \brief C API: BreakIterator
*
*
* Line boundary analysis determines where a text string can be broken
* when line-wrapping. The mechanism correctly handles punctuation and
* hyphenated words.
*
* Note: The locale keyword "lb" can be used to modify line break
* behavior according to the CSS level 3 line-break options, see
*
* Sentence boundary analysis allows selection with correct
* interpretation of periods within numbers and abbreviations, and
* trailing punctuation marks such as quotation marks and parentheses.
*
* Note: The locale keyword "ss" can be used to enable use of
* segmentation suppression data (preventing breaks in English after
* abbreviations such as "Mr." or "Est.", for example), as follows:
* "en@ss=standard".
*
* Word boundary analysis is used by search and replace functions, as
* well as within text editing applications that allow the user to
* select words with a double click. Word selection provides correct
* interpretation of punctuation marks within and following
* words. Characters that are not part of a word, such as symbols or
* punctuation marks, have word-breaks on both sides.
*
* Character boundary analysis identifies the boundaries of
* "Extended Grapheme Clusters", which are groupings of codepoints
* that should be treated as character-like units for many text operations.
* Please see Unicode Standard Annex #29, Unicode Text Segmentation,
* http://www.unicode.org/reports/tr29/ for additional information
* on grapheme clusters and guidelines on their use.
*
* Title boundary analysis locates all positions,
* typically starts of words, that should be set to Title Case
* when title casing the text.
*
* The text boundary positions are found according to the rules
* described in Unicode Standard Annex #29, Text Boundaries, and
* Unicode Standard Annex #14, Line Breaking Properties. These
* are available at http://www.unicode.org/reports/tr14/ and
* http://www.unicode.org/reports/tr29/.
*
* In addition to the plain C API defined in this header file, an
* object oriented C++ API with equivalent functionality is defined in the
* file brkiter.h.
*
* Code snippets illustrating the use of the Break Iterator APIs
* are available in the ICU User Guide,
* http://icu-project.org/userguide/boundaryAnalysis.html
* and in the sample program icu/source/samples/break/break.cpp
*/
/** The possible types of text boundaries. @stable ICU 2.0 */
typedef enum UBreakIteratorType {
/** Character breaks @stable ICU 2.0 */
UBRK_CHARACTER = 0,
/** Word breaks @stable ICU 2.0 */
UBRK_WORD = 1,
/** Line breaks @stable ICU 2.0 */
UBRK_LINE = 2,
/** Sentence breaks @stable ICU 2.0 */
UBRK_SENTENCE = 3,
UBRK_COUNT = 5
} UBreakIteratorType;
/** Value indicating all text boundaries have been returned.
* @stable ICU 2.0
*/
#define UBRK_DONE ((int32_t) -1)
/**
* Enum constants for the word break tags returned by
* getRuleStatus(). A range of values is defined for each category of
* word, to allow for further subdivisions of a category in future releases.
* Applications should check for tag values falling within the range, rather
* than for single individual values.
* @stable ICU 2.2
*/
typedef enum UWordBreak {
/** Tag value for "words" that do not fit into any of other categories.
* Includes spaces and most punctuation. */
UBRK_WORD_NONE = 0,
/** Upper bound for tags for uncategorized words. */
UBRK_WORD_NONE_LIMIT = 100,
/** Tag value for words that appear to be numbers, lower limit. */
UBRK_WORD_NUMBER = 100,
/** Tag value for words that appear to be numbers, upper limit. */
UBRK_WORD_NUMBER_LIMIT = 200,
/** Tag value for words that contain letters, excluding
* hiragana, katakana or ideographic characters, lower limit. */
UBRK_WORD_LETTER = 200,
/** Tag value for words containing letters, upper limit */
UBRK_WORD_LETTER_LIMIT = 300,
/** Tag value for words containing kana characters, lower limit */
UBRK_WORD_KANA = 300,
/** Tag value for words containing kana characters, upper limit */
UBRK_WORD_KANA_LIMIT = 400,
/** Tag value for words containing ideographic characters, lower limit */
UBRK_WORD_IDEO = 400,
/** Tag value for words containing ideographic characters, upper limit */
UBRK_WORD_IDEO_LIMIT = 500
} UWordBreak;
/**
* Enum constants for the line break tags returned by getRuleStatus().
* A range of values is defined for each category of
* word, to allow for further subdivisions of a category in future releases.
* Applications should check for tag values falling within the range, rather
* than for single individual values.
* @stable ICU 2.8
*/
typedef enum ULineBreakTag {
/** Tag value for soft line breaks, positions at which a line break
* is acceptable but not required */
UBRK_LINE_SOFT = 0,
/** Upper bound for soft line breaks. */
UBRK_LINE_SOFT_LIMIT = 100,
/** Tag value for a hard, or mandatory line break */
UBRK_LINE_HARD = 100,
/** Upper bound for hard line breaks. */
UBRK_LINE_HARD_LIMIT = 200
} ULineBreakTag;
/**
* Enum constants for the sentence break tags returned by getRuleStatus().
* A range of values is defined for each category of
* sentence, to allow for further subdivisions of a category in future releases.
* Applications should check for tag values falling within the range, rather
* than for single individual values.
* @stable ICU 2.8
*/
typedef enum USentenceBreakTag {
/** Tag value for for sentences ending with a sentence terminator
* ('.', '?', '!', etc.) character, possibly followed by a
* hard separator (CR, LF, PS, etc.)
*/
UBRK_SENTENCE_TERM = 0,
/** Upper bound for tags for sentences ended by sentence terminators. */
UBRK_SENTENCE_TERM_LIMIT = 100,
/** Tag value for for sentences that do not contain an ending
* sentence terminator ('.', '?', '!', etc.) character, but
* are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
*/
UBRK_SENTENCE_SEP = 100,
/** Upper bound for tags for sentences ended by a separator. */
UBRK_SENTENCE_SEP_LIMIT = 200
/** Tag value for a hard, or mandatory line break */
} USentenceBreakTag;
/**
* Open a new UBreakIterator for locating text boundaries for a specified locale.
* A UBreakIterator may be used for detecting character, line, word,
* and sentence breaks in text.
* @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD,
* UBRK_LINE, UBRK_SENTENCE
* @param locale The locale specifying the text-breaking conventions. Note that
* locale keys such as "lb" and "ss" may be used to modify text break behavior,
* see general discussion of BreakIterator C API.
* @param text The text to be iterated over.
* @param textLength The number of characters in text, or -1 if null-terminated.
* @param status A UErrorCode to receive any errors.
* @return A UBreakIterator for the specified locale.
* @see ubrk_openRules
* @stable ICU 2.0
*/
U_STABLE UBreakIterator* U_EXPORT2
ubrk_open(UBreakIteratorType type,
const char *locale,
const UChar *text,
int32_t textLength,
UErrorCode *status);
/**
* Open a new UBreakIterator for locating text boundaries using specified breaking rules.
* The rule syntax is ... (TBD)
* @param rules A set of rules specifying the text breaking conventions.
* @param rulesLength The number of characters in rules, or -1 if null-terminated.
* @param text The text to be iterated over. May be null, in which case ubrk_setText() is
* used to specify the text to be iterated.
* @param textLength The number of characters in text, or -1 if null-terminated.
* @param parseErr Receives position and context information for any syntax errors
* detected while parsing the rules.
* @param status A UErrorCode to receive any errors.
* @return A UBreakIterator for the specified rules.
* @see ubrk_open
* @stable ICU 2.2
*/
U_STABLE UBreakIterator* U_EXPORT2
ubrk_openRules(const UChar *rules,
int32_t rulesLength,
const UChar *text,
int32_t textLength,
UParseError *parseErr,
UErrorCode *status);
/**
* Thread safe cloning operation
* @param bi iterator to be cloned
* @param stackBuffer Deprecated functionality as of ICU 52, use NULL.
* For word break iterators, the possible values are defined in enum UWordBreak.
* @stable ICU 2.2
*/
U_STABLE int32_t U_EXPORT2
ubrk_getRuleStatus(UBreakIterator *bi);
/**
* Get the statuses from the break rules that determined the most recently
* returned break position. The values appear in the rule source
* within brackets, {123}, for example. The default status value for rules
* that do not explicitly provide one is zero.
*
* For word break iterators, the possible values are defined in enum UWordBreak.
* @param bi The break iterator to use
* @param fillInVec an array to be filled in with the status values.
* @param capacity the length of the supplied vector. A length of zero causes
* the function to return the number of status values, in the
* normal way, without attemtping to store any values.
* @param status receives error codes.
* @return The number of rule status values from rules that determined
* the most recent boundary returned by the break iterator.
* @stable ICU 3.0
*/
U_STABLE int32_t U_EXPORT2
ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
/**
* Return the locale of the break iterator. You can choose between the valid and
* the actual locale.
* @param bi break iterator
* @param type locale type (valid or actual)
* @param status error code
* @return locale string
* @stable ICU 2.8
*/
U_STABLE const char* U_EXPORT2
ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
/**
* Set the subject text string upon which the break iterator is operating
* without changing any other aspect of the state.
* The new and previous text strings must have the same content.
*
* This function is intended for use in environments where ICU is operating on
* strings that may move around in memory. It provides a mechanism for notifying
* ICU that the string has been relocated, and providing a new UText to access the
* string in its new position.
*
* Note that the break iterator never copies the underlying text
* of a string being processed, but always operates directly on the original text
* provided by the user. Refreshing simply drops the references to the old text
* and replaces them with references to the new.
*
* Caution: this function is normally used only by very specialized
* system-level code. One example use case is with garbage collection
* that moves the text in memory.
*
* @param bi The break iterator.
* @param text The new (moved) text string.
* @param status Receives errors detected by this function.
*
* @stable ICU 49
*/
U_STABLE void U_EXPORT2
ubrk_refreshUText(UBreakIterator *bi,
UText *text,
UErrorCode *status);
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
#endif
// messagepattern.h
/*
*******************************************************************************
* Copyright (C) 2011-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: messagepattern.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2011mar14
* created by: Markus W. Scherer
*/
#ifndef __MESSAGEPATTERN_H__
#define __MESSAGEPATTERN_H__
/**
* \file
* \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
*/
#if !UCONFIG_NO_FORMATTING
/**
* Mode for when an apostrophe starts quoted literal text for MessageFormat output.
* The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
* (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
*
* A pair of adjacent apostrophes always results in a single apostrophe in the output,
* even when the pair is between two single, text-quoting apostrophes.
*
* The following table shows examples of desired MessageFormat.format() output
* with the pattern strings that yield that output.
*
*
* This is the default behavior starting with ICU 4.8.
* @stable ICU 4.8
*/
UMSGPAT_APOS_DOUBLE_OPTIONAL,
/**
* A literal apostrophe must be represented by
* a double apostrophe pattern character.
* A single apostrophe always starts quoted literal text.
*
* This is the behavior of ICU 4.6 and earlier, and of the JDK.
* @stable ICU 4.8
*/
UMSGPAT_APOS_DOUBLE_REQUIRED
};
/**
* @stable ICU 4.8
*/
typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
/**
* MessagePattern::Part type constants.
* @stable ICU 4.8
*/
enum UMessagePatternPartType {
/**
* Start of a message pattern (main or nested).
* The length is 0 for the top-level message
* and for a choice argument sub-message, otherwise 1 for the '{'.
* The value indicates the nesting level, starting with 0 for the main message.
*
* There is always a later MSG_LIMIT part.
* @stable ICU 4.8
*/
UMSGPAT_PART_TYPE_MSG_START,
/**
* End of a message pattern (main or nested).
* The length is 0 for the top-level message and
* the last sub-message of a choice argument,
* otherwise 1 for the '}' or (in a choice argument style) the '|'.
* The value indicates the nesting level, starting with 0 for the main message.
* @stable ICU 4.8
*/
UMSGPAT_PART_TYPE_MSG_LIMIT,
/**
* Indicates a substring of the pattern string which is to be skipped when formatting.
* For example, an apostrophe that begins or ends quoted text
* would be indicated with such a part.
* The value is undefined and currently always 0.
* @stable ICU 4.8
*/
UMSGPAT_PART_TYPE_SKIP_SYNTAX,
/**
* Indicates that a syntax character needs to be inserted for auto-quoting.
* The length is 0.
* The value is the character code of the insertion character. (U+0027=APOSTROPHE)
* @stable ICU 4.8
*/
UMSGPAT_PART_TYPE_INSERT_CHAR,
/**
* Indicates a syntactic (non-escaped) # symbol in a plural variant.
* When formatting, replace this part's substring with the
* (value-offset) for the plural argument value.
* The value is undefined and currently always 0.
* @stable ICU 4.8
*/
UMSGPAT_PART_TYPE_REPLACE_NUMBER,
/**
* Start of an argument.
* The length is 1 for the '{'.
* The value is the ordinal value of the ArgType. Use getArgType().
*
* This part is followed by either an ARG_NUMBER or ARG_NAME,
* followed by optional argument sub-parts (see UMessagePatternArgType constants)
* and finally an ARG_LIMIT part.
* @stable ICU 4.8
*/
UMSGPAT_PART_TYPE_ARG_START,
/**
* End of an argument.
* The length is 1 for the '}'.
* The value is the ordinal value of the ArgType. Use getArgType().
* @stable ICU 4.8
*/
UMSGPAT_PART_TYPE_ARG_LIMIT,
/**
* The argument number, provided by the value.
* @stable ICU 4.8
*/
UMSGPAT_PART_TYPE_ARG_NUMBER,
/**
* The argument name.
* The value is undefined and currently always 0.
* @stable ICU 4.8
*/
UMSGPAT_PART_TYPE_ARG_NAME,
/**
* The argument type.
* The value is undefined and currently always 0.
* @stable ICU 4.8
*/
UMSGPAT_PART_TYPE_ARG_TYPE,
/**
* The argument style text.
* The value is undefined and currently always 0.
* @stable ICU 4.8
*/
UMSGPAT_PART_TYPE_ARG_STYLE,
/**
* A selector substring in a "complex" argument style.
* The value is undefined and currently always 0.
* @stable ICU 4.8
*/
UMSGPAT_PART_TYPE_ARG_SELECTOR,
/**
* An integer value, for example the offset or an explicit selector value
* in a PluralFormat style.
* The part value is the integer value.
* @stable ICU 4.8
*/
UMSGPAT_PART_TYPE_ARG_INT,
/**
* A numeric value, for example the offset or an explicit selector value
* in a PluralFormat style.
* The part value is an index into an internal array of numeric values;
* use getNumericValue().
* @stable ICU 4.8
*/
UMSGPAT_PART_TYPE_ARG_DOUBLE
};
/**
* @stable ICU 4.8
*/
typedef enum UMessagePatternPartType UMessagePatternPartType;
/**
* Argument type constants.
* Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
*
* Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
* with a nesting level one greater than the surrounding message.
* @stable ICU 4.8
*/
enum UMessagePatternArgType {
/**
* The argument has no specified type.
* @stable ICU 4.8
*/
UMSGPAT_ARG_TYPE_NONE,
/**
* The argument has a "simple" type which is provided by the ARG_TYPE part.
* An ARG_STYLE part might follow that.
* @stable ICU 4.8
*/
UMSGPAT_ARG_TYPE_SIMPLE,
/**
* The argument is a ChoiceFormat with one or more
* ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
* @stable ICU 4.8
*/
UMSGPAT_ARG_TYPE_CHOICE,
/**
* The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
* (e.g., offset:1)
* and one or more (ARG_SELECTOR [explicit-value] message) tuples.
* If the selector has an explicit value (e.g., =2), then
* that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
* Otherwise the message immediately follows the ARG_SELECTOR.
* @stable ICU 4.8
*/
UMSGPAT_ARG_TYPE_PLURAL,
/**
* The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
* @stable ICU 4.8
*/
UMSGPAT_ARG_TYPE_SELECT,
/**
* The argument is an ordinal-number PluralFormat
* with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
* @stable ICU 50
*/
UMSGPAT_ARG_TYPE_SELECTORDINAL
};
/**
* @stable ICU 4.8
*/
typedef enum UMessagePatternArgType UMessagePatternArgType;
/**
* \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
* Returns TRUE if the argument type has a plural style part sequence and semantics,
* for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
* @stable ICU 50
*/
#define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
enum {
/**
* Return value from MessagePattern.validateArgumentName() for when
* the string is a valid "pattern identifier" but not a number.
* @stable ICU 4.8
*/
UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
/**
* Return value from MessagePattern.validateArgumentName() for when
* the string is invalid.
* It might not be a valid "pattern identifier",
* or it have only ASCII digits but there is a leading zero or the number is too large.
* @stable ICU 4.8
*/
UMSGPAT_ARG_NAME_NOT_VALID=-2
};
/**
* Special value that is returned by getNumericValue(Part) when no
* numeric value is defined for a part.
* @see MessagePattern.getNumericValue()
* @stable ICU 4.8
*/
#define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
#endif // !UCONFIG_NO_FORMATTING
#endif // __MESSAGEPATTERN_H__
// icudataver.h
/*
******************************************************************************
*
* Copyright (C) 2009-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*/
/**
* \file
* \brief C API: access to ICU Data Version number
*/
#ifndef __ICU_DATA_VER_H__
#define __ICU_DATA_VER_H__
/**
* @stable ICU 49
*/
#define U_ICU_VERSION_BUNDLE "icuver"
/**
* @stable ICU 49
*/
#define U_ICU_DATA_KEY "DataVersion"
/**
* Retrieves the data version from icuver and stores it in dataVersionFillin.
*
* @param dataVersionFillin icuver data version information to be filled in if not-null
* @param status stores the error code from the calls to resource bundle
*
* @stable ICU 49
*/
U_STABLE void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status);
#endif
// caniter.h
/*
*******************************************************************************
* Copyright (C) 1996-2014, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
#ifndef CANITER_H
#define CANITER_H
#if !UCONFIG_NO_NORMALIZATION
/**
* \file
* \brief C++ API: Canonical Iterator
*/
/** Should permutation skip characters with combining class zero
* Should be either TRUE or FALSE. This is a compile time option
* @stable ICU 2.4
*/
#ifndef CANITER_SKIP_ZEROES
#define CANITER_SKIP_ZEROES TRUE
#endif
#endif /* #if !UCONFIG_NO_NORMALIZATION */
#endif
#endif // (NTDDI_VERSION >= NTDDI_WIN10_RS2)
pParaBiDi is also copied,
* and start is added to it so that it points to the beginning of the
* line for this object.
*
* @param pParaBiDi is the parent paragraph object. It must have been set
* by a successful call to ubidi_setPara.
*
* @param start is the line's first index into the text.
*
* @param limit is just behind the line's last index into the text
* (its last index +1).
* It must be 0<=startUBiDi object.
*
* @return a value of UBIDI_LTR, UBIDI_RTL
* or UBIDI_MIXED
* that indicates if the entire text
* represented by this object is unidirectional,
* and which direction, or if it is mixed-directional.
* Note - The value UBIDI_NEUTRAL is never returned from this method.
*
* @see UBiDiDirection
* @stable ICU 2.0
*/
U_STABLE UBiDiDirection U_EXPORT2
ubidi_getDirection(const UBiDi *pBiDi);
/**
* Gets the base direction of the text provided according
* to the Unicode Bidirectional Algorithm. The base direction
* is derived from the first character in the string with bidirectional
* character type L, R, or AL. If the first such character has type L,
* UBIDI_LTR is returned. If the first such character has
* type R or AL, UBIDI_RTL is returned. If the string does
* not contain any character of these types, then
* UBIDI_NEUTRAL is returned.
*
* This is a lightweight function for use when only the base direction
* is needed and no further bidi processing of the text is needed.
*
* @param text is a pointer to the text whose base
* direction is needed.
* Note: the text must be (at least) @c length long.
*
* @param length is the length of the text;
* if length==-1 then the text
* must be zero-terminated.
*
* @return UBIDI_LTR, UBIDI_RTL,
* UBIDI_NEUTRAL
*
* @see UBiDiDirection
* @stable ICU 4.6
*/
U_STABLE UBiDiDirection U_EXPORT2
ubidi_getBaseDirection(const UChar *text, int32_t length );
/**
* Get the pointer to the text.
*
* @param pBiDi is the paragraph or line UBiDi object.
*
* @return The pointer to the text that the UBiDi object was created for.
*
* @see ubidi_setPara
* @see ubidi_setLine
* @stable ICU 2.0
*/
U_STABLE const UChar * U_EXPORT2
ubidi_getText(const UBiDi *pBiDi);
/**
* Get the length of the text.
*
* @param pBiDi is the paragraph or line UBiDi object.
*
* @return The length of the text that the UBiDi object was created for.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ubidi_getLength(const UBiDi *pBiDi);
/**
* Get the paragraph level of the text.
*
* @param pBiDi is the paragraph or line UBiDi object.
*
* @return The paragraph level. If there are multiple paragraphs, their
* level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or
* UBIDI_DEFAULT_RTL. In that case, the level of the first paragraph
* is returned.
*
* @see UBiDiLevel
* @see ubidi_getParagraph
* @see ubidi_getParagraphByIndex
* @stable ICU 2.0
*/
U_STABLE UBiDiLevel U_EXPORT2
ubidi_getParaLevel(const UBiDi *pBiDi);
/**
* Get the number of paragraphs.
*
* @param pBiDi is the paragraph or line UBiDi object.
*
* @return The number of paragraphs.
* @stable ICU 3.4
*/
U_STABLE int32_t U_EXPORT2
ubidi_countParagraphs(UBiDi *pBiDi);
/**
* Get a paragraph, given a position within the text.
* This function returns information about a paragraph.
* Note: if the paragraph index is known, it is more efficient to
* retrieve the paragraph information using ubidi_getParagraphByIndex().UBiDi object.
*
* @param charIndex is the index of a character within the text, in the
* range [0..ubidi_getProcessedLength(pBiDi)-1].
*
* @param pParaStart will receive the index of the first character of the
* paragraph in the text.
* This pointer can be NULL if this
* value is not necessary.
*
* @param pParaLimit will receive the limit of the paragraph.
* The l-value that you point to here may be the
* same expression (variable) as the one for
* charIndex.
* This pointer can be NULL if this
* value is not necessary.
*
* @param pParaLevel will receive the level of the paragraph.
* This pointer can be NULL if this
* value is not necessary.
*
* @param pErrorCode must be a valid pointer to an error code value.
*
* @return The index of the paragraph containing the specified position.
*
* @see ubidi_getProcessedLength
* @stable ICU 3.4
*/
U_STABLE int32_t U_EXPORT2
ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart,
int32_t *pParaLimit, UBiDiLevel *pParaLevel,
UErrorCode *pErrorCode);
/**
* Get a paragraph, given the index of this paragraph.
*
* This function returns information about a paragraph.UBiDi object.
*
* @param paraIndex is the number of the paragraph, in the
* range [0..ubidi_countParagraphs(pBiDi)-1].
*
* @param pParaStart will receive the index of the first character of the
* paragraph in the text.
* This pointer can be NULL if this
* value is not necessary.
*
* @param pParaLimit will receive the limit of the paragraph.
* This pointer can be NULL if this
* value is not necessary.
*
* @param pParaLevel will receive the level of the paragraph.
* This pointer can be NULL if this
* value is not necessary.
*
* @param pErrorCode must be a valid pointer to an error code value.
*
* @stable ICU 3.4
*/
U_STABLE void U_EXPORT2
ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
int32_t *pParaStart, int32_t *pParaLimit,
UBiDiLevel *pParaLevel, UErrorCode *pErrorCode);
/**
* Get the level for one character.
*
* @param pBiDi is the paragraph or line UBiDi object.
*
* @param charIndex the index of a character. It must be in the range
* [0..ubidi_getProcessedLength(pBiDi)].
*
* @return The level for the character at charIndex (0 if charIndex is not
* in the valid range).
*
* @see UBiDiLevel
* @see ubidi_getProcessedLength
* @stable ICU 2.0
*/
U_STABLE UBiDiLevel U_EXPORT2
ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex);
/**
* Get an array of levels for each character.ubidi_getLevelAt().
*
* @param pBiDi is the paragraph or line UBiDi object, whose
* text length must be strictly positive.
*
* @param pErrorCode must be a valid pointer to an error code value.
*
* @return The levels array for the text,
* or NULL if an error occurs.
*
* @see UBiDiLevel
* @see ubidi_getProcessedLength
* @stable ICU 2.0
*/
U_STABLE const UBiDiLevel * U_EXPORT2
ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode);
/**
* Get a logical run.
* This function returns information about a run and is used
* to retrieve runs in logical order.UBiDi object.
*
* @param logicalPosition is a logical position within the source text.
*
* @param pLogicalLimit will receive the limit of the corresponding run.
* The l-value that you point to here may be the
* same expression (variable) as the one for
* logicalPosition.
* This pointer can be NULL if this
* value is not necessary.
*
* @param pLevel will receive the level of the corresponding run.
* This pointer can be NULL if this
* value is not necessary.
*
* @see ubidi_getProcessedLength
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition,
int32_t *pLogicalLimit, UBiDiLevel *pLevel);
/**
* Get the number of runs.
* This function may invoke the actual reordering on the
* UBiDi object, after ubidi_setPara()
* may have resolved only the levels of the text. Therefore,
* ubidi_countRuns() may have to allocate memory,
* and may fail doing so.
*
* @param pBiDi is the paragraph or line UBiDi object.
*
* @param pErrorCode must be a valid pointer to an error code value.
*
* @return The number of runs.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
/**
* Get one run's logical start, length, and directionality,
* which can be 0 for LTR or 1 for RTL.
* In an RTL run, the character at the logical start is
* visually on the right of the displayed run.
* The length is the number of characters in the run.ubidi_countRuns() should be called
* before the runs are retrieved.
*
* @param pBiDi is the paragraph or line UBiDi object.
*
* @param runIndex is the number of the run in visual order, in the
* range [0..ubidi_countRuns(pBiDi)-1].
*
* @param pLogicalStart is the first logical character index in the text.
* The pointer may be NULL if this index is not needed.
*
* @param pLength is the number of characters (at least one) in the run.
* The pointer may be NULL if this is not needed.
*
* @return the directionality of the run,
* UBIDI_LTR==0 or UBIDI_RTL==1,
* never UBIDI_MIXED,
* never UBIDI_NEUTRAL.
*
* @see ubidi_countRuns
*
* Example:
*
* \code
* int32_t i, count=ubidi_countRuns(pBiDi),
* logicalStart, visualIndex=0, length;
* for(i=0; i
*
* Note that in right-to-left runs, code like this places
* second surrogates before first ones (which is generally a bad idea)
* and combining characters before base characters.
* ubidi_writeReordered(), optionally with the
* #UBIDI_KEEP_BASE_COMBINING option, can be considered in order
* to avoid these issues.
* @stable ICU 2.0
*/
U_STABLE UBiDiDirection U_EXPORT2
ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
int32_t *pLogicalStart, int32_t *pLength);
/**
* Get the visual position from a logical text position.
* If such a mapping is used many times on the same
* UBiDi object, then calling
* ubidi_getLogicalMap() is more efficient.#UBIDI_MAP_NOWHERE if there is no
* visual position because the corresponding text character is a Bidi control
* removed from output by the option #UBIDI_OPTION_REMOVE_CONTROLS.
* ubidi_writeReordered() such as UBIDI_INSERT_LRM_FOR_NUMERIC,
* UBIDI_KEEP_BASE_COMBINING, UBIDI_OUTPUT_REVERSE,
* UBIDI_REMOVE_BIDI_CONTROLS, the visual position returned may not
* be correct. It is advised to use, when possible, reordering options
* such as UBIDI_OPTION_INSERT_MARKS and UBIDI_OPTION_REMOVE_CONTROLS.
* ubidi_writeReordered(), optionally with the
* #UBIDI_KEEP_BASE_COMBINING option can be considered instead
* of using the mapping, in order to avoid these issues.
*
* @param pBiDi is the paragraph or line UBiDi object.
*
* @param logicalIndex is the index of a character in the text.
*
* @param pErrorCode must be a valid pointer to an error code value.
*
* @return The visual position of this character.
*
* @see ubidi_getLogicalMap
* @see ubidi_getLogicalIndex
* @see ubidi_getProcessedLength
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode);
/**
* Get the logical text position from a visual position.
* If such a mapping is used many times on the same
* UBiDi object, then calling
* ubidi_getVisualMap() is more efficient.#UBIDI_MAP_NOWHERE if there is no
* logical position because the corresponding text character is a Bidi mark
* inserted in the output by option #UBIDI_OPTION_INSERT_MARKS.
* ubidi_getVisualIndex().
* ubidi_writeReordered() such as UBIDI_INSERT_LRM_FOR_NUMERIC,
* UBIDI_KEEP_BASE_COMBINING, UBIDI_OUTPUT_REVERSE,
* UBIDI_REMOVE_BIDI_CONTROLS, the logical position returned may not
* be correct. It is advised to use, when possible, reordering options
* such as UBIDI_OPTION_INSERT_MARKS and UBIDI_OPTION_REMOVE_CONTROLS.
*
* @param pBiDi is the paragraph or line UBiDi object.
*
* @param visualIndex is the visual position of a character.
*
* @param pErrorCode must be a valid pointer to an error code value.
*
* @return The index of this character in the text.
*
* @see ubidi_getVisualMap
* @see ubidi_getVisualIndex
* @see ubidi_getResultLength
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode);
/**
* Get a logical-to-visual index map (array) for the characters in the UBiDi
* (paragraph or line) object.
* #UBIDI_MAP_NOWHERE if the
* corresponding text characters are Bidi controls removed from the visual
* output by the option #UBIDI_OPTION_REMOVE_CONTROLS.
* ubidi_writeReordered() such as UBIDI_INSERT_LRM_FOR_NUMERIC,
* UBIDI_KEEP_BASE_COMBINING, UBIDI_OUTPUT_REVERSE,
* UBIDI_REMOVE_BIDI_CONTROLS, the visual positions returned may not
* be correct. It is advised to use, when possible, reordering options
* such as UBIDI_OPTION_INSERT_MARKS and UBIDI_OPTION_REMOVE_CONTROLS.
* ubidi_writeReordered(), optionally with the
* #UBIDI_KEEP_BASE_COMBINING option can be considered instead
* of using the mapping, in order to avoid these issues.
*
* @param pBiDi is the paragraph or line UBiDi object.
*
* @param indexMap is a pointer to an array of ubidi_getProcessedLength()
* indexes which will reflect the reordering of the characters.
* If option #UBIDI_OPTION_INSERT_MARKS is set, the number
* of elements allocated in indexMap must be no less than
* ubidi_getResultLength().
* The array does not need to be initialized.
* The index map will result in indexMap[logicalIndex]==visualIndex.
*
* @param pErrorCode must be a valid pointer to an error code value.
*
* @see ubidi_getVisualMap
* @see ubidi_getVisualIndex
* @see ubidi_getProcessedLength
* @see ubidi_getResultLength
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
/**
* Get a visual-to-logical index map (array) for the characters in the UBiDi
* (paragraph or line) object.
* #UBIDI_MAP_NOWHERE if the
* corresponding text characters are Bidi marks inserted in the visual output
* by the option #UBIDI_OPTION_INSERT_MARKS.
* ubidi_writeReordered() such as UBIDI_INSERT_LRM_FOR_NUMERIC,
* UBIDI_KEEP_BASE_COMBINING, UBIDI_OUTPUT_REVERSE,
* UBIDI_REMOVE_BIDI_CONTROLS, the logical positions returned may not
* be correct. It is advised to use, when possible, reordering options
* such as UBIDI_OPTION_INSERT_MARKS and UBIDI_OPTION_REMOVE_CONTROLS.
*
* @param pBiDi is the paragraph or line UBiDi object.
*
* @param indexMap is a pointer to an array of ubidi_getResultLength()
* indexes which will reflect the reordering of the characters.
* If option #UBIDI_OPTION_REMOVE_CONTROLS is set, the number
* of elements allocated in indexMap must be no less than
* ubidi_getProcessedLength().
* The array does not need to be initialized.
* The index map will result in indexMap[visualIndex]==logicalIndex.
*
* @param pErrorCode must be a valid pointer to an error code value.
*
* @see ubidi_getLogicalMap
* @see ubidi_getLogicalIndex
* @see ubidi_getProcessedLength
* @see ubidi_getResultLength
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
/**
* This is a convenience function that does not use a UBiDi object.
* It is intended to be used for when an application has determined the levels
* of objects (character sequences) and just needs to have them reordered (L2).
* This is equivalent to using ubidi_getLogicalMap() on a
* UBiDi object.
*
* @param levels is an array with length levels that have been determined by
* the application.
*
* @param length is the number of levels in the array, or, semantically,
* the number of objects to be reordered.
* It must be length>0.
*
* @param indexMap is a pointer to an array of length
* indexes which will reflect the reordering of the characters.
* The array does not need to be initialized.indexMap[logicalIndex]==visualIndex.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
/**
* This is a convenience function that does not use a UBiDi object.
* It is intended to be used for when an application has determined the levels
* of objects (character sequences) and just needs to have them reordered (L2).
* This is equivalent to using ubidi_getVisualMap() on a
* UBiDi object.
*
* @param levels is an array with length levels that have been determined by
* the application.
*
* @param length is the number of levels in the array, or, semantically,
* the number of objects to be reordered.
* It must be length>0.
*
* @param indexMap is a pointer to an array of length
* indexes which will reflect the reordering of the characters.
* The array does not need to be initialized.indexMap[visualIndex]==logicalIndex.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
/**
* Invert an index map.
* The index mapping of the first map is inverted and written to
* the second one.
*
* @param srcMap is an array with length elements
* which defines the original mapping from a source array containing
* length elements to a destination array.
* Some elements of the source array may have no mapping in the
* destination array. In that case, their value will be
* the special value UBIDI_MAP_NOWHERE.
* All elements must be >=0 or equal to UBIDI_MAP_NOWHERE.
* Some elements may have a value >= length, if the
* destination array has more elements than the source array.
* There must be no duplicate indexes (two or more elements with the
* same value except UBIDI_MAP_NOWHERE).
*
* @param destMap is an array with a number of elements equal to 1 + the highest
* value in srcMap.
* destMap will be filled with the inverse mapping.
* If element with index i in srcMap has a value k different
* from UBIDI_MAP_NOWHERE, this means that element i of
* the source array maps to element k in the destination array.
* The inverse map will have value i in its k-th element.
* For all elements of the destination array which do not map to
* an element in the source array, the corresponding element in the
* inverse map will have a value equal to UBIDI_MAP_NOWHERE.
*
* @param length is the length of each array.
* @see UBIDI_MAP_NOWHERE
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length);
/** option flags for ubidi_writeReordered() */
/**
* option bit for ubidi_writeReordered():
* keep combining characters after their base characters in RTL runs
*
* @see ubidi_writeReordered
* @stable ICU 2.0
*/
#define UBIDI_KEEP_BASE_COMBINING 1
/**
* option bit for ubidi_writeReordered():
* replace characters with the "mirrored" property in RTL runs
* by their mirror-image mappings
*
* @see ubidi_writeReordered
* @stable ICU 2.0
*/
#define UBIDI_DO_MIRRORING 2
/**
* option bit for ubidi_writeReordered():
* surround the run with LRMs if necessary;
* this is part of the approximate "inverse Bidi" algorithm
*
* ubidi_writeReordered()
* first without this option, and then calling
* ubidi_writeReverse() without mirroring.
* Doing this in the same step is faster and avoids a temporary buffer.
* An example for using this option is output to a character terminal that
* is designed for RTL scripts and stores text in reverse order.ubidi_setPara(). This length may be different from the length
* of the source text if option #UBIDI_OPTION_STREAMING
* has been set.
*
* Note that whenever the length of the text affects the execution or the
* result of a function, it is the processed length which must be considered,
* except for ubidi_setPara (which receives unprocessed source
* text) and ubidi_getLength (which returns the original length
* of the source text).
* In particular, the processed length is the one to consider in the following
* cases:
*
*
*
* @param pBiDi is the paragraph limit argument of
* ubidi_setLinecharIndex argument of
* ubidi_getParagraphcharIndex argument of
* ubidi_getLevelAtubidi_getLevelslogicalStart argument of
* ubidi_getLogicalRunlogicalIndex argument of
* ubidi_getVisualIndex*indexMap argument of
* ubidi_getLogicalMapubidi_writeReorderedUBiDi object.
*
* @return The length of the part of the source text processed by
* the last call to ubidi_setPara.
* @see ubidi_setPara
* @see UBIDI_OPTION_STREAMING
* @stable ICU 3.6
*/
U_STABLE int32_t U_EXPORT2
ubidi_getProcessedLength(const UBiDi *pBiDi);
/**
* Get the length of the reordered text resulting from the last call to
* ubidi_setPara(). This length may be different from the length
* of the source text if option #UBIDI_OPTION_INSERT_MARKS
* or option #UBIDI_OPTION_REMOVE_CONTROLS has been set.
*
* This resulting length is the one to consider in the following cases:
*
*
* Note that this length stays identical to the source text length if
* Bidi marks are inserted or removed using option bits of
* visualIndex argument of
* ubidi_getLogicalIndex*indexMap argument of
* ubidi_getVisualMapubidi_writeReordered, or if option
* #UBIDI_REORDER_INVERSE_NUMBERS_AS_L has been set.
*
* @param pBiDi is the paragraph UBiDi object.
*
* @return The length of the reordered text resulting from
* the last call to ubidi_setPara.
* @see ubidi_setPara
* @see UBIDI_OPTION_INSERT_MARKS
* @see UBIDI_OPTION_REMOVE_CONTROLS
* @stable ICU 3.6
*/
U_STABLE int32_t U_EXPORT2
ubidi_getResultLength(const UBiDi *pBiDi);
U_CDECL_BEGIN
/**
* value returned by UBiDiClassCallback callbacks when
* there is no need to override the standard Bidi class for a given code point.
* @see UBiDiClassCallback
* @stable ICU 3.6
*/
#define U_BIDI_CLASS_DEFAULT U_CHAR_DIRECTION_COUNT
/**
* Callback type declaration for overriding default Bidi class values with
* custom ones.
* UBiDi
* object by calling the ubidi_setClassCallback() function;
* then the callback will be invoked by the UBA implementation any time the
* class of a character is to be determined.c if the default class has been overridden, or
* #U_BIDI_CLASS_DEFAULT if the standard Bidi class value
* for c is to be used.
* @see ubidi_setClassCallback
* @see ubidi_getClassCallback
* @stable ICU 3.6
*/
typedef UCharDirection U_CALLCONV
UBiDiClassCallback(const void *context, UChar32 c);
U_CDECL_END
/**
* Retrieve the Bidi class for a given code point.
* #UBiDiClassCallback callback is defined and returns a
* value other than #U_BIDI_CLASS_DEFAULT, that value is used;
* otherwise the default class determination mechanism is invoked.UBiDi object.
*
* @param c is the code point whose Bidi class must be retrieved.
*
* @return The Bidi class for character c based
* on the given pBiDi instance.
* @see UBiDiClassCallback
* @stable ICU 3.6
*/
U_STABLE UCharDirection U_EXPORT2
ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c);
/**
* Set the callback function and callback data used by the UBA
* implementation for Bidi class determination.
* UBiDi object.
*
* @param newFn is the new callback function pointer.
*
* @param newContext is the new callback context pointer. This can be NULL.
*
* @param oldFn fillin: Returns the old callback function pointer. This can be
* NULL.
*
* @param oldContext fillin: Returns the old callback's context. This can be
* NULL.
*
* @param pErrorCode must be a valid pointer to an error code value.
*
* @see ubidi_getClassCallback
* @stable ICU 3.6
*/
U_STABLE void U_EXPORT2
ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
const void *newContext, UBiDiClassCallback **oldFn,
const void **oldContext, UErrorCode *pErrorCode);
/**
* Get the current callback function used for Bidi class determination.
*
* @param pBiDi is the paragraph UBiDi object.
*
* @param fn fillin: Returns the callback function pointer.
*
* @param context fillin: Returns the callback's private context.
*
* @see ubidi_setClassCallback
* @stable ICU 3.6
*/
U_STABLE void U_EXPORT2
ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context);
/**
* Take a UBiDi object containing the reordering
* information for a piece of text (one or more paragraphs) set by
* ubidi_setPara() or for a line of text set by
* ubidi_setLine() and write a reordered string to the
* destination buffer.
*
* This function preserves the integrity of characters with multiple
* code units and (optionally) combining characters.
* Characters in RTL runs can be replaced by mirror-image characters
* in the destination buffer. Note that "real" mirroring has
* to be done in a rendering engine by glyph selection
* and that for many "mirrored" characters there are no
* Unicode characters as mirror-image equivalents.
* There are also options to insert or remove Bidi control
* characters; see the description of the destSize
* and options parameters and of the option bit flags.
*
* @param pBiDi A pointer to a UBiDi object that
* is set by ubidi_setPara() or
* ubidi_setLine() and contains the reordering
* information for the text that it was defined for,
* as well as a pointer to that text.
* The text was aliased (only the pointer was stored
* without copying the contents) and must not have been modified
* since the ubidi_setPara() call.
*
* @param dest A pointer to where the reordered text is to be copied.
* The source text and dest[destSize]
* must not overlap.
*
* @param destSize The size of the dest buffer,
* in number of UChars.
* If the UBIDI_INSERT_LRM_FOR_NUMERIC
* option is set, then the destination length could be
* as large as
* ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi).
* If the UBIDI_REMOVE_BIDI_CONTROLS option
* is set, then the destination length may be less than
* ubidi_getLength(pBiDi).
* If none of these options is set, then the destination length
* will be exactly ubidi_getProcessedLength(pBiDi).
*
* @param options A bit set of options for the reordering that control
* how the reordered text is written.
* The options include mirroring the characters on a code
* point basis and inserting LRM characters, which is used
* especially for transforming visually stored text
* to logically stored text (although this is still an
* imperfect implementation of an "inverse Bidi" algorithm
* because it uses the "forward Bidi" algorithm at its core).
* The available options are:
* #UBIDI_DO_MIRRORING,
* #UBIDI_INSERT_LRM_FOR_NUMERIC,
* #UBIDI_KEEP_BASE_COMBINING,
* #UBIDI_OUTPUT_REVERSE,
* #UBIDI_REMOVE_BIDI_CONTROLS
*
* @param pErrorCode must be a valid pointer to an error code value.
*
* @return The length of the output string.
*
* @see ubidi_getProcessedLength
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ubidi_writeReordered(UBiDi *pBiDi,
UChar *dest, int32_t destSize,
uint16_t options,
UErrorCode *pErrorCode);
/**
* Reverse a Right-To-Left run of Unicode text.
*
* This function preserves the integrity of characters with multiple
* code units and (optionally) combining characters.
* Characters can be replaced by mirror-image characters
* in the destination buffer. Note that "real" mirroring has
* to be done in a rendering engine by glyph selection
* and that for many "mirrored" characters there are no
* Unicode characters as mirror-image equivalents.
* There are also options to insert or remove Bidi control
* characters.
*
* This function is the implementation for reversing RTL runs as part
* of ubidi_writeReordered(). For detailed descriptions
* of the parameters, see there.
* Since no Bidi controls are inserted here, the output string length
* will never exceed srcLength.
*
* @see ubidi_writeReordered
*
* @param src A pointer to the RTL run text.
*
* @param srcLength The length of the RTL run.
*
* @param dest A pointer to where the reordered text is to be copied.
* src[srcLength] and dest[destSize]
* must not overlap.
*
* @param destSize The size of the dest buffer,
* in number of UChars.
* If the UBIDI_REMOVE_BIDI_CONTROLS option
* is set, then the destination length may be less than
* srcLength.
* If this option is not set, then the destination length
* will be exactly srcLength.
*
* @param options A bit set of options for the reordering that control
* how the reordered text is written.
* See the options parameter in ubidi_writeReordered().
*
* @param pErrorCode must be a valid pointer to an error code value.
*
* @return The length of the output string.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ubidi_writeReverse(const UChar *src, int32_t srcLength,
UChar *dest, int32_t destSize,
uint16_t options,
UErrorCode *pErrorCode);
/*#define BIDI_SAMPLE_CODE*/
/*@}*/
#endif
// stringtriebuilder.h
/*
*******************************************************************************
* Copyright (C) 2010-2012,2014, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: stringtriebuilder.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010dec24
* created by: Markus W. Scherer
*/
#ifndef __STRINGTRIEBUILDER_H__
#define __STRINGTRIEBUILDER_H__
/**
* \file
* \brief C++ API: Builder API for trie builders
*/
// Forward declaration.
struct UHashtable;
typedef struct UHashtable UHashtable;
/**
* Build options for BytesTrieBuilder and CharsTrieBuilder.
* @stable ICU 4.8
*/
enum UStringTrieBuildOption {
/**
* Builds a trie quickly.
* @stable ICU 4.8
*/
USTRINGTRIE_BUILD_FAST,
/**
* Builds a trie more slowly, attempting to generate
* a shorter but equivalent serialization.
* This build option also uses more memory.
*
* This option can be effective when many integer values are the same
* and string/byte sequence suffixes can be shared.
* Runtime speed is not expected to improve.
* @stable ICU 4.8
*/
USTRINGTRIE_BUILD_SMALL
};
#endif // __STRINGTRIEBUILDER_H__
// putil.h
/*
******************************************************************************
*
* Copyright (C) 1997-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* FILE NAME : putil.h
*
* Date Name Description
* 05/14/98 nos Creation (content moved here from utypes.h).
* 06/17/99 erm Added IEEE_754
* 07/22/98 stephen Added IEEEremainder, max, min, trunc
* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
* 08/24/98 stephen Added longBitsFromDouble
* 03/02/99 stephen Removed openFile(). Added AS400 support.
* 04/15/99 stephen Converted to C
* 11/15/99 helena Integrated S/390 changes for IEEE support.
* 01/11/00 helena Added u_getVersion.
******************************************************************************
*/
#ifndef PUTIL_H
#define PUTIL_H
/**
* \file
* \brief C API: Platform Utilities
*/
/*==========================================================================*/
/* Platform utilities */
/*==========================================================================*/
/**
* Platform utilities isolates the platform dependencies of the
* libarary. For each platform which this code is ported to, these
* functions may have to be re-implemented.
*/
/** @} */
/**
* Convert char characters to UChar characters.
* This utility function is useful only for "invariant characters"
* that are encoded in the platform default encoding.
* They are a small, constant subset of the encoding and include
* just the latin letters, digits, and some punctuation.
* For details, see U_CHARSET_FAMILY.
*
* @param cs Input string, points to length
* character bytes from a subset of the platform encoding.
* @param us Output string, points to memory for length
* Unicode characters.
* @param length The number of characters to convert; this may
* include the terminating NUL.
*
* @see U_CHARSET_FAMILY
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
u_charsToUChars(const char *cs, UChar *us, int32_t length);
/**
* Convert UChar characters to char characters.
* This utility function is useful only for "invariant characters"
* that can be encoded in the platform default encoding.
* They are a small, constant subset of the encoding and include
* just the latin letters, digits, and some punctuation.
* For details, see U_CHARSET_FAMILY.
*
* @param us Input string, points to length
* Unicode characters that can be encoded with the
* codepage-invariant subset of the platform encoding.
* @param cs Output string, points to memory for length
* character bytes.
* @param length The number of characters to convert; this may
* include the terminating NUL.
*
* @see U_CHARSET_FAMILY
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
u_UCharsToChars(const UChar *us, char *cs, int32_t length);
#endif
// ustring.h
/*
**********************************************************************
* Copyright (C) 1998-2014, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* File ustring.h
*
* Modification History:
*
* Date Name Description
* 12/07/98 bertrand Creation.
******************************************************************************
*/
#ifndef USTRING_H
#define USTRING_H
/**
* \def UBRK_TYPEDEF_UBREAK_ITERATOR
* @internal
*/
#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
# define UBRK_TYPEDEF_UBREAK_ITERATOR
/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/
typedef struct UBreakIterator UBreakIterator;
#endif
/**
* \file
* \brief C API: Unicode string handling functions
*
* These C API functions provide general Unicode string handling.
*
* Some functions are equivalent in name, signature, and behavior to the ANSI C chars, minus the terminator.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_strlen(const UChar *s);
/*@}*/
/**
* Count Unicode code points in the length UChar code units of the string.
* A code point may occupy either one or two UChar code units.
* Counting code points involves reading all code units.
*
* This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).
*
* @param s The input string.
* @param length The number of UChar code units to be checked, or -1 to count all
* code points before the first NUL (U+0000).
* @return The number of code points in the specified code units.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_countChar32(const UChar *s, int32_t length);
/**
* Check if the string contains more Unicode code points than a certain number.
* This is more efficient than counting all code points in the entire string
* and comparing that number with a threshold.
* This function may not need to scan the string at all if the length is known
* (not -1 for NUL-termination) and falls within a certain range, and
* never needs to count more than 'number+1' code points.
* Logically equivalent to (u_countChar32(s, length)>number).
* A Unicode code point may occupy either one or two UChar code units.
*
* @param s The input string.
* @param length The length of the string, or -1 if it is NUL-terminated.
* @param number The number of code points in the string is compared against
* the 'number' parameter.
* @return Boolean value for whether the string contains more Unicode code points
* than 'number'. Same as (u_countChar32(s, length)>number).
* @stable ICU 2.4
*/
U_STABLE UBool U_EXPORT2
u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
/**
* Concatenate two ustrings. Appends a copy of src,
* including the null terminator, to dst. The initial copied
* character from src overwrites the null terminator in dst.
*
* @param dst The destination string.
* @param src The source string.
* @return A pointer to dst.
* @stable ICU 2.0
*/
U_STABLE UChar* U_EXPORT2
u_strcat(UChar *dst,
const UChar *src);
/**
* Concatenate two ustrings.
* Appends at most n characters from src to dst.
* Adds a terminating NUL.
* If src is too long, then only n-1 characters will be copied
* before the terminating NUL.
* If n<=0 then dst is not modified.
*
* @param dst The destination string.
* @param src The source string (can be NULL/invalid if n<=0).
* @param n The maximum number of characters to append; no-op if <=0.
* @return A pointer to dst.
* @stable ICU 2.0
*/
U_STABLE UChar* U_EXPORT2
u_strncat(UChar *dst,
const UChar *src,
int32_t n);
/**
* Find the first occurrence of a substring in a string.
* The substring is found at code point boundaries.
* That means that if the substring begins with
* a trail surrogate or ends with a lead surrogate,
* then it is found only if these surrogates stand alone in the text.
* Otherwise, the substring edge units would be matched against
* halves of surrogate pairs.
*
* @param s The string to search (NUL-terminated).
* @param substring The substring to find (NUL-terminated).
* @return A pointer to the first occurrence of substring in s,
* or s itself if the substring is empty,
* or NULL if substring is not in s.
* @stable ICU 2.0
*
* @see u_strrstr
* @see u_strFindFirst
* @see u_strFindLast
*/
U_STABLE UChar * U_EXPORT2
u_strstr(const UChar *s, const UChar *substring);
/**
* Find the first occurrence of a substring in a string.
* The substring is found at code point boundaries.
* That means that if the substring begins with
* a trail surrogate or ends with a lead surrogate,
* then it is found only if these surrogates stand alone in the text.
* Otherwise, the substring edge units would be matched against
* halves of surrogate pairs.
*
* @param s The string to search.
* @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
* @param substring The substring to find (NUL-terminated).
* @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
* @return A pointer to the first occurrence of substring in s,
* or s itself if the substring is empty,
* or NULL if substring is not in s.
* @stable ICU 2.4
*
* @see u_strstr
* @see u_strFindLast
*/
U_STABLE UChar * U_EXPORT2
u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
/**
* Find the first occurrence of a BMP code point in a string.
* A surrogate code point is found only if its match in the text is not
* part of a surrogate pair.
* A NUL character is found at the string terminator.
*
* @param s The string to search (NUL-terminated).
* @param c The BMP code point to find.
* @return A pointer to the first occurrence of c in s
* or NULL if c is not in s.
* @stable ICU 2.0
*
* @see u_strchr32
* @see u_memchr
* @see u_strstr
* @see u_strFindFirst
*/
U_STABLE UChar * U_EXPORT2
u_strchr(const UChar *s, UChar c);
/**
* Find the first occurrence of a code point in a string.
* A surrogate code point is found only if its match in the text is not
* part of a surrogate pair.
* A NUL character is found at the string terminator.
*
* @param s The string to search (NUL-terminated).
* @param c The code point to find.
* @return A pointer to the first occurrence of c in s
* or NULL if c is not in s.
* @stable ICU 2.0
*
* @see u_strchr
* @see u_memchr32
* @see u_strstr
* @see u_strFindFirst
*/
U_STABLE UChar * U_EXPORT2
u_strchr32(const UChar *s, UChar32 c);
/**
* Find the last occurrence of a substring in a string.
* The substring is found at code point boundaries.
* That means that if the substring begins with
* a trail surrogate or ends with a lead surrogate,
* then it is found only if these surrogates stand alone in the text.
* Otherwise, the substring edge units would be matched against
* halves of surrogate pairs.
*
* @param s The string to search (NUL-terminated).
* @param substring The substring to find (NUL-terminated).
* @return A pointer to the last occurrence of substring in s,
* or s itself if the substring is empty,
* or NULL if substring is not in s.
* @stable ICU 2.4
*
* @see u_strstr
* @see u_strFindFirst
* @see u_strFindLast
*/
U_STABLE UChar * U_EXPORT2
u_strrstr(const UChar *s, const UChar *substring);
/**
* Find the last occurrence of a substring in a string.
* The substring is found at code point boundaries.
* That means that if the substring begins with
* a trail surrogate or ends with a lead surrogate,
* then it is found only if these surrogates stand alone in the text.
* Otherwise, the substring edge units would be matched against
* halves of surrogate pairs.
*
* @param s The string to search.
* @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
* @param substring The substring to find (NUL-terminated).
* @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
* @return A pointer to the last occurrence of substring in s,
* or s itself if the substring is empty,
* or NULL if substring is not in s.
* @stable ICU 2.4
*
* @see u_strstr
* @see u_strFindLast
*/
U_STABLE UChar * U_EXPORT2
u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
/**
* Find the last occurrence of a BMP code point in a string.
* A surrogate code point is found only if its match in the text is not
* part of a surrogate pair.
* A NUL character is found at the string terminator.
*
* @param s The string to search (NUL-terminated).
* @param c The BMP code point to find.
* @return A pointer to the last occurrence of c in s
* or NULL if c is not in s.
* @stable ICU 2.4
*
* @see u_strrchr32
* @see u_memrchr
* @see u_strrstr
* @see u_strFindLast
*/
U_STABLE UChar * U_EXPORT2
u_strrchr(const UChar *s, UChar c);
/**
* Find the last occurrence of a code point in a string.
* A surrogate code point is found only if its match in the text is not
* part of a surrogate pair.
* A NUL character is found at the string terminator.
*
* @param s The string to search (NUL-terminated).
* @param c The code point to find.
* @return A pointer to the last occurrence of c in s
* or NULL if c is not in s.
* @stable ICU 2.4
*
* @see u_strrchr
* @see u_memchr32
* @see u_strrstr
* @see u_strFindLast
*/
U_STABLE UChar * U_EXPORT2
u_strrchr32(const UChar *s, UChar32 c);
/**
* Locates the first occurrence in the string string of any of the characters
* in the string matchSet.
* Works just like C's strpbrk but with Unicode.
*
* @param string The string in which to search, NUL-terminated.
* @param matchSet A NUL-terminated string defining a set of code points
* for which to search in the text string.
* @return A pointer to the character in string that matches one of the
* characters in matchSet, or NULL if no such character is found.
* @stable ICU 2.0
*/
U_STABLE UChar * U_EXPORT2
u_strpbrk(const UChar *string, const UChar *matchSet);
/**
* Returns the number of consecutive characters in string,
* beginning with the first, that do not occur somewhere in matchSet.
* Works just like C's strcspn but with Unicode.
*
* @param string The string in which to search, NUL-terminated.
* @param matchSet A NUL-terminated string defining a set of code points
* for which to search in the text string.
* @return The number of initial characters in string that do not
* occur in matchSet.
* @see u_strspn
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_strcspn(const UChar *string, const UChar *matchSet);
/**
* Returns the number of consecutive characters in string,
* beginning with the first, that occur somewhere in matchSet.
* Works just like C's strspn but with Unicode.
*
* @param string The string in which to search, NUL-terminated.
* @param matchSet A NUL-terminated string defining a set of code points
* for which to search in the text string.
* @return The number of initial characters in string that do
* occur in matchSet.
* @see u_strcspn
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_strspn(const UChar *string, const UChar *matchSet);
/**
* The string tokenizer API allows an application to break a string into
* tokens. Unlike strtok(), the saveState (the current pointer within the
* original string) is maintained in saveState. In the first call, the
* argument src is a pointer to the string. In subsequent calls to
* return successive tokens of that string, src must be specified as
* NULL. The value saveState is set by this function to maintain the
* function's position within the string, and on each subsequent call
* you must give this argument the same variable. This function does
* handle surrogate pairs. This function is similar to the strtok_r()
* the POSIX Threads Extension (1003.1c-1995) version.
*
* @param src String containing token(s). This string will be modified.
* After the first call to u_strtok_r(), this argument must
* be NULL to get to the next token.
* @param delim Set of delimiter characters (Unicode code points).
* @param saveState The current pointer within the original string,
* which is set by this function. The saveState
* parameter should the address of a local variable of type
* UChar *. (i.e. defined "Uhar *myLocalSaveState" and use
* &myLocalSaveState for this parameter).
* @return A pointer to the next token found in src, or NULL
* when there are no more tokens.
* @stable ICU 2.0
*/
U_STABLE UChar * U_EXPORT2
u_strtok_r(UChar *src,
const UChar *delim,
UChar **saveState);
/**
* Compare two Unicode strings for bitwise equality (code unit order).
*
* @param s1 A string to compare.
* @param s2 A string to compare.
* @return 0 if s1 and s2 are bitwise equal; a negative
* value if s1 is bitwise less than s2,; a positive
* value if s1 is bitwise greater than s2.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_strcmp(const UChar *s1,
const UChar *s2);
/**
* Compare two Unicode strings in code point order.
* See u_strCompare for details.
*
* @param s1 A string to compare.
* @param s2 A string to compare.
* @return a negative/zero/positive integer corresponding to whether
* the first string is less than/equal to/greater than the second one
* in code point order
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
/**
* Compare two Unicode strings (binary order).
*
* The comparison can be done in code unit order or in code point order.
* They differ only in UTF-16 when
* comparing supplementary code points (U+10000..U+10ffff)
* to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
* In code unit order, high BMP code points sort after supplementary code points
* because they are stored as pairs of surrogates which are at U+d800..U+dfff.
*
* This functions works with strings of different explicitly specified lengths
* unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
* NUL-terminated strings are possible with length arguments of -1.
*
* @param s1 First source string.
* @param length1 Length of first source string, or -1 if NUL-terminated.
*
* @param s2 Second source string.
* @param length2 Length of second source string, or -1 if NUL-terminated.
*
* @param codePointOrder Choose between code unit order (FALSE)
* and code point order (TRUE).
*
* @return <0 or 0 or >0 as usual for string comparisons
*
* @stable ICU 2.2
*/
U_STABLE int32_t U_EXPORT2
u_strCompare(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,
UBool codePointOrder);
/**
* Compare two Unicode strings (binary order)
* as presented by UCharIterator objects.
* Works otherwise just like u_strCompare().
*
* Both iterators are reset to their start positions.
* When the function returns, it is undefined where the iterators
* have stopped.
*
* @param iter1 First source string iterator.
* @param iter2 Second source string iterator.
* @param codePointOrder Choose between code unit order (FALSE)
* and code point order (TRUE).
*
* @return <0 or 0 or >0 as usual for string comparisons
*
* @see u_strCompare
*
* @stable ICU 2.6
*/
U_STABLE int32_t U_EXPORT2
u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
#ifndef U_COMPARE_CODE_POINT_ORDER
/* see also unistr.h and unorm.h */
/**
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
* Compare strings in code point order instead of code unit order.
* @stable ICU 2.2
*/
#define U_COMPARE_CODE_POINT_ORDER 0x8000
#endif
/**
* Compare two strings case-insensitively using full case folding.
* This is equivalent to
* u_strCompare(u_strFoldCase(s1, options),
* u_strFoldCase(s2, options),
* (options&U_COMPARE_CODE_POINT_ORDER)!=0).
*
* The comparison can be done in UTF-16 code unit order or in code point order.
* They differ only when comparing supplementary code points (U+10000..U+10ffff)
* to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
* In code unit order, high BMP code points sort after supplementary code points
* because they are stored as pairs of surrogates which are at U+d800..U+dfff.
*
* This functions works with strings of different explicitly specified lengths
* unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
* NUL-terminated strings are possible with length arguments of -1.
*
* @param s1 First source string.
* @param length1 Length of first source string, or -1 if NUL-terminated.
*
* @param s2 Second source string.
* @param length2 Length of second source string, or -1 if NUL-terminated.
*
* @param options A bit set of options:
* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
* Comparison in code unit order with default case folding.
*
* - U_COMPARE_CODE_POINT_ORDER
* Set to choose code point order instead of code unit order
* (see u_strCompare for details).
*
* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
*
* @param pErrorCode Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @return <0 or 0 or >0 as usual for string comparisons
*
* @stable ICU 2.2
*/
U_STABLE int32_t U_EXPORT2
u_strCaseCompare(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,
uint32_t options,
UErrorCode *pErrorCode);
/**
* Compare two ustrings for bitwise equality.
* Compares at most n characters.
*
* @param ucs1 A string to compare (can be NULL/invalid if n<=0).
* @param ucs2 A string to compare (can be NULL/invalid if n<=0).
* @param n The maximum number of characters to compare; always returns 0 if n<=0.
* @return 0 if s1 and s2 are bitwise equal; a negative
* value if s1 is bitwise less than s2; a positive
* value if s1 is bitwise greater than s2.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_strncmp(const UChar *ucs1,
const UChar *ucs2,
int32_t n);
/**
* Compare two Unicode strings in code point order.
* This is different in UTF-16 from u_strncmp() if supplementary characters are present.
* For details, see u_strCompare().
*
* @param s1 A string to compare.
* @param s2 A string to compare.
* @param n The maximum number of characters to compare.
* @return a negative/zero/positive integer corresponding to whether
* the first string is less than/equal to/greater than the second one
* in code point order
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);
/**
* Compare two strings case-insensitively using full case folding.
* This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
*
* @param s1 A string to compare.
* @param s2 A string to compare.
* @param options A bit set of options:
* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
* Comparison in code unit order with default case folding.
*
* - U_COMPARE_CODE_POINT_ORDER
* Set to choose code point order instead of code unit order
* (see u_strCompare for details).
*
* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
*
* @return A negative, zero, or positive integer indicating the comparison result.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
/**
* Compare two strings case-insensitively using full case folding.
* This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options),
* u_strFoldCase(s2, at most n, options)).
*
* @param s1 A string to compare.
* @param s2 A string to compare.
* @param n The maximum number of characters each string to case-fold and then compare.
* @param options A bit set of options:
* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
* Comparison in code unit order with default case folding.
*
* - U_COMPARE_CODE_POINT_ORDER
* Set to choose code point order instead of code unit order
* (see u_strCompare for details).
*
* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
*
* @return A negative, zero, or positive integer indicating the comparison result.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
/**
* Compare two strings case-insensitively using full case folding.
* This is equivalent to u_strcmp(u_strFoldCase(s1, n, options),
* u_strFoldCase(s2, n, options)).
*
* @param s1 A string to compare.
* @param s2 A string to compare.
* @param length The number of characters in each string to case-fold and then compare.
* @param options A bit set of options:
* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
* Comparison in code unit order with default case folding.
*
* - U_COMPARE_CODE_POINT_ORDER
* Set to choose code point order instead of code unit order
* (see u_strCompare for details).
*
* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
*
* @return A negative, zero, or positive integer indicating the comparison result.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
/**
* Copy a ustring. Adds a null terminator.
*
* @param dst The destination string.
* @param src The source string.
* @return A pointer to dst.
* @stable ICU 2.0
*/
U_STABLE UChar* U_EXPORT2
u_strcpy(UChar *dst,
const UChar *src);
/**
* Copy a ustring.
* Copies at most n characters. The result will be null terminated
* if the length of src is less than n.
*
* @param dst The destination string.
* @param src The source string (can be NULL/invalid if n<=0).
* @param n The maximum number of characters to copy; no-op if <=0.
* @return A pointer to dst.
* @stable ICU 2.0
*/
U_STABLE UChar* U_EXPORT2
u_strncpy(UChar *dst,
const UChar *src,
int32_t n);
#if !UCONFIG_NO_CONVERSION
/**
* Copy a byte string encoded in the default codepage to a ustring.
* Adds a null terminator.
* Performs a host byte to UChar conversion
*
* @param dst The destination string.
* @param src The source string.
* @return A pointer to dst.
* @stable ICU 2.0
*/
U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
const char *src );
/**
* Copy a byte string encoded in the default codepage to a ustring.
* Copies at most n characters. The result will be null terminated
* if the length of src is less than n.
* Performs a host byte to UChar conversion
*
* @param dst The destination string.
* @param src The source string.
* @param n The maximum number of characters to copy.
* @return A pointer to dst.
* @stable ICU 2.0
*/
U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
const char *src,
int32_t n);
/**
* Copy ustring to a byte string encoded in the default codepage.
* Adds a null terminator.
* Performs a UChar to host byte conversion
*
* @param dst The destination string.
* @param src The source string.
* @return A pointer to dst.
* @stable ICU 2.0
*/
U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,
const UChar *src );
/**
* Copy ustring to a byte string encoded in the default codepage.
* Copies at most n characters. The result will be null terminated
* if the length of src is less than n.
* Performs a UChar to host byte conversion
*
* @param dst The destination string.
* @param src The source string.
* @param n The maximum number of characters to copy.
* @return A pointer to dst.
* @stable ICU 2.0
*/
U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
const UChar *src,
int32_t n );
#endif
/**
* Synonym for memcpy(), but with UChars only.
* @param dest The destination string
* @param src The source string (can be NULL/invalid if count<=0)
* @param count The number of characters to copy; no-op if <=0
* @return A pointer to dest
* @stable ICU 2.0
*/
U_STABLE UChar* U_EXPORT2
u_memcpy(UChar *dest, const UChar *src, int32_t count);
/**
* Synonym for memmove(), but with UChars only.
* @param dest The destination string
* @param src The source string (can be NULL/invalid if count<=0)
* @param count The number of characters to move; no-op if <=0
* @return A pointer to dest
* @stable ICU 2.0
*/
U_STABLE UChar* U_EXPORT2
u_memmove(UChar *dest, const UChar *src, int32_t count);
/**
* Initialize count characters of dest to c.
*
* @param dest The destination string.
* @param c The character to initialize the string.
* @param count The maximum number of characters to set.
* @return A pointer to dest.
* @stable ICU 2.0
*/
U_STABLE UChar* U_EXPORT2
u_memset(UChar *dest, UChar c, int32_t count);
/**
* Compare the first count UChars of each buffer.
*
* @param buf1 The first string to compare.
* @param buf2 The second string to compare.
* @param count The maximum number of UChars to compare.
* @return When buf1 < buf2, a negative number is returned.
* When buf1 == buf2, 0 is returned.
* When buf1 > buf2, a positive number is returned.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);
/**
* Compare two Unicode strings in code point order.
* This is different in UTF-16 from u_memcmp() if supplementary characters are present.
* For details, see u_strCompare().
*
* @param s1 A string to compare.
* @param s2 A string to compare.
* @param count The maximum number of characters to compare.
* @return a negative/zero/positive integer corresponding to whether
* the first string is less than/equal to/greater than the second one
* in code point order
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);
/**
* Find the first occurrence of a BMP code point in a string.
* A surrogate code point is found only if its match in the text is not
* part of a surrogate pair.
* A NUL character is found at the string terminator.
*
* @param s The string to search (contains count UChars).
* @param c The BMP code point to find.
* @param count The length of the string.
* @return A pointer to the first occurrence of c in s
* or NULL if c is not in s.
* @stable ICU 2.0
*
* @see u_strchr
* @see u_memchr32
* @see u_strFindFirst
*/
U_STABLE UChar* U_EXPORT2
u_memchr(const UChar *s, UChar c, int32_t count);
/**
* Find the first occurrence of a code point in a string.
* A surrogate code point is found only if its match in the text is not
* part of a surrogate pair.
* A NUL character is found at the string terminator.
*
* @param s The string to search (contains count UChars).
* @param c The code point to find.
* @param count The length of the string.
* @return A pointer to the first occurrence of c in s
* or NULL if c is not in s.
* @stable ICU 2.0
*
* @see u_strchr32
* @see u_memchr
* @see u_strFindFirst
*/
U_STABLE UChar* U_EXPORT2
u_memchr32(const UChar *s, UChar32 c, int32_t count);
/**
* Find the last occurrence of a BMP code point in a string.
* A surrogate code point is found only if its match in the text is not
* part of a surrogate pair.
* A NUL character is found at the string terminator.
*
* @param s The string to search (contains count UChars).
* @param c The BMP code point to find.
* @param count The length of the string.
* @return A pointer to the last occurrence of c in s
* or NULL if c is not in s.
* @stable ICU 2.4
*
* @see u_strrchr
* @see u_memrchr32
* @see u_strFindLast
*/
U_STABLE UChar* U_EXPORT2
u_memrchr(const UChar *s, UChar c, int32_t count);
/**
* Find the last occurrence of a code point in a string.
* A surrogate code point is found only if its match in the text is not
* part of a surrogate pair.
* A NUL character is found at the string terminator.
*
* @param s The string to search (contains count UChars).
* @param c The code point to find.
* @param count The length of the string.
* @return A pointer to the last occurrence of c in s
* or NULL if c is not in s.
* @stable ICU 2.4
*
* @see u_strrchr32
* @see u_memrchr
* @see u_strFindLast
*/
U_STABLE UChar* U_EXPORT2
u_memrchr32(const UChar *s, UChar32 c, int32_t count);
/**
* Unicode String literals in C.
* We need one macro to declare a variable for the string
* and to statically preinitialize it if possible,
* and a second macro to dynamically intialize such a string variable if necessary.
*
* The macros are defined for maximum performance.
* They work only for strings that contain "invariant characters", i.e.,
* only latin letters, digits, and some punctuation.
* See utypes.h for details.
*
* A pair of macros for a single string must be used with the same
* parameters.
* The string parameter must be a C string literal.
* The length of the string, not including the terminating
* NUL, must be specified as a constant.
* The U_STRING_DECL macro should be invoked exactly once for one
* such string variable before it is used.
*
* Usage:
*
* U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
* U_STRING_DECL(ustringVar2, "jumps 5%", 8);
* static UBool didInit=FALSE;
*
* int32_t function() {
* if(!didInit) {
* U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
* U_STRING_INIT(ustringVar2, "jumps 5%", 8);
* didInit=TRUE;
* }
* return u_strcmp(ustringVar1, ustringVar2);
* }
*
*
* Note that the macros will NOT consistently work if their argument is another #define.
* The following will not work on all platforms, don't use it.
*
*
* #define GLUCK "Mr. Gluck"
* U_STRING_DECL(var, GLUCK, 9)
* U_STRING_INIT(var, GLUCK, 9)
*
*
* Instead, use the string literal "Mr. Gluck" as the argument to both macro
* calls.
*
*
* @stable ICU 2.0
*/
#if defined(U_DECLARE_UTF16)
# define U_STRING_DECL(var, cs, length) static const UChar *var=(const UChar *)U_DECLARE_UTF16(cs)
/**@stable ICU 2.0 */
# define U_STRING_INIT(var, cs, length)
#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs
/**@stable ICU 2.0 */
# define U_STRING_INIT(var, cs, length)
#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs
/**@stable ICU 2.0 */
# define U_STRING_INIT(var, cs, length)
#else
# define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
/**@stable ICU 2.0 */
# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
#endif
/**
* Unescape a string of characters and write the resulting
* Unicode characters to the destination buffer. The following escape
* sequences are recognized:
*
* \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
* \\Uhhhhhhhh 8 hex digits
* \\xhh 1-2 hex digits
* \\x{h...} 1-8 hex digits
* \\ooo 1-3 octal digits; o in [0-7]
* \\cX control-X; X is masked with 0x1F
*
* as well as the standard ANSI C escapes:
*
* \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
* \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
* \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
*
* Anything else following a backslash is generically escaped. For
* example, "[a\\-z]" returns "[a-z]".
*
* If an escape sequence is ill-formed, this method returns an empty
* string. An example of an ill-formed sequence is "\\u" followed by
* fewer than 4 hex digits.
*
* The above characters are recognized in the compiler's codepage,
* that is, they are coded as 'u', '\\', etc. Characters that are
* not parts of escape sequences are converted using u_charsToUChars().
*
* This function is similar to UnicodeString::unescape() but not
* identical to it. The latter takes a source UnicodeString, so it
* does escape recognition but no conversion.
*
* @param src a zero-terminated string of invariant characters
* @param dest pointer to buffer to receive converted and unescaped
* text and, if there is room, a zero terminator. May be NULL for
* preflighting, in which case no UChars will be written, but the
* return value will still be valid. On error, an empty string is
* stored here (if possible).
* @param destCapacity the number of UChars that may be written at
* dest. Ignored if dest == NULL.
* @return the length of unescaped string.
* @see u_unescapeAt
* @see UnicodeString#unescape()
* @see UnicodeString#unescapeAt()
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_unescape(const char *src,
UChar *dest, int32_t destCapacity);
U_CDECL_BEGIN
/**
* Callback function for u_unescapeAt() that returns a character of
* the source text given an offset and a context pointer. The context
* pointer will be whatever is passed into u_unescapeAt().
*
* @param offset pointer to the offset that will be passed to u_unescapeAt().
* @param context an opaque pointer passed directly into u_unescapeAt()
* @return the character represented by the escape sequence at
* offset
* @see u_unescapeAt
* @stable ICU 2.0
*/
typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);
U_CDECL_END
/**
* Unescape a single sequence. The character at offset-1 is assumed
* (without checking) to be a backslash. This method takes a callback
* pointer to a function that returns the UChar at a given offset. By
* varying this callback, ICU functions are able to unescape char*
* strings, UnicodeString objects, and UFILE pointers.
*
* If offset is out of range, or if the escape sequence is ill-formed,
* (UChar32)0xFFFFFFFF is returned. See documentation of u_unescape()
* for a list of recognized sequences.
*
* @param charAt callback function that returns a UChar of the source
* text given an offset and a context pointer.
* @param offset pointer to the offset that will be passed to charAt.
* The offset value will be updated upon return to point after the
* last parsed character of the escape sequence. On error the offset
* is unchanged.
* @param length the number of characters in the source text. The
* last character of the source text is considered to be at offset
* length-1.
* @param context an opaque pointer passed directly into charAt.
* @return the character represented by the escape sequence at
* offset, or (UChar32)0xFFFFFFFF on error.
* @see u_unescape()
* @see UnicodeString#unescape()
* @see UnicodeString#unescapeAt()
* @stable ICU 2.0
*/
U_STABLE UChar32 U_EXPORT2
u_unescapeAt(UNESCAPE_CHAR_AT charAt,
int32_t *offset,
int32_t length,
void *context);
/**
* Uppercase the characters in a string.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
* The source string and the destination buffer are allowed to overlap.
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param src The original string
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
* @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
* @param pErrorCode Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
* @return The length of the result string. It may be greater than destCapacity. In that case,
* only some of the result was written to the destination buffer.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_strToUpper(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode);
/**
* Lowercase the characters in a string.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
* The source string and the destination buffer are allowed to overlap.
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param src The original string
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
* @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
* @param pErrorCode Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
* @return The length of the result string. It may be greater than destCapacity. In that case,
* only some of the result was written to the destination buffer.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_strToLower(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode);
#if !UCONFIG_NO_BREAK_ITERATION
/**
* Titlecase a string.
* Casing is locale-dependent and context-sensitive.
* Titlecasing uses a break iterator to find the first characters of words
* that are to be titlecased. It titlecases those characters and lowercases
* all others.
*
* The titlecase break iterator can be provided to customize for arbitrary
* styles, using rules and dictionaries beyond the standard iterators.
* It may be more efficient to always provide an iterator to avoid
* opening and closing one for each string.
* The standard titlecase iterator for the root locale implements the
* algorithm of Unicode TR 21.
*
* This function uses only the setText(), first() and next() methods of the
* provided break iterator.
*
* The result may be longer or shorter than the original.
* The source string and the destination buffer are allowed to overlap.
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param src The original string
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
* @param titleIter A break iterator to find the first characters of words
* that are to be titlecased.
* If none is provided (NULL), then a standard titlecase
* break iterator is opened.
* @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
* @param pErrorCode Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
* @return The length of the result string. It may be greater than destCapacity. In that case,
* only some of the result was written to the destination buffer.
* @stable ICU 2.1
*/
U_STABLE int32_t U_EXPORT2
u_strToTitle(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBreakIterator *titleIter,
const char *locale,
UErrorCode *pErrorCode);
#endif
/**
* Case-folds the characters in a string.
*
* Case-folding is locale-independent and not context-sensitive,
* but there is an option for whether to include or exclude mappings for dotted I
* and dotless i that are marked with 'T' in CaseFolding.txt.
*
* The result may be longer or shorter than the original.
* The source string and the destination buffer are allowed to overlap.
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param src The original string
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
* @param pErrorCode Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
* @return The length of the result string. It may be greater than destCapacity. In that case,
* only some of the result was written to the destination buffer.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_strFoldCase(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
uint32_t options,
UErrorCode *pErrorCode);
#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
/**
* Convert a UTF-16 string to a wchar_t string.
* If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then
* this function simply calls the fast, dedicated function for that.
* Otherwise, two conversions UTF-16 -> default charset -> wchar_t* are performed.
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
* @param destCapacity The size of the buffer (number of wchar_t's). If it is 0, then
* dest may be NULL and the function will only return the length of the
* result without writing any of the result string (pre-flighting).
* @param pDestLength A pointer to receive the number of units written to the destination. If
* pDestLength!=NULL then *pDestLength is always set to the
* number of output units corresponding to the transformation of
* all the input units, even in case of a buffer overflow.
* @param src The original source string
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
* @param pErrorCode Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
* @return The pointer to destination buffer.
* @stable ICU 2.0
*/
U_STABLE wchar_t* U_EXPORT2
u_strToWCS(wchar_t *dest,
int32_t destCapacity,
int32_t *pDestLength,
const UChar *src,
int32_t srcLength,
UErrorCode *pErrorCode);
/**
* Convert a wchar_t string to UTF-16.
* If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then
* this function simply calls the fast, dedicated function for that.
* Otherwise, two conversions wchar_t* -> default charset -> UTF-16 are performed.
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the
* result without writing any of the result string (pre-flighting).
* @param pDestLength A pointer to receive the number of units written to the destination. If
* pDestLength!=NULL then *pDestLength is always set to the
* number of output units corresponding to the transformation of
* all the input units, even in case of a buffer overflow.
* @param src The original source string
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
* @param pErrorCode Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
* @return The pointer to destination buffer.
* @stable ICU 2.0
*/
U_STABLE UChar* U_EXPORT2
u_strFromWCS(UChar *dest,
int32_t destCapacity,
int32_t *pDestLength,
const wchar_t *src,
int32_t srcLength,
UErrorCode *pErrorCode);
#endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */
/**
* Convert a UTF-16 string to UTF-8.
* If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
* @param destCapacity The size of the buffer (number of chars). If it is 0, then
* dest may be NULL and the function will only return the length of the
* result without writing any of the result string (pre-flighting).
* @param pDestLength A pointer to receive the number of units written to the destination. If
* pDestLength!=NULL then *pDestLength is always set to the
* number of output units corresponding to the transformation of
* all the input units, even in case of a buffer overflow.
* @param src The original source string
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
* @param pErrorCode Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
* @return The pointer to destination buffer.
* @stable ICU 2.0
* @see u_strToUTF8WithSub
* @see u_strFromUTF8
*/
U_STABLE char* U_EXPORT2
u_strToUTF8(char *dest,
int32_t destCapacity,
int32_t *pDestLength,
const UChar *src,
int32_t srcLength,
UErrorCode *pErrorCode);
/**
* Convert a UTF-8 string to UTF-16.
* If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the
* result without writing any of the result string (pre-flighting).
* @param pDestLength A pointer to receive the number of units written to the destination. If
* pDestLength!=NULL then *pDestLength is always set to the
* number of output units corresponding to the transformation of
* all the input units, even in case of a buffer overflow.
* @param src The original source string
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
* @param pErrorCode Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
* @return The pointer to destination buffer.
* @stable ICU 2.0
* @see u_strFromUTF8WithSub
* @see u_strFromUTF8Lenient
*/
U_STABLE UChar* U_EXPORT2
u_strFromUTF8(UChar *dest,
int32_t destCapacity,
int32_t *pDestLength,
const char *src,
int32_t srcLength,
UErrorCode *pErrorCode);
/**
* Convert a UTF-16 string to UTF-8.
*
* Same as u_strToUTF8() except for the additional subchar which is output for
* illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
* With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8().
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
* @param destCapacity The size of the buffer (number of chars). If it is 0, then
* dest may be NULL and the function will only return the length of the
* result without writing any of the result string (pre-flighting).
* @param pDestLength A pointer to receive the number of units written to the destination. If
* pDestLength!=NULL then *pDestLength is always set to the
* number of output units corresponding to the transformation of
* all the input units, even in case of a buffer overflow.
* @param src The original source string
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
* @param subchar The substitution character to use in place of an illegal input sequence,
* or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
* A substitution character can be any valid Unicode code point (up to U+10FFFF)
* except for surrogate code points (U+D800..U+DFFF).
* The recommended value is U+FFFD "REPLACEMENT CHARACTER".
* @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
* Set to 0 if no substitutions occur or subchar<0.
* pNumSubstitutions can be NULL.
* @param pErrorCode Pointer to a standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return The pointer to destination buffer.
* @see u_strToUTF8
* @see u_strFromUTF8WithSub
* @stable ICU 3.6
*/
U_STABLE char* U_EXPORT2
u_strToUTF8WithSub(char *dest,
int32_t destCapacity,
int32_t *pDestLength,
const UChar *src,
int32_t srcLength,
UChar32 subchar, int32_t *pNumSubstitutions,
UErrorCode *pErrorCode);
/**
* Convert a UTF-8 string to UTF-16.
*
* Same as u_strFromUTF8() except for the additional subchar which is output for
* illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
* With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8().
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the
* result without writing any of the result string (pre-flighting).
* @param pDestLength A pointer to receive the number of units written to the destination. If
* pDestLength!=NULL then *pDestLength is always set to the
* number of output units corresponding to the transformation of
* all the input units, even in case of a buffer overflow.
* @param src The original source string
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
* @param subchar The substitution character to use in place of an illegal input sequence,
* or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
* A substitution character can be any valid Unicode code point (up to U+10FFFF)
* except for surrogate code points (U+D800..U+DFFF).
* The recommended value is U+FFFD "REPLACEMENT CHARACTER".
* @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
* Set to 0 if no substitutions occur or subchar<0.
* pNumSubstitutions can be NULL.
* @param pErrorCode Pointer to a standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return The pointer to destination buffer.
* @see u_strFromUTF8
* @see u_strFromUTF8Lenient
* @see u_strToUTF8WithSub
* @stable ICU 3.6
*/
U_STABLE UChar* U_EXPORT2
u_strFromUTF8WithSub(UChar *dest,
int32_t destCapacity,
int32_t *pDestLength,
const char *src,
int32_t srcLength,
UChar32 subchar, int32_t *pNumSubstitutions,
UErrorCode *pErrorCode);
/**
* Convert a UTF-8 string to UTF-16.
*
* Same as u_strFromUTF8() except that this function is designed to be very fast,
* which it achieves by being lenient about malformed UTF-8 sequences.
* This function is intended for use in environments where UTF-8 text is
* expected to be well-formed.
*
* Its semantics are:
* - Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text.
* - The function will not read beyond the input string, nor write beyond
* the destCapacity.
* - Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not
* be well-formed UTF-16.
* The function will resynchronize to valid code point boundaries
* within a small number of code points after an illegal sequence.
* - Non-shortest forms are not detected and will result in "spoofing" output.
*
* For further performance improvement, if srcLength is given (>=0),
* then it must be destCapacity>=srcLength.
*
* There is no inverse u_strToUTF8Lenient() function because there is practically
* no performance gain from not checking that a UTF-16 string is well-formed.
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the
* result without writing any of the result string (pre-flighting).
* Unlike for other ICU functions, if srcLength>=0 then it
* must be destCapacity>=srcLength.
* @param pDestLength A pointer to receive the number of units written to the destination. If
* pDestLength!=NULL then *pDestLength is always set to the
* number of output units corresponding to the transformation of
* all the input units, even in case of a buffer overflow.
* Unlike for other ICU functions, if srcLength>=0 but
* destCapacity
* preContext postContext
* "" "" The parser does not support context
* "let " "=7" Pre- and post-context only
* "let " "for=7" Pre- and post-context and error text
* "" "for" Error text only
*
*
*
*
*
* The procedure for preparing Unicode strings:
*
*
* @author Ram Viswanadha
*/
#if !UCONFIG_NO_IDNA
/**
* The StringPrep profile
* @stable ICU 2.8
*/
typedef struct UStringPrepProfile UStringPrepProfile;
/**
* Option to prohibit processing of unassigned code points in the input
*
* @see usprep_prepare
* @stable ICU 2.8
*/
#define USPREP_DEFAULT 0x0000
/**
* Option to allow processing of unassigned code points in the input
*
* @see usprep_prepare
* @stable ICU 2.8
*/
#define USPREP_ALLOW_UNASSIGNED 0x0001
/**
* enums for the standard stringprep profile types
* supported by usprep_openByType.
* @see usprep_openByType
* @stable ICU 4.2
*/
typedef enum UStringPrepProfileType {
/**
* RFC3491 Nameprep
* @stable ICU 4.2
*/
USPREP_RFC3491_NAMEPREP,
/**
* RFC3530 nfs4_cs_prep
* @stable ICU 4.2
*/
USPREP_RFC3530_NFS4_CS_PREP,
/**
* RFC3530 nfs4_cs_prep with case insensitive option
* @stable ICU 4.2
*/
USPREP_RFC3530_NFS4_CS_PREP_CI,
/**
* RFC3530 nfs4_cis_prep
* @stable ICU 4.2
*/
USPREP_RFC3530_NFS4_CIS_PREP,
/**
* RFC3530 nfs4_mixed_prep for prefix
* @stable ICU 4.2
*/
USPREP_RFC3530_NFS4_MIXED_PREP_PREFIX,
/**
* RFC3530 nfs4_mixed_prep for suffix
* @stable ICU 4.2
*/
USPREP_RFC3530_NFS4_MIXED_PREP_SUFFIX,
/**
* RFC3722 iSCSI
* @stable ICU 4.2
*/
USPREP_RFC3722_ISCSI,
/**
* RFC3920 XMPP Nodeprep
* @stable ICU 4.2
*/
USPREP_RFC3920_NODEPREP,
/**
* RFC3920 XMPP Resourceprep
* @stable ICU 4.2
*/
USPREP_RFC3920_RESOURCEPREP,
/**
* RFC4011 Policy MIB Stringprep
* @stable ICU 4.2
*/
USPREP_RFC4011_MIB,
/**
* RFC4013 SASLprep
* @stable ICU 4.2
*/
USPREP_RFC4013_SASLPREP,
/**
* RFC4505 trace
* @stable ICU 4.2
*/
USPREP_RFC4505_TRACE,
/**
* RFC4518 LDAP
* @stable ICU 4.2
*/
USPREP_RFC4518_LDAP,
/**
* RFC4518 LDAP for case ignore, numeric and stored prefix
* matching rules
* @stable ICU 4.2
*/
USPREP_RFC4518_LDAP_CI
} UStringPrepProfileType;
/**
* Creates a StringPrep profile from the data file.
*
* @param path string containing the full path pointing to the directory
* where the profile reside followed by the package name
* e.g. "/usr/resource/my_app/profiles/mydata" on a Unix system.
* if NULL, ICU default data files will be used.
* @param fileName name of the profile file to be opened
* @param status ICU error code in/out parameter. Must not be NULL.
* Must fulfill U_SUCCESS before the function call.
* @return Pointer to UStringPrepProfile that is opened. Should be closed by
* calling usprep_close()
* @see usprep_close()
* @stable ICU 2.8
*/
U_STABLE UStringPrepProfile* U_EXPORT2
usprep_open(const char* path,
const char* fileName,
UErrorCode* status);
/**
* Creates a StringPrep profile for the specified profile type.
*
* @param type The profile type
* @param status ICU error code in/out parameter. Must not be NULL.
* Must fulfill U_SUCCESS before the function call.
* @return Pointer to UStringPrepProfile that is opened. Should be closed by
* calling usprep_close()
* @see usprep_close()
* @stable ICU 4.2
*/
U_STABLE UStringPrepProfile* U_EXPORT2
usprep_openByType(UStringPrepProfileType type,
UErrorCode* status);
/**
* Closes the profile
* @param profile The profile to close
* @stable ICU 2.8
*/
U_STABLE void U_EXPORT2
usprep_close(UStringPrepProfile* profile);
/**
* Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
* checks for prohited and BiDi characters in the order defined by RFC 3454
* depending on the options specified in the profile.
*
* @param prep The profile to use
* @param src Pointer to UChar buffer containing the string to prepare
* @param srcLength Number of characters in the source string
* @param dest Pointer to the destination buffer to receive the output
* @param destCapacity The capacity of destination array
* @param options A bit set of options:
*
* - USPREP_DEFAULT Prohibit processing of unassigned code points in the input
*
* - USPREP_ALLOW_UNASSIGNED Treat the unassigned code points are in the input
* as normal Unicode code points.
*
* @param parseError Pointer to UParseError struct to receive information on position
* of error if an error is encountered. Can be NULL.
* @param status ICU in/out error code parameter.
* U_INVALID_CHAR_FOUND if src contains
* unmatched single surrogates.
* U_INDEX_OUTOFBOUNDS_ERROR if src contains
* too many code points.
* U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
* @return The number of UChars in the destination buffer
* @stable ICU 2.8
*/
U_STABLE int32_t U_EXPORT2
usprep_prepare( const UStringPrepProfile* prep,
const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
int32_t options,
UParseError* parseError,
UErrorCode* status );
#endif /* #if !UCONFIG_NO_IDNA */
#endif
// uidna.h
/*
*******************************************************************************
*
* Copyright (C) 2003-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: uidna.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003feb1
* created by: Ram Viswanadha
*/
#ifndef __UIDNA_H__
#define __UIDNA_H__
#if !UCONFIG_NO_IDNA
/**
* \file
* \brief C API: Internationalizing Domain Names in Applications (IDNA)
*
* IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
*
* The C API functions which do take a UIDNA * service object pointer
* implement UTS #46 and IDNA2008.
*
* IDNA2003 is obsolete.
* The C API functions which do not take a service object pointer
* implement IDNA2003. They are all deprecated.
*/
/*
* IDNA option bit set values.
*/
enum {
/**
* Default options value: None of the other options are set.
* For use in static worker and factory methods.
* @stable ICU 2.6
*/
UIDNA_DEFAULT=0,
/**
* Option to check whether the input conforms to the STD3 ASCII rules,
* for example the restriction of labels to LDH characters
* (ASCII Letters, Digits and Hyphen-Minus).
* For use in static worker and factory methods.
* @stable ICU 2.6
*/
UIDNA_USE_STD3_RULES=2,
/**
* IDNA option to check for whether the input conforms to the BiDi rules.
* For use in static worker and factory methods.
* BreakIterator C API
*
* The BreakIterator C API defines methods for finding the location
* of boundaries in text. Pointer to a UBreakIterator maintain a
* current position and scan over text returning the index of characters
* where boundaries occur.
*
* user allocated space for the new clone. If NULL new memory will be allocated.
* If buffer is not large enough, new memory will be allocated.
* Clients can use the U_BRK_SAFECLONE_BUFFERSIZE.
* @param pBufferSize Deprecated functionality as of ICU 52, use NULL or 1.
* pointer to size of allocated space.
* If *pBufferSize == 0, a sufficient size for use in cloning will
* be returned ('pre-flighting')
* If *pBufferSize is not enough for a stack-based safe clone,
* new memory will be allocated.
* @param status to indicate whether the operation went on smoothly or there were errors
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
* @return pointer to the new clone
* @stable ICU 2.0
*/
U_STABLE UBreakIterator * U_EXPORT2
ubrk_safeClone(
const UBreakIterator *bi,
void *stackBuffer,
int32_t *pBufferSize,
UErrorCode *status);
/**
* Close a UBreakIterator.
* Once closed, a UBreakIterator may no longer be used.
* @param bi The break iterator to close.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ubrk_close(UBreakIterator *bi);
/**
* Sets an existing iterator to point to a new piece of text
* @param bi The iterator to use
* @param text The text to be set
* @param textLength The length of the text
* @param status The error code
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ubrk_setText(UBreakIterator* bi,
const UChar* text,
int32_t textLength,
UErrorCode* status);
/**
* Sets an existing iterator to point to a new piece of text.
*
* All index positions returned by break iterator functions are
* native indices from the UText. For example, when breaking UTF-8
* encoded text, the break positions returned by \ref ubrk_next, \ref ubrk_previous, etc.
* will be UTF-8 string indices, not UTF-16 positions.
*
* @param bi The iterator to use
* @param text The text to be set.
* This function makes a shallow clone of the supplied UText. This means
* that the caller is free to immediately close or otherwise reuse the
* UText that was passed as a parameter, but that the underlying text itself
* must not be altered while being referenced by the break iterator.
* @param status The error code
* @stable ICU 3.4
*/
U_STABLE void U_EXPORT2
ubrk_setUText(UBreakIterator* bi,
UText* text,
UErrorCode* status);
/**
* Determine the most recently-returned text boundary.
*
* @param bi The break iterator to use.
* @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous,
* \ref ubrk_first, or \ref ubrk_last.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ubrk_current(const UBreakIterator *bi);
/**
* Advance the iterator to the boundary following the current boundary.
*
* @param bi The break iterator to use.
* @return The character index of the next text boundary, or UBRK_DONE
* if all text boundaries have been returned.
* @see ubrk_previous
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ubrk_next(UBreakIterator *bi);
/**
* Set the iterator position to the boundary preceding the current boundary.
*
* @param bi The break iterator to use.
* @return The character index of the preceding text boundary, or UBRK_DONE
* if all text boundaries have been returned.
* @see ubrk_next
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ubrk_previous(UBreakIterator *bi);
/**
* Set the iterator position to zero, the start of the text being scanned.
* @param bi The break iterator to use.
* @return The new iterator position (zero).
* @see ubrk_last
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ubrk_first(UBreakIterator *bi);
/**
* Set the iterator position to the index immediately beyond the last character in the text being scanned.
* This is not the same as the last character.
* @param bi The break iterator to use.
* @return The character offset immediately beyond the last character in the
* text being scanned.
* @see ubrk_first
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ubrk_last(UBreakIterator *bi);
/**
* Set the iterator position to the first boundary preceding the specified offset.
* The new position is always smaller than offset, or UBRK_DONE.
* @param bi The break iterator to use.
* @param offset The offset to begin scanning.
* @return The text boundary preceding offset, or UBRK_DONE.
* @see ubrk_following
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ubrk_preceding(UBreakIterator *bi,
int32_t offset);
/**
* Advance the iterator to the first boundary following the specified offset.
* The value returned is always greater than offset, or UBRK_DONE.
* @param bi The break iterator to use.
* @param offset The offset to begin scanning.
* @return The text boundary following offset, or UBRK_DONE.
* @see ubrk_preceding
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ubrk_following(UBreakIterator *bi,
int32_t offset);
/**
* Get a locale for which text breaking information is available.
* A UBreakIterator in a locale returned by this function will perform the correct
* text breaking for the locale.
* @param index The index of the desired locale.
* @return A locale for which number text breaking information is available, or 0 if none.
* @see ubrk_countAvailable
* @stable ICU 2.0
*/
U_STABLE const char* U_EXPORT2
ubrk_getAvailable(int32_t index);
/**
* Determine how many locales have text breaking information available.
* This function is most useful as determining the loop ending condition for
* calls to \ref ubrk_getAvailable.
* @return The number of locales for which text breaking information is available.
* @see ubrk_getAvailable
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ubrk_countAvailable(void);
/**
* Returns true if the specfied position is a boundary position. As a side
* effect, leaves the iterator pointing to the first boundary position at
* or after "offset".
* @param bi The break iterator to use.
* @param offset the offset to check.
* @return True if "offset" is a boundary position.
* @stable ICU 2.0
*/
U_STABLE UBool U_EXPORT2
ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
/**
* Return the status from the break rule that determined the most recently
* returned break position. The values appear in the rule source
* within brackets, {123}, for example. For rules that do not specify a
* status, a default value of 0 is returned.
*
*
* @stable ICU 4.8
* @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
*/
enum UMessagePatternApostropheMode {
/**
* A literal apostrophe is represented by
* either a single or a double apostrophe pattern character.
* Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
* if it immediately precedes a curly brace {},
* or a pipe symbol | if inside a choice format,
* or a pound symbol # if inside a plural format.
*
*
* Desired output
* DOUBLE_OPTIONAL
* DOUBLE_REQUIRED
*
*
* I see {many}
* I see '{many}'
* (same)
*
*
* I said {'Wow!'}
* I said '{''Wow!''}'
* (same)
*
*
* I don't know
* I don't know OR
*
I don''t knowI don''t know
*