// icui18n.h is autogenerated and merged from the ICU header files. // Code unused or not supported in the Windows ICU SDK has been removed. #include "icucommon.h" #if (NTDDI_VERSION >= NTDDI_WIN10_RS2) #ifndef SUPPRESS_LEGACY_ICU_HEADER_WARNINGS // For more information on the ICU breaking change to use char16_t by default, please see the page here: // https://go.microsoft.com/fwlink/?linkid=851033 #pragma message("The wchar_t versions of the ICU headers are no longer being updated, please use the char16_t based header icu.h instead; see https://go.microsoft.com/fwlink/?linkid=851033 for more info. To suppress this warning, define the macro SUPPRESS_LEGACY_ICU_HEADER_WARNINGS before including this header.") #endif /* SUPPRESS_LEGACY_ICU_HEADER_WARNINGS */ // alphaindex.h /* ******************************************************************************* * * Copyright (C) 2011-2014 International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* */ #ifndef INDEXCHARS_H #define INDEXCHARS_H #if !UCONFIG_NO_COLLATION /** * \file * \brief C++ API: Index Characters */ U_CDECL_BEGIN /** * Constants for Alphabetic Index Label Types. * The form of these enum constants anticipates having a plain C API * for Alphabetic Indexes that will also use them. * @stable ICU 4.8 */ typedef enum UAlphabeticIndexLabelType { /** * Normal Label, typically the starting letter of the names * in the bucket with this label. * @stable ICU 4.8 */ U_ALPHAINDEX_NORMAL = 0, /** * Undeflow Label. The bucket with this label contains names * in scripts that sort before any of the bucket labels in this index. * @stable ICU 4.8 */ U_ALPHAINDEX_UNDERFLOW = 1, /** * Inflow Label. The bucket with this label contains names * in scripts that sort between two of the bucket labels in this index. * Inflow labels are created when an index contains normal labels for * multiple scripts, and skips other scripts that sort between some of the * included scripts. * @stable ICU 4.8 */ U_ALPHAINDEX_INFLOW = 2, /** * Overflow Label. Te bucket with this label contains names in scripts * that sort after all of the bucket labels in this index. * @stable ICU 4.8 */ U_ALPHAINDEX_OVERFLOW = 3 } UAlphabeticIndexLabelType; struct UHashtable; U_CDECL_END #endif // !UCONFIG_NO_COLLATION #endif // basictz.h // No supported content // calendar.h // No supported content // choicfmt.h // No supported content // coleitr.h // No supported content // coll.h // No supported content // compactdecimalformat.h // No supported content // curramt.h // No supported content // currpinf.h // No supported content // currunit.h // No supported content // datefmt.h // No supported content // dcfmtsym.h // No supported content // decimfmt.h // No supported content // dtfmtsym.h // No supported content // dtitvfmt.h // No supported content // dtitvinf.h // No supported content // dtptngen.h // No supported content // dtrule.h // No supported content // fieldpos.h // No supported content // fmtable.h // No supported content // format.h // No supported content // fpositer.h // No supported content // gender.h // No supported content // gregocal.h // No supported content // measunit.h // No supported content // measure.h // No supported content // numfmt.h // No supported content // numsys.h /* ******************************************************************************* * Copyright (C) 2010-2014, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* * * * File NUMSYS.H * * Modification History:* * Date Name Description * ******************************************************************************** */ #ifndef NUMSYS #define NUMSYS /** * \def NUMSYS_NAME_CAPACITY * Size of a numbering system name. * @internal */ #define NUMSYS_NAME_CAPACITY 8 /** * \file * \brief C++ API: NumberingSystem object */ #if !UCONFIG_NO_FORMATTING #endif /* #if !UCONFIG_NO_FORMATTING */ #endif // _NUMSYS // rbtz.h // No supported content // regex.h // No supported content // region.h // No supported content // scientificnumberformatter.h // No supported content // selfmt.h /******************************************************************** * COPYRIGHT: * Copyright (c) 1997-2011, International Business Machines Corporation and * others. All Rights Reserved. * Copyright (C) 2010 , Yahoo! Inc. ******************************************************************** * * File SELFMT.H * * Modification History: * * Date Name Description * 11/11/09 kirtig Finished first cut of implementation. ********************************************************************/ #ifndef SELFMT #define SELFMT /** * \file * \brief C++ API: SelectFormat object */ #if !UCONFIG_NO_FORMATTING #endif /* #if !UCONFIG_NO_FORMATTING */ #endif // _SELFMT // simpletz.h // No supported content // smpdtfmt.h // No supported content // sortkey.h // No supported content // stsearch.h // No supported content // tblcoll.h // No supported content // timezone.h // No supported content // tmunit.h // No supported content // tmutamt.h // No supported content // tmutfmt.h // No supported content // translit.h // No supported content // tznames.h /* ******************************************************************************* * Copyright (C) 2011-2015, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ #ifndef __TZNAMES_H #define __TZNAMES_H /** * \file * \brief C++ API: TimeZoneNames */ #if !UCONFIG_NO_FORMATTING U_CDECL_BEGIN /** * Constants for time zone display name types. * @stable ICU 50 */ typedef enum UTimeZoneNameType { /** * Unknown display name type. * @stable ICU 50 */ UTZNM_UNKNOWN = 0x00, /** * Long display name, such as "Eastern Time". * @stable ICU 50 */ UTZNM_LONG_GENERIC = 0x01, /** * Long display name for standard time, such as "Eastern Standard Time". * @stable ICU 50 */ UTZNM_LONG_STANDARD = 0x02, /** * Long display name for daylight saving time, such as "Eastern Daylight Time". * @stable ICU 50 */ UTZNM_LONG_DAYLIGHT = 0x04, /** * Short display name, such as "ET". * @stable ICU 50 */ UTZNM_SHORT_GENERIC = 0x08, /** * Short display name for standard time, such as "EST". * @stable ICU 50 */ UTZNM_SHORT_STANDARD = 0x10, /** * Short display name for daylight saving time, such as "EDT". * @stable ICU 50 */ UTZNM_SHORT_DAYLIGHT = 0x20, /** * Exemplar location name, such as "Los Angeles". * @stable ICU 51 */ UTZNM_EXEMPLAR_LOCATION = 0x40 } UTimeZoneNameType; U_CDECL_END #endif #endif // tzfmt.h /* ******************************************************************************* * Copyright (C) 2011-2015, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ #ifndef __TZFMT_H #define __TZFMT_H /** * \file * \brief C++ API: TimeZoneFormat */ #if !UCONFIG_NO_FORMATTING U_CDECL_BEGIN /** * Constants for time zone display format style used by format/parse APIs * in TimeZoneFormat. * @stable ICU 50 */ typedef enum UTimeZoneFormatStyle { /** * Generic location format, such as "United States Time (New York)", "Italy Time" * @stable ICU 50 */ UTZFMT_STYLE_GENERIC_LOCATION, /** * Generic long non-location format, such as "Eastern Time". * @stable ICU 50 */ UTZFMT_STYLE_GENERIC_LONG, /** * Generic short non-location format, such as "ET". * @stable ICU 50 */ UTZFMT_STYLE_GENERIC_SHORT, /** * Specific long format, such as "Eastern Standard Time". * @stable ICU 50 */ UTZFMT_STYLE_SPECIFIC_LONG, /** * Specific short format, such as "EST", "PDT". * @stable ICU 50 */ UTZFMT_STYLE_SPECIFIC_SHORT, /** * Localized GMT offset format, such as "GMT-05:00", "UTC+0100" * @stable ICU 50 */ UTZFMT_STYLE_LOCALIZED_GMT, /** * Short localized GMT offset format, such as "GMT-5", "UTC+1:30" * This style is equivalent to the LDML date format pattern "O". * @stable ICU 51 */ UTZFMT_STYLE_LOCALIZED_GMT_SHORT, /** * Short ISO 8601 local time difference (basic format) or the UTC indicator. * For example, "-05", "+0530", and "Z"(UTC). * This style is equivalent to the LDML date format pattern "X". * @stable ICU 51 */ UTZFMT_STYLE_ISO_BASIC_SHORT, /** * Short ISO 8601 locale time difference (basic format). * For example, "-05" and "+0530". * This style is equivalent to the LDML date format pattern "x". * @stable ICU 51 */ UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT, /** * Fixed width ISO 8601 local time difference (basic format) or the UTC indicator. * For example, "-0500", "+0530", and "Z"(UTC). * This style is equivalent to the LDML date format pattern "XX". * @stable ICU 51 */ UTZFMT_STYLE_ISO_BASIC_FIXED, /** * Fixed width ISO 8601 local time difference (basic format). * For example, "-0500" and "+0530". * This style is equivalent to the LDML date format pattern "xx". * @stable ICU 51 */ UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED, /** * ISO 8601 local time difference (basic format) with optional seconds field, or the UTC indicator. * For example, "-0500", "+052538", and "Z"(UTC). * This style is equivalent to the LDML date format pattern "XXXX". * @stable ICU 51 */ UTZFMT_STYLE_ISO_BASIC_FULL, /** * ISO 8601 local time difference (basic format) with optional seconds field. * For example, "-0500" and "+052538". * This style is equivalent to the LDML date format pattern "xxxx". * @stable ICU 51 */ UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL, /** * Fixed width ISO 8601 local time difference (extended format) or the UTC indicator. * For example, "-05:00", "+05:30", and "Z"(UTC). * This style is equivalent to the LDML date format pattern "XXX". * @stable ICU 51 */ UTZFMT_STYLE_ISO_EXTENDED_FIXED, /** * Fixed width ISO 8601 local time difference (extended format). * For example, "-05:00" and "+05:30". * This style is equivalent to the LDML date format pattern "xxx" and "ZZZZZ". * @stable ICU 51 */ UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED, /** * ISO 8601 local time difference (extended format) with optional seconds field, or the UTC indicator. * For example, "-05:00", "+05:25:38", and "Z"(UTC). * This style is equivalent to the LDML date format pattern "XXXXX". * @stable ICU 51 */ UTZFMT_STYLE_ISO_EXTENDED_FULL, /** * ISO 8601 local time difference (extended format) with optional seconds field. * For example, "-05:00" and "+05:25:38". * This style is equivalent to the LDML date format pattern "xxxxx". * @stable ICU 51 */ UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL, /** * Time Zone ID, such as "America/Los_Angeles". * @stable ICU 51 */ UTZFMT_STYLE_ZONE_ID, /** * Short Time Zone ID (BCP 47 Unicode location extension, time zone type value), such as "uslax". * @stable ICU 51 */ UTZFMT_STYLE_ZONE_ID_SHORT, /** * Exemplar location, such as "Los Angeles" and "Paris". * @stable ICU 51 */ UTZFMT_STYLE_EXEMPLAR_LOCATION } UTimeZoneFormatStyle; /** * Constants for GMT offset pattern types. * @stable ICU 50 */ typedef enum UTimeZoneFormatGMTOffsetPatternType { /** * Positive offset with hours and minutes fields * @stable ICU 50 */ UTZFMT_PAT_POSITIVE_HM, /** * Positive offset with hours, minutes and seconds fields * @stable ICU 50 */ UTZFMT_PAT_POSITIVE_HMS, /** * Negative offset with hours and minutes fields * @stable ICU 50 */ UTZFMT_PAT_NEGATIVE_HM, /** * Negative offset with hours, minutes and seconds fields * @stable ICU 50 */ UTZFMT_PAT_NEGATIVE_HMS, /** * Positive offset with hours field * @stable ICU 51 */ UTZFMT_PAT_POSITIVE_H, /** * Negative offset with hours field * @stable ICU 51 */ UTZFMT_PAT_NEGATIVE_H, /* The following cannot be #ifndef U_HIDE_INTERNAL_API, needed for other .h declarations */ /** * Number of UTimeZoneFormatGMTOffsetPatternType types. * @internal */ UTZFMT_PAT_COUNT = 6 } UTimeZoneFormatGMTOffsetPatternType; /** * Constants for time types used by TimeZoneFormat APIs for * receiving time type (standard time, daylight time or unknown). * @stable ICU 50 */ typedef enum UTimeZoneFormatTimeType { /** * Unknown * @stable ICU 50 */ UTZFMT_TIME_TYPE_UNKNOWN, /** * Standard time * @stable ICU 50 */ UTZFMT_TIME_TYPE_STANDARD, /** * Daylight saving time * @stable ICU 50 */ UTZFMT_TIME_TYPE_DAYLIGHT } UTimeZoneFormatTimeType; /** * Constants for parse option flags, used for specifying optional parse behavior. * @stable ICU 50 */ typedef enum UTimeZoneFormatParseOption { /** * No option. * @stable ICU 50 */ UTZFMT_PARSE_OPTION_NONE = 0x00, /** * When a time zone display name is not found within a set of display names * used for the specified style, look for the name from display names used * by other styles. * @stable ICU 50 */ UTZFMT_PARSE_OPTION_ALL_STYLES = 0x01, /** * When parsing a time zone display name in UTZFMT_STYLE_SPECIFIC_SHORT, * look for the IANA tz database compatible zone abbreviations in addition * to the localized names coming from the {@link TimeZoneNames} currently * used by the {@link TimeZoneFormat}. * @stable ICU 54 */ UTZFMT_PARSE_OPTION_TZ_DATABASE_ABBREVIATIONS = 0x02 } UTimeZoneFormatParseOption; U_CDECL_END #endif /* !UCONFIG_NO_FORMATTING */ #endif // tzrule.h // No supported content // tztrans.h // No supported content // ucal.h /* ******************************************************************************* * Copyright (C) 1996-2015, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ #ifndef UCAL_H #define UCAL_H #if !UCONFIG_NO_FORMATTING /** * \file * \brief C API: Calendar * *
UDate object
* and a set of integer fields such as UCAL_YEAR, UCAL_MONTH,
* UCAL_DAY, UCAL_HOUR, and so on.
* (A UDate object represents a specific instant in
* time with millisecond precision. See UDate
* for information about the UDate .)
*
*
* Types of UCalendar interpret a UDate
* according to the rules of a specific calendar system. The U_STABLE
* provides the enum UCalendarType with UCAL_TRADITIONAL and
* UCAL_GREGORIAN.
*
* Like other locale-sensitive C API, calendar API provides a
* function, ucal_open(), which returns a pointer to
* UCalendar whose time fields have been initialized
* with the current date and time. We need to specify the type of
* calendar to be opened and the timezoneId.
* \htmlonly
\endhtmlonly
*
* \code
* UCalendar *caldef;
* UChar *tzId;
* UErrorCode status;
* tzId=(UChar*)malloc(sizeof(UChar) * (strlen("PST") +1) );
* u_uastrcpy(tzId, "PST");
* caldef=ucal_open(tzID, u_strlen(tzID), NULL, UCAL_TRADITIONAL, &status);
* \endcode
*
* \htmlonly\endhtmlonly
*
*
* A UCalendar object can produce all the time field values
* needed to implement the date-time formatting for a particular language
* and calendar style (for example, Japanese-Gregorian, Japanese-Traditional).
*
*
* When computing a UDate from time fields, two special circumstances
* may arise: there may be insufficient information to compute the
* UDate (such as only year and month but no day in the month),
* or there may be inconsistent information (such as "Tuesday, July 15, 1996"
* -- July 15, 1996 is actually a Monday).
*
*
* Insufficient information. The calendar will use default * information to specify the missing fields. This may vary by calendar; for * the Gregorian calendar, the default for a field is the same as that of the * start of the epoch: i.e., UCAL_YEAR = 1970, UCAL_MONTH = JANUARY, UCAL_DATE = 1, etc. * *
* Inconsistent information. If fields conflict, the calendar * will give preference to fields set more recently. For example, when * determining the day, the calendar will look for one of the following * combinations of fields. The most recent combination, as determined by the * most recently set single field, will be used. * * \htmlonly
\endhtmlonly *\endhtmlonly * * For the time of day: * * \htmlonly* \code * UCAL_MONTH + UCAL_DAY_OF_MONTH * UCAL_MONTH + UCAL_WEEK_OF_MONTH + UCAL_DAY_OF_WEEK * UCAL_MONTH + UCAL_DAY_OF_WEEK_IN_MONTH + UCAL_DAY_OF_WEEK * UCAL_DAY_OF_YEAR * UCAL_DAY_OF_WEEK + UCAL_WEEK_OF_YEAR * \endcode ** \htmlonly
\endhtmlonly *\endhtmlonly * ** \code * UCAL_HOUR_OF_DAY * UCAL_AM_PM + UCAL_HOUR * \endcode ** \htmlonly
* Note: for some non-Gregorian calendars, different * fields may be necessary for complete disambiguation. For example, a full * specification of the historial Arabic astronomical calendar requires year, * month, day-of-month and day-of-week in some cases. * *
* Note: There are certain possible ambiguities in * interpretation of certain singular times, which are resolved in the * following ways: *
* The date or time format strings are not part of the definition of a * calendar, as those must be modifiable or overridable by the user at * runtime. Use {@link icu::DateFormat} * to format dates. * *
* Calendar provides an API for field "rolling", where fields
* can be incremented or decremented, but wrap around. For example, rolling the
* month up in the date December 12, 1996 results in
* January 12, 1996.
*
*
* Calendar also provides a date arithmetic function for
* adding the specified (signed) amount of time to a particular time field.
* For example, subtracting 5 days from the date September 12, 1996
* results in September 7, 1996.
*
* @stable ICU 2.0
*/
/**
* The time zone ID reserved for unknown time zone.
* @stable ICU 4.8
*/
#define UCAL_UNKNOWN_ZONE_ID "Etc/Unknown"
/** A calendar.
* For usage in C programs.
* @stable ICU 2.0
*/
typedef void* UCalendar;
/** Possible types of UCalendars
* @stable ICU 2.0
*/
enum UCalendarType {
/**
* Despite the name, UCAL_TRADITIONAL designates the locale's default calendar,
* which may be the Gregorian calendar or some other calendar.
* @stable ICU 2.0
*/
UCAL_TRADITIONAL,
/**
* A better name for UCAL_TRADITIONAL.
* @stable ICU 4.2
*/
UCAL_DEFAULT = UCAL_TRADITIONAL,
/**
* Unambiguously designates the Gregorian calendar for the locale.
* @stable ICU 2.0
*/
UCAL_GREGORIAN
};
/** @stable ICU 2.0 */
typedef enum UCalendarType UCalendarType;
/** Possible fields in a UCalendar
* @stable ICU 2.0
*/
enum UCalendarDateFields {
/**
* Field number indicating the era, e.g., AD or BC in the Gregorian (Julian) calendar.
* This is a calendar-specific value.
* @stable ICU 2.6
*/
UCAL_ERA,
/**
* Field number indicating the year. This is a calendar-specific value.
* @stable ICU 2.6
*/
UCAL_YEAR,
/**
* Field number indicating the month. This is a calendar-specific value.
* The first month of the year is
* JANUARY; the last depends on the number of months in a year.
* @see #UCAL_JANUARY
* @see #UCAL_FEBRUARY
* @see #UCAL_MARCH
* @see #UCAL_APRIL
* @see #UCAL_MAY
* @see #UCAL_JUNE
* @see #UCAL_JULY
* @see #UCAL_AUGUST
* @see #UCAL_SEPTEMBER
* @see #UCAL_OCTOBER
* @see #UCAL_NOVEMBER
* @see #UCAL_DECEMBER
* @see #UCAL_UNDECIMBER
* @stable ICU 2.6
*/
UCAL_MONTH,
/**
* Field number indicating the
* week number within the current year. The first week of the year, as
* defined by UCAL_FIRST_DAY_OF_WEEK and UCAL_MINIMAL_DAYS_IN_FIRST_WEEK
* attributes, has value 1. Subclasses define
* the value of UCAL_WEEK_OF_YEAR for days before the first week of
* the year.
* @see ucal_getAttribute
* @see ucal_setAttribute
* @stable ICU 2.6
*/
UCAL_WEEK_OF_YEAR,
/**
* Field number indicating the
* week number within the current month. The first week of the month, as
* defined by UCAL_FIRST_DAY_OF_WEEK and UCAL_MINIMAL_DAYS_IN_FIRST_WEEK
* attributes, has value 1. Subclasses define
* the value of WEEK_OF_MONTH for days before the first week of
* the month.
* @see ucal_getAttribute
* @see ucal_setAttribute
* @see #UCAL_FIRST_DAY_OF_WEEK
* @see #UCAL_MINIMAL_DAYS_IN_FIRST_WEEK
* @stable ICU 2.6
*/
UCAL_WEEK_OF_MONTH,
/**
* Field number indicating the
* day of the month. This is a synonym for DAY_OF_MONTH.
* The first day of the month has value 1.
* @see #UCAL_DAY_OF_MONTH
* @stable ICU 2.6
*/
UCAL_DATE,
/**
* Field number indicating the day
* number within the current year. The first day of the year has value 1.
* @stable ICU 2.6
*/
UCAL_DAY_OF_YEAR,
/**
* Field number indicating the day
* of the week. This field takes values SUNDAY,
* MONDAY, TUESDAY, WEDNESDAY,
* THURSDAY, FRIDAY, and SATURDAY.
* @see #UCAL_SUNDAY
* @see #UCAL_MONDAY
* @see #UCAL_TUESDAY
* @see #UCAL_WEDNESDAY
* @see #UCAL_THURSDAY
* @see #UCAL_FRIDAY
* @see #UCAL_SATURDAY
* @stable ICU 2.6
*/
UCAL_DAY_OF_WEEK,
/**
* Field number indicating the
* ordinal number of the day of the week within the current month. Together
* with the DAY_OF_WEEK field, this uniquely specifies a day
* within a month. Unlike WEEK_OF_MONTH and
* WEEK_OF_YEAR, this field's value does not depend on
* getFirstDayOfWeek() or
* getMinimalDaysInFirstWeek(). DAY_OF_MONTH 1
* through 7 always correspond to DAY_OF_WEEK_IN_MONTH
* 1; 8 through 15 correspond to
* DAY_OF_WEEK_IN_MONTH 2, and so on.
* DAY_OF_WEEK_IN_MONTH 0 indicates the week before
* DAY_OF_WEEK_IN_MONTH 1. Negative values count back from the
* end of the month, so the last Sunday of a month is specified as
* DAY_OF_WEEK = SUNDAY, DAY_OF_WEEK_IN_MONTH = -1. Because
* negative values count backward they will usually be aligned differently
* within the month than positive values. For example, if a month has 31
* days, DAY_OF_WEEK_IN_MONTH -1 will overlap
* DAY_OF_WEEK_IN_MONTH 5 and the end of 4.
* @see #UCAL_DAY_OF_WEEK
* @see #UCAL_WEEK_OF_MONTH
* @stable ICU 2.6
*/
UCAL_DAY_OF_WEEK_IN_MONTH,
/**
* Field number indicating
* whether the HOUR is before or after noon.
* E.g., at 10:04:15.250 PM the AM_PM is PM.
* @see #UCAL_AM
* @see #UCAL_PM
* @see #UCAL_HOUR
* @stable ICU 2.6
*/
UCAL_AM_PM,
/**
* Field number indicating the
* hour of the morning or afternoon. HOUR is used for the 12-hour
* clock.
* E.g., at 10:04:15.250 PM the HOUR is 10.
* @see #UCAL_AM_PM
* @see #UCAL_HOUR_OF_DAY
* @stable ICU 2.6
*/
UCAL_HOUR,
/**
* Field number indicating the
* hour of the day. HOUR_OF_DAY is used for the 24-hour clock.
* E.g., at 10:04:15.250 PM the HOUR_OF_DAY is 22.
* @see #UCAL_HOUR
* @stable ICU 2.6
*/
UCAL_HOUR_OF_DAY,
/**
* Field number indicating the
* minute within the hour.
* E.g., at 10:04:15.250 PM the UCAL_MINUTE is 4.
* @stable ICU 2.6
*/
UCAL_MINUTE,
/**
* Field number indicating the
* second within the minute.
* E.g., at 10:04:15.250 PM the UCAL_SECOND is 15.
* @stable ICU 2.6
*/
UCAL_SECOND,
/**
* Field number indicating the
* millisecond within the second.
* E.g., at 10:04:15.250 PM the UCAL_MILLISECOND is 250.
* @stable ICU 2.6
*/
UCAL_MILLISECOND,
/**
* Field number indicating the
* raw offset from GMT in milliseconds.
* @stable ICU 2.6
*/
UCAL_ZONE_OFFSET,
/**
* Field number indicating the
* daylight savings offset in milliseconds.
* @stable ICU 2.6
*/
UCAL_DST_OFFSET,
/**
* Field number
* indicating the extended year corresponding to the
* UCAL_WEEK_OF_YEAR field. This may be one greater or less
* than the value of UCAL_EXTENDED_YEAR.
* @stable ICU 2.6
*/
UCAL_YEAR_WOY,
/**
* Field number
* indicating the localized day of week. This will be a value from 1
* to 7 inclusive, with 1 being the localized first day of the week.
* @stable ICU 2.6
*/
UCAL_DOW_LOCAL,
/**
* Year of this calendar system, encompassing all supra-year fields. For example,
* in Gregorian/Julian calendars, positive Extended Year values indicate years AD,
* 1 BC = 0 extended, 2 BC = -1 extended, and so on.
* @stable ICU 2.8
*/
UCAL_EXTENDED_YEAR,
/**
* Field number
* indicating the modified Julian day number. This is different from
* the conventional Julian day number in two regards. First, it
* demarcates days at local zone midnight, rather than noon GMT.
* Second, it is a local number; that is, it depends on the local time
* zone. It can be thought of as a single number that encompasses all
* the date-related fields.
* @stable ICU 2.8
*/
UCAL_JULIAN_DAY,
/**
* Ranges from 0 to 23:59:59.999 (regardless of DST). This field behaves exactly
* like a composite of all time-related fields, not including the zone fields. As such,
* it also reflects discontinuities of those fields on DST transition days. On a day
* of DST onset, it will jump forward. On a day of DST cessation, it will jump
* backward. This reflects the fact that it must be combined with the DST_OFFSET field
* to obtain a unique local time value.
* @stable ICU 2.8
*/
UCAL_MILLISECONDS_IN_DAY,
/**
* Whether or not the current month is a leap month (0 or 1). See the Chinese calendar for
* an example of this.
*/
UCAL_IS_LEAP_MONTH,
/**
* Field count
* @stable ICU 2.6
*/
UCAL_FIELD_COUNT,
/**
* Field number indicating the
* day of the month. This is a synonym for UCAL_DATE.
* The first day of the month has value 1.
* @see #UCAL_DATE
* Synonym for UCAL_DATE
* @stable ICU 2.8
**/
UCAL_DAY_OF_MONTH=UCAL_DATE
};
/** @stable ICU 2.0 */
typedef enum UCalendarDateFields UCalendarDateFields;
/**
* Useful constant for days of week. Note: Calendar day-of-week is 1-based. Clients
* who create locale resources for the field of first-day-of-week should be aware of
* this. For instance, in US locale, first-day-of-week is set to 1, i.e., UCAL_SUNDAY.
*/
/** Possible days of the week in a UCalendar
* @stable ICU 2.0
*/
enum UCalendarDaysOfWeek {
/** Sunday */
UCAL_SUNDAY = 1,
/** Monday */
UCAL_MONDAY,
/** Tuesday */
UCAL_TUESDAY,
/** Wednesday */
UCAL_WEDNESDAY,
/** Thursday */
UCAL_THURSDAY,
/** Friday */
UCAL_FRIDAY,
/** Saturday */
UCAL_SATURDAY
};
/** @stable ICU 2.0 */
typedef enum UCalendarDaysOfWeek UCalendarDaysOfWeek;
/** Possible months in a UCalendar. Note: Calendar month is 0-based.
* @stable ICU 2.0
*/
enum UCalendarMonths {
/** January */
UCAL_JANUARY,
/** February */
UCAL_FEBRUARY,
/** March */
UCAL_MARCH,
/** April */
UCAL_APRIL,
/** May */
UCAL_MAY,
/** June */
UCAL_JUNE,
/** July */
UCAL_JULY,
/** August */
UCAL_AUGUST,
/** September */
UCAL_SEPTEMBER,
/** October */
UCAL_OCTOBER,
/** November */
UCAL_NOVEMBER,
/** December */
UCAL_DECEMBER,
/** Value of the UCAL_MONTH field indicating the
* thirteenth month of the year. Although the Gregorian calendar
* does not use this value, lunar calendars do.
*/
UCAL_UNDECIMBER
};
/** @stable ICU 2.0 */
typedef enum UCalendarMonths UCalendarMonths;
/** Possible AM/PM values in a UCalendar
* @stable ICU 2.0
*/
enum UCalendarAMPMs {
/** AM */
UCAL_AM,
/** PM */
UCAL_PM
};
/** @stable ICU 2.0 */
typedef enum UCalendarAMPMs UCalendarAMPMs;
/**
* System time zone type constants used by filtering zones
* in ucal_openTimeZoneIDEnumeration.
* @see ucal_openTimeZoneIDEnumeration
* @stable ICU 4.8
*/
enum USystemTimeZoneType {
/**
* Any system zones.
* @stable ICU 4.8
*/
UCAL_ZONE_TYPE_ANY,
/**
* Canonical system zones.
* @stable ICU 4.8
*/
UCAL_ZONE_TYPE_CANONICAL,
/**
* Canonical system zones associated with actual locations.
* @stable ICU 4.8
*/
UCAL_ZONE_TYPE_CANONICAL_LOCATION
};
/** @stable ICU 4.8 */
typedef enum USystemTimeZoneType USystemTimeZoneType;
/**
* Create an enumeration over system time zone IDs with the given
* filter conditions.
* @param zoneType The system time zone type.
* @param region The ISO 3166 two-letter country code or UN M.49
* three-digit area code. When NULL, no filtering
* done by region.
* @param rawOffset An offset from GMT in milliseconds, ignoring the
* effect of daylight savings time, if any. When NULL,
* no filtering done by zone offset.
* @param ec A pointer to an UErrorCode to receive any errors
* @return an enumeration object that the caller must dispose of
* using enum_close(), or NULL upon failure. In case of failure,
* *ec will indicate the error.
* @stable ICU 4.8
*/
U_STABLE UEnumeration* U_EXPORT2
ucal_openTimeZoneIDEnumeration(USystemTimeZoneType zoneType, const char* region,
const int32_t* rawOffset, UErrorCode* ec);
/**
* Create an enumeration over all time zones.
*
* @param ec input/output error code
*
* @return an enumeration object that the caller must dispose of using
* uenum_close(), or NULL upon failure. In case of failure *ec will
* indicate the error.
*
* @stable ICU 2.6
*/
U_STABLE UEnumeration* U_EXPORT2
ucal_openTimeZones(UErrorCode* ec);
/**
* Create an enumeration over all time zones associated with the given
* country. Some zones are affiliated with no country (e.g., "UTC");
* these may also be retrieved, as a group.
*
* @param country the ISO 3166 two-letter country code, or NULL to
* retrieve zones not affiliated with any country
*
* @param ec input/output error code
*
* @return an enumeration object that the caller must dispose of using
* uenum_close(), or NULL upon failure. In case of failure *ec will
* indicate the error.
*
* @stable ICU 2.6
*/
U_STABLE UEnumeration* U_EXPORT2
ucal_openCountryTimeZones(const char* country, UErrorCode* ec);
/**
* Return the default time zone. The default is determined initially
* by querying the host operating system. It may be changed with
* ucal_setDefaultTimeZone() or with the C++ TimeZone API.
*
* @param result A buffer to receive the result, or NULL
*
* @param resultCapacity The capacity of the result buffer
*
* @param ec input/output error code
*
* @return The result string length, not including the terminating
* null
*
* @stable ICU 2.6
*/
U_STABLE int32_t U_EXPORT2
ucal_getDefaultTimeZone(UChar* result, int32_t resultCapacity, UErrorCode* ec);
/**
* Set the default time zone.
*
* @param zoneID null-terminated time zone ID
*
* @param ec input/output error code
*
* @stable ICU 2.6
*/
U_STABLE void U_EXPORT2
ucal_setDefaultTimeZone(const UChar* zoneID, UErrorCode* ec);
/**
* Return the amount of time in milliseconds that the clock is
* advanced during daylight savings time for the given time zone, or
* zero if the time zone does not observe daylight savings time.
*
* @param zoneID null-terminated time zone ID
*
* @param ec input/output error code
*
* @return the number of milliseconds the time is advanced with
* respect to standard time when the daylight savings rules are in
* effect. This is always a non-negative number, most commonly either
* 3,600,000 (one hour) or zero.
*
* @stable ICU 2.6
*/
U_STABLE int32_t U_EXPORT2
ucal_getDSTSavings(const UChar* zoneID, UErrorCode* ec);
/**
* Get the current date and time.
* The value returned is represented as milliseconds from the epoch.
* @return The current date and time.
* @stable ICU 2.0
*/
U_STABLE UDate U_EXPORT2
ucal_getNow(void);
/**
* Open a UCalendar.
* A UCalendar may be used to convert a millisecond value to a year,
* month, and day.
*
* Note: When unknown TimeZone ID is specified or if the TimeZone ID specified is "Etc/Unknown",
* the UCalendar returned by the function is initialized with GMT zone with TimeZone ID
* UCAL_UNKNOWN_ZONE_ID ("Etc/Unknown") without any errors/warnings. If you want
* to check if a TimeZone ID is valid prior to this function, use ucal_getCanonicalTimeZoneID.
*
* @param zoneID The desired TimeZone ID. If 0, use the default time zone.
* @param len The length of zoneID, or -1 if null-terminated.
* @param locale The desired locale
* @param type The type of UCalendar to open. This can be UCAL_GREGORIAN to open the Gregorian
* calendar for the locale, or UCAL_DEFAULT to open the default calendar for the locale (the
* default calendar may also be Gregorian). To open a specific non-Gregorian calendar for the
* locale, use uloc_setKeywordValue to set the value of the calendar keyword for the locale
* and then pass the locale to ucal_open with UCAL_DEFAULT as the type.
* @param status A pointer to an UErrorCode to receive any errors
* @return A pointer to a UCalendar, or 0 if an error occurred.
* @see #UCAL_UNKNOWN_ZONE_ID
* @stable ICU 2.0
*/
U_STABLE UCalendar* U_EXPORT2
ucal_open(const UChar* zoneID,
int32_t len,
const char* locale,
UCalendarType type,
UErrorCode* status);
/**
* Close a UCalendar.
* Once closed, a UCalendar may no longer be used.
* @param cal The UCalendar to close.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucal_close(UCalendar *cal);
/**
* Open a copy of a UCalendar.
* This function performs a deep copy.
* @param cal The calendar to copy
* @param status A pointer to an UErrorCode to receive any errors.
* @return A pointer to a UCalendar identical to cal.
* @stable ICU 4.0
*/
U_STABLE UCalendar* U_EXPORT2
ucal_clone(const UCalendar* cal,
UErrorCode* status);
/**
* Set the TimeZone used by a UCalendar.
* A UCalendar uses a timezone for converting from Greenwich time to local time.
* @param cal The UCalendar to set.
* @param zoneID The desired TimeZone ID. If 0, use the default time zone.
* @param len The length of zoneID, or -1 if null-terminated.
* @param status A pointer to an UErrorCode to receive any errors.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucal_setTimeZone(UCalendar* cal,
const UChar* zoneID,
int32_t len,
UErrorCode* status);
/**
* Get the ID of the UCalendar's time zone.
*
* @param cal The UCalendar to query.
* @param result Receives the UCalendar's time zone ID.
* @param resultLength The maximum size of result.
* @param status Receives the status.
* @return The total buffer size needed; if greater than resultLength, the output was truncated.
* @stable ICU 51
*/
U_STABLE int32_t U_EXPORT2
ucal_getTimeZoneID(const UCalendar *cal,
UChar *result,
int32_t resultLength,
UErrorCode *status);
/**
* Possible formats for a UCalendar's display name
* @stable ICU 2.0
*/
enum UCalendarDisplayNameType {
/** Standard display name */
UCAL_STANDARD,
/** Short standard display name */
UCAL_SHORT_STANDARD,
/** Daylight savings display name */
UCAL_DST,
/** Short daylight savings display name */
UCAL_SHORT_DST
};
/** @stable ICU 2.0 */
typedef enum UCalendarDisplayNameType UCalendarDisplayNameType;
/**
* Get the display name for a UCalendar's TimeZone.
* A display name is suitable for presentation to a user.
* @param cal The UCalendar to query.
* @param type The desired display name format; one of UCAL_STANDARD, UCAL_SHORT_STANDARD,
* UCAL_DST, UCAL_SHORT_DST
* @param locale The desired locale for the display name.
* @param result A pointer to a buffer to receive the formatted number.
* @param resultLength The maximum size of result.
* @param status A pointer to an UErrorCode to receive any errors
* @return The total buffer size needed; if greater than resultLength, the output was truncated.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ucal_getTimeZoneDisplayName(const UCalendar* cal,
UCalendarDisplayNameType type,
const char* locale,
UChar* result,
int32_t resultLength,
UErrorCode* status);
/**
* Determine if a UCalendar is currently in daylight savings time.
* Daylight savings time is not used in all parts of the world.
* @param cal The UCalendar to query.
* @param status A pointer to an UErrorCode to receive any errors
* @return TRUE if cal is currently in daylight savings time, FALSE otherwise
* @stable ICU 2.0
*/
U_STABLE UBool U_EXPORT2
ucal_inDaylightTime(const UCalendar* cal,
UErrorCode* status );
/**
* Sets the GregorianCalendar change date. This is the point when the switch from
* Julian dates to Gregorian dates occurred. Default is 00:00:00 local time, October
* 15, 1582. Previous to this time and date will be Julian dates.
*
* This function works only for Gregorian calendars. If the UCalendar is not
* an instance of a Gregorian calendar, then a U_UNSUPPORTED_ERROR
* error code is set.
*
* @param cal The calendar object.
* @param date The given Gregorian cutover date.
* @param pErrorCode Pointer to a standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
*
* @see GregorianCalendar::setGregorianChange
* @see ucal_getGregorianChange
* @stable ICU 3.6
*/
U_STABLE void U_EXPORT2
ucal_setGregorianChange(UCalendar *cal, UDate date, UErrorCode *pErrorCode);
/**
* Gets the Gregorian Calendar change date. This is the point when the switch from
* Julian dates to Gregorian dates occurred. Default is 00:00:00 local time, October
* 15, 1582. Previous to this time and date will be Julian dates.
*
* This function works only for Gregorian calendars. If the UCalendar is not
* an instance of a Gregorian calendar, then a U_UNSUPPORTED_ERROR
* error code is set.
*
* @param cal The calendar object.
* @param pErrorCode Pointer to a standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return The Gregorian cutover time for this calendar.
*
* @see GregorianCalendar::getGregorianChange
* @see ucal_setGregorianChange
* @stable ICU 3.6
*/
U_STABLE UDate U_EXPORT2
ucal_getGregorianChange(const UCalendar *cal, UErrorCode *pErrorCode);
/**
* Types of UCalendar attributes
* @stable ICU 2.0
*/
enum UCalendarAttribute {
/**
* Lenient parsing
* @stable ICU 2.0
*/
UCAL_LENIENT,
/**
* First day of week
* @stable ICU 2.0
*/
UCAL_FIRST_DAY_OF_WEEK,
/**
* Minimum number of days in first week
* @stable ICU 2.0
*/
UCAL_MINIMAL_DAYS_IN_FIRST_WEEK,
/**
* The behavior for handling wall time repeating multiple times
* at negative time zone offset transitions
* @stable ICU 49
*/
UCAL_REPEATED_WALL_TIME,
/**
* The behavior for handling skipped wall time at positive time
* zone offset transitions.
* @stable ICU 49
*/
UCAL_SKIPPED_WALL_TIME
};
/** @stable ICU 2.0 */
typedef enum UCalendarAttribute UCalendarAttribute;
/**
* Options for handling ambiguous wall time at time zone
* offset transitions.
* @stable ICU 49
*/
enum UCalendarWallTimeOption {
/**
* An ambiguous wall time to be interpreted as the latest.
* This option is valid for UCAL_REPEATED_WALL_TIME and
* UCAL_SKIPPED_WALL_TIME.
* @stable ICU 49
*/
UCAL_WALLTIME_LAST,
/**
* An ambiguous wall time to be interpreted as the earliest.
* This option is valid for UCAL_REPEATED_WALL_TIME and
* UCAL_SKIPPED_WALL_TIME.
* @stable ICU 49
*/
UCAL_WALLTIME_FIRST,
/**
* An ambiguous wall time to be interpreted as the next valid
* wall time. This option is valid for UCAL_SKIPPED_WALL_TIME.
* @stable ICU 49
*/
UCAL_WALLTIME_NEXT_VALID
};
/** @stable ICU 49 */
typedef enum UCalendarWallTimeOption UCalendarWallTimeOption;
/**
* Get a numeric attribute associated with a UCalendar.
* Numeric attributes include the first day of the week, or the minimal numbers
* of days in the first week of the month.
* @param cal The UCalendar to query.
* @param attr The desired attribute; one of UCAL_LENIENT, UCAL_FIRST_DAY_OF_WEEK,
* UCAL_MINIMAL_DAYS_IN_FIRST_WEEK, UCAL_REPEATED_WALL_TIME or UCAL_SKIPPED_WALL_TIME
* @return The value of attr.
* @see ucal_setAttribute
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ucal_getAttribute(const UCalendar* cal,
UCalendarAttribute attr);
/**
* Set a numeric attribute associated with a UCalendar.
* Numeric attributes include the first day of the week, or the minimal numbers
* of days in the first week of the month.
* @param cal The UCalendar to set.
* @param attr The desired attribute; one of UCAL_LENIENT, UCAL_FIRST_DAY_OF_WEEK,
* UCAL_MINIMAL_DAYS_IN_FIRST_WEEK, UCAL_REPEATED_WALL_TIME or UCAL_SKIPPED_WALL_TIME
* @param newValue The new value of attr.
* @see ucal_getAttribute
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucal_setAttribute(UCalendar* cal,
UCalendarAttribute attr,
int32_t newValue);
/**
* Get a locale for which calendars are available.
* A UCalendar in a locale returned by this function will contain the correct
* day and month names for the locale.
* @param localeIndex The index of the desired locale.
* @return A locale for which calendars are available, or 0 if none.
* @see ucal_countAvailable
* @stable ICU 2.0
*/
U_STABLE const char* U_EXPORT2
ucal_getAvailable(int32_t localeIndex);
/**
* Determine how many locales have calendars available.
* This function is most useful as determining the loop ending condition for
* calls to \ref ucal_getAvailable.
* @return The number of locales for which calendars are available.
* @see ucal_getAvailable
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ucal_countAvailable(void);
/**
* Get a UCalendar's current time in millis.
* The time is represented as milliseconds from the epoch.
* @param cal The UCalendar to query.
* @param status A pointer to an UErrorCode to receive any errors
* @return The calendar's current time in millis.
* @see ucal_setMillis
* @see ucal_setDate
* @see ucal_setDateTime
* @stable ICU 2.0
*/
U_STABLE UDate U_EXPORT2
ucal_getMillis(const UCalendar* cal,
UErrorCode* status);
/**
* Set a UCalendar's current time in millis.
* The time is represented as milliseconds from the epoch.
* @param cal The UCalendar to set.
* @param dateTime The desired date and time.
* @param status A pointer to an UErrorCode to receive any errors
* @see ucal_getMillis
* @see ucal_setDate
* @see ucal_setDateTime
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucal_setMillis(UCalendar* cal,
UDate dateTime,
UErrorCode* status );
/**
* Set a UCalendar's current date.
* The date is represented as a series of 32-bit integers.
* @param cal The UCalendar to set.
* @param year The desired year.
* @param month The desired month; one of UCAL_JANUARY, UCAL_FEBRUARY, UCAL_MARCH, UCAL_APRIL, UCAL_MAY,
* UCAL_JUNE, UCAL_JULY, UCAL_AUGUST, UCAL_SEPTEMBER, UCAL_OCTOBER, UCAL_NOVEMBER, UCAL_DECEMBER, UCAL_UNDECIMBER
* @param date The desired day of the month.
* @param status A pointer to an UErrorCode to receive any errors
* @see ucal_getMillis
* @see ucal_setMillis
* @see ucal_setDateTime
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucal_setDate(UCalendar* cal,
int32_t year,
int32_t month,
int32_t date,
UErrorCode* status);
/**
* Set a UCalendar's current date.
* The date is represented as a series of 32-bit integers.
* @param cal The UCalendar to set.
* @param year The desired year.
* @param month The desired month; one of UCAL_JANUARY, UCAL_FEBRUARY, UCAL_MARCH, UCAL_APRIL, UCAL_MAY,
* UCAL_JUNE, UCAL_JULY, UCAL_AUGUST, UCAL_SEPTEMBER, UCAL_OCTOBER, UCAL_NOVEMBER, UCAL_DECEMBER, UCAL_UNDECIMBER
* @param date The desired day of the month.
* @param hour The desired hour of day.
* @param minute The desired minute.
* @param second The desirec second.
* @param status A pointer to an UErrorCode to receive any errors
* @see ucal_getMillis
* @see ucal_setMillis
* @see ucal_setDate
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucal_setDateTime(UCalendar* cal,
int32_t year,
int32_t month,
int32_t date,
int32_t hour,
int32_t minute,
int32_t second,
UErrorCode* status);
/**
* Returns TRUE if two UCalendars are equivalent. Equivalent
* UCalendars will behave identically, but they may be set to
* different times.
* @param cal1 The first of the UCalendars to compare.
* @param cal2 The second of the UCalendars to compare.
* @return TRUE if cal1 and cal2 are equivalent, FALSE otherwise.
* @stable ICU 2.0
*/
U_STABLE UBool U_EXPORT2
ucal_equivalentTo(const UCalendar* cal1,
const UCalendar* cal2);
/**
* Add a specified signed amount to a particular field in a UCalendar.
* This can modify more significant fields in the calendar.
* Adding a positive value always means moving forward in time, so for the Gregorian calendar,
* starting with 100 BC and adding +1 to year results in 99 BC (even though this actually reduces
* the numeric value of the field itself).
* @param cal The UCalendar to which to add.
* @param field The field to which to add the signed value; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
* UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
* UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
* UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
* @param amount The signed amount to add to field. If the amount causes the value
* to exceed to maximum or minimum values for that field, other fields are modified
* to preserve the magnitude of the change.
* @param status A pointer to an UErrorCode to receive any errors
* @see ucal_roll
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucal_add(UCalendar* cal,
UCalendarDateFields field,
int32_t amount,
UErrorCode* status);
/**
* Add a specified signed amount to a particular field in a UCalendar.
* This will not modify more significant fields in the calendar.
* Rolling by a positive value always means moving forward in time (unless the limit of the
* field is reached, in which case it may pin or wrap), so for Gregorian calendar,
* starting with 100 BC and rolling the year by +1 results in 99 BC.
* When eras have a definite beginning and end (as in the Chinese calendar, or as in most eras in the
* Japanese calendar) then rolling the year past either limit of the era will cause the year to wrap around.
* When eras only have a limit at one end, then attempting to roll the year past that limit will result in
* pinning the year at that limit. Note that for most calendars in which era 0 years move forward in time
* (such as Buddhist, Hebrew, or Islamic), it is possible for add or roll to result in negative years for
* era 0 (that is the only way to represent years before the calendar epoch).
* @param cal The UCalendar to which to add.
* @param field The field to which to add the signed value; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
* UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
* UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
* UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
* @param amount The signed amount to add to field. If the amount causes the value
* to exceed to maximum or minimum values for that field, the field is pinned to a permissible
* value.
* @param status A pointer to an UErrorCode to receive any errors
* @see ucal_add
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucal_roll(UCalendar* cal,
UCalendarDateFields field,
int32_t amount,
UErrorCode* status);
/**
* Get the current value of a field from a UCalendar.
* All fields are represented as 32-bit integers.
* @param cal The UCalendar to query.
* @param field The desired field; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
* UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
* UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
* UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
* @param status A pointer to an UErrorCode to receive any errors
* @return The value of the desired field.
* @see ucal_set
* @see ucal_isSet
* @see ucal_clearField
* @see ucal_clear
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ucal_get(const UCalendar* cal,
UCalendarDateFields field,
UErrorCode* status );
/**
* Set the value of a field in a UCalendar.
* All fields are represented as 32-bit integers.
* @param cal The UCalendar to set.
* @param field The field to set; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
* UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
* UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
* UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
* @param value The desired value of field.
* @see ucal_get
* @see ucal_isSet
* @see ucal_clearField
* @see ucal_clear
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucal_set(UCalendar* cal,
UCalendarDateFields field,
int32_t value);
/**
* Determine if a field in a UCalendar is set.
* All fields are represented as 32-bit integers.
* @param cal The UCalendar to query.
* @param field The desired field; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
* UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
* UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
* UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
* @return TRUE if field is set, FALSE otherwise.
* @see ucal_get
* @see ucal_set
* @see ucal_clearField
* @see ucal_clear
* @stable ICU 2.0
*/
U_STABLE UBool U_EXPORT2
ucal_isSet(const UCalendar* cal,
UCalendarDateFields field);
/**
* Clear a field in a UCalendar.
* All fields are represented as 32-bit integers.
* @param cal The UCalendar containing the field to clear.
* @param field The field to clear; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
* UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
* UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
* UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
* @see ucal_get
* @see ucal_set
* @see ucal_isSet
* @see ucal_clear
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucal_clearField(UCalendar* cal,
UCalendarDateFields field);
/**
* Clear all fields in a UCalendar.
* All fields are represented as 32-bit integers.
* @param calendar The UCalendar to clear.
* @see ucal_get
* @see ucal_set
* @see ucal_isSet
* @see ucal_clearField
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucal_clear(UCalendar* calendar);
/**
* Possible limit values for a UCalendar
* @stable ICU 2.0
*/
enum UCalendarLimitType {
/** Minimum value */
UCAL_MINIMUM,
/** Maximum value */
UCAL_MAXIMUM,
/** Greatest minimum value */
UCAL_GREATEST_MINIMUM,
/** Leaest maximum value */
UCAL_LEAST_MAXIMUM,
/** Actual minimum value */
UCAL_ACTUAL_MINIMUM,
/** Actual maximum value */
UCAL_ACTUAL_MAXIMUM
};
/** @stable ICU 2.0 */
typedef enum UCalendarLimitType UCalendarLimitType;
/**
* Determine a limit for a field in a UCalendar.
* A limit is a maximum or minimum value for a field.
* @param cal The UCalendar to query.
* @param field The desired field; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
* UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
* UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
* UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
* @param type The desired critical point; one of UCAL_MINIMUM, UCAL_MAXIMUM, UCAL_GREATEST_MINIMUM,
* UCAL_LEAST_MAXIMUM, UCAL_ACTUAL_MINIMUM, UCAL_ACTUAL_MAXIMUM
* @param status A pointer to an UErrorCode to receive any errors.
* @return The requested value.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ucal_getLimit(const UCalendar* cal,
UCalendarDateFields field,
UCalendarLimitType type,
UErrorCode* status);
/** Get the locale for this calendar object. You can choose between valid and actual locale.
* @param cal The calendar object
* @param type type of the locale we're looking for (valid or actual)
* @param status error code for the operation
* @return the locale name
* @stable ICU 2.8
*/
U_STABLE const char * U_EXPORT2
ucal_getLocaleByType(const UCalendar *cal, ULocDataLocaleType type, UErrorCode* status);
/**
* Returns the timezone data version currently used by ICU.
* @param status error code for the operation
* @return the version string, such as "2007f"
* @stable ICU 3.8
*/
U_STABLE const char * U_EXPORT2
ucal_getTZDataVersion(UErrorCode* status);
/**
* Returns the canonical system timezone ID or the normalized
* custom time zone ID for the given time zone ID.
* @param id The input timezone ID to be canonicalized.
* @param len The length of id, or -1 if null-terminated.
* @param result The buffer receives the canonical system timezone ID
* or the custom timezone ID in normalized format.
* @param resultCapacity The capacity of the result buffer.
* @param isSystemID Receives if the given ID is a known system
* timezone ID.
* @param status Receives the status. When the given timezone ID
* is neither a known system time zone ID nor a
* valid custom timezone ID, U_ILLEGAL_ARGUMENT_ERROR
* is set.
* @return The result string length, not including the terminating
* null.
* @stable ICU 4.0
*/
U_STABLE int32_t U_EXPORT2
ucal_getCanonicalTimeZoneID(const UChar* id, int32_t len,
UChar* result, int32_t resultCapacity, UBool *isSystemID, UErrorCode* status);
/**
* Get the resource keyword value string designating the calendar type for the UCalendar.
* @param cal The UCalendar to query.
* @param status The error code for the operation.
* @return The resource keyword value string.
* @stable ICU 4.2
*/
U_STABLE const char * U_EXPORT2
ucal_getType(const UCalendar *cal, UErrorCode* status);
/**
* Given a key and a locale, returns an array of string values in a preferred
* order that would make a difference. These are all and only those values where
* the open (creation) of the service with the locale formed from the input locale
* plus input keyword and that value has different behavior than creation with the
* input locale alone.
* @param key one of the keys supported by this service. For now, only
* "calendar" is supported.
* @param locale the locale
* @param commonlyUsed if set to true it will return only commonly used values
* with the given locale in preferred order. Otherwise,
* it will return all the available values for the locale.
* @param status error status
* @return a string enumeration over keyword values for the given key and the locale.
* @stable ICU 4.2
*/
U_STABLE UEnumeration* U_EXPORT2
ucal_getKeywordValuesForLocale(const char* key,
const char* locale,
UBool commonlyUsed,
UErrorCode* status);
/** Weekday types, as returned by ucal_getDayOfWeekType().
* @stable ICU 4.4
*/
enum UCalendarWeekdayType {
/**
* Designates a full weekday (no part of the day is included in the weekend).
* @stable ICU 4.4
*/
UCAL_WEEKDAY,
/**
* Designates a full weekend day (the entire day is included in the weekend).
* @stable ICU 4.4
*/
UCAL_WEEKEND,
/**
* Designates a day that starts as a weekday and transitions to the weekend.
* Call ucal_getWeekendTransition() to get the time of transition.
* @stable ICU 4.4
*/
UCAL_WEEKEND_ONSET,
/**
* Designates a day that starts as the weekend and transitions to a weekday.
* Call ucal_getWeekendTransition() to get the time of transition.
* @stable ICU 4.4
*/
UCAL_WEEKEND_CEASE
};
/** @stable ICU 4.4 */
typedef enum UCalendarWeekdayType UCalendarWeekdayType;
/**
* Returns whether the given day of the week is a weekday, a weekend day,
* or a day that transitions from one to the other, for the locale and
* calendar system associated with this UCalendar (the locale's region is
* often the most determinant factor). If a transition occurs at midnight,
* then the days before and after the transition will have the
* type UCAL_WEEKDAY or UCAL_WEEKEND. If a transition occurs at a time
* other than midnight, then the day of the transition will have
* the type UCAL_WEEKEND_ONSET or UCAL_WEEKEND_CEASE. In this case, the
* function ucal_getWeekendTransition() will return the point of
* transition.
* @param cal The UCalendar to query.
* @param dayOfWeek The day of the week whose type is desired (UCAL_SUNDAY..UCAL_SATURDAY).
* @param status The error code for the operation.
* @return The UCalendarWeekdayType for the day of the week.
* @stable ICU 4.4
*/
U_STABLE UCalendarWeekdayType U_EXPORT2
ucal_getDayOfWeekType(const UCalendar *cal, UCalendarDaysOfWeek dayOfWeek, UErrorCode* status);
/**
* Returns the time during the day at which the weekend begins or ends in
* this calendar system. If ucal_getDayOfWeekType() returns UCAL_WEEKEND_ONSET
* for the specified dayOfWeek, return the time at which the weekend begins.
* If ucal_getDayOfWeekType() returns UCAL_WEEKEND_CEASE for the specified dayOfWeek,
* return the time at which the weekend ends. If ucal_getDayOfWeekType() returns
* some other UCalendarWeekdayType for the specified dayOfWeek, is it an error condition
* (U_ILLEGAL_ARGUMENT_ERROR).
* @param cal The UCalendar to query.
* @param dayOfWeek The day of the week for which the weekend transition time is
* desired (UCAL_SUNDAY..UCAL_SATURDAY).
* @param status The error code for the operation.
* @return The milliseconds after midnight at which the weekend begins or ends.
* @stable ICU 4.4
*/
U_STABLE int32_t U_EXPORT2
ucal_getWeekendTransition(const UCalendar *cal, UCalendarDaysOfWeek dayOfWeek, UErrorCode *status);
/**
* Returns TRUE if the given UDate is in the weekend in
* this calendar system.
* @param cal The UCalendar to query.
* @param date The UDate in question.
* @param status The error code for the operation.
* @return TRUE if the given UDate is in the weekend in
* this calendar system, FALSE otherwise.
* @stable ICU 4.4
*/
U_STABLE UBool U_EXPORT2
ucal_isWeekend(const UCalendar *cal, UDate date, UErrorCode *status);
/**
* Return the difference between the target time and the time this calendar object is currently set to.
* If the target time is after the current calendar setting, the the returned value will be positive.
* The field parameter specifies the units of the return value. For example, if field is UCAL_MONTH
* and ucal_getFieldDifference returns 3, then the target time is 3 to less than 4 months after the
* current calendar setting.
*
* As a side effect of this call, this calendar is advanced toward target by the given amount. That is,
* calling this function has the side effect of calling ucal_add on this calendar with the specified
* field and an amount equal to the return value from this function.
*
* A typical way of using this function is to call it first with the largest field of interest, then
* with progressively smaller fields.
*
* @param cal The UCalendar to compare and update.
* @param target The target date to compare to the current calendar setting.
* @param field The field to compare; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
* UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
* UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
* UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
* @param status A pointer to an UErrorCode to receive any errors
* @return The date difference for the specified field.
* @stable ICU 4.8
*/
U_STABLE int32_t U_EXPORT2
ucal_getFieldDifference(UCalendar* cal,
UDate target,
UCalendarDateFields field,
UErrorCode* status);
/**
* Time zone transition types for ucal_getTimeZoneTransitionDate
* @stable ICU 50
*/
enum UTimeZoneTransitionType {
/**
* Get the next transition after the current date,
* i.e. excludes the current date
* @stable ICU 50
*/
UCAL_TZ_TRANSITION_NEXT,
/**
* Get the next transition on or after the current date,
* i.e. may include the current date
* @stable ICU 50
*/
UCAL_TZ_TRANSITION_NEXT_INCLUSIVE,
/**
* Get the previous transition before the current date,
* i.e. excludes the current date
* @stable ICU 50
*/
UCAL_TZ_TRANSITION_PREVIOUS,
/**
* Get the previous transition on or before the current date,
* i.e. may include the current date
* @stable ICU 50
*/
UCAL_TZ_TRANSITION_PREVIOUS_INCLUSIVE
};
typedef enum UTimeZoneTransitionType UTimeZoneTransitionType; /**< @stable ICU 50 */
/**
* Get the UDate for the next/previous time zone transition relative to
* the calendar's current date, in the time zone to which the calendar
* is currently set. If there is no known time zone transition of the
* requested type relative to the calendar's date, the function returns
* FALSE.
* @param cal The UCalendar to query.
* @param type The type of transition desired.
* @param transition A pointer to a UDate to be set to the transition time.
* If the function returns FALSE, the value set is unspecified.
* @param status A pointer to a UErrorCode to receive any errors.
* @return TRUE if a valid transition time is set in *transition, FALSE
* otherwise.
* @stable ICU 50
*/
U_STABLE UBool U_EXPORT2
ucal_getTimeZoneTransitionDate(const UCalendar* cal, UTimeZoneTransitionType type,
UDate* transition, UErrorCode* status);
/**
* Converts a system time zone ID to an equivalent Windows time zone ID. For example,
* Windows time zone ID "Pacific Standard Time" is returned for input "America/Los_Angeles".
*
*
There are system time zones that cannot be mapped to Windows zones. When the input * system time zone ID is unknown or unmappable to a Windows time zone, then this * function returns 0 as the result length, but the operation itself remains successful * (no error status set on return). * *
This implementation utilizes
* Zone-Tzid mapping data. The mapping data is updated time to time. To get the latest changes,
* please read the ICU user guide section
* Updating the Time Zone Data.
*
* @param id A system time zone ID.
* @param len The length of id, or -1 if null-terminated.
* @param winid A buffer to receive a Windows time zone ID.
* @param winidCapacity The capacity of the result buffer winid.
* @param status Receives the status.
* @return The result string length, not including the terminating null.
* @see ucal_getTimeZoneIDForWindowsID
*
* @stable ICU 52
*/
U_STABLE int32_t U_EXPORT2
ucal_getWindowsTimeZoneID(const UChar* id, int32_t len,
UChar* winid, int32_t winidCapacity, UErrorCode* status);
/**
* Converts a Windows time zone ID to an equivalent system time zone ID
* for a region. For example, system time zone ID "America/Los_Angeles" is returned
* for input Windows ID "Pacific Standard Time" and region "US" (or null),
* "America/Vancouver" is returned for the same Windows ID "Pacific Standard Time" and
* region "CA".
*
*
Not all Windows time zones can be mapped to system time zones. When the input * Windows time zone ID is unknown or unmappable to a system time zone, then this * function returns 0 as the result length, but the operation itself remains successful * (no error status set on return). * *
This implementation utilizes
* Zone-Tzid mapping data. The mapping data is updated time to time. To get the latest changes,
* please read the ICU user guide section
* Updating the Time Zone Data.
*
* @param winid A Windows time zone ID.
* @param len The length of winid, or -1 if null-terminated.
* @param region A null-terminated region code, or NULL if no regional preference.
* @param id A buffer to receive a system time zone ID.
* @param idCapacity The capacity of the result buffer id.
* @param status Receives the status.
* @return The result string length, not including the terminating null.
* @see ucal_getWindowsTimeZoneID
*
* @stable ICU 52
*/
U_STABLE int32_t U_EXPORT2
ucal_getTimeZoneIDForWindowsID(const UChar* winid, int32_t len, const char* region,
UChar* id, int32_t idCapacity, UErrorCode* status);
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif
// ucol.h
/*
*******************************************************************************
* Copyright (c) 1996-2015, International Business Machines Corporation and others.
* All Rights Reserved.
*******************************************************************************
*/
#ifndef UCOL_H
#define UCOL_H
#if !UCONFIG_NO_COLLATION
/**
* \file
* \brief C API: Collator
*
*
* For more information about the collation service see * the User Guide. *
* Collation service provides correct sorting orders for most locales supported in ICU. * If specific data for a locale is not available, the orders eventually falls back * to the CLDR root sort order. *
* Sort ordering may be customized by providing your own set of rules. For more on * this subject see the * Collation Customization section of the User Guide. *
* @see UCollationResult * @see UNormalizationMode * @see UCollationStrength * @see UCollationElements */ /** A collator. * For usage in C programs. */ struct UCollator; /** structure representing a collator object instance * @stable ICU 2.0 */ typedef struct UCollator UCollator; /** * UCOL_LESS is returned if source string is compared to be less than target * string in the ucol_strcoll() method. * UCOL_EQUAL is returned if source string is compared to be equal to target * string in the ucol_strcoll() method. * UCOL_GREATER is returned if source string is compared to be greater than * target string in the ucol_strcoll() method. * @see ucol_strcoll() *
* Possible values for a comparison result * @stable ICU 2.0 */ typedef enum { /** string a == string b */ UCOL_EQUAL = 0, /** string a > string b */ UCOL_GREATER = 1, /** string a < string b */ UCOL_LESS = -1 } UCollationResult ; /** Enum containing attribute values for controling collation behavior. * Here are all the allowable values. Not every attribute can take every value. The only * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined * value for that locale * @stable ICU 2.0 */ typedef enum { /** accepted by most attributes */ UCOL_DEFAULT = -1, /** Primary collation strength */ UCOL_PRIMARY = 0, /** Secondary collation strength */ UCOL_SECONDARY = 1, /** Tertiary collation strength */ UCOL_TERTIARY = 2, /** Default collation strength */ UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY, UCOL_CE_STRENGTH_LIMIT, /** Quaternary collation strength */ UCOL_QUATERNARY=3, /** Identical collation strength */ UCOL_IDENTICAL=15, UCOL_STRENGTH_LIMIT, /** Turn the feature off - works for UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE*/ UCOL_OFF = 16, /** Turn the feature on - works for UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE*/ UCOL_ON = 17, /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */ UCOL_SHIFTED = 20, /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */ UCOL_NON_IGNORABLE = 21, /** Valid for UCOL_CASE_FIRST - lower case sorts before upper case */ UCOL_LOWER_FIRST = 24, /** upper case sorts before lower case */ UCOL_UPPER_FIRST = 25, UCOL_ATTRIBUTE_VALUE_COUNT } UColAttributeValue; /** * Enum containing the codes for reordering segments of the collation table that are not script * codes. These reordering codes are to be used in conjunction with the script codes. * @see ucol_getReorderCodes * @see ucol_setReorderCodes * @see ucol_getEquivalentReorderCodes * @see UScriptCode * @stable ICU 4.8 */ typedef enum { /** * A special reordering code that is used to specify the default * reordering codes for a locale. * @stable ICU 4.8 */ UCOL_REORDER_CODE_DEFAULT = -1, /** * A special reordering code that is used to specify no reordering codes. * @stable ICU 4.8 */ UCOL_REORDER_CODE_NONE = USCRIPT_UNKNOWN, /** * A special reordering code that is used to specify all other codes used for * reordering except for the codes lised as UColReorderCode values and those * listed explicitly in a reordering. * @stable ICU 4.8 */ UCOL_REORDER_CODE_OTHERS = USCRIPT_UNKNOWN, /** * Characters with the space property. * This is equivalent to the rule value "space". * @stable ICU 4.8 */ UCOL_REORDER_CODE_SPACE = 0x1000, /** * The first entry in the enumeration of reordering groups. This is intended for use in * range checking and enumeration of the reorder codes. * @stable ICU 4.8 */ UCOL_REORDER_CODE_FIRST = UCOL_REORDER_CODE_SPACE, /** * Characters with the punctuation property. * This is equivalent to the rule value "punct". * @stable ICU 4.8 */ UCOL_REORDER_CODE_PUNCTUATION = 0x1001, /** * Characters with the symbol property. * This is equivalent to the rule value "symbol". * @stable ICU 4.8 */ UCOL_REORDER_CODE_SYMBOL = 0x1002, /** * Characters with the currency property. * This is equivalent to the rule value "currency". * @stable ICU 4.8 */ UCOL_REORDER_CODE_CURRENCY = 0x1003, /** * Characters with the digit property. * This is equivalent to the rule value "digit". * @stable ICU 4.8 */ UCOL_REORDER_CODE_DIGIT = 0x1004, /** * The limit of the reorder codes. This is intended for use in range checking * and enumeration of the reorder codes. * @stable ICU 4.8 */ UCOL_REORDER_CODE_LIMIT = 0x1005 } UColReorderCode; /** * Base letter represents a primary difference. Set comparison * level to UCOL_PRIMARY to ignore secondary and tertiary differences. * Use this to set the strength of a Collator object. * Example of primary difference, "abc" < "abd" * * Diacritical differences on the same base letter represent a secondary * difference. Set comparison level to UCOL_SECONDARY to ignore tertiary * differences. Use this to set the strength of a Collator object. * Example of secondary difference, "ä" >> "a". * * Uppercase and lowercase versions of the same character represents a * tertiary difference. Set comparison level to UCOL_TERTIARY to include * all comparison differences. Use this to set the strength of a Collator * object. * Example of tertiary difference, "abc" <<< "ABC". * * Two characters are considered "identical" when they have the same * unicode spellings. UCOL_IDENTICAL. * For example, "ä" == "ä". * * UCollationStrength is also used to determine the strength of sort keys * generated from UCollator objects * These values can be now found in the UColAttributeValue enum. * @stable ICU 2.0 **/ typedef UColAttributeValue UCollationStrength; /** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT * value, as well as the values specific to each one. * @stable ICU 2.0 */ typedef enum { /** Attribute for direction of secondary weights - used in Canadian French. * Acceptable values are UCOL_ON, which results in secondary weights * being considered backwards and UCOL_OFF which treats secondary * weights in the order they appear. * @stable ICU 2.0 */ UCOL_FRENCH_COLLATION, /** Attribute for handling variable elements. * Acceptable values are UCOL_NON_IGNORABLE (default) * which treats all the codepoints with non-ignorable * primary weights in the same way, * and UCOL_SHIFTED which causes codepoints with primary * weights that are equal or below the variable top value * to be ignored on primary level and moved to the quaternary * level. * @stable ICU 2.0 */ UCOL_ALTERNATE_HANDLING, /** Controls the ordering of upper and lower case letters. * Acceptable values are UCOL_OFF (default), which orders * upper and lower case letters in accordance to their tertiary * weights, UCOL_UPPER_FIRST which forces upper case letters to * sort before lower case letters, and UCOL_LOWER_FIRST which does * the opposite. * @stable ICU 2.0 */ UCOL_CASE_FIRST, /** Controls whether an extra case level (positioned before the third * level) is generated or not. Acceptable values are UCOL_OFF (default), * when case level is not generated, and UCOL_ON which causes the case * level to be generated. Contents of the case level are affected by * the value of UCOL_CASE_FIRST attribute. A simple way to ignore * accent differences in a string is to set the strength to UCOL_PRIMARY * and enable case level. * @stable ICU 2.0 */ UCOL_CASE_LEVEL, /** Controls whether the normalization check and necessary normalizations * are performed. When set to UCOL_OFF (default) no normalization check * is performed. The correctness of the result is guaranteed only if the * input data is in so-called FCD form (see users manual for more info). * When set to UCOL_ON, an incremental check is performed to see whether * the input data is in the FCD form. If the data is not in the FCD form, * incremental NFD normalization is performed. * @stable ICU 2.0 */ UCOL_NORMALIZATION_MODE, /** An alias for UCOL_NORMALIZATION_MODE attribute. * @stable ICU 2.0 */ UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE, /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY, * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength * for most locales (except Japanese) is tertiary. * * Quaternary strength * is useful when combined with shifted setting for alternate handling * attribute and for JIS X 4061 collation, when it is used to distinguish * between Katakana and Hiragana. * Otherwise, quaternary level * is affected only by the number of non-ignorable code points in * the string. * * Identical strength is rarely useful, as it amounts * to codepoints of the NFD form of the string. * @stable ICU 2.0 */ UCOL_STRENGTH, /** * When turned on, this attribute makes * substrings of digits sort according to their numeric values. * * This is a way to get '100' to sort AFTER '2'. Note that the longest * digit substring that can be treated as a single unit is * 254 digits (not counting leading zeros). If a digit substring is * longer than that, the digits beyond the limit will be treated as a * separate digit substring. * * A "digit" in this sense is a code point with General_Category=Nd, * which does not include circled numbers, roman numerals, etc. * Only a contiguous digit substring is considered, that is, * non-negative integers without separators. * There is no support for plus/minus signs, decimals, exponents, etc. * * @stable ICU 2.8 */ UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2, /** * The number of UColAttribute constants. * @stable ICU 2.0 */ UCOL_ATTRIBUTE_COUNT } UColAttribute; /** Options for retrieving the rule string * @stable ICU 2.0 */ typedef enum { /** * Retrieves the tailoring rules only. * Same as calling the version of getRules() without UColRuleOption. * @stable ICU 2.0 */ UCOL_TAILORING_ONLY, /** * Retrieves the "UCA rules" concatenated with the tailoring rules. * The "UCA rules" are an approximation of the root collator's sort order. * They are almost never used or useful at runtime and can be removed from the data. * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales * @stable ICU 2.0 */ UCOL_FULL_RULES } UColRuleOption ; /** * Open a UCollator for comparing strings. * * For some languages, multiple collation types are available; * for example, "de@collation=phonebook". * Starting with ICU 54, collation attributes can be specified via locale keywords as well, * in the old locale extension syntax ("el@colCaseFirst=upper") * or in language tag syntax ("el-u-kf-upper"). * See User Guide: Collation API. * * The UCollator pointer is used in all the calls to the Collation * service. After finished, collator must be disposed of by calling * {@link #ucol_close }. * @param loc The locale containing the required collation rules. * Special values for locales can be passed in - * if NULL is passed for the locale, the default locale * collation rules will be used. If empty string ("") or * "root" are passed, the root collator will be returned. * @param status A pointer to a UErrorCode to receive any errors * @return A pointer to a UCollator, or 0 if an error occurred. * @see ucol_openRules * @see ucol_safeClone * @see ucol_close * @stable ICU 2.0 */ U_STABLE UCollator* U_EXPORT2 ucol_open(const char *loc, UErrorCode *status); /** * Produce a UCollator instance according to the rules supplied. * The rules are used to change the default ordering, defined in the * UCA in a process called tailoring. The resulting UCollator pointer * can be used in the same way as the one obtained by {@link #ucol_strcoll }. * @param rules A string describing the collation rules. For the syntax * of the rules please see users guide. * @param rulesLength The length of rules, or -1 if null-terminated. * @param normalizationMode The normalization mode: One of * UCOL_OFF (expect the text to not need normalization), * UCOL_ON (normalize), or * UCOL_DEFAULT (set the mode according to the rules) * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules. * @param parseError A pointer to UParseError to recieve information about errors * occurred during parsing. This argument can currently be set * to NULL, but at users own risk. Please provide a real structure. * @param status A pointer to a UErrorCode to receive any errors * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case * of error - please use status argument to check for errors. * @see ucol_open * @see ucol_safeClone * @see ucol_close * @stable ICU 2.0 */ U_STABLE UCollator* U_EXPORT2 ucol_openRules( const UChar *rules, int32_t rulesLength, UColAttributeValue normalizationMode, UCollationStrength strength, UParseError *parseError, UErrorCode *status); /** * Get a set containing the expansions defined by the collator. The set includes * both the root collator's expansions and the expansions defined by the tailoring * @param coll collator * @param contractions if not NULL, the set to hold the contractions * @param expansions if not NULL, the set to hold the expansions * @param addPrefixes add the prefix contextual elements to contractions * @param status to hold the error code * * @stable ICU 3.4 */ U_STABLE void U_EXPORT2 ucol_getContractionsAndExpansions( const UCollator *coll, USet *contractions, USet *expansions, UBool addPrefixes, UErrorCode *status); /** * Close a UCollator. * Once closed, a UCollator should not be used. Every open collator should * be closed. Otherwise, a memory leak will result. * @param coll The UCollator to close. * @see ucol_open * @see ucol_openRules * @see ucol_safeClone * @stable ICU 2.0 */ U_STABLE void U_EXPORT2 ucol_close(UCollator *coll); /** * Compare two strings. * The strings will be compared using the options already specified. * @param coll The UCollator containing the comparison rules. * @param source The source string. * @param sourceLength The length of source, or -1 if null-terminated. * @param target The target string. * @param targetLength The length of target, or -1 if null-terminated. * @return The result of comparing the strings; one of UCOL_EQUAL, * UCOL_GREATER, UCOL_LESS * @see ucol_greater * @see ucol_greaterOrEqual * @see ucol_equal * @stable ICU 2.0 */ U_STABLE UCollationResult U_EXPORT2 ucol_strcoll( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength); /** * Compare two strings in UTF-8. * The strings will be compared using the options already specified. * Note: When input string contains malformed a UTF-8 byte sequence, * this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD). * @param coll The UCollator containing the comparison rules. * @param source The source UTF-8 string. * @param sourceLength The length of source, or -1 if null-terminated. * @param target The target UTF-8 string. * @param targetLength The length of target, or -1 if null-terminated. * @param status A pointer to a UErrorCode to receive any errors * @return The result of comparing the strings; one of UCOL_EQUAL, * UCOL_GREATER, UCOL_LESS * @see ucol_greater * @see ucol_greaterOrEqual * @see ucol_equal * @stable ICU 50 */ U_STABLE UCollationResult U_EXPORT2 ucol_strcollUTF8( const UCollator *coll, const char *source, int32_t sourceLength, const char *target, int32_t targetLength, UErrorCode *status); /** * Determine if one string is greater than another. * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER * @param coll The UCollator containing the comparison rules. * @param source The source string. * @param sourceLength The length of source, or -1 if null-terminated. * @param target The target string. * @param targetLength The length of target, or -1 if null-terminated. * @return TRUE if source is greater than target, FALSE otherwise. * @see ucol_strcoll * @see ucol_greaterOrEqual * @see ucol_equal * @stable ICU 2.0 */ U_STABLE UBool U_EXPORT2 ucol_greater(const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength); /** * Determine if one string is greater than or equal to another. * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS * @param coll The UCollator containing the comparison rules. * @param source The source string. * @param sourceLength The length of source, or -1 if null-terminated. * @param target The target string. * @param targetLength The length of target, or -1 if null-terminated. * @return TRUE if source is greater than or equal to target, FALSE otherwise. * @see ucol_strcoll * @see ucol_greater * @see ucol_equal * @stable ICU 2.0 */ U_STABLE UBool U_EXPORT2 ucol_greaterOrEqual(const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength); /** * Compare two strings for equality. * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL * @param coll The UCollator containing the comparison rules. * @param source The source string. * @param sourceLength The length of source, or -1 if null-terminated. * @param target The target string. * @param targetLength The length of target, or -1 if null-terminated. * @return TRUE if source is equal to target, FALSE otherwise * @see ucol_strcoll * @see ucol_greater * @see ucol_greaterOrEqual * @stable ICU 2.0 */ U_STABLE UBool U_EXPORT2 ucol_equal(const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength); /** * Compare two UTF-8 encoded trings. * The strings will be compared using the options already specified. * @param coll The UCollator containing the comparison rules. * @param sIter The source string iterator. * @param tIter The target string iterator. * @return The result of comparing the strings; one of UCOL_EQUAL, * UCOL_GREATER, UCOL_LESS * @param status A pointer to a UErrorCode to receive any errors * @see ucol_strcoll * @stable ICU 2.6 */ U_STABLE UCollationResult U_EXPORT2 ucol_strcollIter( const UCollator *coll, UCharIterator *sIter, UCharIterator *tIter, UErrorCode *status); /** * Get the collation strength used in a UCollator. * The strength influences how strings are compared. * @param coll The UCollator to query. * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL * @see ucol_setStrength * @stable ICU 2.0 */ U_STABLE UCollationStrength U_EXPORT2 ucol_getStrength(const UCollator *coll); /** * Set the collation strength used in a UCollator. * The strength influences how strings are compared. * @param coll The UCollator to set. * @param strength The desired collation strength; one of UCOL_PRIMARY, * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT * @see ucol_getStrength * @stable ICU 2.0 */ U_STABLE void U_EXPORT2 ucol_setStrength(UCollator *coll, UCollationStrength strength); /** * Retrieves the reordering codes for this collator. * These reordering codes are a combination of UScript codes and UColReorderCode entries. * @param coll The UCollator to query. * @param dest The array to fill with the script ordering. * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function * will only return the length of the result without writing any codes (pre-flighting). * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a * failure before the function call. * @return The number of reordering codes written to the dest array. * @see ucol_setReorderCodes * @see ucol_getEquivalentReorderCodes * @see UScriptCode * @see UColReorderCode * @stable ICU 4.8 */ U_STABLE int32_t U_EXPORT2 ucol_getReorderCodes(const UCollator* coll, int32_t* dest, int32_t destCapacity, UErrorCode *pErrorCode); /** * Sets the reordering codes for this collator. * Collation reordering allows scripts and some other groups of characters * to be moved relative to each other. This reordering is done on top of * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed * at the start and/or the end of the collation order. These groups are specified using * UScript codes and UColReorderCode entries. * *
By default, reordering codes specified for the start of the order are placed in the * order given after several special non-script blocks. These special groups of characters * are space, punctuation, symbol, currency, and digit. These special groups are represented with * UColReorderCode entries. Script groups can be intermingled with * these special non-script groups if those special groups are explicitly specified in the reordering. * *
The special code OTHERS stands for any script that is not explicitly * mentioned in the list of reordering codes given. Anything that is after OTHERS * will go at the very end of the reordering in the order given. * *
The special reorder code DEFAULT will reset the reordering for this collator * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that * was specified when this collator was created from resource data or from rules. The * DEFAULT code must be the sole code supplied when it is used. * If not, then U_ILLEGAL_ARGUMENT_ERROR will be set. * *
The special reorder code NONE will remove any reordering for this collator.
* The result of setting no reordering will be to have the DUCET/CLDR ordering used. The
* NONE code must be the sole code supplied when it is used.
*
* @param coll The UCollator to set.
* @param reorderCodes An array of script codes in the new order. This can be NULL if the
* length is also set to 0. An empty array will clear any reordering codes on the collator.
* @param reorderCodesLength The length of reorderCodes.
* @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a
* failure before the function call.
* @see ucol_getReorderCodes
* @see ucol_getEquivalentReorderCodes
* @see UScriptCode
* @see UColReorderCode
* @stable ICU 4.8
*/
U_STABLE void U_EXPORT2
ucol_setReorderCodes(UCollator* coll,
const int32_t* reorderCodes,
int32_t reorderCodesLength,
UErrorCode *pErrorCode);
/**
* Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
* codes will be grouped and must reorder together.
* Beginning with ICU 55, scripts only reorder together if they are primary-equal,
* for example Hiragana and Katakana.
*
* @param reorderCode The reorder code to determine equivalence for.
* @param dest The array to fill with the script ordering.
* @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
* will only return the length of the result without writing any codes (pre-flighting).
* @param pErrorCode Must be a valid pointer to an error code value, which must not indicate
* a failure before the function call.
* @return The number of reordering codes written to the dest array.
* @see ucol_setReorderCodes
* @see ucol_getReorderCodes
* @see UScriptCode
* @see UColReorderCode
* @stable ICU 4.8
*/
U_STABLE int32_t U_EXPORT2
ucol_getEquivalentReorderCodes(int32_t reorderCode,
int32_t* dest,
int32_t destCapacity,
UErrorCode *pErrorCode);
/**
* Get the display name for a UCollator.
* The display name is suitable for presentation to a user.
* @param objLoc The locale of the collator in question.
* @param dispLoc The locale for display.
* @param result A pointer to a buffer to receive the attribute.
* @param resultLength The maximum size of result.
* @param status A pointer to a UErrorCode to receive any errors
* @return The total buffer size needed; if greater than resultLength,
* the output was truncated.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ucol_getDisplayName( const char *objLoc,
const char *dispLoc,
UChar *result,
int32_t resultLength,
UErrorCode *status);
/**
* Get a locale for which collation rules are available.
* A UCollator in a locale returned by this function will perform the correct
* collation for the locale.
* @param localeIndex The index of the desired locale.
* @return A locale for which collation rules are available, or 0 if none.
* @see ucol_countAvailable
* @stable ICU 2.0
*/
U_STABLE const char* U_EXPORT2
ucol_getAvailable(int32_t localeIndex);
/**
* Determine how many locales have collation rules available.
* This function is most useful as determining the loop ending condition for
* calls to {@link #ucol_getAvailable }.
* @return The number of locales for which collation rules are available.
* @see ucol_getAvailable
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ucol_countAvailable(void);
#if !UCONFIG_NO_SERVICE
/**
* Create a string enumerator of all locales for which a valid
* collator may be opened.
* @param status input-output error code
* @return a string enumeration over locale strings. The caller is
* responsible for closing the result.
* @stable ICU 3.0
*/
U_STABLE UEnumeration* U_EXPORT2
ucol_openAvailableLocales(UErrorCode *status);
#endif
/**
* Create a string enumerator of all possible keywords that are relevant to
* collation. At this point, the only recognized keyword for this
* service is "collation".
* @param status input-output error code
* @return a string enumeration over locale strings. The caller is
* responsible for closing the result.
* @stable ICU 3.0
*/
U_STABLE UEnumeration* U_EXPORT2
ucol_getKeywords(UErrorCode *status);
/**
* Given a keyword, create a string enumeration of all values
* for that keyword that are currently in use.
* @param keyword a particular keyword as enumerated by
* ucol_getKeywords. If any other keyword is passed in, *status is set
* to U_ILLEGAL_ARGUMENT_ERROR.
* @param status input-output error code
* @return a string enumeration over collation keyword values, or NULL
* upon error. The caller is responsible for closing the result.
* @stable ICU 3.0
*/
U_STABLE UEnumeration* U_EXPORT2
ucol_getKeywordValues(const char *keyword, UErrorCode *status);
/**
* Given a key and a locale, returns an array of string values in a preferred
* order that would make a difference. These are all and only those values where
* the open (creation) of the service with the locale formed from the input locale
* plus input keyword and that value has different behavior than creation with the
* input locale alone.
* @param key one of the keys supported by this service. For now, only
* "collation" is supported.
* @param locale the locale
* @param commonlyUsed if set to true it will return only commonly used values
* with the given locale in preferred order. Otherwise,
* it will return all the available values for the locale.
* @param status error status
* @return a string enumeration over keyword values for the given key and the locale.
* @stable ICU 4.2
*/
U_STABLE UEnumeration* U_EXPORT2
ucol_getKeywordValuesForLocale(const char* key,
const char* locale,
UBool commonlyUsed,
UErrorCode* status);
/**
* Return the functionally equivalent locale for the specified
* input locale, with respect to given keyword, for the
* collation service. If two different input locale + keyword
* combinations produce the same result locale, then collators
* instantiated for these two different input locales will behave
* equivalently. The converse is not always true; two collators
* may in fact be equivalent, but return different results, due to
* internal details. The return result has no other meaning than
* that stated above, and implies nothing as to the relationship
* between the two locales. This is intended for use by
* applications who wish to cache collators, or otherwise reuse
* collators when possible. The functional equivalent may change
* over time. For more information, please see the
* Locales and Services section of the ICU User Guide.
* @param result fillin for the functionally equivalent result locale
* @param resultCapacity capacity of the fillin buffer
* @param keyword a particular keyword as enumerated by
* ucol_getKeywords.
* @param locale the specified input locale
* @param isAvailable if non-NULL, pointer to a fillin parameter that
* on return indicates whether the specified input locale was 'available'
* to the collation service. A locale is defined as 'available' if it
* physically exists within the collation locale data.
* @param status pointer to input-output error code
* @return the actual buffer size needed for the locale. If greater
* than resultCapacity, the returned full name will be truncated and
* an error code will be returned.
* @stable ICU 3.0
*/
U_STABLE int32_t U_EXPORT2
ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
const char* keyword, const char* locale,
UBool* isAvailable, UErrorCode* status);
/**
* Get the collation tailoring rules from a UCollator.
* The rules will follow the rule syntax.
* @param coll The UCollator to query.
* @param length
* @return The collation tailoring rules.
* @stable ICU 2.0
*/
U_STABLE const UChar* U_EXPORT2
ucol_getRules( const UCollator *coll,
int32_t *length);
/**
* Get a sort key for a string from a UCollator.
* Sort keys may be compared using strcmp.
*
* Note that sort keys are often less efficient than simply doing comparison.
* For more details, see the ICU User Guide.
*
* Like ICU functions that write to an output buffer, the buffer contents
* is undefined if the buffer capacity (resultLength parameter) is too small.
* Unlike ICU functions that write a string to an output buffer,
* the terminating zero byte is counted in the sort key length.
* @param coll The UCollator containing the collation rules.
* @param source The string to transform.
* @param sourceLength The length of source, or -1 if null-terminated.
* @param result A pointer to a buffer to receive the attribute.
* @param resultLength The maximum size of result.
* @return The size needed to fully store the sort key.
* If there was an internal error generating the sort key,
* a zero value is returned.
* @see ucol_keyHashCode
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
ucol_getSortKey(const UCollator *coll,
const UChar *source,
int32_t sourceLength,
uint8_t *result,
int32_t resultLength);
/** Gets the next count bytes of a sort key. Caller needs
* to preserve state array between calls and to provide
* the same type of UCharIterator set with the same string.
* The destination buffer provided must be big enough to store
* the number of requested bytes.
*
* The generated sort key may or may not be compatible with
* sort keys generated using ucol_getSortKey().
* @param coll The UCollator containing the collation rules.
* @param iter UCharIterator containing the string we need
* the sort key to be calculated for.
* @param state Opaque state of sortkey iteration.
* @param dest Buffer to hold the resulting sortkey part
* @param count number of sort key bytes required.
* @param status error code indicator.
* @return the actual number of bytes of a sortkey. It can be
* smaller than count if we have reached the end of
* the sort key.
* @stable ICU 2.6
*/
U_STABLE int32_t U_EXPORT2
ucol_nextSortKeyPart(const UCollator *coll,
UCharIterator *iter,
uint32_t state[2],
uint8_t *dest, int32_t count,
UErrorCode *status);
/** enum that is taken by ucol_getBound API
* See below for explanation
* do not change the values assigned to the
* members of this enum. Underlying code
* depends on them having these numbers
* @stable ICU 2.0
*/
typedef enum {
/** lower bound */
UCOL_BOUND_LOWER = 0,
/** upper bound that will match strings of exact size */
UCOL_BOUND_UPPER = 1,
/** upper bound that will match all the strings that have the same initial substring as the given string */
UCOL_BOUND_UPPER_LONG = 2,
UCOL_BOUND_VALUE_COUNT
} UColBoundMode;
/**
* Produce a bound for a given sortkey and a number of levels.
* Return value is always the number of bytes needed, regardless of
* whether the result buffer was big enough or even valid.
* Resulting bounds can be used to produce a range of strings that are
* between upper and lower bounds. For example, if bounds are produced
* for a sortkey of string "smith", strings between upper and lower
* bounds with one level would include "Smith", "SMITH", "sMiTh".
* There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
* is produced, strings matched would be as above. However, if bound
* produced using UCOL_BOUND_UPPER_LONG is used, the above example will
* also match "Smithsonian" and similar.
* For more on usage, see example in cintltst/capitst.c in procedure
* TestBounds.
* Sort keys may be compared using strcmp.
* @param source The source sortkey.
* @param sourceLength The length of source, or -1 if null-terminated.
* (If an unmodified sortkey is passed, it is always null
* terminated).
* @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which
* produces a lower inclusive bound, UCOL_BOUND_UPPER, that
* produces upper bound that matches strings of the same length
* or UCOL_BOUND_UPPER_LONG that matches strings that have the
* same starting substring as the source string.
* @param noOfLevels Number of levels required in the resulting bound (for most
* uses, the recommended value is 1). See users guide for
* explanation on number of levels a sortkey can have.
* @param result A pointer to a buffer to receive the resulting sortkey.
* @param resultLength The maximum size of result.
* @param status Used for returning error code if something went wrong. If the
* number of levels requested is higher than the number of levels
* in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is
* issued.
* @return The size needed to fully store the bound.
* @see ucol_keyHashCode
* @stable ICU 2.1
*/
U_STABLE int32_t U_EXPORT2
ucol_getBound(const uint8_t *source,
int32_t sourceLength,
UColBoundMode boundType,
uint32_t noOfLevels,
uint8_t *result,
int32_t resultLength,
UErrorCode *status);
/**
* Gets the version information for a Collator. Version is currently
* an opaque 32-bit number which depends, among other things, on major
* versions of the collator tailoring and UCA.
* @param coll The UCollator to query.
* @param info the version # information, the result will be filled in
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
ucol_getVersion(const UCollator* coll, UVersionInfo info);
/**
* Gets the UCA version information for a Collator. Version is the
* UCA version number (3.1.1, 4.0).
* @param coll The UCollator to query.
* @param info the version # information, the result will be filled in
* @stable ICU 2.8
*/
U_STABLE void U_EXPORT2
ucol_getUCAVersion(const UCollator* coll, UVersionInfo info);
/**
* Merges two sort keys. The levels are merged with their corresponding counterparts
* (primaries with primaries, secondaries with secondaries etc.). Between the values
* from the same level a separator is inserted.
*
* This is useful, for example, for combining sort keys from first and last names
* to sort such pairs.
* See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys
*
* The recommended way to achieve "merged" sorting is by
* concatenating strings with U+FFFE between them.
* The concatenation has the same sort order as the merged sort keys,
* but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\uFFFE' + str2).
* Using strings with U+FFFE may yield shorter sort keys.
*
* For details about Sort Key Features see
* http://userguide.icu-project.org/collation/api#TOC-Sort-Key-Features
*
* It is possible to merge multiple sort keys by consecutively merging
* another one with the intermediate result.
*
* The length of the merge result is the sum of the lengths of the input sort keys.
*
* Example (uncompressed):
*
191B1D 01 050505 01 910505 00 * 1F2123 01 050505 01 910505 00* will be merged as *
191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00* * If the destination buffer is not big enough, then its contents are undefined. * If any of source lengths are zero or any of the source pointers are NULL/undefined, * the result is of size zero. * * @param src1 the first sort key * @param src1Length the length of the first sort key, including the zero byte at the end; * can be -1 if the function is to find the length * @param src2 the second sort key * @param src2Length the length of the second sort key, including the zero byte at the end; * can be -1 if the function is to find the length * @param dest the buffer where the merged sort key is written, * can be NULL if destCapacity==0 * @param destCapacity the number of bytes in the dest buffer * @return the length of the merged sort key, src1Length+src2Length; * can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments), * in which cases the contents of dest is undefined * @stable ICU 2.0 */ U_STABLE int32_t U_EXPORT2 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, const uint8_t *src2, int32_t src2Length, uint8_t *dest, int32_t destCapacity); /** * Universal attribute setter * @param coll collator which attributes are to be changed * @param attr attribute type * @param value attribute value * @param status to indicate whether the operation went on smoothly or there were errors * @see UColAttribute * @see UColAttributeValue * @see ucol_getAttribute * @stable ICU 2.0 */ U_STABLE void U_EXPORT2 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status); /** * Universal attribute getter * @param coll collator which attributes are to be changed * @param attr attribute type * @return attribute value * @param status to indicate whether the operation went on smoothly or there were errors * @see UColAttribute * @see UColAttributeValue * @see ucol_setAttribute * @stable ICU 2.0 */ U_STABLE UColAttributeValue U_EXPORT2 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status); /** * Sets the variable top to the top of the specified reordering group. * The variable top determines the highest-sorting character * which is affected by UCOL_ALTERNATE_HANDLING. * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect. * @param coll the collator * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, * UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY; * or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group * @param pErrorCode Standard ICU error code. Its input value must * pass the U_SUCCESS() test, or else the function returns * immediately. Check for U_FAILURE() on output or use with * function chaining. (See User Guide for details.) * @see ucol_getMaxVariable * @stable ICU 53 */ U_STABLE void U_EXPORT2 ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode); /** * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING. * @param coll the collator * @return the maximum variable reordering group. * @see ucol_setMaxVariable * @stable ICU 53 */ U_STABLE UColReorderCode U_EXPORT2 ucol_getMaxVariable(const UCollator *coll); /** * Gets the variable top value of a Collator. * @param coll collator which variable top needs to be retrieved * @param status error code (not changed by function). If error code is set, * the return value is undefined. * @return the variable top primary weight * @see ucol_getMaxVariable * @see ucol_setVariableTop * @see ucol_restoreVariableTop * @stable ICU 2.0 */ U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status); /** * Thread safe cloning operation. The result is a clone of a given collator. * @param coll collator to be cloned * @param stackBuffer Deprecated functionality as of ICU 52, use NULL.
* . "ca" -> the first key is key('c') and second key is key('a').
* . "cha" -> the first key is key('ch') and second key is key('a').
*
* And in German phonebook collation,
* * . "*b"-> the first key is key('a'), the second key is key('e'), and * . the third key is key('b'). *
Example of the iterator usage: (without error checking) *
* . void CollationElementIterator_Example()
* . {
* . UChar *s;
* . t_int32 order, primaryOrder;
* . UCollationElements *c;
* . UCollatorOld *coll;
* . UErrorCode success = U_ZERO_ERROR;
* . s=(UChar*)malloc(sizeof(UChar) * (strlen("This is a test")+1) );
* . u_uastrcpy(s, "This is a test");
* . coll = ucol_open(NULL, &success);
* . c = ucol_openElements(coll, str, u_strlen(str), &status);
* . order = ucol_next(c, &success);
* . ucol_reset(c);
* . order = ucol_prev(c, &success);
* . free(s);
* . ucol_close(coll);
* . ucol_closeElements(c);
* . }
*
* * ucol_next() returns the collation order of the next. * ucol_prev() returns the collation order of the previous character. * The Collation Element Iterator moves only in one direction between calls to * ucol_reset. That is, ucol_next() and ucol_prev can not be inter-used. * Whenever ucol_prev is to be called after ucol_next() or vice versa, * ucol_reset has to be called first to reset the status, shifting pointers to * either the end or the start of the string. Hence at the next call of * ucol_prev or ucol_next, the first or last collation order will be returned. * If a change of direction is done without a ucol_reset, the result is * undefined. * The result of a forward iterate (ucol_next) and reversed result of the * backward iterate (ucol_prev) on the same string are equivalent, if * collation orders with the value 0 are ignored. * Character based on the comparison level of the collator. A collation order * consists of primary order, secondary order and tertiary order. The data * type of the collation order is int32_t. * * @see UCollator */ /** * Open the collation elements for a string. * * @param coll The collator containing the desired collation rules. * @param text The text to iterate over. * @param textLength The number of characters in text, or -1 if null-terminated * @param status A pointer to a UErrorCode to receive any errors. * @return a struct containing collation element information * @stable ICU 2.0 */ U_STABLE UCollationElements* U_EXPORT2 ucol_openElements(const UCollator *coll, const UChar *text, int32_t textLength, UErrorCode *status); /** * get a hash code for a key... Not very useful! * @param key the given key. * @param length the size of the key array. * @return the hash code. * @stable ICU 2.0 */ U_STABLE int32_t U_EXPORT2 ucol_keyHashCode(const uint8_t* key, int32_t length); /** * Close a UCollationElements. * Once closed, a UCollationElements may no longer be used. * @param elems The UCollationElements to close. * @stable ICU 2.0 */ U_STABLE void U_EXPORT2 ucol_closeElements(UCollationElements *elems); /** * Reset the collation elements to their initial state. * This will move the 'cursor' to the beginning of the text. * Property settings for collation will be reset to the current status. * @param elems The UCollationElements to reset. * @see ucol_next * @see ucol_previous * @stable ICU 2.0 */ U_STABLE void U_EXPORT2 ucol_reset(UCollationElements *elems); /** * Get the ordering priority of the next collation element in the text. * A single character may contain more than one collation element. * @param elems The UCollationElements containing the text. * @param status A pointer to a UErrorCode to receive any errors. * @return The next collation elements ordering, otherwise returns NULLORDER * if an error has occured or if the end of string has been reached * @stable ICU 2.0 */ U_STABLE int32_t U_EXPORT2 ucol_next(UCollationElements *elems, UErrorCode *status); /** * Get the ordering priority of the previous collation element in the text. * A single character may contain more than one collation element. * Note that internally a stack is used to store buffered collation elements. * @param elems The UCollationElements containing the text. * @param status A pointer to a UErrorCode to receive any errors. Noteably * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack * buffer has been exhausted. * @return The previous collation elements ordering, otherwise returns * NULLORDER if an error has occured or if the start of string has * been reached. * @stable ICU 2.0 */ U_STABLE int32_t U_EXPORT2 ucol_previous(UCollationElements *elems, UErrorCode *status); /** * Get the maximum length of any expansion sequences that end with the * specified comparison order. * This is useful for .... ? * @param elems The UCollationElements containing the text. * @param order A collation order returned by previous or next. * @return maximum size of the expansion sequences ending with the collation * element or 1 if collation element does not occur at the end of any * expansion sequence * @stable ICU 2.0 */ U_STABLE int32_t U_EXPORT2 ucol_getMaxExpansion(const UCollationElements *elems, int32_t order); /** * Set the text containing the collation elements. * Property settings for collation will remain the same. * In order to reset the iterator to the current collation property settings, * the API reset() has to be called. * @param elems The UCollationElements to set. * @param text The source text containing the collation elements. * @param textLength The length of text, or -1 if null-terminated. * @param status A pointer to a UErrorCode to receive any errors. * @see ucol_getText * @stable ICU 2.0 */ U_STABLE void U_EXPORT2 ucol_setText( UCollationElements *elems, const UChar *text, int32_t textLength, UErrorCode *status); /** * Get the offset of the current source character. * This is an offset into the text of the character containing the current * collation elements. * @param elems The UCollationElements to query. * @return The offset of the current source character. * @see ucol_setOffset * @stable ICU 2.0 */ U_STABLE int32_t U_EXPORT2 ucol_getOffset(const UCollationElements *elems); /** * Set the offset of the current source character. * This is an offset into the text of the character to be processed. * Property settings for collation will remain the same. * In order to reset the iterator to the current collation property settings, * the API reset() has to be called. * @param elems The UCollationElements to set. * @param offset The desired character offset. * @param status A pointer to a UErrorCode to receive any errors. * @see ucol_getOffset * @stable ICU 2.0 */ U_STABLE void U_EXPORT2 ucol_setOffset(UCollationElements *elems, int32_t offset, UErrorCode *status); /** * Get the primary order of a collation order. * @param order the collation order * @return the primary order of a collation order. * @stable ICU 2.6 */ U_STABLE int32_t U_EXPORT2 ucol_primaryOrder (int32_t order); /** * Get the secondary order of a collation order. * @param order the collation order * @return the secondary order of a collation order. * @stable ICU 2.6 */ U_STABLE int32_t U_EXPORT2 ucol_secondaryOrder (int32_t order); /** * Get the tertiary order of a collation order. * @param order the collation order * @return the tertiary order of a collation order. * @stable ICU 2.6 */ U_STABLE int32_t U_EXPORT2 ucol_tertiaryOrder (int32_t order); #endif /* #if !UCONFIG_NO_COLLATION */ #endif // ucsdet.h /* ********************************************************************** * Copyright (C) 2005-2013, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ucsdet.h * encoding: US-ASCII * indentation:4 * * created on: 2005Aug04 * created by: Andy Heninger * * ICU Character Set Detection, API for C * * Draft version 18 Oct 2005 * */ #ifndef __UCSDET_H #define __UCSDET_H #if !UCONFIG_NO_CONVERSION /** * \file * \brief C API: Charset Detection API * * This API provides a facility for detecting the * charset or encoding of character data in an unknown text format. * The input data can be from an array of bytes. *
* Character set detection is at best an imprecise operation. The detection * process will attempt to identify the charset that best matches the characteristics * of the byte data, but the process is partly statistical in nature, and * the results can not be guaranteed to always be correct. *
* For best accuracy in charset detection, the input data should be primarily * in a single language, and a minimum of a few hundred bytes worth of plain text * in the language are needed. The detection process will attempt to * ignore html or xml style markup that could otherwise obscure the content. */ struct UCharsetDetector; /** * Structure representing a charset detector * @stable ICU 3.6 */ typedef struct UCharsetDetector UCharsetDetector; struct UCharsetMatch; /** * Opaque structure representing a match that was identified * from a charset detection operation. * @stable ICU 3.6 */ typedef struct UCharsetMatch UCharsetMatch; /** * Open a charset detector. * * @param status Any error conditions occurring during the open * operation are reported back in this variable. * @return the newly opened charset detector. * @stable ICU 3.6 */ U_STABLE UCharsetDetector * U_EXPORT2 ucsdet_open(UErrorCode *status); /** * Close a charset detector. All storage and any other resources * owned by this charset detector will be released. Failure to * close a charset detector when finished with it can result in * memory leaks in the application. * * @param ucsd The charset detector to be closed. * @stable ICU 3.6 */ U_STABLE void U_EXPORT2 ucsdet_close(UCharsetDetector *ucsd); /** * Set the input byte data whose charset is to detected. * * Ownership of the input text byte array remains with the caller. * The input string must not be altered or deleted until the charset * detector is either closed or reset to refer to different input text. * * @param ucsd the charset detector to be used. * @param textIn the input text of unknown encoding. . * @param len the length of the input text, or -1 if the text * is NUL terminated. * @param status any error conditions are reported back in this variable. * * @stable ICU 3.6 */ U_STABLE void U_EXPORT2 ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status); /** Set the declared encoding for charset detection. * The declared encoding of an input text is an encoding obtained * by the user from an http header or xml declaration or similar source that * can be provided as an additional hint to the charset detector. * * How and whether the declared encoding will be used during the * detection process is TBD. * * @param ucsd the charset detector to be used. * @param encoding an encoding for the current data obtained from * a header or declaration or other source outside * of the byte data itself. * @param length the length of the encoding name, or -1 if the name string * is NUL terminated. * @param status any error conditions are reported back in this variable. * * @stable ICU 3.6 */ U_STABLE void U_EXPORT2 ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status); /** * Return the charset that best matches the supplied input data. * * Note though, that because the detection * only looks at the start of the input data, * there is a possibility that the returned charset will fail to handle * the full set of input data. *
* The returned UCharsetMatch object is owned by the UCharsetDetector. * It will remain valid until the detector input is reset, or until * the detector is closed. *
* The function will fail if *
* The returned UCharsetMatch objects are owned by the UCharsetDetector. * They will remain valid until the detector is closed or modified * *
* Return an error if *
* The state of the Charset detector that is passed in does not * affect the result of this function, but requiring a valid, open * charset detector as a parameter insures that the charset detection * service has been safely initialized and that the required detection * data is available. * *
* Note: Multiple different charset encodings in a same family may use
* a single shared name in this implementation. For example, this method returns
* an array including "ISO-8859-1" (ISO Latin 1), but not including "windows-1252"
* (Windows Latin 1). However, actual detection result could be "windows-1252"
* when the input data matches Latin 1 code points with any points only available
* in "windows-1252".
*
* @param ucsd a Charset detector.
* @param status Any error conditions are reported back in this variable.
* @return an iterator providing access to the detectable charset names.
* @stable ICU 3.6
*/
U_STABLE UEnumeration * U_EXPORT2
ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status);
/**
* Test whether input filtering is enabled for this charset detector.
* Input filtering removes text that appears to be HTML or xml
* markup from the input before applying the code page detection
* heuristics.
*
* @param ucsd The charset detector to check.
* @return TRUE if filtering is enabled.
* @stable ICU 3.6
*/
U_STABLE UBool U_EXPORT2
ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd);
/**
* Enable filtering of input text. If filtering is enabled,
* text within angle brackets ("<" and ">") will be removed
* before detection, which will remove most HTML or xml markup.
*
* @param ucsd the charset detector to be modified.
* @param filter true to enable input text filtering.
* @return The previous setting.
*
* @stable ICU 3.6
*/
U_STABLE UBool U_EXPORT2
ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter);
#endif
#endif /* __UCSDET_H */
// udateintervalformat.h
/*
*****************************************************************************************
* Copyright (C) 2010-2012,2015 International Business Machines
* Corporation and others. All Rights Reserved.
*****************************************************************************************
*/
#ifndef UDATEINTERVALFORMAT_H
#define UDATEINTERVALFORMAT_H
#if !UCONFIG_NO_FORMATTING
/**
* \file
* \brief C API: Format a date interval.
*
* A UDateIntervalFormat is used to format the range between two UDate values
* in a locale-sensitive way, using a skeleton that specifies the precision and
* completeness of the information to show. If the range smaller than the resolution
* specified by the skeleton, a single date format will be produced. If the range
* is larger than the format specified by the skeleton, a locale-specific fallback
* will be used to format the items missing from the skeleton.
*
* For example, if the range is 2010-03-04 07:56 - 2010-03-04 19:56 (12 hours)
* - The skeleton jm will produce
* for en_US, "7:56 AM - 7:56 PM"
* for en_GB, "7:56 - 19:56"
* - The skeleton MMMd will produce
* for en_US, "Mar 4"
* for en_GB, "4 Mar"
* If the range is 2010-03-04 07:56 - 2010-03-08 16:11 (4 days, 8 hours, 15 minutes)
* - The skeleton jm will produce
* for en_US, "3/4/2010 7:56 AM - 3/8/2010 4:11 PM"
* for en_GB, "4/3/2010 7:56 - 8/3/2010 16:11"
* - The skeleton MMMd will produce
* for en_US, "Mar 4-8"
* for en_GB, "4-8 Mar"
*
* Note: the "-" characters in the above sample output will actually be
* Unicode 2013, EN_DASH, in all but the last example.
*
* Note, in ICU 4.4 the standard skeletons for which date interval format data
* is usually available are as follows; best results will be obtained by using
* skeletons from this set, or those formed by combining these standard skeletons
* (note that for these skeletons, the length of digit field such as d, y, or
* M vs MM is irrelevant (but for non-digit fields such as MMM vs MMMM it is
* relevant). Note that a skeleton involving h or H generally explicitly requests
* that time style (12- or 24-hour time respectively). For a skeleton that
* requests the locale's default time style (h or H), use 'j' instead of h or H.
* h, H, hm, Hm,
* hv, Hv, hmv, Hmv,
* d,
* M, MMM, MMMM,
* Md, MMMd,
* MEd, MMMEd,
* y,
* yM, yMMM, yMMMM,
* yMd, yMMMd,
* yMEd, yMMMEd
*
* Locales for which ICU 4.4 seems to have a reasonable amount of this data
* include:
* af, am, ar, be, bg, bn, ca, cs, da, de (_AT), el, en (_AU,_CA,_GB,_IE,_IN...),
* eo, es (_AR,_CL,_CO,...,_US) et, fa, fi, fo, fr (_BE,_CH,_CA), fur, gsw, he,
* hr, hu, hy, is, it (_CH), ja, kk, km, ko, lt, lv, mk, ml, mt, nb, nl )_BE),
* nn, pl, pt (_PT), rm, ro, ru (_UA), sk, sl, so, sq, sr, sr_Latn, sv, th, to,
* tr, uk, ur, vi, zh (_SG), zh_Hant (_HK,_MO)
*/
/**
* Opaque UDateIntervalFormat object for use in C programs.
* @stable ICU 4.8
*/
struct UDateIntervalFormat;
typedef struct UDateIntervalFormat UDateIntervalFormat; /**< C typedef for struct UDateIntervalFormat. @stable ICU 4.8 */
/**
* Open a new UDateIntervalFormat object using the predefined rules for a
* given locale plus a specified skeleton.
* @param locale
* The locale for whose rules should be used; may be NULL for
* default locale.
* @param skeleton
* A pattern containing only the fields desired for the interval
* format, for example "Hm", "yMMMd", or "yMMMEdHm".
* @param skeletonLength
* The length of skeleton; may be -1 if the skeleton is zero-terminated.
* @param tzID
* A timezone ID specifying the timezone to use. If 0, use the default
* timezone.
* @param tzIDLength
* The length of tzID, or -1 if null-terminated. If 0, use the default
* timezone.
* @param status
* A pointer to a UErrorCode to receive any errors.
* @return
* A pointer to a UDateIntervalFormat object for the specified locale,
* or NULL if an error occurred.
* @stable ICU 4.8
*/
U_STABLE UDateIntervalFormat* U_EXPORT2
udtitvfmt_open(const char* locale,
const UChar* skeleton,
int32_t skeletonLength,
const UChar* tzID,
int32_t tzIDLength,
UErrorCode* status);
/**
* Close a UDateIntervalFormat object. Once closed it may no longer be used.
* @param formatter
* The UDateIntervalFormat object to close.
* @stable ICU 4.8
*/
U_STABLE void U_EXPORT2
udtitvfmt_close(UDateIntervalFormat *formatter);
/**
* Formats a date/time range using the conventions established for the
* UDateIntervalFormat object.
* @param formatter
* The UDateIntervalFormat object specifying the format conventions.
* @param fromDate
* The starting point of the range.
* @param toDate
* The ending point of the range.
* @param result
* A pointer to a buffer to receive the formatted range.
* @param resultCapacity
* The maximum size of result.
* @param position
* A pointer to a UFieldPosition. On input, position->field is read.
* On output, position->beginIndex and position->endIndex indicate
* the beginning and ending indices of field number position->field,
* if such a field exists. This parameter may be NULL, in which case
* no field position data is returned.
* There may be multiple instances of a given field type in an
* interval format; in this case the position indices refer to the
* first instance.
* @param status
* A pointer to a UErrorCode to receive any errors.
* @return
* The total buffer size needed; if greater than resultLength, the
* output was truncated.
* @stable ICU 4.8
*/
U_STABLE int32_t U_EXPORT2
udtitvfmt_format(const UDateIntervalFormat* formatter,
UDate fromDate,
UDate toDate,
UChar* result,
int32_t resultCapacity,
UFieldPosition* position,
UErrorCode* status);
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif
// udatpg.h
/*
*******************************************************************************
*
* Copyright (C) 2007-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: udatpg.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2007jul30
* created by: Markus W. Scherer
*/
#ifndef __UDATPG_H__
#define __UDATPG_H__
/**
* \file
* \brief C API: Wrapper for icu::DateTimePatternGenerator (unicode/dtptngen.h).
*
* UDateTimePatternGenerator provides flexible generation of date format patterns,
* like "yy-MM-dd". The user can build up the generator by adding successive
* patterns. Once that is done, a query can be made using a "skeleton", which is
* a pattern which just includes the desired fields and lengths. The generator
* will return the "best fit" pattern corresponding to that skeleton.
*
The main method people will use is udatpg_getBestPattern, since normally * UDateTimePatternGenerator is pre-built with data from a particular locale. * However, generators can be built directly from other data as well. *
Issue: may be useful to also have a function that returns the list of * fields in a pattern, in order, since we have that internally. * That would be useful for getting the UI order of field elements. */ /** * Opaque type for a date/time pattern generator object. * @stable ICU 3.8 */ typedef void *UDateTimePatternGenerator; /** * Field number constants for udatpg_getAppendItemFormats() and similar functions. * These constants are separate from UDateFormatField despite semantic overlap * because some fields are merged for the date/time pattern generator. * @stable ICU 3.8 */ typedef enum UDateTimePatternField { /** @stable ICU 3.8 */ UDATPG_ERA_FIELD, /** @stable ICU 3.8 */ UDATPG_YEAR_FIELD, /** @stable ICU 3.8 */ UDATPG_QUARTER_FIELD, /** @stable ICU 3.8 */ UDATPG_MONTH_FIELD, /** @stable ICU 3.8 */ UDATPG_WEEK_OF_YEAR_FIELD, /** @stable ICU 3.8 */ UDATPG_WEEK_OF_MONTH_FIELD, /** @stable ICU 3.8 */ UDATPG_WEEKDAY_FIELD, /** @stable ICU 3.8 */ UDATPG_DAY_OF_YEAR_FIELD, /** @stable ICU 3.8 */ UDATPG_DAY_OF_WEEK_IN_MONTH_FIELD, /** @stable ICU 3.8 */ UDATPG_DAY_FIELD, /** @stable ICU 3.8 */ UDATPG_DAYPERIOD_FIELD, /** @stable ICU 3.8 */ UDATPG_HOUR_FIELD, /** @stable ICU 3.8 */ UDATPG_MINUTE_FIELD, /** @stable ICU 3.8 */ UDATPG_SECOND_FIELD, /** @stable ICU 3.8 */ UDATPG_FRACTIONAL_SECOND_FIELD, /** @stable ICU 3.8 */ UDATPG_ZONE_FIELD, /** @stable ICU 3.8 */ UDATPG_FIELD_COUNT } UDateTimePatternField; /** * Masks to control forcing the length of specified fields in the returned * pattern to match those in the skeleton (when this would not happen * otherwise). These may be combined to force the length of multiple fields. * Used with udatpg_getBestPatternWithOptions, udatpg_replaceFieldTypesWithOptions. * @stable ICU 4.4 */ typedef enum UDateTimePatternMatchOptions { /** @stable ICU 4.4 */ UDATPG_MATCH_NO_OPTIONS = 0, /** @stable ICU 4.4 */ UDATPG_MATCH_HOUR_FIELD_LENGTH = 1 << UDATPG_HOUR_FIELD, /** @stable ICU 4.4 */ UDATPG_MATCH_ALL_FIELDS_LENGTH = (1 << UDATPG_FIELD_COUNT) - 1 } UDateTimePatternMatchOptions; /** * Status return values from udatpg_addPattern(). * @stable ICU 3.8 */ typedef enum UDateTimePatternConflict { /** @stable ICU 3.8 */ UDATPG_NO_CONFLICT, /** @stable ICU 3.8 */ UDATPG_BASE_CONFLICT, /** @stable ICU 3.8 */ UDATPG_CONFLICT, /** @stable ICU 3.8 */ UDATPG_CONFLICT_COUNT } UDateTimePatternConflict; /** * Open a generator according to a given locale. * @param locale * @param pErrorCode a pointer to the UErrorCode which must not indicate a * failure before the function call. * @return a pointer to UDateTimePatternGenerator. * @stable ICU 3.8 */ U_STABLE UDateTimePatternGenerator * U_EXPORT2 udatpg_open(const char *locale, UErrorCode *pErrorCode); /** * Open an empty generator, to be constructed with udatpg_addPattern(...) etc. * @param pErrorCode a pointer to the UErrorCode which must not indicate a * failure before the function call. * @return a pointer to UDateTimePatternGenerator. * @stable ICU 3.8 */ U_STABLE UDateTimePatternGenerator * U_EXPORT2 udatpg_openEmpty(UErrorCode *pErrorCode); /** * Close a generator. * @param dtpg a pointer to UDateTimePatternGenerator. * @stable ICU 3.8 */ U_STABLE void U_EXPORT2 udatpg_close(UDateTimePatternGenerator *dtpg); /** * Create a copy pf a generator. * @param dtpg a pointer to UDateTimePatternGenerator to be copied. * @param pErrorCode a pointer to the UErrorCode which must not indicate a * failure before the function call. * @return a pointer to a new UDateTimePatternGenerator. * @stable ICU 3.8 */ U_STABLE UDateTimePatternGenerator * U_EXPORT2 udatpg_clone(const UDateTimePatternGenerator *dtpg, UErrorCode *pErrorCode); /** * Get the best pattern matching the input skeleton. It is guaranteed to * have all of the fields in the skeleton. * * Note that this function uses a non-const UDateTimePatternGenerator: * It uses a stateful pattern parser which is set up for each generator object, * rather than creating one for each function call. * Consecutive calls to this function do not affect each other, * but this function cannot be used concurrently on a single generator object. * * @param dtpg a pointer to UDateTimePatternGenerator. * @param skeleton * The skeleton is a pattern containing only the variable fields. * For example, "MMMdd" and "mmhh" are skeletons. * @param length the length of skeleton * @param bestPattern * The best pattern found from the given skeleton. * @param capacity the capacity of bestPattern. * @param pErrorCode a pointer to the UErrorCode which must not indicate a * failure before the function call. * @return the length of bestPattern. * @stable ICU 3.8 */ U_STABLE int32_t U_EXPORT2 udatpg_getBestPattern(UDateTimePatternGenerator *dtpg, const UChar *skeleton, int32_t length, UChar *bestPattern, int32_t capacity, UErrorCode *pErrorCode); /** * Get the best pattern matching the input skeleton. It is guaranteed to * have all of the fields in the skeleton. * * Note that this function uses a non-const UDateTimePatternGenerator: * It uses a stateful pattern parser which is set up for each generator object, * rather than creating one for each function call. * Consecutive calls to this function do not affect each other, * but this function cannot be used concurrently on a single generator object. * * @param dtpg a pointer to UDateTimePatternGenerator. * @param skeleton * The skeleton is a pattern containing only the variable fields. * For example, "MMMdd" and "mmhh" are skeletons. * @param length the length of skeleton * @param options * Options for forcing the length of specified fields in the * returned pattern to match those in the skeleton (when this * would not happen otherwise). For default behavior, use * UDATPG_MATCH_NO_OPTIONS. * @param bestPattern * The best pattern found from the given skeleton. * @param capacity * the capacity of bestPattern. * @param pErrorCode * a pointer to the UErrorCode which must not indicate a * failure before the function call. * @return the length of bestPattern. * @stable ICU 4.4 */ U_STABLE int32_t U_EXPORT2 udatpg_getBestPatternWithOptions(UDateTimePatternGenerator *dtpg, const UChar *skeleton, int32_t length, UDateTimePatternMatchOptions options, UChar *bestPattern, int32_t capacity, UErrorCode *pErrorCode); /** * Get a unique skeleton from a given pattern. For example, * both "MMM-dd" and "dd/MMM" produce the skeleton "MMMdd". * * Note that this function uses a non-const UDateTimePatternGenerator: * It uses a stateful pattern parser which is set up for each generator object, * rather than creating one for each function call. * Consecutive calls to this function do not affect each other, * but this function cannot be used concurrently on a single generator object. * * @param unusedDtpg a pointer to UDateTimePatternGenerator. * This parameter is no longer used. Callers may pass NULL. * @param pattern input pattern, such as "dd/MMM". * @param length the length of pattern. * @param skeleton such as "MMMdd" * @param capacity the capacity of skeleton. * @param pErrorCode a pointer to the UErrorCode which must not indicate a * failure before the function call. * @return the length of skeleton. * @stable ICU 3.8 */ U_STABLE int32_t U_EXPORT2 udatpg_getSkeleton(UDateTimePatternGenerator *unusedDtpg, const UChar *pattern, int32_t length, UChar *skeleton, int32_t capacity, UErrorCode *pErrorCode); /** * Get a unique base skeleton from a given pattern. This is the same * as the skeleton, except that differences in length are minimized so * as to only preserve the difference between string and numeric form. So * for example, both "MMM-dd" and "d/MMM" produce the skeleton "MMMd" * (notice the single d). * * Note that this function uses a non-const UDateTimePatternGenerator: * It uses a stateful pattern parser which is set up for each generator object, * rather than creating one for each function call. * Consecutive calls to this function do not affect each other, * but this function cannot be used concurrently on a single generator object. * * @param unusedDtpg a pointer to UDateTimePatternGenerator. * This parameter is no longer used. Callers may pass NULL. * @param pattern input pattern, such as "dd/MMM". * @param length the length of pattern. * @param baseSkeleton such as "Md" * @param capacity the capacity of base skeleton. * @param pErrorCode a pointer to the UErrorCode which must not indicate a * failure before the function call. * @return the length of baseSkeleton. * @stable ICU 3.8 */ U_STABLE int32_t U_EXPORT2 udatpg_getBaseSkeleton(UDateTimePatternGenerator *unusedDtpg, const UChar *pattern, int32_t length, UChar *baseSkeleton, int32_t capacity, UErrorCode *pErrorCode); /** * Adds a pattern to the generator. If the pattern has the same skeleton as * an existing pattern, and the override parameter is set, then the previous * value is overriden. Otherwise, the previous value is retained. In either * case, the conflicting status is set and previous vale is stored in * conflicting pattern. *
* Note that single-field patterns (like "MMM") are automatically added, and * don't need to be added explicitly! * * @param dtpg a pointer to UDateTimePatternGenerator. * @param pattern input pattern, such as "dd/MMM" * @param patternLength the length of pattern. * @param override When existing values are to be overridden use true, * otherwise use false. * @param conflictingPattern Previous pattern with the same skeleton. * @param capacity the capacity of conflictingPattern. * @param pLength a pointer to the length of conflictingPattern. * @param pErrorCode a pointer to the UErrorCode which must not indicate a * failure before the function call. * @return conflicting status. The value could be UDATPG_NO_CONFLICT, * UDATPG_BASE_CONFLICT or UDATPG_CONFLICT. * @stable ICU 3.8 */ U_STABLE UDateTimePatternConflict U_EXPORT2 udatpg_addPattern(UDateTimePatternGenerator *dtpg, const UChar *pattern, int32_t patternLength, UBool override, UChar *conflictingPattern, int32_t capacity, int32_t *pLength, UErrorCode *pErrorCode); /** * An AppendItem format is a pattern used to append a field if there is no * good match. For example, suppose that the input skeleton is "GyyyyMMMd", * and there is no matching pattern internally, but there is a pattern * matching "yyyyMMMd", say "d-MM-yyyy". Then that pattern is used, plus the * G. The way these two are conjoined is by using the AppendItemFormat for G * (era). So if that value is, say "{0}, {1}" then the final resulting * pattern is "d-MM-yyyy, G". *
* There are actually three available variables: {0} is the pattern so far, * {1} is the element we are adding, and {2} is the name of the element. *
* This reflects the way that the CLDR data is organized. * * @param dtpg a pointer to UDateTimePatternGenerator. * @param field UDateTimePatternField, such as UDATPG_ERA_FIELD * @param value pattern, such as "{0}, {1}" * @param length the length of value. * @stable ICU 3.8 */ U_STABLE void U_EXPORT2 udatpg_setAppendItemFormat(UDateTimePatternGenerator *dtpg, UDateTimePatternField field, const UChar *value, int32_t length); /** * Getter corresponding to setAppendItemFormat. Values below 0 or at or * above UDATPG_FIELD_COUNT are illegal arguments. * * @param dtpg A pointer to UDateTimePatternGenerator. * @param field UDateTimePatternField, such as UDATPG_ERA_FIELD * @param pLength A pointer that will receive the length of appendItemFormat. * @return appendItemFormat for field. * @stable ICU 3.8 */ U_STABLE const UChar * U_EXPORT2 udatpg_getAppendItemFormat(const UDateTimePatternGenerator *dtpg, UDateTimePatternField field, int32_t *pLength); /** * Set the name of field, eg "era" in English for ERA. These are only * used if the corresponding AppendItemFormat is used, and if it contains a * {2} variable. *
* This reflects the way that the CLDR data is organized. * * @param dtpg a pointer to UDateTimePatternGenerator. * @param field UDateTimePatternField * @param value name for the field. * @param length the length of value. * @stable ICU 3.8 */ U_STABLE void U_EXPORT2 udatpg_setAppendItemName(UDateTimePatternGenerator *dtpg, UDateTimePatternField field, const UChar *value, int32_t length); /** * Getter corresponding to setAppendItemNames. Values below 0 or at or above * UDATPG_FIELD_COUNT are illegal arguments. * * @param dtpg a pointer to UDateTimePatternGenerator. * @param field UDateTimePatternField, such as UDATPG_ERA_FIELD * @param pLength A pointer that will receive the length of the name for field. * @return name for field * @stable ICU 3.8 */ U_STABLE const UChar * U_EXPORT2 udatpg_getAppendItemName(const UDateTimePatternGenerator *dtpg, UDateTimePatternField field, int32_t *pLength); /** * The DateTimeFormat is a message format pattern used to compose date and * time patterns. The default pattern in the root locale is "{1} {0}", where * {1} will be replaced by the date pattern and {0} will be replaced by the * time pattern; however, other locales may specify patterns such as * "{1}, {0}" or "{1} 'at' {0}", etc. *
* This is used when the input skeleton contains both date and time fields,
* but there is not a close match among the added patterns. For example,
* suppose that this object was created by adding "dd-MMM" and "hh:mm", and
* its DateTimeFormat is the default "{1} {0}". Then if the input skeleton
* is "MMMdhmm", there is not an exact match, so the input skeleton is
* broken up into two components "MMMd" and "hmm". There are close matches
* for those two skeletons, so the result is put together with this pattern,
* resulting in "d-MMM h:mm".
*
* @param dtpg a pointer to UDateTimePatternGenerator.
* @param dtFormat
* message format pattern, here {1} will be replaced by the date
* pattern and {0} will be replaced by the time pattern.
* @param length the length of dtFormat.
* @stable ICU 3.8
*/
U_STABLE void U_EXPORT2
udatpg_setDateTimeFormat(const UDateTimePatternGenerator *dtpg,
const UChar *dtFormat, int32_t length);
/**
* Getter corresponding to setDateTimeFormat.
* @param dtpg a pointer to UDateTimePatternGenerator.
* @param pLength A pointer that will receive the length of the format
* @return dateTimeFormat.
* @stable ICU 3.8
*/
U_STABLE const UChar * U_EXPORT2
udatpg_getDateTimeFormat(const UDateTimePatternGenerator *dtpg,
int32_t *pLength);
/**
* The decimal value is used in formatting fractions of seconds. If the
* skeleton contains fractional seconds, then this is used with the
* fractional seconds. For example, suppose that the input pattern is
* "hhmmssSSSS", and the best matching pattern internally is "H:mm:ss", and
* the decimal string is ",". Then the resulting pattern is modified to be
* "H:mm:ss,SSSS"
*
* @param dtpg a pointer to UDateTimePatternGenerator.
* @param decimal
* @param length the length of decimal.
* @stable ICU 3.8
*/
U_STABLE void U_EXPORT2
udatpg_setDecimal(UDateTimePatternGenerator *dtpg,
const UChar *decimal, int32_t length);
/**
* Getter corresponding to setDecimal.
*
* @param dtpg a pointer to UDateTimePatternGenerator.
* @param pLength A pointer that will receive the length of the decimal string.
* @return corresponding to the decimal point.
* @stable ICU 3.8
*/
U_STABLE const UChar * U_EXPORT2
udatpg_getDecimal(const UDateTimePatternGenerator *dtpg,
int32_t *pLength);
/**
* Adjusts the field types (width and subtype) of a pattern to match what is
* in a skeleton. That is, if you supply a pattern like "d-M H:m", and a
* skeleton of "MMMMddhhmm", then the input pattern is adjusted to be
* "dd-MMMM hh:mm". This is used internally to get the best match for the
* input skeleton, but can also be used externally.
*
* Note that this function uses a non-const UDateTimePatternGenerator:
* It uses a stateful pattern parser which is set up for each generator object,
* rather than creating one for each function call.
* Consecutive calls to this function do not affect each other,
* but this function cannot be used concurrently on a single generator object.
*
* @param dtpg a pointer to UDateTimePatternGenerator.
* @param pattern Input pattern
* @param patternLength the length of input pattern.
* @param skeleton
* @param skeletonLength the length of input skeleton.
* @param dest pattern adjusted to match the skeleton fields widths and subtypes.
* @param destCapacity the capacity of dest.
* @param pErrorCode a pointer to the UErrorCode which must not indicate a
* failure before the function call.
* @return the length of dest.
* @stable ICU 3.8
*/
U_STABLE int32_t U_EXPORT2
udatpg_replaceFieldTypes(UDateTimePatternGenerator *dtpg,
const UChar *pattern, int32_t patternLength,
const UChar *skeleton, int32_t skeletonLength,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode);
/**
* Adjusts the field types (width and subtype) of a pattern to match what is
* in a skeleton. That is, if you supply a pattern like "d-M H:m", and a
* skeleton of "MMMMddhhmm", then the input pattern is adjusted to be
* "dd-MMMM hh:mm". This is used internally to get the best match for the
* input skeleton, but can also be used externally.
*
* Note that this function uses a non-const UDateTimePatternGenerator:
* It uses a stateful pattern parser which is set up for each generator object,
* rather than creating one for each function call.
* Consecutive calls to this function do not affect each other,
* but this function cannot be used concurrently on a single generator object.
*
* @param dtpg a pointer to UDateTimePatternGenerator.
* @param pattern Input pattern
* @param patternLength the length of input pattern.
* @param skeleton
* @param skeletonLength the length of input skeleton.
* @param options
* Options controlling whether the length of specified fields in the
* pattern are adjusted to match those in the skeleton (when this
* would not happen otherwise). For default behavior, use
* UDATPG_MATCH_NO_OPTIONS.
* @param dest pattern adjusted to match the skeleton fields widths and subtypes.
* @param destCapacity the capacity of dest.
* @param pErrorCode a pointer to the UErrorCode which must not indicate a
* failure before the function call.
* @return the length of dest.
* @stable ICU 4.4
*/
U_STABLE int32_t U_EXPORT2
udatpg_replaceFieldTypesWithOptions(UDateTimePatternGenerator *dtpg,
const UChar *pattern, int32_t patternLength,
const UChar *skeleton, int32_t skeletonLength,
UDateTimePatternMatchOptions options,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode);
/**
* Return a UEnumeration list of all the skeletons in canonical form.
* Call udatpg_getPatternForSkeleton() to get the corresponding pattern.
*
* @param dtpg a pointer to UDateTimePatternGenerator.
* @param pErrorCode a pointer to the UErrorCode which must not indicate a
* failure before the function call
* @return a UEnumeration list of all the skeletons
* The caller must close the object.
* @stable ICU 3.8
*/
U_STABLE UEnumeration * U_EXPORT2
udatpg_openSkeletons(const UDateTimePatternGenerator *dtpg, UErrorCode *pErrorCode);
/**
* Return a UEnumeration list of all the base skeletons in canonical form.
*
* @param dtpg a pointer to UDateTimePatternGenerator.
* @param pErrorCode a pointer to the UErrorCode which must not indicate a
* failure before the function call.
* @return a UEnumeration list of all the base skeletons
* The caller must close the object.
* @stable ICU 3.8
*/
U_STABLE UEnumeration * U_EXPORT2
udatpg_openBaseSkeletons(const UDateTimePatternGenerator *dtpg, UErrorCode *pErrorCode);
/**
* Get the pattern corresponding to a given skeleton.
*
* @param dtpg a pointer to UDateTimePatternGenerator.
* @param skeleton
* @param skeletonLength pointer to the length of skeleton.
* @param pLength pointer to the length of return pattern.
* @return pattern corresponding to a given skeleton.
* @stable ICU 3.8
*/
U_STABLE const UChar * U_EXPORT2
udatpg_getPatternForSkeleton(const UDateTimePatternGenerator *dtpg,
const UChar *skeleton, int32_t skeletonLength,
int32_t *pLength);
#endif
// ufieldpositer.h
/*
*****************************************************************************************
* Copyright (C) 2015-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*****************************************************************************************
*/
#ifndef UFIELDPOSITER_H
#define UFIELDPOSITER_H
#if !UCONFIG_NO_FORMATTING
/**
* \file
* \brief C API: UFieldPositionIterator for use with format APIs.
*
* Usage:
* ufieldpositer_open creates an empty (unset) UFieldPositionIterator.
* This can be passed to format functions such as {@link #udat_formatForFields},
* which will set it to apply to the fields in a particular formatted string.
* ufieldpositer_next can then be used to iterate over those fields,
* providing for each field its type (using values that are specific to the
* particular format type, such as date or number formats), as well as the
* start and end positions of the field in the formatted string.
* A given UFieldPositionIterator can be re-used for different format calls;
* each such call resets it to apply to that format string.
* ufieldpositer_close should be called to dispose of the UFieldPositionIterator
* when it is no longer needed.
*
* @see FieldPositionIterator
*/
/**
* Opaque UFieldPositionIterator object for use in C.
* @stable ICU 55
*/
struct UFieldPositionIterator;
typedef struct UFieldPositionIterator UFieldPositionIterator; /**< C typedef for struct UFieldPositionIterator. @stable ICU 55 */
/**
* Open a new, unset UFieldPositionIterator object.
* @param status
* A pointer to a UErrorCode to receive any errors.
* @return
* A pointer to an empty (unset) UFieldPositionIterator object,
* or NULL if an error occurred.
* @stable ICU 55
*/
U_STABLE UFieldPositionIterator* U_EXPORT2
ufieldpositer_open(UErrorCode* status);
/**
* Close a UFieldPositionIterator object. Once closed it may no longer be used.
* @param fpositer
* A pointer to the UFieldPositionIterator object to close.
* @stable ICU 55
*/
U_STABLE void U_EXPORT2
ufieldpositer_close(UFieldPositionIterator *fpositer);
/**
* Get information for the next field in the formatted string to which this
* UFieldPositionIterator currently applies, or return FALSE if there are
* no more fields.
* @param fpositer
* A pointer to the UFieldPositionIterator object containing iteration
* state for the format fields.
* @param beginIndex
* A pointer to an int32_t to receive information about the start offset
* of the field in the formatted string (undefined if the function
* returns a negative value). May be NULL if this information is not needed.
* @param endIndex
* A pointer to an int32_t to receive information about the end offset
* of the field in the formatted string (undefined if the function
* returns a negative value). May be NULL if this information is not needed.
* @return
* The field type (non-negative value), or a negative value if there are
* no more fields for which to provide information. If negative, then any
* values pointed to by beginIndex and endIndex are undefined.
*
* The values for field type depend on what type of formatter the
* UFieldPositionIterator has been set by; for a date formatter, the
* values from the UDateFormatField enum. For more information, see the
* descriptions of format functions that take a UFieldPositionIterator*
* parameter, such as {@link #udat_formatForFields}.
*
* @stable ICU 55
*/
U_STABLE int32_t U_EXPORT2
ufieldpositer_next(UFieldPositionIterator *fpositer,
int32_t *beginIndex, int32_t *endIndex);
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif
// uformattable.h
/*
********************************************************************************
* Copyright (C) 2013-2014, International Business Machines Corporation and others.
* All Rights Reserved.
********************************************************************************
*
* File UFORMATTABLE.H
*
* Modification History:
*
* Date Name Description
* 2013 Jun 7 srl New
********************************************************************************
*/
/**
* \file
* \brief C API: UFormattable is a thin wrapper for primitive types used for formatting and parsing.
*
* This is a C interface to the icu::Formattable class. Static functions on this class convert
* to and from this interface (via reinterpret_cast). Note that Formattables (and thus UFormattables)
* are mutable, and many operations (even getters) may actually modify the internal state. For this
* reason, UFormattables are not thread safe, and should not be shared between threads.
*
* See {@link unum_parseToUFormattable} for example code.
*/
#ifndef UFORMATTABLE_H
#define UFORMATTABLE_H
#if !UCONFIG_NO_FORMATTING
/**
* Enum designating the type of a UFormattable instance.
* Practically, this indicates which of the getters would return without conversion
* or error.
* @see icu::Formattable::Type
* @stable ICU 52
*/
typedef enum UFormattableType {
UFMT_DATE = 0, /**< ufmt_getDate() will return without conversion. @see ufmt_getDate*/
UFMT_DOUBLE, /**< ufmt_getDouble() will return without conversion. @see ufmt_getDouble*/
UFMT_LONG, /**< ufmt_getLong() will return without conversion. @see ufmt_getLong */
UFMT_STRING, /**< ufmt_getUChars() will return without conversion. @see ufmt_getUChars*/
UFMT_ARRAY, /**< ufmt_countArray() and ufmt_getArray() will return the value. @see ufmt_getArrayItemByIndex */
UFMT_INT64, /**< ufmt_getInt64() will return without conversion. @see ufmt_getInt64 */
UFMT_OBJECT, /**< ufmt_getObject() will return without conversion. @see ufmt_getObject*/
UFMT_COUNT /**< Count of defined UFormattableType values */
} UFormattableType;
/**
* Opaque type representing various types of data which may be used for formatting
* and parsing operations.
* @see icu::Formattable
* @stable ICU 52
*/
typedef void *UFormattable;
/**
* Initialize a UFormattable, to type UNUM_LONG, value 0
* may return error if memory allocation failed.
* parameter status error code.
* See {@link unum_parseToUFormattable} for example code.
* @stable ICU 52
* @return the new UFormattable
* @see ufmt_close
* @see icu::Formattable::Formattable()
*/
U_STABLE UFormattable* U_EXPORT2
ufmt_open(UErrorCode* status);
/**
* Cleanup any additional memory allocated by this UFormattable.
* @param fmt the formatter
* @stable ICU 52
* @see ufmt_open
*/
U_STABLE void U_EXPORT2
ufmt_close(UFormattable* fmt);
/**
* Return the type of this object
* @param fmt the UFormattable object
* @param status status code - U_ILLEGAL_ARGUMENT_ERROR is returned if the UFormattable contains data not supported by
* the API
* @return the value as a UFormattableType
* @see ufmt_isNumeric
* @see icu::Formattable::getType() const
* @stable ICU 52
*/
U_STABLE UFormattableType U_EXPORT2
ufmt_getType(const UFormattable* fmt, UErrorCode *status);
/**
* Return whether the object is numeric.
* @param fmt the UFormattable object
* @return true if the object is a double, long, or int64 value, else false.
* @see ufmt_getType
* @see icu::Formattable::isNumeric() const
* @stable ICU 52
*/
U_STABLE UBool U_EXPORT2
ufmt_isNumeric(const UFormattable* fmt);
/**
* Gets the UDate value of this object. If the type is not of type UFMT_DATE,
* status is set to U_INVALID_FORMAT_ERROR and the return value is
* undefined.
* @param fmt the UFormattable object
* @param status the error code - any conversion or format errors
* @return the value
* @stable ICU 52
* @see icu::Formattable::getDate(UErrorCode&) const
*/
U_STABLE UDate U_EXPORT2
ufmt_getDate(const UFormattable* fmt, UErrorCode *status);
/**
* Gets the double value of this object. If the type is not a UFMT_DOUBLE, or
* if there are additional significant digits than fit in a double type,
* a conversion is performed with possible loss of precision.
* If the type is UFMT_OBJECT and the
* object is a Measure, then the result of
* getNumber().getDouble(status) is returned. If this object is
* neither a numeric type nor a Measure, then 0 is returned and
* the status is set to U_INVALID_FORMAT_ERROR.
* @param fmt the UFormattable object
* @param status the error code - any conversion or format errors
* @return the value
* @stable ICU 52
* @see icu::Formattable::getDouble(UErrorCode&) const
*/
U_STABLE double U_EXPORT2
ufmt_getDouble(UFormattable* fmt, UErrorCode *status);
/**
* Gets the long (int32_t) value of this object. If the magnitude is too
* large to fit in a long, then the maximum or minimum long value,
* as appropriate, is returned and the status is set to
* U_INVALID_FORMAT_ERROR. If this object is of type UFMT_INT64 and
* it fits within a long, then no precision is lost. If it is of
* type kDouble or kDecimalNumber, then a conversion is peformed, with
* truncation of any fractional part. If the type is UFMT_OBJECT and
* the object is a Measure, then the result of
* getNumber().getLong(status) is returned. If this object is
* neither a numeric type nor a Measure, then 0 is returned and
* the status is set to U_INVALID_FORMAT_ERROR.
* @param fmt the UFormattable object
* @param status the error code - any conversion or format errors
* @return the value
* @stable ICU 52
* @see icu::Formattable::getLong(UErrorCode&) const
*/
U_STABLE int32_t U_EXPORT2
ufmt_getLong(UFormattable* fmt, UErrorCode *status);
/**
* Gets the int64_t value of this object. If this object is of a numeric
* type and the magnitude is too large to fit in an int64, then
* the maximum or minimum int64 value, as appropriate, is returned
* and the status is set to U_INVALID_FORMAT_ERROR. If the
* magnitude fits in an int64, then a casting conversion is
* peformed, with truncation of any fractional part. If the type
* is UFMT_OBJECT and the object is a Measure, then the result of
* getNumber().getDouble(status) is returned. If this object is
* neither a numeric type nor a Measure, then 0 is returned and
* the status is set to U_INVALID_FORMAT_ERROR.
* @param fmt the UFormattable object
* @param status the error code - any conversion or format errors
* @return the value
* @stable ICU 52
* @see icu::Formattable::getInt64(UErrorCode&) const
*/
U_STABLE int64_t U_EXPORT2
ufmt_getInt64(UFormattable* fmt, UErrorCode *status);
/**
* Returns a pointer to the UObject contained within this
* formattable (as a const void*), or NULL if this object
* is not of type UFMT_OBJECT.
* @param fmt the UFormattable object
* @param status the error code - any conversion or format errors
* @return the value as a const void*. It is a polymorphic C++ object.
* @stable ICU 52
* @see icu::Formattable::getObject() const
*/
U_STABLE const void *U_EXPORT2
ufmt_getObject(const UFormattable* fmt, UErrorCode *status);
/**
* Gets the string value of this object as a UChar string. If the type is not a
* string, status is set to U_INVALID_FORMAT_ERROR and a NULL pointer is returned.
* This function is not thread safe and may modify the UFormattable if need be to terminate the string.
* The returned pointer is not valid if any other functions are called on this UFormattable, or if the UFormattable is closed.
* @param fmt the UFormattable object
* @param status the error code - any conversion or format errors
* @param len if non null, contains the string length on return
* @return the null terminated string value - must not be referenced after any other functions are called on this UFormattable.
* @stable ICU 52
* @see icu::Formattable::getString(UnicodeString&)const
*/
U_STABLE const UChar* U_EXPORT2
ufmt_getUChars(UFormattable* fmt, int32_t *len, UErrorCode *status);
/**
* Get the number of array objects contained, if an array type UFMT_ARRAY
* @param fmt the UFormattable object
* @param status the error code - any conversion or format errors. U_ILLEGAL_ARGUMENT_ERROR if not an array type.
* @return the number of array objects or undefined if not an array type
* @stable ICU 52
* @see ufmt_getArrayItemByIndex
*/
U_STABLE int32_t U_EXPORT2
ufmt_getArrayLength(const UFormattable* fmt, UErrorCode *status);
/**
* Get the specified value from the array of UFormattables. Invalid if the object is not an array type UFMT_ARRAY
* @param fmt the UFormattable object
* @param n the number of the array to return (0 based).
* @param status the error code - any conversion or format errors. Returns an error if n is out of bounds.
* @return the nth array value, only valid while the containing UFormattable is valid. NULL if not an array.
* @stable ICU 52
* @see icu::Formattable::getArray(int32_t&, UErrorCode&) const
*/
U_STABLE UFormattable * U_EXPORT2
ufmt_getArrayItemByIndex(UFormattable* fmt, int32_t n, UErrorCode *status);
/**
* Returns a numeric string representation of the number contained within this
* formattable, or NULL if this object does not contain numeric type.
* For values obtained by parsing, the returned decimal number retains
* the full precision and range of the original input, unconstrained by
* the limits of a double floating point or a 64 bit int.
*
* This function is not thread safe, and therfore is not declared const,
* even though it is logically const.
* The resulting buffer is owned by the UFormattable and is invalid if any other functions are
* called on the UFormattable.
*
* Possible errors include U_MEMORY_ALLOCATION_ERROR, and
* U_INVALID_STATE if the formattable object has not been set to
* a numeric type.
* @param fmt the UFormattable object
* @param len if non-null, on exit contains the string length (not including the terminating null)
* @param status the error code
* @return the character buffer as a NULL terminated string, which is owned by the object and must not be accessed if any other functions are called on this object.
* @stable ICU 52
* @see icu::Formattable::getDecimalNumber(UErrorCode&)
*/
U_STABLE const char * U_EXPORT2
ufmt_getDecNumChars(UFormattable *fmt, int32_t *len, UErrorCode *status);
#endif
#endif
// ugender.h
/*
*****************************************************************************************
* Copyright (C) 2010-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*****************************************************************************************
*/
#ifndef UGENDER_H
#define UGENDER_H
#if !UCONFIG_NO_FORMATTING
/**
* \file
* \brief C API: The purpose of this API is to compute the gender of a list as a
* whole given the gender of each element.
*
*/
/**
* Genders
* @stable ICU 50
*/
enum UGender {
/**
* Male gender.
* @stable ICU 50
*/
UGENDER_MALE,
/**
* Female gender.
* @stable ICU 50
*/
UGENDER_FEMALE,
/**
* Neutral gender.
* @stable ICU 50
*/
UGENDER_OTHER
};
/**
* @stable ICU 50
*/
typedef enum UGender UGender;
/**
* Opaque UGenderInfo object for use in C programs.
* @stable ICU 50
*/
struct UGenderInfo;
typedef struct UGenderInfo UGenderInfo;
/**
* Opens a new UGenderInfo object given locale.
* @param locale The locale for which the rules are desired.
* @param status UErrorCode pointer
* @return A UGenderInfo for the specified locale, or NULL if an error occurred.
* @stable ICU 50
*/
U_STABLE const UGenderInfo* U_EXPORT2
ugender_getInstance(const char *locale, UErrorCode *status);
/**
* Given a list, returns the gender of the list as a whole.
* @param genderInfo pointer that ugender_getInstance returns.
* @param genders the gender of each element in the list.
* @param size the size of the list.
* @param status A pointer to a UErrorCode to receive any errors.
* @return The gender of the list.
* @stable ICU 50
*/
U_STABLE UGender U_EXPORT2
ugender_getListGender(const UGenderInfo* genderinfo, const UGender *genders, int32_t size, UErrorCode *status);
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif
// ulocdata.h
/*
******************************************************************************
* *
* Copyright (C) 2003-2015, International Business Machines *
* Corporation and others. All Rights Reserved. *
* *
******************************************************************************
* file name: ulocdata.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003Oct21
* created by: Ram Viswanadha
*/
#ifndef __ULOCDATA_H__
#define __ULOCDATA_H__
/**
* \file
* \brief C API: Provides access to locale data.
*/
/** Forward declaration of the ULocaleData structure. @stable ICU 3.6 */
struct ULocaleData;
/** A locale data object. @stable ICU 3.6 */
typedef struct ULocaleData ULocaleData;
/** The possible types of exemplar character sets.
* @stable ICU 3.4
*/
typedef enum ULocaleDataExemplarSetType {
/** Basic set @stable ICU 3.4 */
ULOCDATA_ES_STANDARD=0,
/** Auxiliary set @stable ICU 3.4 */
ULOCDATA_ES_AUXILIARY=1,
/** Index Character set @stable ICU 4.8 */
ULOCDATA_ES_INDEX=2,
/** Punctuation set @stable ICU 51 */
ULOCDATA_ES_PUNCTUATION=3,
/** One higher than the last valid type @stable ICU 3.4 */
ULOCDATA_ES_COUNT=4
} ULocaleDataExemplarSetType;
/** The possible types of delimiters.
* @stable ICU 3.4
*/
typedef enum ULocaleDataDelimiterType {
/** Quotation start @stable ICU 3.4 */
ULOCDATA_QUOTATION_START = 0,
/** Quotation end @stable ICU 3.4 */
ULOCDATA_QUOTATION_END = 1,
/** Alternate quotation start @stable ICU 3.4 */
ULOCDATA_ALT_QUOTATION_START = 2,
/** Alternate quotation end @stable ICU 3.4 */
ULOCDATA_ALT_QUOTATION_END = 3,
/** One higher than the last valid type @stable ICU 3.4 */
ULOCDATA_DELIMITER_COUNT = 4
} ULocaleDataDelimiterType;
/**
* Opens a locale data object for the given locale
*
* @param localeID Specifies the locale associated with this locale
* data object.
* @param status Pointer to error status code.
* @stable ICU 3.4
*/
U_STABLE ULocaleData* U_EXPORT2
ulocdata_open(const char *localeID, UErrorCode *status);
/**
* Closes a locale data object.
*
* @param uld The locale data object to close
* @stable ICU 3.4
*/
U_STABLE void U_EXPORT2
ulocdata_close(ULocaleData *uld);
/**
* Sets the "no Substitute" attribute of the locale data
* object. If true, then any methods associated with the
* locale data object will return null when there is no
* data available for that method, given the locale ID
* supplied to ulocdata_open().
*
* @param uld The locale data object to set.
* @param setting Value of the "no substitute" attribute.
* @stable ICU 3.4
*/
U_STABLE void U_EXPORT2
ulocdata_setNoSubstitute(ULocaleData *uld, UBool setting);
/**
* Retrieves the current "no Substitute" value of the locale data
* object. If true, then any methods associated with the
* locale data object will return null when there is no
* data available for that method, given the locale ID
* supplied to ulocdata_open().
*
* @param uld Pointer to the The locale data object to set.
* @return UBool Value of the "no substitute" attribute.
* @stable ICU 3.4
*/
U_STABLE UBool U_EXPORT2
ulocdata_getNoSubstitute(ULocaleData *uld);
/**
* Returns the set of exemplar characters for a locale.
*
* @param uld Pointer to the locale data object from which the
* exemplar character set is to be retrieved.
* @param fillIn Pointer to a USet object to receive the
* exemplar character set for the given locale. Previous
* contents of fillIn are lost. If fillIn is NULL,
* then a new USet is created and returned. The caller
* owns the result and must dispose of it by calling
* uset_close.
* @param options Bitmask for options to apply to the exemplar pattern.
* Specify zero to retrieve the exemplar set as it is
* defined in the locale data. Specify
* USET_CASE_INSENSITIVE to retrieve a case-folded
* exemplar set. See uset_applyPattern for a complete
* list of valid options. The USET_IGNORE_SPACE bit is
* always set, regardless of the value of 'options'.
* @param extype Specifies the type of exemplar set to be retrieved.
* @param status Pointer to an input-output error code value;
* must not be NULL. Will be set to U_MISSING_RESOURCE_ERROR
* if the requested data is not available.
* @return USet* Either fillIn, or if fillIn is NULL, a pointer to
* a newly-allocated USet that the user must close.
* In case of error, NULL is returned.
* @stable ICU 3.4
*/
U_STABLE USet* U_EXPORT2
ulocdata_getExemplarSet(ULocaleData *uld, USet *fillIn,
uint32_t options, ULocaleDataExemplarSetType extype, UErrorCode *status);
/**
* Returns one of the delimiter strings associated with a locale.
*
* @param uld Pointer to the locale data object from which the
* delimiter string is to be retrieved.
* @param type the type of delimiter to be retrieved.
* @param result A pointer to a buffer to receive the result.
* @param resultLength The maximum size of result.
* @param status Pointer to an error code value
* @return int32_t The total buffer size needed; if greater than resultLength,
* the output was truncated.
* @stable ICU 3.4
*/
U_STABLE int32_t U_EXPORT2
ulocdata_getDelimiter(ULocaleData *uld, ULocaleDataDelimiterType type, UChar *result, int32_t resultLength, UErrorCode *status);
/**
* Enumeration for representing the measurement systems.
* @stable ICU 2.8
*/
typedef enum UMeasurementSystem {
UMS_SI, /**< Measurement system specified by SI otherwise known as Metric system. @stable ICU 2.8 */
UMS_US, /**< Measurement system followed in the United States of America. @stable ICU 2.8 */
UMS_UK, /**< Mix of metric and imperial units used in Great Britain. @stable ICU 55 */
UMS_LIMIT
} UMeasurementSystem;
/**
* Returns the measurement system used in the locale specified by the localeID.
* Please note that this API will change in ICU 3.6 and will use an ulocdata object.
*
* @param localeID The id of the locale for which the measurement system to be retrieved.
* @param status Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
* @return UMeasurementSystem the measurement system used in the locale.
* @stable ICU 2.8
*/
U_STABLE UMeasurementSystem U_EXPORT2
ulocdata_getMeasurementSystem(const char *localeID, UErrorCode *status);
/**
* Returns the element gives the normal business letter size, and customary units.
* The units for the numbers are always in milli-meters.
* For US since 8.5 and 11 do not yeild an integral value when converted to milli-meters,
* the values are rounded off.
* So for A4 size paper the height and width are 297 mm and 210 mm repectively,
* and for US letter size the height and width are 279 mm and 216 mm respectively.
* Please note that this API will change in ICU 3.6 and will use an ulocdata object.
*
* @param localeID The id of the locale for which the paper size information to be retrieved.
* @param height A pointer to int to recieve the height information.
* @param width A pointer to int to recieve the width information.
* @param status Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
* @stable ICU 2.8
*/
U_STABLE void U_EXPORT2
ulocdata_getPaperSize(const char *localeID, int32_t *height, int32_t *width, UErrorCode *status);
/**
* Return the current CLDR version used by the library.
* @param versionArray fillin that will recieve the version number
* @param status error code - could be U_MISSING_RESOURCE_ERROR if the version was not found.
* @stable ICU 4.2
*/
U_STABLE void U_EXPORT2
ulocdata_getCLDRVersion(UVersionInfo versionArray, UErrorCode *status);
/**
* Returns locale display pattern associated with a locale.
*
* @param uld Pointer to the locale data object from which the
* exemplar character set is to be retrieved.
* @param pattern locale display pattern for locale.
* @param patternCapacity the size of the buffer to store the locale display
* pattern with.
* @param status Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
* @return the actual buffer size needed for localeDisplayPattern. If it's greater
* than patternCapacity, the returned pattern will be truncated.
*
* @stable ICU 4.2
*/
U_STABLE int32_t U_EXPORT2
ulocdata_getLocaleDisplayPattern(ULocaleData *uld,
UChar *pattern,
int32_t patternCapacity,
UErrorCode *status);
/**
* Returns locale separator associated with a locale.
*
* @param uld Pointer to the locale data object from which the
* exemplar character set is to be retrieved.
* @param separator locale separator for locale.
* @param separatorCapacity the size of the buffer to store the locale
* separator with.
* @param status Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
* @return the actual buffer size needed for localeSeparator. If it's greater
* than separatorCapacity, the returned separator will be truncated.
*
* @stable ICU 4.2
*/
U_STABLE int32_t U_EXPORT2
ulocdata_getLocaleSeparator(ULocaleData *uld,
UChar *separator,
int32_t separatorCapacity,
UErrorCode *status);
#endif
// umsg.h
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
* Copyright (C) 2010 , Yahoo! Inc.
********************************************************************
*
* file name: umsg.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* Change history:
*
* 08/5/2001 Ram Added C wrappers for C++ API.
********************************************************************/
#ifndef UMSG_H
#define UMSG_H
#if !UCONFIG_NO_FORMATTING
#include MessageFormat prepares strings for display to users,
* with optional arguments (variables/placeholders).
* The arguments can occur in any order, which is necessary for translation
* into languages with different grammars.
*
* The opaque UMessageFormat type is a thin C wrapper around
* a C++ MessageFormat. It is constructed from a pattern string
* with arguments in {curly braces} which will be replaced by formatted values.
*
* Currently, the C API supports only numbered arguments.
*
* For details about the pattern syntax and behavior,
* especially about the ASCII apostrophe vs. the
* real apostrophe (single quote) character \htmlonly’\endhtmlonly (U+2019),
* see the C++ MessageFormat class documentation.
*
* Here are some examples of C API usage:
* Example 1:
*
* Example 2:
* Note it is not guaranteed that the returned pattern
* is indeed a valid pattern. The only effect is to convert
* between patterns having different quoting semantics.
*
* @param pattern the 'apostrophe-friendly' patttern to convert
* @param patternLength the length of pattern, or -1 if unknown and pattern is null-terminated
* @param dest the buffer for the result, or NULL if preflight only
* @param destCapacity the length of the buffer, or 0 if preflighting
* @param ec the error code
* @return the length of the resulting text, not including trailing null
* if buffer has room for the trailing null, it is provided, otherwise
* not
* @stable ICU 3.4
*/
U_STABLE int32_t U_EXPORT2
umsg_autoQuoteApostrophe(const UChar* pattern,
int32_t patternLength,
UChar* dest,
int32_t destCapacity,
UErrorCode* ec);
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif
// unirepl.h
// No supported content
// unum.h
/*
*******************************************************************************
* Copyright (C) 1997-2015, International Business Machines Corporation and others.
* All Rights Reserved.
* Modification History:
*
* Date Name Description
* 06/24/99 helena Integrated Alan's NF enhancements and Java2 bug fixes
*******************************************************************************
*/
#ifndef _UNUM
#define _UNUM
#if !UCONFIG_NO_FORMATTING
/**
* \file
* \brief C API: NumberFormat
*
*
* UNumberFormat helps you to format and parse numbers for any locale.
* Your code can be completely independent of the locale conventions
* for decimal points, thousands-separators, or even the particular
* decimal digits used, or whether the number format is even decimal.
* There are different number format styles like decimal, currency,
* percent and spellout.
*
* To format a number for the current Locale, use one of the static
* factory methods:
*
* Use a pattern to create either a DecimalFormat or a RuleBasedNumberFormat
* formatter. The pattern must conform to the syntax defined for those
* formatters.
*
* You can also control the display of numbers with such function as
* unum_getAttributes() and unum_setAttributes(), which let you set the
* miminum fraction digits, grouping, etc.
* @see UNumberFormatAttributes for more details
*
* You can also use forms of the parse and format methods with
* ParsePosition and UFieldPosition to allow you to:
*
* It is also possible to change or set the symbols used for a particular
* locale like the currency symbol, the grouping seperator , monetary seperator
* etc by making use of functions unum_setSymbols() and unum_getSymbols().
*/
/** A number formatter.
* For usage in C programs.
* @stable ICU 2.0
*/
typedef void* UNumberFormat;
/** The possible number format styles.
* @stable ICU 2.0
*/
typedef enum UNumberFormatStyle {
/**
* Decimal format defined by a pattern string.
* @stable ICU 3.0
*/
UNUM_PATTERN_DECIMAL=0,
/**
* Decimal format ("normal" style).
* @stable ICU 2.0
*/
UNUM_DECIMAL=1,
/**
* Currency format (generic).
* Defaults to UNUM_CURRENCY_STANDARD style
* (using currency symbol, e.g., "$1.00", with non-accounting
* style for negative values e.g. using minus sign).
* The specific style may be specified using the -cf- locale key.
* @stable ICU 2.0
*/
UNUM_CURRENCY=2,
/**
* Percent format
* @stable ICU 2.0
*/
UNUM_PERCENT=3,
/**
* Scientific format
* @stable ICU 2.1
*/
UNUM_SCIENTIFIC=4,
/**
* Spellout rule-based format. The default ruleset can be specified/changed using
* unum_setTextAttribute with UNUM_DEFAULT_RULESET; the available public rulesets
* can be listed using unum_getTextAttribute with UNUM_PUBLIC_RULESETS.
* @stable ICU 2.0
*/
UNUM_SPELLOUT=5,
/**
* Ordinal rule-based format . The default ruleset can be specified/changed using
* unum_setTextAttribute with UNUM_DEFAULT_RULESET; the available public rulesets
* can be listed using unum_getTextAttribute with UNUM_PUBLIC_RULESETS.
* @stable ICU 3.0
*/
UNUM_ORDINAL=6,
/**
* Duration rule-based format
* @stable ICU 3.0
*/
UNUM_DURATION=7,
/**
* Numbering system rule-based format
* @stable ICU 4.2
*/
UNUM_NUMBERING_SYSTEM=8,
/**
* Rule-based format defined by a pattern string.
* @stable ICU 3.0
*/
UNUM_PATTERN_RULEBASED=9,
/**
* Currency format with an ISO currency code, e.g., "USD1.00".
* @stable ICU 4.8
*/
UNUM_CURRENCY_ISO=10,
/**
* Currency format with a pluralized currency name,
* e.g., "1.00 US dollar" and "3.00 US dollars".
* @stable ICU 4.8
*/
UNUM_CURRENCY_PLURAL=11,
/**
* Currency format for accounting, e.g., "($3.00)" for
* negative currency amount instead of "-$3.00" ({@link #UNUM_CURRENCY}).
* Overrides any style specified using -cf- key in locale.
* @stable ICU 53
*/
UNUM_CURRENCY_ACCOUNTING=12,
/**
* Currency format with a currency symbol given CASH usage, e.g.,
* "NT$3" instead of "NT$3.23".
* @stable ICU 54
*/
UNUM_CASH_CURRENCY=13,
/**
* One more than the highest number format style constant.
* @stable ICU 4.8
*/
UNUM_FORMAT_STYLE_COUNT=17,
/**
* Default format
* @stable ICU 2.0
*/
UNUM_DEFAULT = UNUM_DECIMAL,
/**
* Alias for UNUM_PATTERN_DECIMAL
* @stable ICU 3.0
*/
UNUM_IGNORE = UNUM_PATTERN_DECIMAL
} UNumberFormatStyle;
/** The possible number format rounding modes.
* @stable ICU 2.0
*/
typedef enum UNumberFormatRoundingMode {
UNUM_ROUND_CEILING,
UNUM_ROUND_FLOOR,
UNUM_ROUND_DOWN,
UNUM_ROUND_UP,
/**
* Half-even rounding
* @stable, ICU 3.8
*/
UNUM_ROUND_HALFEVEN,
UNUM_ROUND_HALFDOWN = UNUM_ROUND_HALFEVEN + 1,
UNUM_ROUND_HALFUP,
/**
* ROUND_UNNECESSARY reports an error if formatted result is not exact.
* @stable ICU 4.8
*/
UNUM_ROUND_UNNECESSARY
} UNumberFormatRoundingMode;
/** The possible number format pad positions.
* @stable ICU 2.0
*/
typedef enum UNumberFormatPadPosition {
UNUM_PAD_BEFORE_PREFIX,
UNUM_PAD_AFTER_PREFIX,
UNUM_PAD_BEFORE_SUFFIX,
UNUM_PAD_AFTER_SUFFIX
} UNumberFormatPadPosition;
/**
* Constants for specifying short or long format.
* @stable ICU 51
*/
typedef enum UNumberCompactStyle {
/** @stable ICU 51 */
UNUM_SHORT,
/** @stable ICU 51 */
UNUM_LONG
/** @stable ICU 51 */
} UNumberCompactStyle;
/**
* Constants for specifying currency spacing
* @stable ICU 4.8
*/
enum UCurrencySpacing {
/** @stable ICU 4.8 */
UNUM_CURRENCY_MATCH,
/** @stable ICU 4.8 */
UNUM_CURRENCY_SURROUNDING_MATCH,
/** @stable ICU 4.8 */
UNUM_CURRENCY_INSERT,
/** @stable ICU 4.8 */
UNUM_CURRENCY_SPACING_COUNT
};
typedef enum UCurrencySpacing UCurrencySpacing; /**< @stable ICU 4.8 */
/**
* FieldPosition and UFieldPosition selectors for format fields
* defined by NumberFormat and UNumberFormat.
* @stable ICU 49
*/
typedef enum UNumberFormatFields {
/** @stable ICU 49 */
UNUM_INTEGER_FIELD,
/** @stable ICU 49 */
UNUM_FRACTION_FIELD,
/** @stable ICU 49 */
UNUM_DECIMAL_SEPARATOR_FIELD,
/** @stable ICU 49 */
UNUM_EXPONENT_SYMBOL_FIELD,
/** @stable ICU 49 */
UNUM_EXPONENT_SIGN_FIELD,
/** @stable ICU 49 */
UNUM_EXPONENT_FIELD,
/** @stable ICU 49 */
UNUM_GROUPING_SEPARATOR_FIELD,
/** @stable ICU 49 */
UNUM_CURRENCY_FIELD,
/** @stable ICU 49 */
UNUM_PERCENT_FIELD,
/** @stable ICU 49 */
UNUM_PERMILL_FIELD,
/** @stable ICU 49 */
UNUM_SIGN_FIELD,
/** @stable ICU 49 */
UNUM_FIELD_COUNT
} UNumberFormatFields;
/**
* Create and return a new UNumberFormat for formatting and parsing
* numbers. A UNumberFormat may be used to format numbers by calling
* {@link #unum_format }, and to parse numbers by calling {@link #unum_parse }.
* The caller must call {@link #unum_close } when done to release resources
* used by this object.
* @param style The type of number format to open: one of
* UNUM_DECIMAL, UNUM_CURRENCY, UNUM_PERCENT, UNUM_SCIENTIFIC,
* UNUM_CURRENCY_ISO, UNUM_CURRENCY_PLURAL, UNUM_SPELLOUT,
* UNUM_ORDINAL, UNUM_DURATION, UNUM_NUMBERING_SYSTEM,
* UNUM_PATTERN_DECIMAL, UNUM_PATTERN_RULEBASED, or UNUM_DEFAULT.
* If UNUM_PATTERN_DECIMAL or UNUM_PATTERN_RULEBASED is passed then the
* number format is opened using the given pattern, which must conform
* to the syntax described in DecimalFormat or RuleBasedNumberFormat,
* respectively.
* @param pattern A pattern specifying the format to use.
* This parameter is ignored unless the style is
* UNUM_PATTERN_DECIMAL or UNUM_PATTERN_RULEBASED.
* @param patternLength The number of characters in the pattern, or -1
* if null-terminated. This parameter is ignored unless the style is
* UNUM_PATTERN.
* @param locale A locale identifier to use to determine formatting
* and parsing conventions, or NULL to use the default locale.
* @param parseErr A pointer to a UParseError struct to receive the
* details of any parsing errors, or NULL if no parsing error details
* are desired.
* @param status A pointer to an input-output UErrorCode.
* @return A pointer to a newly created UNumberFormat, or NULL if an
* error occurred.
* @see unum_close
* @see DecimalFormat
* @stable ICU 2.0
*/
U_STABLE UNumberFormat* U_EXPORT2
unum_open( UNumberFormatStyle style,
const UChar* pattern,
int32_t patternLength,
const char* locale,
UParseError* parseErr,
UErrorCode* status);
/**
* Close a UNumberFormat.
* Once closed, a UNumberFormat may no longer be used.
* @param fmt The formatter to close.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
unum_close(UNumberFormat* fmt);
/**
* Open a copy of a UNumberFormat.
* This function performs a deep copy.
* @param fmt The format to copy
* @param status A pointer to an UErrorCode to receive any errors.
* @return A pointer to a UNumberFormat identical to fmt.
* @stable ICU 2.0
*/
U_STABLE UNumberFormat* U_EXPORT2
unum_clone(const UNumberFormat *fmt,
UErrorCode *status);
/**
* Format an integer using a UNumberFormat.
* The integer will be formatted according to the UNumberFormat's locale.
* @param fmt The formatter to use.
* @param number The number to format.
* @param result A pointer to a buffer to receive the NULL-terminated formatted number. If
* the formatted number fits into dest but cannot be NULL-terminated (length == resultLength)
* then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the formatted number
* doesn't fit into result then the error code is set to U_BUFFER_OVERFLOW_ERROR.
* @param resultLength The maximum size of result.
* @param pos A pointer to a UFieldPosition. On input, position->field
* is read. On output, position->beginIndex and position->endIndex indicate
* the beginning and ending indices of field number position->field, if such
* a field exists. This parameter may be NULL, in which case no field
* @param status A pointer to an UErrorCode to receive any errors
* @return The total buffer size needed; if greater than resultLength, the output was truncated.
* @see unum_formatInt64
* @see unum_formatDouble
* @see unum_parse
* @see unum_parseInt64
* @see unum_parseDouble
* @see UFieldPosition
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
unum_format( const UNumberFormat* fmt,
int32_t number,
UChar* result,
int32_t resultLength,
UFieldPosition *pos,
UErrorCode* status);
/**
* Format an int64 using a UNumberFormat.
* The int64 will be formatted according to the UNumberFormat's locale.
* @param fmt The formatter to use.
* @param number The number to format.
* @param result A pointer to a buffer to receive the NULL-terminated formatted number. If
* the formatted number fits into dest but cannot be NULL-terminated (length == resultLength)
* then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the formatted number
* doesn't fit into result then the error code is set to U_BUFFER_OVERFLOW_ERROR.
* @param resultLength The maximum size of result.
* @param pos A pointer to a UFieldPosition. On input, position->field
* is read. On output, position->beginIndex and position->endIndex indicate
* the beginning and ending indices of field number position->field, if such
* a field exists. This parameter may be NULL, in which case no field
* @param status A pointer to an UErrorCode to receive any errors
* @return The total buffer size needed; if greater than resultLength, the output was truncated.
* @see unum_format
* @see unum_formatDouble
* @see unum_parse
* @see unum_parseInt64
* @see unum_parseDouble
* @see UFieldPosition
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
unum_formatInt64(const UNumberFormat *fmt,
int64_t number,
UChar* result,
int32_t resultLength,
UFieldPosition *pos,
UErrorCode* status);
/**
* Format a double using a UNumberFormat.
* The double will be formatted according to the UNumberFormat's locale.
* @param fmt The formatter to use.
* @param number The number to format.
* @param result A pointer to a buffer to receive the NULL-terminated formatted number. If
* the formatted number fits into dest but cannot be NULL-terminated (length == resultLength)
* then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the formatted number
* doesn't fit into result then the error code is set to U_BUFFER_OVERFLOW_ERROR.
* @param resultLength The maximum size of result.
* @param pos A pointer to a UFieldPosition. On input, position->field
* is read. On output, position->beginIndex and position->endIndex indicate
* the beginning and ending indices of field number position->field, if such
* a field exists. This parameter may be NULL, in which case no field
* @param status A pointer to an UErrorCode to receive any errors
* @return The total buffer size needed; if greater than resultLength, the output was truncated.
* @see unum_format
* @see unum_formatInt64
* @see unum_parse
* @see unum_parseInt64
* @see unum_parseDouble
* @see UFieldPosition
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
unum_formatDouble( const UNumberFormat* fmt,
double number,
UChar* result,
int32_t resultLength,
UFieldPosition *pos, /* 0 if ignore */
UErrorCode* status);
/**
* Format a decimal number using a UNumberFormat.
* The number will be formatted according to the UNumberFormat's locale.
* The syntax of the input number is a "numeric string"
* as defined in the Decimal Arithmetic Specification, available at
* http://speleotrove.com/decimal
* @param fmt The formatter to use.
* @param number The number to format.
* @param length The length of the input number, or -1 if the input is nul-terminated.
* @param result A pointer to a buffer to receive the NULL-terminated formatted number. If
* the formatted number fits into dest but cannot be NULL-terminated (length == resultLength)
* then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the formatted number
* doesn't fit into result then the error code is set to U_BUFFER_OVERFLOW_ERROR.
* @param resultLength The maximum size of result.
* @param pos A pointer to a UFieldPosition. On input, position->field
* is read. On output, position->beginIndex and position->endIndex indicate
* the beginning and ending indices of field number position->field, if such
* a field exists. This parameter may be NULL, in which case it is ignored.
* @param status A pointer to an UErrorCode to receive any errors
* @return The total buffer size needed; if greater than resultLength, the output was truncated.
* @see unum_format
* @see unum_formatInt64
* @see unum_parse
* @see unum_parseInt64
* @see unum_parseDouble
* @see UFieldPosition
* @stable ICU 4.4
*/
U_STABLE int32_t U_EXPORT2
unum_formatDecimal( const UNumberFormat* fmt,
const char * number,
int32_t length,
UChar* result,
int32_t resultLength,
UFieldPosition *pos, /* 0 if ignore */
UErrorCode* status);
/**
* Format a double currency amount using a UNumberFormat.
* The double will be formatted according to the UNumberFormat's locale.
* @param fmt the formatter to use
* @param number the number to format
* @param currency the 3-letter null-terminated ISO 4217 currency code
* @param result A pointer to a buffer to receive the NULL-terminated formatted number. If
* the formatted number fits into dest but cannot be NULL-terminated (length == resultLength)
* then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the formatted number
* doesn't fit into result then the error code is set to U_BUFFER_OVERFLOW_ERROR.
* @param resultLength the maximum number of UChars to write to result
* @param pos a pointer to a UFieldPosition. On input,
* position->field is read. On output, position->beginIndex and
* position->endIndex indicate the beginning and ending indices of
* field number position->field, if such a field exists. This
* parameter may be NULL, in which case it is ignored.
* @param status a pointer to an input-output UErrorCode
* @return the total buffer size needed; if greater than resultLength,
* the output was truncated.
* @see unum_formatDouble
* @see unum_parseDoubleCurrency
* @see UFieldPosition
* @stable ICU 3.0
*/
U_STABLE int32_t U_EXPORT2
unum_formatDoubleCurrency(const UNumberFormat* fmt,
double number,
UChar* currency,
UChar* result,
int32_t resultLength,
UFieldPosition* pos,
UErrorCode* status);
/**
* Format a UFormattable into a string.
* @param fmt the formatter to use
* @param number the number to format, as a UFormattable
* @param result A pointer to a buffer to receive the NULL-terminated formatted number. If
* the formatted number fits into dest but cannot be NULL-terminated (length == resultLength)
* then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the formatted number
* doesn't fit into result then the error code is set to U_BUFFER_OVERFLOW_ERROR.
* @param resultLength the maximum number of UChars to write to result
* @param pos a pointer to a UFieldPosition. On input,
* position->field is read. On output, position->beginIndex and
* position->endIndex indicate the beginning and ending indices of
* field number position->field, if such a field exists. This
* parameter may be NULL, in which case it is ignored.
* @param status a pointer to an input-output UErrorCode
* @return the total buffer size needed; if greater than resultLength,
* the output was truncated. Will return 0 on error.
* @see unum_parseToUFormattable
* @stable ICU 52
*/
U_STABLE int32_t U_EXPORT2
unum_formatUFormattable(const UNumberFormat* fmt,
const UFormattable *number,
UChar *result,
int32_t resultLength,
UFieldPosition *pos,
UErrorCode *status);
/**
* Parse a string into an integer using a UNumberFormat.
* The string will be parsed according to the UNumberFormat's locale.
* Note: parsing is not supported for styles UNUM_DECIMAL_COMPACT_SHORT
* and UNUM_DECIMAL_COMPACT_LONG.
* @param fmt The formatter to use.
* @param text The text to parse.
* @param textLength The length of text, or -1 if null-terminated.
* @param parsePos If not NULL, on input a pointer to an integer specifying the offset at which
* to begin parsing. If not NULL, on output the offset at which parsing ended.
* @param status A pointer to an UErrorCode to receive any errors
* @return The value of the parsed integer
* @see unum_parseInt64
* @see unum_parseDouble
* @see unum_format
* @see unum_formatInt64
* @see unum_formatDouble
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
unum_parse( const UNumberFormat* fmt,
const UChar* text,
int32_t textLength,
int32_t *parsePos /* 0 = start */,
UErrorCode *status);
/**
* Parse a string into an int64 using a UNumberFormat.
* The string will be parsed according to the UNumberFormat's locale.
* Note: parsing is not supported for styles UNUM_DECIMAL_COMPACT_SHORT
* and UNUM_DECIMAL_COMPACT_LONG.
* @param fmt The formatter to use.
* @param text The text to parse.
* @param textLength The length of text, or -1 if null-terminated.
* @param parsePos If not NULL, on input a pointer to an integer specifying the offset at which
* to begin parsing. If not NULL, on output the offset at which parsing ended.
* @param status A pointer to an UErrorCode to receive any errors
* @return The value of the parsed integer
* @see unum_parse
* @see unum_parseDouble
* @see unum_format
* @see unum_formatInt64
* @see unum_formatDouble
* @stable ICU 2.8
*/
U_STABLE int64_t U_EXPORT2
unum_parseInt64(const UNumberFormat* fmt,
const UChar* text,
int32_t textLength,
int32_t *parsePos /* 0 = start */,
UErrorCode *status);
/**
* Parse a string into a double using a UNumberFormat.
* The string will be parsed according to the UNumberFormat's locale.
* Note: parsing is not supported for styles UNUM_DECIMAL_COMPACT_SHORT
* and UNUM_DECIMAL_COMPACT_LONG.
* @param fmt The formatter to use.
* @param text The text to parse.
* @param textLength The length of text, or -1 if null-terminated.
* @param parsePos If not NULL, on input a pointer to an integer specifying the offset at which
* to begin parsing. If not NULL, on output the offset at which parsing ended.
* @param status A pointer to an UErrorCode to receive any errors
* @return The value of the parsed double
* @see unum_parse
* @see unum_parseInt64
* @see unum_format
* @see unum_formatInt64
* @see unum_formatDouble
* @stable ICU 2.0
*/
U_STABLE double U_EXPORT2
unum_parseDouble( const UNumberFormat* fmt,
const UChar* text,
int32_t textLength,
int32_t *parsePos /* 0 = start */,
UErrorCode *status);
/**
* Parse a number from a string into an unformatted numeric string using a UNumberFormat.
* The input string will be parsed according to the UNumberFormat's locale.
* The syntax of the output is a "numeric string"
* as defined in the Decimal Arithmetic Specification, available at
* http://speleotrove.com/decimal
* Note: parsing is not supported for styles UNUM_DECIMAL_COMPACT_SHORT
* and UNUM_DECIMAL_COMPACT_LONG.
* @param fmt The formatter to use.
* @param text The text to parse.
* @param textLength The length of text, or -1 if null-terminated.
* @param parsePos If not NULL, on input a pointer to an integer specifying the offset at which
* to begin parsing. If not NULL, on output the offset at which parsing ended.
* @param outBuf A (char *) buffer to receive the parsed number as a string. The output string
* will be nul-terminated if there is sufficient space.
* @param outBufLength The size of the output buffer. May be zero, in which case
* the outBuf pointer may be NULL, and the function will return the
* size of the output string.
* @param status A pointer to an UErrorCode to receive any errors
* @return the length of the output string, not including any terminating nul.
* @see unum_parse
* @see unum_parseInt64
* @see unum_format
* @see unum_formatInt64
* @see unum_formatDouble
* @stable ICU 4.4
*/
U_STABLE int32_t U_EXPORT2
unum_parseDecimal(const UNumberFormat* fmt,
const UChar* text,
int32_t textLength,
int32_t *parsePos /* 0 = start */,
char *outBuf,
int32_t outBufLength,
UErrorCode *status);
/**
* Parse a string into a double and a currency using a UNumberFormat.
* The string will be parsed according to the UNumberFormat's locale.
* @param fmt the formatter to use
* @param text the text to parse
* @param textLength the length of text, or -1 if null-terminated
* @param parsePos a pointer to an offset index into text at which to
* begin parsing. On output, *parsePos will point after the last
* parsed character. This parameter may be NULL, in which case parsing
* begins at offset 0.
* @param currency a pointer to the buffer to receive the parsed null-
* terminated currency. This buffer must have a capacity of at least
* 4 UChars.
* @param status a pointer to an input-output UErrorCode
* @return the parsed double
* @see unum_parseDouble
* @see unum_formatDoubleCurrency
* @stable ICU 3.0
*/
U_STABLE double U_EXPORT2
unum_parseDoubleCurrency(const UNumberFormat* fmt,
const UChar* text,
int32_t textLength,
int32_t* parsePos, /* 0 = start */
UChar* currency,
UErrorCode* status);
/**
* Parse a UChar string into a UFormattable.
* Example code:
* \snippet test/cintltst/cnumtst.c unum_parseToUFormattable
* Note: parsing is not supported for styles UNUM_DECIMAL_COMPACT_SHORT
* and UNUM_DECIMAL_COMPACT_LONG.
* @param fmt the formatter to use
* @param result the UFormattable to hold the result. If NULL, a new UFormattable will be allocated (which the caller must close with ufmt_close).
* @param text the text to parse
* @param textLength the length of text, or -1 if null-terminated
* @param parsePos a pointer to an offset index into text at which to
* begin parsing. On output, *parsePos will point after the last
* parsed character. This parameter may be NULL in which case parsing
* begins at offset 0.
* @param status a pointer to an input-output UErrorCode
* @return the UFormattable. Will be ==result unless NULL was passed in for result, in which case it will be the newly opened UFormattable.
* @see ufmt_getType
* @see ufmt_close
* @stable ICU 52
*/
U_STABLE UFormattable* U_EXPORT2
unum_parseToUFormattable(const UNumberFormat* fmt,
UFormattable *result,
const UChar* text,
int32_t textLength,
int32_t* parsePos, /* 0 = start */
UErrorCode* status);
/**
* Set the pattern used by a UNumberFormat. This can only be used
* on a DecimalFormat, other formats return U_UNSUPPORTED_ERROR
* in the status.
* @param format The formatter to set.
* @param localized TRUE if the pattern is localized, FALSE otherwise.
* @param pattern The new pattern
* @param patternLength The length of pattern, or -1 if null-terminated.
* @param parseError A pointer to UParseError to recieve information
* about errors occurred during parsing, or NULL if no parse error
* information is desired.
* @param status A pointer to an input-output UErrorCode.
* @see unum_toPattern
* @see DecimalFormat
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
unum_applyPattern( UNumberFormat *format,
UBool localized,
const UChar *pattern,
int32_t patternLength,
UParseError *parseError,
UErrorCode *status
);
/**
* Get a locale for which decimal formatting patterns are available.
* A UNumberFormat in a locale returned by this function will perform the correct
* formatting and parsing for the locale. The results of this call are not
* valid for rule-based number formats.
* @param localeIndex The index of the desired locale.
* @return A locale for which number formatting patterns are available, or 0 if none.
* @see unum_countAvailable
* @stable ICU 2.0
*/
U_STABLE const char* U_EXPORT2
unum_getAvailable(int32_t localeIndex);
/**
* Determine how many locales have decimal formatting patterns available. The
* results of this call are not valid for rule-based number formats.
* This function is useful for determining the loop ending condition for
* calls to {@link #unum_getAvailable }.
* @return The number of locales for which decimal formatting patterns are available.
* @see unum_getAvailable
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
unum_countAvailable(void);
#if UCONFIG_HAVE_PARSEALLINPUT
/* The UNumberFormatAttributeValue type cannot be #ifndef U_HIDE_INTERNAL_API, needed for .h variable declaration */
/**
* @internal
*/
typedef enum UNumberFormatAttributeValue {
/** @internal */
UNUM_FORMAT_ATTRIBUTE_VALUE_HIDDEN
} UNumberFormatAttributeValue;
#endif
/** The possible UNumberFormat numeric attributes @stable ICU 2.0 */
typedef enum UNumberFormatAttribute {
/** Parse integers only */
UNUM_PARSE_INT_ONLY,
/** Use grouping separator */
UNUM_GROUPING_USED,
/** Always show decimal point */
UNUM_DECIMAL_ALWAYS_SHOWN,
/** Maximum integer digits */
UNUM_MAX_INTEGER_DIGITS,
/** Minimum integer digits */
UNUM_MIN_INTEGER_DIGITS,
/** Integer digits */
UNUM_INTEGER_DIGITS,
/** Maximum fraction digits */
UNUM_MAX_FRACTION_DIGITS,
/** Minimum fraction digits */
UNUM_MIN_FRACTION_DIGITS,
/** Fraction digits */
UNUM_FRACTION_DIGITS,
/** Multiplier */
UNUM_MULTIPLIER,
/** Grouping size */
UNUM_GROUPING_SIZE,
/** Rounding Mode */
UNUM_ROUNDING_MODE,
/** Rounding increment */
UNUM_ROUNDING_INCREMENT,
/** The width to which the output of Example: setting the scale to 3, 123 formats as "123,000"
* Example: setting the scale to -4, 123 formats as "0.0123"
*
* @stable ICU 51 */
UNUM_SCALE = 21,
/**
* if this attribute is set to 0, it is set to UNUM_CURRENCY_STANDARD purpose,
* otherwise it is UNUM_CURRENCY_CASH purpose
* Default: 0 (UNUM_CURRENCY_STANDARD purpose)
* @stable ICU 54
*/
UNUM_CURRENCY_USAGE = 23,
/* The following cannot be #ifndef U_HIDE_INTERNAL_API, needed in .h file variable declararions */
/** One below the first bitfield-boolean item.
* All items after this one are stored in boolean form.
* @internal */
UNUM_MAX_NONBOOLEAN_ATTRIBUTE = 0x0FFF,
/** If 1, specifies that if setting the "max integer digits" attribute would truncate a value, set an error status rather than silently truncating.
* For example, formatting the value 1234 with 4 max int digits would succeed, but formatting 12345 would fail. There is no effect on parsing.
* Default: 0 (not set)
* @stable ICU 50
*/
UNUM_FORMAT_FAIL_IF_MORE_THAN_MAX_DIGITS = 0x1000,
/**
* if this attribute is set to 1, specifies that, if the pattern doesn't contain an exponent, the exponent will not be parsed. If the pattern does contain an exponent, this attribute has no effect.
* Has no effect on formatting.
* Default: 0 (unset)
* @stable ICU 50
*/
UNUM_PARSE_NO_EXPONENT,
/**
* if this attribute is set to 1, specifies that, if the pattern contains a
* decimal mark the input is required to have one. If this attribute is set to 0,
* specifies that input does not have to contain a decimal mark.
* Has no effect on formatting.
* Default: 0 (unset)
* @stable ICU 54
*/
UNUM_PARSE_DECIMAL_MARK_REQUIRED = 0x1002,
/* The following cannot be #ifndef U_HIDE_INTERNAL_API, needed in .h file variable declararions */
/** Limit of boolean attributes.
* @internal */
UNUM_LIMIT_BOOLEAN_ATTRIBUTE = 0x1003
} UNumberFormatAttribute;
/**
* Get a numeric attribute associated with a UNumberFormat.
* An example of a numeric attribute is the number of integer digits a formatter will produce.
* @param fmt The formatter to query.
* @param attr The attribute to query; one of UNUM_PARSE_INT_ONLY, UNUM_GROUPING_USED,
* UNUM_DECIMAL_ALWAYS_SHOWN, UNUM_MAX_INTEGER_DIGITS, UNUM_MIN_INTEGER_DIGITS, UNUM_INTEGER_DIGITS,
* UNUM_MAX_FRACTION_DIGITS, UNUM_MIN_FRACTION_DIGITS, UNUM_FRACTION_DIGITS, UNUM_MULTIPLIER,
* UNUM_GROUPING_SIZE, UNUM_ROUNDING_MODE, UNUM_FORMAT_WIDTH, UNUM_PADDING_POSITION, UNUM_SECONDARY_GROUPING_SIZE,
* UNUM_SCALE, UNUM_MINIMUM_GROUPING_DIGITS.
* @return The value of attr.
* @see unum_setAttribute
* @see unum_getDoubleAttribute
* @see unum_setDoubleAttribute
* @see unum_getTextAttribute
* @see unum_setTextAttribute
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
unum_getAttribute(const UNumberFormat* fmt,
UNumberFormatAttribute attr);
/**
* Set a numeric attribute associated with a UNumberFormat.
* An example of a numeric attribute is the number of integer digits a formatter will produce. If the
* formatter does not understand the attribute, the call is ignored. Rule-based formatters only understand
* the lenient-parse attribute.
* @param fmt The formatter to set.
* @param attr The attribute to set; one of UNUM_PARSE_INT_ONLY, UNUM_GROUPING_USED,
* UNUM_DECIMAL_ALWAYS_SHOWN, UNUM_MAX_INTEGER_DIGITS, UNUM_MIN_INTEGER_DIGITS, UNUM_INTEGER_DIGITS,
* UNUM_MAX_FRACTION_DIGITS, UNUM_MIN_FRACTION_DIGITS, UNUM_FRACTION_DIGITS, UNUM_MULTIPLIER,
* UNUM_GROUPING_SIZE, UNUM_ROUNDING_MODE, UNUM_FORMAT_WIDTH, UNUM_PADDING_POSITION, UNUM_SECONDARY_GROUPING_SIZE,
* UNUM_LENIENT_PARSE, UNUM_SCALE, UNUM_MINIMUM_GROUPING_DIGITS.
* @param newValue The new value of attr.
* @see unum_getAttribute
* @see unum_getDoubleAttribute
* @see unum_setDoubleAttribute
* @see unum_getTextAttribute
* @see unum_setTextAttribute
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
unum_setAttribute( UNumberFormat* fmt,
UNumberFormatAttribute attr,
int32_t newValue);
/**
* Get a numeric attribute associated with a UNumberFormat.
* An example of a numeric attribute is the number of integer digits a formatter will produce.
* If the formatter does not understand the attribute, -1 is returned.
* @param fmt The formatter to query.
* @param attr The attribute to query; e.g. UNUM_ROUNDING_INCREMENT.
* @return The value of attr.
* @see unum_getAttribute
* @see unum_setAttribute
* @see unum_setDoubleAttribute
* @see unum_getTextAttribute
* @see unum_setTextAttribute
* @stable ICU 2.0
*/
U_STABLE double U_EXPORT2
unum_getDoubleAttribute(const UNumberFormat* fmt,
UNumberFormatAttribute attr);
/**
* Set a numeric attribute associated with a UNumberFormat.
* An example of a numeric attribute is the number of integer digits a formatter will produce.
* If the formatter does not understand the attribute, this call is ignored.
* @param fmt The formatter to set.
* @param attr The attribute to set; e.g. UNUM_ROUNDING_INCREMENT.
* @param newValue The new value of attr.
* @see unum_getAttribute
* @see unum_setAttribute
* @see unum_getDoubleAttribute
* @see unum_getTextAttribute
* @see unum_setTextAttribute
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
unum_setDoubleAttribute( UNumberFormat* fmt,
UNumberFormatAttribute attr,
double newValue);
/** The possible UNumberFormat text attributes @stable ICU 2.0*/
typedef enum UNumberFormatTextAttribute {
/** Positive prefix */
UNUM_POSITIVE_PREFIX,
/** Positive suffix */
UNUM_POSITIVE_SUFFIX,
/** Negative prefix */
UNUM_NEGATIVE_PREFIX,
/** Negative suffix */
UNUM_NEGATIVE_SUFFIX,
/** The character used to pad to the format width. */
UNUM_PADDING_CHARACTER,
/** The ISO currency code */
UNUM_CURRENCY_CODE,
/**
* The default rule set, such as "%spellout-numbering-year:", "%spellout-cardinal:",
* "%spellout-ordinal-masculine-plural:", "%spellout-ordinal-feminine:", or
* "%spellout-ordinal-neuter:". The available public rulesets can be listed using
* unum_getTextAttribute with UNUM_PUBLIC_RULESETS. This is only available with
* rule-based formatters.
* @stable ICU 3.0
*/
UNUM_DEFAULT_RULESET,
/**
* The public rule sets. This is only available with rule-based formatters.
* This is a read-only attribute. The public rulesets are returned as a
* single string, with each ruleset name delimited by ';' (semicolon). See the
* CLDR LDML spec for more information about RBNF rulesets:
* http://www.unicode.org/reports/tr35/tr35-numbers.html#Rule-Based_Number_Formatting
* @stable ICU 3.0
*/
UNUM_PUBLIC_RULESETS
} UNumberFormatTextAttribute;
/**
* Get a text attribute associated with a UNumberFormat.
* An example of a text attribute is the suffix for positive numbers. If the formatter
* does not understand the attribute, U_UNSUPPORTED_ERROR is returned as the status.
* Rule-based formatters only understand UNUM_DEFAULT_RULESET and UNUM_PUBLIC_RULESETS.
* @param fmt The formatter to query.
* @param tag The attribute to query; one of UNUM_POSITIVE_PREFIX, UNUM_POSITIVE_SUFFIX,
* UNUM_NEGATIVE_PREFIX, UNUM_NEGATIVE_SUFFIX, UNUM_PADDING_CHARACTER, UNUM_CURRENCY_CODE,
* UNUM_DEFAULT_RULESET, or UNUM_PUBLIC_RULESETS.
* @param result A pointer to a buffer to receive the attribute.
* @param resultLength The maximum size of result.
* @param status A pointer to an UErrorCode to receive any errors
* @return The total buffer size needed; if greater than resultLength, the output was truncated.
* @see unum_setTextAttribute
* @see unum_getAttribute
* @see unum_setAttribute
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
unum_getTextAttribute( const UNumberFormat* fmt,
UNumberFormatTextAttribute tag,
UChar* result,
int32_t resultLength,
UErrorCode* status);
/**
* Set a text attribute associated with a UNumberFormat.
* An example of a text attribute is the suffix for positive numbers. Rule-based formatters
* only understand UNUM_DEFAULT_RULESET.
* @param fmt The formatter to set.
* @param tag The attribute to set; one of UNUM_POSITIVE_PREFIX, UNUM_POSITIVE_SUFFIX,
* UNUM_NEGATIVE_PREFIX, UNUM_NEGATIVE_SUFFIX, UNUM_PADDING_CHARACTER, UNUM_CURRENCY_CODE,
* or UNUM_DEFAULT_RULESET.
* @param newValue The new value of attr.
* @param newValueLength The length of newValue, or -1 if null-terminated.
* @param status A pointer to an UErrorCode to receive any errors
* @see unum_getTextAttribute
* @see unum_getAttribute
* @see unum_setAttribute
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
unum_setTextAttribute( UNumberFormat* fmt,
UNumberFormatTextAttribute tag,
const UChar* newValue,
int32_t newValueLength,
UErrorCode *status);
/**
* Extract the pattern from a UNumberFormat. The pattern will follow
* the DecimalFormat pattern syntax.
* @param fmt The formatter to query.
* @param isPatternLocalized TRUE if the pattern should be localized,
* FALSE otherwise. This is ignored if the formatter is a rule-based
* formatter.
* @param result A pointer to a buffer to receive the pattern.
* @param resultLength The maximum size of result.
* @param status A pointer to an input-output UErrorCode.
* @return The total buffer size needed; if greater than resultLength,
* the output was truncated.
* @see unum_applyPattern
* @see DecimalFormat
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
unum_toPattern( const UNumberFormat* fmt,
UBool isPatternLocalized,
UChar* result,
int32_t resultLength,
UErrorCode* status);
/**
* Constants for specifying a number format symbol.
* @stable ICU 2.0
*/
typedef enum UNumberFormatSymbol {
/** The decimal separator */
UNUM_DECIMAL_SEPARATOR_SYMBOL = 0,
/** The grouping separator */
UNUM_GROUPING_SEPARATOR_SYMBOL = 1,
/** The pattern separator */
UNUM_PATTERN_SEPARATOR_SYMBOL = 2,
/** The percent sign */
UNUM_PERCENT_SYMBOL = 3,
/** Zero*/
UNUM_ZERO_DIGIT_SYMBOL = 4,
/** Character representing a digit in the pattern */
UNUM_DIGIT_SYMBOL = 5,
/** The minus sign */
UNUM_MINUS_SIGN_SYMBOL = 6,
/** The plus sign */
UNUM_PLUS_SIGN_SYMBOL = 7,
/** The currency symbol */
UNUM_CURRENCY_SYMBOL = 8,
/** The international currency symbol */
UNUM_INTL_CURRENCY_SYMBOL = 9,
/** The monetary separator */
UNUM_MONETARY_SEPARATOR_SYMBOL = 10,
/** The exponential symbol */
UNUM_EXPONENTIAL_SYMBOL = 11,
/** Per mill symbol */
UNUM_PERMILL_SYMBOL = 12,
/** Escape padding character */
UNUM_PAD_ESCAPE_SYMBOL = 13,
/** Infinity symbol */
UNUM_INFINITY_SYMBOL = 14,
/** Nan symbol */
UNUM_NAN_SYMBOL = 15,
/** Significant digit symbol
* @stable ICU 3.0 */
UNUM_SIGNIFICANT_DIGIT_SYMBOL = 16,
/** The monetary grouping separator
* @stable ICU 3.6
*/
UNUM_MONETARY_GROUPING_SEPARATOR_SYMBOL = 17,
/** One
* @stable ICU 4.6
*/
UNUM_ONE_DIGIT_SYMBOL = 18,
/** Two
* @stable ICU 4.6
*/
UNUM_TWO_DIGIT_SYMBOL = 19,
/** Three
* @stable ICU 4.6
*/
UNUM_THREE_DIGIT_SYMBOL = 20,
/** Four
* @stable ICU 4.6
*/
UNUM_FOUR_DIGIT_SYMBOL = 21,
/** Five
* @stable ICU 4.6
*/
UNUM_FIVE_DIGIT_SYMBOL = 22,
/** Six
* @stable ICU 4.6
*/
UNUM_SIX_DIGIT_SYMBOL = 23,
/** Seven
* @stable ICU 4.6
*/
UNUM_SEVEN_DIGIT_SYMBOL = 24,
/** Eight
* @stable ICU 4.6
*/
UNUM_EIGHT_DIGIT_SYMBOL = 25,
/** Nine
* @stable ICU 4.6
*/
UNUM_NINE_DIGIT_SYMBOL = 26,
/** Multiplication sign
* @stable ICU 54
*/
UNUM_EXPONENT_MULTIPLICATION_SYMBOL = 27,
/** count symbol constants */
UNUM_FORMAT_SYMBOL_COUNT = 28
} UNumberFormatSymbol;
/**
* Get a symbol associated with a UNumberFormat.
* A UNumberFormat uses symbols to represent the special locale-dependent
* characters in a number, for example the percent sign. This API is not
* supported for rule-based formatters.
* @param fmt The formatter to query.
* @param symbol The UNumberFormatSymbol constant for the symbol to get
* @param buffer The string buffer that will receive the symbol string;
* if it is NULL, then only the length of the symbol is returned
* @param size The size of the string buffer
* @param status A pointer to an UErrorCode to receive any errors
* @return The length of the symbol; the buffer is not modified if
*
* Date Format helps you to format and parse dates for any locale. Your code can
* be completely independent of the locale conventions for months, days of the
* week, or even the calendar format: lunar vs. solar.
*
* To format a date for the current Locale with default time and date style,
* use one of the static factory methods:
*
* You can also use forms of the parse and format methods with Parse Position and
* UFieldPosition to allow you to
* Date and Time Patterns: Date and time formats are specified by date and time pattern strings.
* Within date and time pattern strings, all unquoted ASCII letters [A-Za-z] are reserved
* as pattern letters representing calendar fields.
* Note that the normal date formats associated with some calendars - such
* as the Chinese lunar calendar - do not specify enough fields to enable
* dates to be parsed unambiguously. In the case of the Chinese lunar
* calendar, while the year within the current 60-year cycle is specified,
* the number of such cycles since the start date of the calendar (in the
* UCAL_ERA field of the UCalendar object) is not normally part of the format,
* and parsing may assume the wrong era. For cases such as this it is
* recommended that clients parse using udat_parseCalendar with the UCalendar
* passed in set to the current date, or to a date within the era/cycle that
* should be assumed if absent in the format.
*
* @param format The formatter to use.
* @param text The text to parse.
* @param textLength The length of text, or -1 if null-terminated.
* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which
* to begin parsing. If not 0, on output the offset at which parsing ended.
* @param status A pointer to an UErrorCode to receive any errors
* @return The value of the parsed date/time
* @see udat_format
* @stable ICU 2.0
*/
U_STABLE UDate U_EXPORT2
udat_parse(const UDateFormat* format,
const UChar* text,
int32_t textLength,
int32_t *parsePos,
UErrorCode *status);
/**
* Parse a string into an date/time using a UDateFormat.
* The date will be parsed using the conventions specified in {@link #udat_open }.
* @param format The formatter to use.
* @param calendar A calendar set on input to the date and time to be used for
* missing values in the date/time string being parsed, and set
* on output to the parsed date/time. When the calendar type is
* different from the internal calendar held by the UDateFormat
* instance, the internal calendar will be cloned to a work
* calendar set to the same milliseconds and time zone as this
* calendar parameter, field values will be parsed based on the
* work calendar, then the result (milliseconds and time zone)
* will be set in this calendar.
* @param text The text to parse.
* @param textLength The length of text, or -1 if null-terminated.
* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which
* to begin parsing. If not 0, on output the offset at which parsing ended.
* @param status A pointer to an UErrorCode to receive any errors
* @see udat_format
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
udat_parseCalendar(const UDateFormat* format,
UCalendar* calendar,
const UChar* text,
int32_t textLength,
int32_t *parsePos,
UErrorCode *status);
/**
* Determine if an UDateFormat will perform lenient parsing.
* With lenient parsing, the parser may use heuristics to interpret inputs that do not
* precisely match the pattern. With strict parsing, inputs must match the pattern.
* @param fmt The formatter to query
* @return TRUE if fmt is set to perform lenient parsing, FALSE otherwise.
* @see udat_setLenient
* @stable ICU 2.0
*/
U_STABLE UBool U_EXPORT2
udat_isLenient(const UDateFormat* fmt);
/**
* Specify whether an UDateFormat will perform lenient parsing.
* With lenient parsing, the parser may use heuristics to interpret inputs that do not
* precisely match the pattern. With strict parsing, inputs must match the pattern.
* @param fmt The formatter to set
* @param isLenient TRUE if fmt should perform lenient parsing, FALSE otherwise.
* @see dat_isLenient
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
udat_setLenient( UDateFormat* fmt,
UBool isLenient);
/**
* Get the UCalendar associated with an UDateFormat.
* A UDateFormat uses a UCalendar to convert a raw value to, for example,
* the day of the week.
* @param fmt The formatter to query.
* @return A pointer to the UCalendar used by fmt.
* @see udat_setCalendar
* @stable ICU 2.0
*/
U_STABLE const UCalendar* U_EXPORT2
udat_getCalendar(const UDateFormat* fmt);
/**
* Set the UCalendar associated with an UDateFormat.
* A UDateFormat uses a UCalendar to convert a raw value to, for example,
* the day of the week.
* @param fmt The formatter to set.
* @param calendarToSet A pointer to an UCalendar to be used by fmt.
* @see udat_setCalendar
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
udat_setCalendar( UDateFormat* fmt,
const UCalendar* calendarToSet);
/**
* Get the UNumberFormat associated with an UDateFormat.
* A UDateFormat uses a UNumberFormat to format numbers within a date,
* for example the day number.
* @param fmt The formatter to query.
* @return A pointer to the UNumberFormat used by fmt to format numbers.
* @see udat_setNumberFormat
* @stable ICU 2.0
*/
U_STABLE const UNumberFormat* U_EXPORT2
udat_getNumberFormat(const UDateFormat* fmt);
/**
* Get the UNumberFormat for specific field associated with an UDateFormat.
* For example: 'y' for year and 'M' for month
* @param fmt The formatter to query.
* @param field the field to query
* @return A pointer to the UNumberFormat used by fmt to format field numbers.
* @see udat_setNumberFormatForField
* @stable ICU 54
*/
U_STABLE const UNumberFormat* U_EXPORT2
udat_getNumberFormatForField(const UDateFormat* fmt, UChar field);
/**
* Set the UNumberFormat for specific field associated with an UDateFormat.
* It can be a single field like: "y"(year) or "M"(month)
* It can be several field combined together: "yM"(year and month)
* Note:
* 1 symbol field is enough for multiple symbol field (so "y" will override "yy", "yyy")
* If the field is not numeric, then override has no effect (like "MMM" will use abbreviation, not numerical field)
*
* @param fields the fields to set
* @param fmt The formatter to set.
* @param numberFormatToSet A pointer to the UNumberFormat to be used by fmt to format numbers.
* @param status error code passed around (memory allocation or invalid fields)
* @see udat_getNumberFormatForField
* @stable ICU 54
*/
U_STABLE void U_EXPORT2
udat_adoptNumberFormatForFields( UDateFormat* fmt,
const UChar* fields,
UNumberFormat* numberFormatToSet,
UErrorCode* status);
/**
* Set the UNumberFormat associated with an UDateFormat.
* A UDateFormat uses a UNumberFormat to format numbers within a date,
* for example the day number.
* This method also clears per field NumberFormat instances previously
* set by {@see udat_setNumberFormatForField}
* @param fmt The formatter to set.
* @param numberFormatToSet A pointer to the UNumberFormat to be used by fmt to format numbers.
* @see udat_getNumberFormat
* @see udat_setNumberFormatForField
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
udat_setNumberFormat( UDateFormat* fmt,
const UNumberFormat* numberFormatToSet);
/**
* Adopt the UNumberFormat associated with an UDateFormat.
* A UDateFormat uses a UNumberFormat to format numbers within a date,
* for example the day number.
* @param fmt The formatter to set.
* @param numberFormatToAdopt A pointer to the UNumberFormat to be used by fmt to format numbers.
* @see udat_getNumberFormat
* @stable ICU 54
*/
U_STABLE void U_EXPORT2
udat_adoptNumberFormat( UDateFormat* fmt,
UNumberFormat* numberFormatToAdopt);
/**
* Get a locale for which date/time formatting patterns are available.
* A UDateFormat in a locale returned by this function will perform the correct
* formatting and parsing for the locale.
* @param localeIndex The index of the desired locale.
* @return A locale for which date/time formatting patterns are available, or 0 if none.
* @see udat_countAvailable
* @stable ICU 2.0
*/
U_STABLE const char* U_EXPORT2
udat_getAvailable(int32_t localeIndex);
/**
* Determine how many locales have date/time formatting patterns available.
* This function is most useful as determining the loop ending condition for
* calls to {@link #udat_getAvailable }.
* @return The number of locales for which date/time formatting patterns are available.
* @see udat_getAvailable
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
udat_countAvailable(void);
/**
* Get the year relative to which all 2-digit years are interpreted.
* For example, if the 2-digit start year is 2100, the year 99 will be
* interpreted as 2199.
* @param fmt The formatter to query.
* @param status A pointer to an UErrorCode to receive any errors
* @return The year relative to which all 2-digit years are interpreted.
* @see udat_Set2DigitYearStart
* @stable ICU 2.0
*/
U_STABLE UDate U_EXPORT2
udat_get2DigitYearStart( const UDateFormat *fmt,
UErrorCode *status);
/**
* Set the year relative to which all 2-digit years will be interpreted.
* For example, if the 2-digit start year is 2100, the year 99 will be
* interpreted as 2199.
* @param fmt The formatter to set.
* @param d The year relative to which all 2-digit years will be interpreted.
* @param status A pointer to an UErrorCode to receive any errors
* @see udat_Set2DigitYearStart
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
udat_set2DigitYearStart( UDateFormat *fmt,
UDate d,
UErrorCode *status);
/**
* Extract the pattern from a UDateFormat.
* The pattern will follow the pattern syntax rules.
* @param fmt The formatter to query.
* @param localized TRUE if the pattern should be localized, FALSE otherwise.
* @param result A pointer to a buffer to receive the pattern.
* @param resultLength The maximum size of result.
* @param status A pointer to an UErrorCode to receive any errors
* @return The total buffer size needed; if greater than resultLength, the output was truncated.
* @see udat_applyPattern
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
udat_toPattern( const UDateFormat *fmt,
UBool localized,
UChar *result,
int32_t resultLength,
UErrorCode *status);
/**
* Set the pattern used by an UDateFormat.
* The pattern should follow the pattern syntax rules.
* @param format The formatter to set.
* @param localized TRUE if the pattern is localized, FALSE otherwise.
* @param pattern The new pattern
* @param patternLength The length of pattern, or -1 if null-terminated.
* @see udat_toPattern
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
udat_applyPattern( UDateFormat *format,
UBool localized,
const UChar *pattern,
int32_t patternLength);
/**
* The possible types of date format symbols
* @stable ICU 2.6
*/
typedef enum UDateFormatSymbolType {
/** The era names, for example AD */
UDAT_ERAS,
/** The month names, for example February */
UDAT_MONTHS,
/** The short month names, for example Feb. */
UDAT_SHORT_MONTHS,
/** The CLDR-style format "wide" weekday names, for example Monday */
UDAT_WEEKDAYS,
/**
* The CLDR-style format "abbreviated" (not "short") weekday names, for example "Mon."
* For the CLDR-style format "short" weekday names, use UDAT_SHORTER_WEEKDAYS.
*/
UDAT_SHORT_WEEKDAYS,
/** The AM/PM names, for example AM */
UDAT_AM_PMS,
/** The localized characters */
UDAT_LOCALIZED_CHARS,
/** The long era names, for example Anno Domini */
UDAT_ERA_NAMES,
/** The narrow month names, for example F */
UDAT_NARROW_MONTHS,
/** The CLDR-style format "narrow" weekday names, for example "M" */
UDAT_NARROW_WEEKDAYS,
/** Standalone context versions of months */
UDAT_STANDALONE_MONTHS,
UDAT_STANDALONE_SHORT_MONTHS,
UDAT_STANDALONE_NARROW_MONTHS,
/** The CLDR-style stand-alone "wide" weekday names */
UDAT_STANDALONE_WEEKDAYS,
/**
* The CLDR-style stand-alone "abbreviated" (not "short") weekday names.
* For the CLDR-style stand-alone "short" weekday names, use UDAT_STANDALONE_SHORTER_WEEKDAYS.
*/
UDAT_STANDALONE_SHORT_WEEKDAYS,
/** The CLDR-style stand-alone "narrow" weekday names */
UDAT_STANDALONE_NARROW_WEEKDAYS,
/** The quarters, for example 1st Quarter */
UDAT_QUARTERS,
/** The short quarter names, for example Q1 */
UDAT_SHORT_QUARTERS,
/** Standalone context versions of quarters */
UDAT_STANDALONE_QUARTERS,
UDAT_STANDALONE_SHORT_QUARTERS,
/**
* The CLDR-style short weekday names, e.g. "Su", Mo", etc.
* These are named "SHORTER" to contrast with the constants using _SHORT_
* above, which actually get the CLDR-style *abbreviated* versions of the
* corresponding names.
* @stable ICU 51
*/
UDAT_SHORTER_WEEKDAYS,
/**
* Standalone version of UDAT_SHORTER_WEEKDAYS.
* @stable ICU 51
*/
UDAT_STANDALONE_SHORTER_WEEKDAYS,
/**
* Cyclic year names (only supported for some calendars, and only for FORMAT usage;
* udat_setSymbols not supported for UDAT_CYCLIC_YEARS_WIDE)
* @stable ICU 54
*/
UDAT_CYCLIC_YEARS_WIDE,
/**
* Cyclic year names (only supported for some calendars, and only for FORMAT usage)
* @stable ICU 54
*/
UDAT_CYCLIC_YEARS_ABBREVIATED,
/**
* Cyclic year names (only supported for some calendars, and only for FORMAT usage;
* udat_setSymbols not supported for UDAT_CYCLIC_YEARS_NARROW)
* @stable ICU 54
*/
UDAT_CYCLIC_YEARS_NARROW,
/**
* Calendar zodiac names (only supported for some calendars, and only for FORMAT usage;
* udat_setSymbols not supported for UDAT_ZODIAC_NAMES_WIDE)
* @stable ICU 54
*/
UDAT_ZODIAC_NAMES_WIDE,
/**
* Calendar zodiac names (only supported for some calendars, and only for FORMAT usage)
* @stable ICU 54
*/
UDAT_ZODIAC_NAMES_ABBREVIATED,
/**
* Calendar zodiac names (only supported for some calendars, and only for FORMAT usage;
* udat_setSymbols not supported for UDAT_ZODIAC_NAMES_NARROW)
* @stable ICU 54
*/
UDAT_ZODIAC_NAMES_NARROW
} UDateFormatSymbolType;
struct UDateFormatSymbols;
/** Date format symbols.
* For usage in C programs.
* @stable ICU 2.6
*/
typedef struct UDateFormatSymbols UDateFormatSymbols;
/**
* Get the symbols associated with an UDateFormat.
* The symbols are what a UDateFormat uses to represent locale-specific data,
* for example month or day names.
* @param fmt The formatter to query.
* @param type The type of symbols to get. One of UDAT_ERAS, UDAT_MONTHS, UDAT_SHORT_MONTHS,
* UDAT_WEEKDAYS, UDAT_SHORT_WEEKDAYS, UDAT_AM_PMS, or UDAT_LOCALIZED_CHARS
* @param symbolIndex The desired symbol of type type.
* @param result A pointer to a buffer to receive the pattern.
* @param resultLength The maximum size of result.
* @param status A pointer to an UErrorCode to receive any errors
* @return The total buffer size needed; if greater than resultLength, the output was truncated.
* @see udat_countSymbols
* @see udat_setSymbols
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
udat_getSymbols(const UDateFormat *fmt,
UDateFormatSymbolType type,
int32_t symbolIndex,
UChar *result,
int32_t resultLength,
UErrorCode *status);
/**
* Count the number of particular symbols for an UDateFormat.
* This function is most useful as for detemining the loop termination condition
* for calls to {@link #udat_getSymbols }.
* @param fmt The formatter to query.
* @param type The type of symbols to count. One of UDAT_ERAS, UDAT_MONTHS, UDAT_SHORT_MONTHS,
* UDAT_WEEKDAYS, UDAT_SHORT_WEEKDAYS, UDAT_AM_PMS, or UDAT_LOCALIZED_CHARS
* @return The number of symbols of type type.
* @see udat_getSymbols
* @see udat_setSymbols
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
udat_countSymbols( const UDateFormat *fmt,
UDateFormatSymbolType type);
/**
* Set the symbols associated with an UDateFormat.
* The symbols are what a UDateFormat uses to represent locale-specific data,
* for example month or day names.
* @param format The formatter to set
* @param type The type of symbols to set. One of UDAT_ERAS, UDAT_MONTHS, UDAT_SHORT_MONTHS,
* UDAT_WEEKDAYS, UDAT_SHORT_WEEKDAYS, UDAT_AM_PMS, or UDAT_LOCALIZED_CHARS
* @param symbolIndex The index of the symbol to set of type type.
* @param value The new value
* @param valueLength The length of value, or -1 if null-terminated
* @param status A pointer to an UErrorCode to receive any errors
* @see udat_getSymbols
* @see udat_countSymbols
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
udat_setSymbols( UDateFormat *format,
UDateFormatSymbolType type,
int32_t symbolIndex,
UChar *value,
int32_t valueLength,
UErrorCode *status);
/**
* Get the locale for this date format object.
* You can choose between valid and actual locale.
* @param fmt The formatter to get the locale from
* @param type type of the locale we're looking for (valid or actual)
* @param status error code for the operation
* @return the locale name
* @stable ICU 2.8
*/
U_STABLE const char* U_EXPORT2
udat_getLocaleByType(const UDateFormat *fmt,
ULocDataLocaleType type,
UErrorCode* status);
/**
* Set a particular UDisplayContext value in the formatter, such as
* UDISPCTX_CAPITALIZATION_FOR_STANDALONE.
* @param fmt The formatter for which to set a UDisplayContext value.
* @param value The UDisplayContext value to set.
* @param status A pointer to an UErrorCode to receive any errors
* @stable ICU 51
*/
U_DRAFT void U_EXPORT2
udat_setContext(UDateFormat* fmt, UDisplayContext value, UErrorCode* status);
/**
* Get the formatter's UDisplayContext value for the specified UDisplayContextType,
* such as UDISPCTX_TYPE_CAPITALIZATION.
* @param fmt The formatter to query.
* @param type The UDisplayContextType whose value to return
* @param status A pointer to an UErrorCode to receive any errors
* @return The UDisplayContextValue for the specified type.
* @stable ICU 53
*/
U_STABLE UDisplayContext U_EXPORT2
udat_getContext(const UDateFormat* fmt, UDisplayContextType type, UErrorCode* status);
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif
// measfmt.h
/*
**********************************************************************
* Copyright (c) 2004-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: April 20, 2004
* Since: ICU 3.0
**********************************************************************
*/
#ifndef MEASUREFORMAT_H
#define MEASUREFORMAT_H
#if !UCONFIG_NO_FORMATTING
/**
* \file
* \brief C++ API: Formatter for measure objects.
*/
/**
* Constants for various widths.
* There are 4 widths: Wide, Short, Narrow, Numeric.
* For example, for English, when formatting "3 hours"
* Wide is "3 hours"; short is "3 hrs"; narrow is "3h";
* formatting "3 hours 17 minutes" as numeric give "3:17"
* @stable ICU 53
*/
enum UMeasureFormatWidth {
// Wide, short, and narrow must be first and in this order.
/**
* Spell out measure units.
* @stable ICU 53
*/
UMEASFMT_WIDTH_WIDE,
/**
* Abbreviate measure units.
* @stable ICU 53
*/
UMEASFMT_WIDTH_SHORT,
/**
* Use symbols for measure units when possible.
* @stable ICU 53
*/
UMEASFMT_WIDTH_NARROW,
/**
* Completely omit measure units when possible. For example, format
* '5 hours, 37 minutes' as '5:37'
* @stable ICU 53
*/
UMEASFMT_WIDTH_NUMERIC,
/**
* Count of values in this enum.
* @stable ICU 53
*/
UMEASFMT_WIDTH_COUNT = 4
};
/** @stable ICU 53 */
typedef enum UMeasureFormatWidth UMeasureFormatWidth;
#endif // #if !UCONFIG_NO_FORMATTING
#endif // #ifndef MEASUREFORMAT_H
// unumsys.h
/*
*****************************************************************************************
* Copyright (C) 2013-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*****************************************************************************************
*/
#ifndef UNUMSYS_H
#define UNUMSYS_H
#if !UCONFIG_NO_FORMATTING
/**
* \file
* \brief C API: UNumberingSystem, information about numbering systems
*
* Defines numbering systems. A numbering system describes the scheme by which
* numbers are to be presented to the end user. In its simplest form, a numbering
* system describes the set of digit characters that are to be used to display
* numbers, such as Western digits, Thai digits, Arabic-Indic digits, etc., in a
* positional numbering system with a specified radix (typically 10).
* More complicated numbering systems are algorithmic in nature, and require use
* of an RBNF formatter (rule based number formatter), in order to calculate
* the characters to be displayed for a given number. Examples of algorithmic
* numbering systems include Roman numerals, Chinese numerals, and Hebrew numerals.
* Formatting rules for many commonly used numbering systems are included in
* the ICU package, based on the numbering system rules defined in CLDR.
* Alternate numbering systems can be specified to a locale by using the
* numbers locale keyword.
*/
/**
* Opaque UNumberingSystem object for use in C programs.
* @stable ICU 52
*/
struct UNumberingSystem;
typedef struct UNumberingSystem UNumberingSystem; /**< C typedef for struct UNumberingSystem. @stable ICU 52 */
/**
* Opens a UNumberingSystem object using the default numbering system for the specified
* locale.
* @param locale The locale for which the default numbering system should be opened.
* @param status A pointer to a UErrorCode to receive any errors. For example, this
* may be U_UNSUPPORTED_ERROR for a locale such as "en@numbers=xyz" that
* specifies a numbering system unknown to ICU.
* @return A UNumberingSystem for the specified locale, or NULL if an error
* occurred.
* @stable ICU 52
*/
U_STABLE UNumberingSystem * U_EXPORT2
unumsys_open(const char *locale, UErrorCode *status);
/**
* Opens a UNumberingSystem object using the name of one of the predefined numbering
* systems specified by CLDR and known to ICU, such as "latn", "arabext", or "hanidec";
* the full list is returned by unumsys_openAvailableNames. Note that some of the names
* listed at http://unicode.org/repos/cldr/tags/latest/common/bcp47/number.xml - e.g.
* default, native, traditional, finance - do not identify specific numbering systems,
* but rather key values that may only be used as part of a locale, which in turn
* defines how they are mapped to a specific numbering system such as "latn" or "hant".
*
* @param name The name of the numbering system for which a UNumberingSystem object
* should be opened.
* @param status A pointer to a UErrorCode to receive any errors. For example, this
* may be U_UNSUPPORTED_ERROR for a numbering system such as "xyz" that
* is unknown to ICU.
* @return A UNumberingSystem for the specified name, or NULL if an error
* occurred.
* @stable ICU 52
*/
U_STABLE UNumberingSystem * U_EXPORT2
unumsys_openByName(const char *name, UErrorCode *status);
/**
* Close a UNumberingSystem object. Once closed it may no longer be used.
* @param unumsys The UNumberingSystem object to close.
* @stable ICU 52
*/
U_STABLE void U_EXPORT2
unumsys_close(UNumberingSystem *unumsys);
/**
* Returns an enumeration over the names of all of the predefined numbering systems known
* to ICU.
* @param status A pointer to a UErrorCode to receive any errors.
* @return A pointer to a UEnumeration that must be closed with uenum_close(),
* or NULL if an error occurred.
* @stable ICU 52
*/
U_STABLE UEnumeration * U_EXPORT2
unumsys_openAvailableNames(UErrorCode *status);
/**
* Returns the name of the specified UNumberingSystem object (if it is one of the
* predefined names known to ICU).
* @param unumsys The UNumberingSystem whose name is desired.
* @return A pointer to the name of the specified UNumberingSystem object, or
* NULL if the name is not one of the ICU predefined names. The pointer
* is only valid for the lifetime of the UNumberingSystem object.
* @stable ICU 52
*/
U_STABLE const char * U_EXPORT2
unumsys_getName(const UNumberingSystem *unumsys);
/**
* Returns whether the given UNumberingSystem object is for an algorithmic (not purely
* positional) system.
* @param unumsys The UNumberingSystem whose algorithmic status is desired.
* @return TRUE if the specified UNumberingSystem object is for an algorithmic
* system.
* @stable ICU 52
*/
U_STABLE UBool U_EXPORT2
unumsys_isAlgorithmic(const UNumberingSystem *unumsys);
/**
* Returns the radix of the specified UNumberingSystem object. Simple positional
* numbering systems typically have radix 10, but might have a radix of e.g. 16 for
* hexadecimal. The radix is less well-defined for non-positional algorithmic systems.
* @param unumsys The UNumberingSystem whose radix is desired.
* @return The radix of the specified UNumberingSystem object.
* @stable ICU 52
*/
U_STABLE int32_t U_EXPORT2
unumsys_getRadix(const UNumberingSystem *unumsys);
/**
* Get the description string of the specified UNumberingSystem object. For simple
* positional systems this is the ordered string of digits (with length matching
* the radix), e.g. "\u3007\u4E00\u4E8C\u4E09\u56DB\u4E94\u516D\u4E03\u516B\u4E5D"
* for "hanidec"; it would be "0123456789ABCDEF" for hexadecimal. For
* algorithmic systems this is the name of the RBNF ruleset used for formatting,
* e.g. "zh/SpelloutRules/%spellout-cardinal" for "hans" or "%greek-upper" for
* "grek".
* @param unumsys The UNumberingSystem whose description string is desired.
* @param result A pointer to a buffer to receive the description string.
* @param resultLength The maximum size of result.
* @param status A pointer to a UErrorCode to receive any errors.
* @return The total buffer size needed; if greater than resultLength, the
* output was truncated.
* @stable ICU 52
*/
U_STABLE int32_t U_EXPORT2
unumsys_getDescription(const UNumberingSystem *unumsys, UChar *result,
int32_t resultLength, UErrorCode *status);
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif
// upluralrules.h
/*
*****************************************************************************************
* Copyright (C) 2010-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*****************************************************************************************
*/
#ifndef UPLURALRULES_H
#define UPLURALRULES_H
#if !UCONFIG_NO_FORMATTING
/**
* \file
* \brief C API: Plural rules, select plural keywords for numeric values.
*
* A UPluralRules object defines rules for mapping non-negative numeric
* values onto a small set of keywords. Rules are constructed from a text
* description, consisting of a series of keywords and conditions.
* The uplrules_select function examines each condition in order and
* returns the keyword for the first condition that matches the number.
* If none match, the default rule(other) is returned.
*
* For more information, see the LDML spec, C.11 Language Plural Rules:
* http://www.unicode.org/reports/tr35/#Language_Plural_Rules
*
* Keywords: ICU locale data has 6 predefined values -
* 'zero', 'one', 'two', 'few', 'many' and 'other'. Callers need to check
* the value of keyword returned by the uplrules_select function.
*
* These are based on CLDR Language Plural Rules. For these
* predefined rules, see the CLDR page at
* http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
*/
/**
* Type of plurals and PluralRules.
* @stable ICU 50
*/
enum UPluralType {
/**
* Plural rules for cardinal numbers: 1 file vs. 2 files.
* @stable ICU 50
*/
UPLURAL_TYPE_CARDINAL,
/**
* Plural rules for ordinal numbers: 1st file, 2nd file, 3rd file, 4th file, etc.
* @stable ICU 50
*/
UPLURAL_TYPE_ORDINAL,
/**
* Number of Plural rules types.
* @stable ICU 50
*/
UPLURAL_TYPE_COUNT
};
/**
* @stable ICU 50
*/
typedef enum UPluralType UPluralType;
/**
* Opaque UPluralRules object for use in C programs.
* @stable ICU 4.8
*/
struct UPluralRules;
typedef struct UPluralRules UPluralRules; /**< C typedef for struct UPluralRules. @stable ICU 4.8 */
/**
* Opens a new UPluralRules object using the predefined cardinal-number plural rules for a
* given locale.
* Same as uplrules_openForType(locale, UPLURAL_TYPE_CARDINAL, status).
* @param locale The locale for which the rules are desired.
* @param status A pointer to a UErrorCode to receive any errors.
* @return A UPluralRules for the specified locale, or NULL if an error occurred.
* @stable ICU 4.8
*/
U_STABLE UPluralRules* U_EXPORT2
uplrules_open(const char *locale, UErrorCode *status);
/**
* Opens a new UPluralRules object using the predefined plural rules for a
* given locale and the plural type.
* @param locale The locale for which the rules are desired.
* @param type The plural type (e.g., cardinal or ordinal).
* @param status A pointer to a UErrorCode to receive any errors.
* @return A UPluralRules for the specified locale, or NULL if an error occurred.
* @stable ICU 50
*/
U_DRAFT UPluralRules* U_EXPORT2
uplrules_openForType(const char *locale, UPluralType type, UErrorCode *status);
/**
* Closes a UPluralRules object. Once closed it may no longer be used.
* @param uplrules The UPluralRules object to close.
* @stable ICU 4.8
*/
U_STABLE void U_EXPORT2
uplrules_close(UPluralRules *uplrules);
/**
* Given a number, returns the keyword of the first rule that
* applies to the number, according to the supplied UPluralRules object.
* @param uplrules The UPluralRules object specifying the rules.
* @param number The number for which the rule has to be determined.
* @param keyword The keyword of the rule that applies to number.
* @param capacity The capacity of keyword.
* @param status A pointer to a UErrorCode to receive any errors.
* @return The length of keyword.
* @stable ICU 4.8
*/
U_STABLE int32_t U_EXPORT2
uplrules_select(const UPluralRules *uplrules,
double number,
UChar *keyword, int32_t capacity,
UErrorCode *status);
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif
// rbnf.h
/*
*******************************************************************************
* Copyright (C) 1997-2015, International Business Machines Corporation and others.
* All Rights Reserved.
*******************************************************************************
*/
#ifndef RBNF_H
#define RBNF_H
/**
* \file
* \brief C++ API: Rule Based Number Format
*/
/**
* \def U_HAVE_RBNF
* This will be 0 if RBNF support is not included in ICU
* and 1 if it is.
*
* @stable ICU 2.4
*/
#if UCONFIG_NO_FORMATTING
#define U_HAVE_RBNF 0
#else
#define U_HAVE_RBNF 1
/* U_HAVE_RBNF */
#endif
/* RBNF_H */
#endif
// plurrule.h
/*
*******************************************************************************
* Copyright (C) 2008-2015, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
*
* File PLURRULE.H
*
* Modification History:*
* Date Name Description
*
********************************************************************************
*/
#ifndef PLURRULE
#define PLURRULE
/**
* \file
* \brief C++ API: PluralRules object
*/
#if !UCONFIG_NO_FORMATTING
/**
* Value returned by PluralRules::getUniqueKeywordValue() when there is no
* unique value to return.
* @stable ICU 4.8
*/
#define UPLRULES_NO_UNIQUE_VALUE ((double)-0.00123456777)
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif // _PLURRULE
// plurfmt.h
/*
*******************************************************************************
* Copyright (C) 2007-2014, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* File PLURFMT.H
********************************************************************************
*/
#ifndef PLURFMT
#define PLURFMT
/**
* \file
* \brief C++ API: PluralFormat object
*/
#if !UCONFIG_NO_FORMATTING
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif // _PLURFMT
// msgfmt.h
/*
* Copyright (C) 2007-2013, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************************
*
* File MSGFMT.H
*
* Modification History:
*
* Date Name Description
* 02/19/97 aliu Converted from java.
* 03/20/97 helena Finished first cut of implementation.
* 07/22/98 stephen Removed operator!= (defined in Format)
* 08/19/2002 srl Removing Javaisms
*******************************************************************************/
#ifndef MSGFMT_H
#define MSGFMT_H
/**
* \file
* \brief C++ API: Formats messages in a language-neutral way.
*/
#if !UCONFIG_NO_FORMATTING
U_CDECL_BEGIN
// Forward declaration.
struct UHashtable;
typedef struct UHashtable UHashtable; /**< @internal */
U_CDECL_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif // _MSGFMT
// uregex.h
/*
**********************************************************************
* Copyright (C) 2004-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: uregex.h
* encoding: US-ASCII
* indentation:4
*
* created on: 2004mar09
* created by: Andy Heninger
*
* ICU Regular Expressions, API for C
*/
/**
* \file
* \brief C API: Regular Expressions
*
* This is a C wrapper around the C++ RegexPattern and RegexMatcher classes.
* The contents of the pattern UText will be extracted and saved. Ownership of the
* UText struct itself remains with the caller. This is to match the behavior of
* uregex_open().
*
* @param pattern The Regular Expression pattern to be compiled.
* @param flags Flags that alter the default matching behavior for
* the regular expression, UREGEX_CASE_INSENSITIVE, for
* example. For default behavior, set this parameter to zero.
* See
* This function is the same as uregex_open, except that the pattern
* is supplied as an 8 bit char * string in the default code page.
*
* @param pattern The Regular Expression pattern to be compiled,
* NUL terminated.
* @param flags Flags that alter the default matching behavior for
* the regular expression, UREGEX_CASE_INSENSITIVE, for
* example. For default behavior, set this parameter to zero.
* See
* Note that the current input string and the position of any matched text
* within it are not cloned; only the pattern itself and the
* match mode flags are copied.
*
* Cloning can be particularly useful to threaded applications that perform
* multiple match operations in parallel. Each concurrent RE
* operation requires its own instance of a URegularExpression.
*
* @param regexp The compiled regular expression to be cloned.
* @param status Receives indication of any errors encountered
* @return the cloned copy of the compiled regular expression.
* @stable ICU 3.0
*/
U_STABLE URegularExpression * U_EXPORT2
uregex_clone(const URegularExpression *regexp, UErrorCode *status);
/**
* Returns a pointer to the source form of the pattern for this regular expression.
* This function will work even if the pattern was originally specified as a UText.
*
* @param regexp The compiled regular expression.
* @param patLength This output parameter will be set to the length of the
* pattern string. A NULL pointer may be used here if the
* pattern length is not needed, as would be the case if
* the pattern is known in advance to be a NUL terminated
* string.
* @param status Receives errors detected by this function.
* @return a pointer to the pattern string. The storage for the string is
* owned by the regular expression object, and must not be
* altered or deleted by the application. The returned string
* will remain valid until the regular expression is closed.
* @stable ICU 3.0
*/
U_STABLE const UChar * U_EXPORT2
uregex_pattern(const URegularExpression *regexp,
int32_t *patLength,
UErrorCode *status);
/**
* Returns the source text of the pattern for this regular expression.
* This function will work even if the pattern was originally specified as a UChar string.
*
* @param regexp The compiled regular expression.
* @param status Receives errors detected by this function.
* @return the pattern text. The storage for the text is owned by the regular expression
* object, and must not be altered or deleted.
*
* @stable ICU 4.6
*/
U_STABLE UText * U_EXPORT2
uregex_patternUText(const URegularExpression *regexp,
UErrorCode *status);
/**
* Get the match mode flags that were specified when compiling this regular expression.
* @param status Receives errors detected by this function.
* @param regexp The compiled regular expression.
* @return The match mode flags
* @see URegexpFlag
* @stable ICU 3.0
*/
U_STABLE int32_t U_EXPORT2
uregex_flags(const URegularExpression *regexp,
UErrorCode *status);
/**
* Set the subject text string upon which the regular expression will look for matches.
* This function may be called any number of times, allowing the regular
* expression pattern to be applied to different strings.
*
* Regular expression matching operations work directly on the application's
* string data. No copy is made. The subject string data must not be
* altered after calling this function until after all regular expression
* operations involving this string data are completed.
*
* Zero length strings are permitted. In this case, no subsequent match
* operation will dereference the text string pointer.
*
* @param regexp The compiled regular expression.
* @param text The subject text string.
* @param textLength The length of the subject text, or -1 if the string
* is NUL terminated.
* @param status Receives errors detected by this function.
* @stable ICU 3.0
*/
U_STABLE void U_EXPORT2
uregex_setText(URegularExpression *regexp,
const UChar *text,
int32_t textLength,
UErrorCode *status);
/**
* Set the subject text string upon which the regular expression will look for matches.
* This function may be called any number of times, allowing the regular
* expression pattern to be applied to different strings.
*
* Regular expression matching operations work directly on the application's
* string data; only a shallow clone is made. The subject string data must not be
* altered after calling this function until after all regular expression
* operations involving this string data are completed.
*
* @param regexp The compiled regular expression.
* @param text The subject text string.
* @param status Receives errors detected by this function.
*
* @stable ICU 4.6
*/
U_STABLE void U_EXPORT2
uregex_setUText(URegularExpression *regexp,
UText *text,
UErrorCode *status);
/**
* Get the subject text that is currently associated with this
* regular expression object. If the input was supplied using uregex_setText(),
* that pointer will be returned. Otherwise, the characters in the input will
* be extracted to a buffer and returned. In either case, ownership remains
* with the regular expression object.
*
* This function will work even if the input was originally specified as a UText.
*
* @param regexp The compiled regular expression.
* @param textLength The length of the string is returned in this output parameter.
* A NULL pointer may be used here if the
* text length is not needed, as would be the case if
* the text is known in advance to be a NUL terminated
* string.
* @param status Receives errors detected by this function.
* @return Pointer to the subject text string currently associated with
* this regular expression.
* @stable ICU 3.0
*/
U_STABLE const UChar * U_EXPORT2
uregex_getText(URegularExpression *regexp,
int32_t *textLength,
UErrorCode *status);
/**
* Get the subject text that is currently associated with this
* regular expression object.
*
* This function will work even if the input was originally specified as a UChar string.
*
* @param regexp The compiled regular expression.
* @param dest A mutable UText in which to store the current input.
* If NULL, a new UText will be created as an immutable shallow clone
* of the actual input string.
* @param status Receives errors detected by this function.
* @return The subject text currently associated with this regular expression.
* If a pre-allocated UText was provided, it will always be used and returned.
*
* @stable ICU 4.6
*/
U_STABLE UText * U_EXPORT2
uregex_getUText(URegularExpression *regexp,
UText *dest,
UErrorCode *status);
/**
* Set the subject text string upon which the regular expression is looking for matches
* without changing any other aspect of the matching state.
* The new and previous text strings must have the same content.
*
* This function is intended for use in environments where ICU is operating on
* strings that may move around in memory. It provides a mechanism for notifying
* ICU that the string has been relocated, and providing a new UText to access the
* string in its new position.
*
* Note that the regular expression implementation never copies the underlying text
* of a string being matched, but always operates directly on the original text
* provided by the user. Refreshing simply drops the references to the old text
* and replaces them with references to the new.
*
* Caution: this function is normally used only by very specialized
* system-level code. One example use case is with garbage collection
* that moves the text in memory.
*
* @param regexp The compiled regular expression.
* @param text The new (moved) text string.
* @param status Receives errors detected by this function.
*
* @stable ICU 4.8
*/
U_STABLE void U_EXPORT2
uregex_refreshUText(URegularExpression *regexp,
UText *text,
UErrorCode *status);
/**
* Attempts to match the input string against the pattern.
* To succeed, the match must extend to the end of the string,
* or cover the complete match region.
*
* If startIndex >= zero the match operation starts at the specified
* index and must extend to the end of the input string. Any region
* that has been specified is reset.
*
* If startIndex == -1 the match must cover the input region, or the entire
* input string if no region has been set. This directly corresponds to
* Matcher.matches() in Java
*
* @param regexp The compiled regular expression.
* @param startIndex The input string (native) index at which to begin matching, or -1
* to match the input Region.
* @param status Receives errors detected by this function.
* @return TRUE if there is a match
* @stable ICU 3.0
*/
U_STABLE UBool U_EXPORT2
uregex_matches(URegularExpression *regexp,
int32_t startIndex,
UErrorCode *status);
/**
* 64bit version of uregex_matches.
* Attempts to match the input string against the pattern.
* To succeed, the match must extend to the end of the string,
* or cover the complete match region.
*
* If startIndex >= zero the match operation starts at the specified
* index and must extend to the end of the input string. Any region
* that has been specified is reset.
*
* If startIndex == -1 the match must cover the input region, or the entire
* input string if no region has been set. This directly corresponds to
* Matcher.matches() in Java
*
* @param regexp The compiled regular expression.
* @param startIndex The input string (native) index at which to begin matching, or -1
* to match the input Region.
* @param status Receives errors detected by this function.
* @return TRUE if there is a match
* @stable ICU 4.6
*/
U_STABLE UBool U_EXPORT2
uregex_matches64(URegularExpression *regexp,
int64_t startIndex,
UErrorCode *status);
/**
* Attempts to match the input string, starting from the specified index, against the pattern.
* The match may be of any length, and is not required to extend to the end
* of the input string. Contrast with uregex_matches().
*
* If startIndex is >= 0 any input region that was set for this
* URegularExpression is reset before the operation begins.
*
* If the specified starting index == -1 the match begins at the start of the input
* region, or at the start of the full string if no region has been specified.
* This corresponds directly with Matcher.lookingAt() in Java.
*
* If the match succeeds then more information can be obtained via the
* If startIndex is >= 0 any input region that was set for this
* URegularExpression is reset before the operation begins.
*
* If the specified starting index == -1 the match begins at the start of the input
* region, or at the start of the full string if no region has been specified.
* This corresponds directly with Matcher.lookingAt() in Java.
*
* If the match succeeds then more information can be obtained via the
* The input string, starting from the end of the previous match and ending at
* the start of the current match, is appended to the destination string. Then the
* replacement string is appended to the output string,
* including handling any substitutions of captured text. A note on preflight computation of buffersize and error handling:
* Calls to uregex_appendReplacement() and uregex_appendTail() are
* designed to be chained, one after another, with the destination
* buffer pointer and buffer capacity updated after each in preparation
* to for the next. If the destination buffer is exhausted partway through such a
* sequence, a U_BUFFER_OVERFLOW_ERROR status will be returned. Normal
* ICU conventions are for a function to perform no action if it is
* called with an error status, but for this one case, uregex_appendRepacement()
* will operate normally so that buffer size computations will complete
* correctly.
*
* For simple, prepackaged, non-incremental find-and-replace
* operations, see replaceFirst() or replaceAll(). The input string, starting from the end of the previous match and ending at
* the start of the current match, is appended to the destination string. Then the
* replacement string is appended to the output string,
* including handling any substitutions of captured text. For simple, prepackaged, non-incremental find-and-replace
* operations, see replaceFirst() or replaceAll().
* The behavior of this function is not very closely aligned with uregex_split();
* instead, it is based on (and implemented directly on top of) the C++ split method.
*
* @param regexp The compiled regular expression.
* @param destFields An array of mutable UText structs to receive the results of the split.
* If a field is NULL, a new UText is allocated to contain the results for
* that field. This new UText is not guaranteed to be mutable.
* @param destFieldsCapacity The number of elements in the destination array.
* If the number of fields found is less than destCapacity, the
* extra strings in the destination array are not altered.
* If the number of destination strings is less than the number
* of fields, the trailing part of the input string, including any
* field delimiters, is placed in the last destination string.
* This behavior mimics that of Perl. It is not an error condition, and no
* error status is returned when all destField positions are used.
* @param status A reference to a UErrorCode to receive any errors.
* @return The number of fields into which the input string was split.
*
* @stable ICU 4.6
*/
U_STABLE int32_t U_EXPORT2
uregex_splitUText(URegularExpression *regexp,
UText *destFields[],
int32_t destFieldsCapacity,
UErrorCode *status);
/**
* Set a processing time limit for match operations with this URegularExpression.
*
* Some patterns, when matching certain strings, can run in exponential time.
* For practical purposes, the match operation may appear to be in an
* infinite loop.
* When a limit is set a match operation will fail with an error if the
* limit is exceeded.
*
* The units of the limit are steps of the match engine.
* Correspondence with actual processor time will depend on the speed
* of the processor and the details of the specific pattern, but will
* typically be on the order of milliseconds.
*
* By default, the matching time is not limited.
*
*
* @param regexp The compiled regular expression.
* @param limit The limit value, or 0 for no limit.
* @param status A reference to a UErrorCode to receive any errors.
* @stable ICU 4.0
*/
U_STABLE void U_EXPORT2
uregex_setTimeLimit(URegularExpression *regexp,
int32_t limit,
UErrorCode *status);
/**
* Get the time limit for for matches with this URegularExpression.
* A return value of zero indicates that there is no limit.
*
* @param regexp The compiled regular expression.
* @param status A reference to a UErrorCode to receive any errors.
* @return the maximum allowed time for a match, in units of processing steps.
* @stable ICU 4.0
*/
U_STABLE int32_t U_EXPORT2
uregex_getTimeLimit(const URegularExpression *regexp,
UErrorCode *status);
/**
* Set the amount of heap storage available for use by the match backtracking stack.
*
* ICU uses a backtracking regular expression engine, with the backtrack stack
* maintained on the heap. This function sets the limit to the amount of memory
* that can be used for this purpose. A backtracking stack overflow will
* result in an error from the match operation that caused it.
*
* A limit is desirable because a malicious or poorly designed pattern can use
* excessive memory, potentially crashing the process. A limit is enabled
* by default.
*
* @param regexp The compiled regular expression.
* @param limit The maximum size, in bytes, of the matching backtrack stack.
* A value of zero means no limit.
* The limit must be greater than or equal to zero.
* @param status A reference to a UErrorCode to receive any errors.
*
* @stable ICU 4.0
*/
U_STABLE void U_EXPORT2
uregex_setStackLimit(URegularExpression *regexp,
int32_t limit,
UErrorCode *status);
/**
* Get the size of the heap storage available for use by the back tracking stack.
*
* @return the maximum backtracking stack size, in bytes, or zero if the
* stack size is unlimited.
* @stable ICU 4.0
*/
U_STABLE int32_t U_EXPORT2
uregex_getStackLimit(const URegularExpression *regexp,
UErrorCode *status);
/**
* Function pointer for a regular expression matching callback function.
* When set, a callback function will be called periodically during matching
* operations. If the call back function returns FALSE, the matching
* operation will be terminated early.
*
* Note: the callback function must not call other functions on this
* URegularExpression.
*
* @param context context pointer. The callback function will be invoked
* with the context specified at the time that
* uregex_setMatchCallback() is called.
* @param steps the accumulated processing time, in match steps,
* for this matching operation.
* @return TRUE to continue the matching operation.
* FALSE to terminate the matching operation.
* @stable ICU 4.0
*/
U_CDECL_BEGIN
typedef UBool U_CALLCONV URegexMatchCallback (
const void *context,
int32_t steps);
U_CDECL_END
/**
* Set a callback function for this URegularExpression.
* During matching operations the function will be called periodically,
* giving the application the opportunity to terminate a long-running
* match.
*
* @param regexp The compiled regular expression.
* @param callback A pointer to the user-supplied callback function.
* @param context User context pointer. The value supplied at the
* time the callback function is set will be saved
* and passed to the callback each time that it is called.
* @param status A reference to a UErrorCode to receive any errors.
* @stable ICU 4.0
*/
U_STABLE void U_EXPORT2
uregex_setMatchCallback(URegularExpression *regexp,
URegexMatchCallback *callback,
const void *context,
UErrorCode *status);
/**
* Get the callback function for this URegularExpression.
*
* @param regexp The compiled regular expression.
* @param callback Out parameter, receives a pointer to the user-supplied
* callback function.
* @param context Out parameter, receives the user context pointer that
* was set when uregex_setMatchCallback() was called.
* @param status A reference to a UErrorCode to receive any errors.
* @stable ICU 4.0
*/
U_STABLE void U_EXPORT2
uregex_getMatchCallback(const URegularExpression *regexp,
URegexMatchCallback **callback,
const void **context,
UErrorCode *status);
/**
* Function pointer for a regular expression find callback function.
*
* When set, a callback function will be called during a find operation
* and for operations that depend on find, such as findNext, split and some replace
* operations like replaceFirst.
* The callback will usually be called after each attempt at a match, but this is not a
* guarantee that the callback will be invoked at each character. For finds where the
* match engine is invoked at each character, this may be close to true, but less likely
* for more optimized loops where the pattern is known to only start, and the match
* engine invoked, at certain characters.
* When invoked, this callback will specify the index at which a match operation is about
* to be attempted, giving the application the opportunity to terminate a long-running
* find operation.
*
* If the call back function returns FALSE, the find operation will be terminated early.
*
* Note: the callback function must not call other functions on this
* URegularExpression
*
* @param context context pointer. The callback function will be invoked
* with the context specified at the time that
* uregex_setFindProgressCallback() is called.
* @param matchIndex the next index at which a match attempt will be attempted for this
* find operation. If this callback interrupts the search, this is the
* index at which a find/findNext operation may be re-initiated.
* @return TRUE to continue the matching operation.
* FALSE to terminate the matching operation.
* @stable ICU 4.6
*/
U_CDECL_BEGIN
typedef UBool U_CALLCONV URegexFindProgressCallback (
const void *context,
int64_t matchIndex);
U_CDECL_END
/**
* Set the find progress callback function for this URegularExpression.
*
* @param regexp The compiled regular expression.
* @param callback A pointer to the user-supplied callback function.
* @param context User context pointer. The value supplied at the
* time the callback function is set will be saved
* and passed to the callback each time that it is called.
* @param status A reference to a UErrorCode to receive any errors.
* @stable ICU 4.6
*/
U_STABLE void U_EXPORT2
uregex_setFindProgressCallback(URegularExpression *regexp,
URegexFindProgressCallback *callback,
const void *context,
UErrorCode *status);
/**
* Get the find progress callback function for this URegularExpression.
*
* @param regexp The compiled regular expression.
* @param callback Out parameter, receives a pointer to the user-supplied
* callback function.
* @param context Out parameter, receives the user context pointer that
* was set when uregex_setFindProgressCallback() was called.
* @param status A reference to a UErrorCode to receive any errors.
* @stable ICU 4.6
*/
U_STABLE void U_EXPORT2
uregex_getFindProgressCallback(const URegularExpression *regexp,
URegexFindProgressCallback **callback,
const void **context,
UErrorCode *status);
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
#endif /* UREGEX_H */
// uregion.h
/*
*****************************************************************************************
* Copyright (C) 2014, International Business Machines
* Corporation and others. All Rights Reserved.
*****************************************************************************************
*/
#ifndef UREGION_H
#define UREGION_H
/**
* \file
* \brief C API: URegion (territory containment and mapping)
*
* URegion objects represent data associated with a particular Unicode Region Code, also known as a
* Unicode Region Subtag, which is defined based upon the BCP 47 standard. These include:
* * Two-letter codes defined by ISO 3166-1, with special LDML treatment of certain private-use or
* reserved codes;
* * A subset of 3-digit numeric codes defined by UN M.49.
* URegion objects can also provide mappings to and from additional codes. There are different types
* of regions that are important to distinguish:
*
* Macroregion - A code for a "macro geographical (continental) region, geographical sub-region, or
* selected economic and other grouping" as defined in UN M.49. These are typically 3-digit codes,
* but contain some 2-letter codes for LDML extensions, such as "QO" for Outlying Oceania.
* Macroregions are represented in ICU by one of three region types: WORLD (code 001),
* CONTINENTS (regions contained directly by WORLD), and SUBCONTINENTS (regions contained directly
* by a continent ).
*
* TERRITORY - A Region that is not a Macroregion. These are typically codes for countries, but also
* include areas that are not separate countries, such as the code "AQ" for Antarctica or the code
* "HK" for Hong Kong (SAR China). Overseas dependencies of countries may or may not have separate
* codes. The codes are typically 2-letter codes aligned with ISO 3166, but BCP47 allows for the use
* of 3-digit codes in the future.
*
* UNKNOWN - The code ZZ is defined by Unicode LDML for use in indicating that region is unknown,
* or that the value supplied as a region was invalid.
*
* DEPRECATED - Region codes that have been defined in the past but are no longer in modern usage,
* usually due to a country splitting into multiple territories or changing its name.
*
* GROUPING - A widely understood grouping of territories that has a well defined membership such
* that a region code has been assigned for it. Some of these are UN M.49 codes that don't fall into
* the world/continent/sub-continent hierarchy, while others are just well-known groupings that have
* their own region code. Region "EU" (European Union) is one such region code that is a grouping.
* Groupings will never be returned by the uregion_getContainingRegion, since a different type of region
* (WORLD, CONTINENT, or SUBCONTINENT) will always be the containing region instead.
*
* URegion objects are const/immutable, owned and maintained by ICU itself, so there are not functions
* to open or close them.
*/
/**
* URegionType is an enumeration defining the different types of regions. Current possible
* values are URGN_WORLD, URGN_CONTINENT, URGN_SUBCONTINENT, URGN_TERRITORY, URGN_GROUPING,
* URGN_DEPRECATED, and URGN_UNKNOWN.
*
* @stable ICU 51
*/
typedef enum URegionType {
/**
* Type representing the unknown region.
* @stable ICU 51
*/
URGN_UNKNOWN,
/**
* Type representing a territory.
* @stable ICU 51
*/
URGN_TERRITORY,
/**
* Type representing the whole world.
* @stable ICU 51
*/
URGN_WORLD,
/**
* Type representing a continent.
* @stable ICU 51
*/
URGN_CONTINENT,
/**
* Type representing a sub-continent.
* @stable ICU 51
*/
URGN_SUBCONTINENT,
/**
* Type representing a grouping of territories that is not to be used in
* the normal WORLD/CONTINENT/SUBCONTINENT/TERRITORY containment tree.
* @stable ICU 51
*/
URGN_GROUPING,
/**
* Type representing a region whose code has been deprecated, usually
* due to a country splitting into multiple territories or changing its name.
* @stable ICU 51
*/
URGN_DEPRECATED,
/**
* Maximum value for this unumeration.
* @stable ICU 51
*/
URGN_LIMIT
} URegionType;
#if !UCONFIG_NO_FORMATTING
/**
* Opaque URegion object for use in C programs.
* @stable ICU 52
*/
struct URegion;
typedef struct URegion URegion; /**< @stable ICU 52 */
/**
* Returns a pointer to a URegion for the specified region code: A 2-letter or 3-letter ISO 3166
* code, UN M.49 numeric code (superset of ISO 3166 numeric codes), or other valid Unicode Region
* Code as defined by the LDML specification. The code will be canonicalized internally. If the
* region code is NULL or not recognized, the appropriate error code will be set
* (U_ILLEGAL_ARGUMENT_ERROR).
* @stable ICU 52
*/
U_STABLE const URegion* U_EXPORT2
uregion_getRegionFromCode(const char *regionCode, UErrorCode *status);
/**
* Returns a pointer to a URegion for the specified numeric region code. If the numeric region
* code is not recognized, the appropriate error code will be set (U_ILLEGAL_ARGUMENT_ERROR).
* @stable ICU 52
*/
U_STABLE const URegion* U_EXPORT2
uregion_getRegionFromNumericCode (int32_t code, UErrorCode *status);
/**
* Returns an enumeration over the canonical codes of all known regions that match the given type.
* The enumeration must be closed with with uenum_close().
* @stable ICU 52
*/
U_STABLE UEnumeration* U_EXPORT2
uregion_getAvailable(URegionType type, UErrorCode *status);
/**
* Returns true if the specified uregion is equal to the specified otherRegion.
* @stable ICU 52
*/
U_STABLE UBool U_EXPORT2
uregion_areEqual(const URegion* uregion, const URegion* otherRegion);
/**
* Returns a pointer to the URegion that contains the specified uregion. Returns NULL if the
* specified uregion is code "001" (World) or "ZZ" (Unknown region). For example, calling
* this method with region "IT" (Italy) returns the URegion for "039" (Southern Europe).
* @stable ICU 52
*/
U_STABLE const URegion* U_EXPORT2
uregion_getContainingRegion(const URegion* uregion);
/**
* Return a pointer to the URegion that geographically contains this uregion and matches the
* specified type, moving multiple steps up the containment chain if necessary. Returns NULL if no
* containing region can be found that matches the specified type. Will return NULL if URegionType
* is URGN_GROUPING, URGN_DEPRECATED, or URGN_UNKNOWN which are not appropriate for this API.
* For example, calling this method with uregion "IT" (Italy) for type URGN_CONTINENT returns the
* URegion "150" (Europe).
* @stable ICU 52
*/
U_STABLE const URegion* U_EXPORT2
uregion_getContainingRegionOfType(const URegion* uregion, URegionType type);
/**
* Return an enumeration over the canonical codes of all the regions that are immediate children
* of the specified uregion in the region hierarchy. These returned regions could be either macro
* regions, territories, or a mixture of the two, depending on the containment data as defined in
* CLDR. This API returns NULL if this uregion doesn't have any sub-regions. For example, calling
* this function for uregion "150" (Europe) returns an enumeration containing the various
* sub-regions of Europe: "039" (Southern Europe), "151" (Eastern Europe), "154" (Northern Europe),
* and "155" (Western Europe). The enumeration must be closed with with uenum_close().
* @stable ICU 52
*/
U_STABLE UEnumeration* U_EXPORT2
uregion_getContainedRegions(const URegion* uregion, UErrorCode *status);
/**
* Returns an enumeration over the canonical codes of all the regions that are children of the
* specified uregion anywhere in the region hierarchy and match the given type. This API may return
* an empty enumeration if this uregion doesn't have any sub-regions that match the given type.
* For example, calling this method with region "150" (Europe) and type URGN_TERRITORY" returns an
* enumeration containing all the territories in Europe: "FR" (France), "IT" (Italy), "DE" (Germany),
* etc. The enumeration must be closed with with uenum_close().
* @stable ICU 52
*/
U_STABLE UEnumeration* U_EXPORT2
uregion_getContainedRegionsOfType(const URegion* uregion, URegionType type, UErrorCode *status);
/**
* Returns true if the specified uregion contains the specified otherRegion anywhere in the region
* hierarchy.
* @stable ICU 52
*/
U_STABLE UBool U_EXPORT2
uregion_contains(const URegion* uregion, const URegion* otherRegion);
/**
* If the specified uregion is deprecated, returns an enumeration over the canonical codes of the
* regions that are the preferred replacement regions for the specified uregion. If the specified
* uregion is not deprecated, returns NULL. For example, calling this method with uregion
* "SU" (Soviet Union) returns a list of the regions containing "RU" (Russia), "AM" (Armenia),
* "AZ" (Azerbaijan), etc... The enumeration must be closed with with uenum_close().
* @stable ICU 52
*/
U_STABLE UEnumeration* U_EXPORT2
uregion_getPreferredValues(const URegion* uregion, UErrorCode *status);
/**
* Returns the specified uregion's canonical code.
* @stable ICU 52
*/
U_STABLE const char* U_EXPORT2
uregion_getRegionCode(const URegion* uregion);
/**
* Returns the specified uregion's numeric code, or a negative value if there is no numeric code
* for the specified uregion.
* @stable ICU 52
*/
U_STABLE int32_t U_EXPORT2
uregion_getNumericCode(const URegion* uregion);
/**
* Returns the URegionType of the specified uregion.
* @stable ICU 52
*/
U_STABLE URegionType U_EXPORT2
uregion_getType(const URegion* uregion);
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif
// ureldatefmt.h
/*
*****************************************************************************************
* Copyright (C) 2016, International Business Machines
* Corporation and others. All Rights Reserved.
*****************************************************************************************
*/
#ifndef URELDATEFMT_H
#define URELDATEFMT_H
#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_BREAK_ITERATION
/**
* \file
* \brief C API: URelativeDateTimeFormatter, relative date formatting of unit + numeric offset.
*
* Provides simple formatting of relative dates, in two ways
*
* This does not provide compound formatting for multiple units,
* other than the ability to combine a time string with a relative date,
* as in "next Tuesday at 3:45 PM". It also does not provide support
* for determining which unit to use, such as deciding between "in 7 days"
* and "in 1 week".
*
* @draft ICU 57
*/
/**
* The formatting style
* @stable ICU 54
*/
typedef enum UDateRelativeDateTimeFormatterStyle {
/**
* Everything spelled out.
* @stable ICU 54
*/
UDAT_STYLE_LONG,
/**
* Abbreviations used when possible.
* @stable ICU 54
*/
UDAT_STYLE_SHORT,
/**
* Use the shortest possible form.
* @stable ICU 54
*/
UDAT_STYLE_NARROW,
/**
* The number of styles.
* @stable ICU 54
*/
UDAT_STYLE_COUNT
} UDateRelativeDateTimeFormatterStyle;
#endif /* !UCONFIG_NO_FORMATTING && !UCONFIG_NO_BREAK_ITERATION */
#endif
// reldatefmt.h
/*
*****************************************************************************
* Copyright (C) 2014-2016, International Business Machines Corporation and
* others.
* All Rights Reserved.
*****************************************************************************
*
* File RELDATEFMT.H
*****************************************************************************
*/
#ifndef __RELDATEFMT_H
#define __RELDATEFMT_H
/**
* \file
* \brief C++ API: Formats relative dates such as "1 day ago" or "tomorrow"
*/
#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_BREAK_ITERATION
/**
* Represents the unit for formatting a relative date. e.g "in 5 days"
* or "in 3 months"
* @stable ICU 53
*/
typedef enum UDateRelativeUnit {
/**
* Seconds
* @stable ICU 53
*/
UDAT_RELATIVE_SECONDS,
/**
* Minutes
* @stable ICU 53
*/
UDAT_RELATIVE_MINUTES,
/**
* Hours
* @stable ICU 53
*/
UDAT_RELATIVE_HOURS,
/**
* Days
* @stable ICU 53
*/
UDAT_RELATIVE_DAYS,
/**
* Weeks
* @stable ICU 53
*/
UDAT_RELATIVE_WEEKS,
/**
* Months
* @stable ICU 53
*/
UDAT_RELATIVE_MONTHS,
/**
* Years
* @stable ICU 53
*/
UDAT_RELATIVE_YEARS,
/**
* Count of items in this enum.
* @stable ICU 53
*/
UDAT_RELATIVE_UNIT_COUNT
} UDateRelativeUnit;
/**
* Represents an absolute unit.
* @stable ICU 53
*/
typedef enum UDateAbsoluteUnit {
// Days of week have to remain together and in order from Sunday to
// Saturday.
/**
* Sunday
* @stable ICU 53
*/
UDAT_ABSOLUTE_SUNDAY,
/**
* Monday
* @stable ICU 53
*/
UDAT_ABSOLUTE_MONDAY,
/**
* Tuesday
* @stable ICU 53
*/
UDAT_ABSOLUTE_TUESDAY,
/**
* Wednesday
* @stable ICU 53
*/
UDAT_ABSOLUTE_WEDNESDAY,
/**
* Thursday
* @stable ICU 53
*/
UDAT_ABSOLUTE_THURSDAY,
/**
* Friday
* @stable ICU 53
*/
UDAT_ABSOLUTE_FRIDAY,
/**
* Saturday
* @stable ICU 53
*/
UDAT_ABSOLUTE_SATURDAY,
/**
* Day
* @stable ICU 53
*/
UDAT_ABSOLUTE_DAY,
/**
* Week
* @stable ICU 53
*/
UDAT_ABSOLUTE_WEEK,
/**
* Month
* @stable ICU 53
*/
UDAT_ABSOLUTE_MONTH,
/**
* Year
* @stable ICU 53
*/
UDAT_ABSOLUTE_YEAR,
/**
* Now
* @stable ICU 53
*/
UDAT_ABSOLUTE_NOW,
/**
* Count of items in this enum.
* @stable ICU 53
*/
UDAT_ABSOLUTE_UNIT_COUNT
} UDateAbsoluteUnit;
/**
* Represents a direction for an absolute unit e.g "Next Tuesday"
* or "Last Tuesday"
* @stable ICU 53
*/
typedef enum UDateDirection {
/**
* Two before. Not fully supported in every locale.
* @stable ICU 53
*/
UDAT_DIRECTION_LAST_2,
/**
* Last
* @stable ICU 53
*/
UDAT_DIRECTION_LAST,
/**
* This
* @stable ICU 53
*/
UDAT_DIRECTION_THIS,
/**
* Next
* @stable ICU 53
*/
UDAT_DIRECTION_NEXT,
/**
* Two after. Not fully supported in every locale.
* @stable ICU 53
*/
UDAT_DIRECTION_NEXT_2,
/**
* Plain, which means the absence of a qualifier.
* @stable ICU 53
*/
UDAT_DIRECTION_PLAIN,
/**
* Count of items in this enum.
* @stable ICU 53
*/
UDAT_DIRECTION_COUNT
} UDateDirection;
#endif /* !UCONFIG_NO_FORMATTING && !UCONFIG_NO_BREAK_ITERATION*/
#endif
// usearch.h
/*
**********************************************************************
* Copyright (C) 2001-2011,2014 IBM and others. All rights reserved.
**********************************************************************
* Date Name Description
* 06/28/2001 synwee Creation.
**********************************************************************
*/
#ifndef USEARCH_H
#define USEARCH_H
#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
/**
* \file
* \brief C API: StringSearch
*
* C Apis for an engine that provides language-sensitive text searching based
* on the comparison rules defined in a UCollator data struct,
* see ucol.h. This ensures that language eccentricity can be
* handled, e.g. for the German collator, characters ß and SS will be matched
* if case is chosen to be ignored.
* See the
* "ICU Collation Design Document" for more information.
*
* The implementation may use a linear search or a modified form of the Boyer-Moore
* search; for more information on the latter see
*
* "Efficient Text Searching in Java", published in Java Report
* in February, 1999.
*
* There are 2 match options for selection:
* This search has APIs similar to that of other text iteration mechanisms
* such as the break iterators in ubrk.h. Using these
* APIs, it is easy to scan through text looking for all occurances of
* a given pattern. This search iterator allows changing of direction by
* calling a reset followed by a next or previous.
* Though a direction change can occur without calling reset first,
* this operation comes with some speed penalty.
* Generally, match results in the forward direction will match the result
* matches in the backwards direction in the reverse order
*
* usearch.h provides APIs to specify the starting position
* within the text string to be searched, e.g. usearch_setOffset,
* usearch_preceding and usearch_following. Since the
* starting position will be set as it is specified, please take note that
* there are some dangerous positions which the search may render incorrect
* results:
*
* A breakiterator can be used if only matches at logical breaks are desired.
* Using a breakiterator will only give you results that exactly matches the
* boundaries given by the breakiterator. For instance the pattern "e" will
* not be found in the string "\u00e9" if a character break iterator is used.
*
* Options are provided to handle overlapping matches.
* E.g. In English, overlapping matches produces the result 0 and 2
* for the pattern "abab" in the text "ababab", where else mutually
* exclusive matches only produce the result of 0.
*
* Options are also provided to implement "asymmetric search" as described in
*
* UTS #10 Unicode Collation Algorithm, specifically the USearchAttribute
* USEARCH_ELEMENT_COMPARISON and its values.
*
* Though collator attributes will be taken into consideration while
* performing matches, there are no APIs here for setting and getting the
* attributes. These attributes can be set by getting the collator
* from usearch_getCollator and using the APIs in ucol.h.
* Lastly to update String Search to the new collator attributes,
* usearch_reset() has to be called.
*
* Restriction:
* Example of use:
* Use usearch_getMatchedLength to get the matched string length.
* @param strsrch search iterator data struct
* @return index to a substring within the text string that is being
* searched.
* @see #usearch_first
* @see #usearch_next
* @see #usearch_previous
* @see #usearch_last
* @see #USEARCH_DONE
* @stable ICU 2.4
*/
U_STABLE int32_t U_EXPORT2 usearch_getMatchedStart(
const UStringSearch *strsrch);
/**
* Returns the length of text in the string which matches the search pattern.
* This call returns a valid result only after a successful call to
* usearch_first, usearch_next, usearch_previous,
* or usearch_last.
* Just after construction, or after a searching method returns
* USEARCH_DONE, this method will return 0.
* @param strsrch search iterator data struct
* @return The length of the match in the string text, or 0 if there is no
* match currently.
* @see #usearch_first
* @see #usearch_next
* @see #usearch_previous
* @see #usearch_last
* @see #USEARCH_DONE
* @stable ICU 2.4
*/
U_STABLE int32_t U_EXPORT2 usearch_getMatchedLength(
const UStringSearch *strsrch);
/**
* Returns the text that was matched by the most recent call to
* usearch_first, usearch_next, usearch_previous,
* or usearch_last.
* If the iterator is not pointing at a valid match (e.g. just after
* construction or after USEARCH_DONE has been returned, returns
* an empty string. If result is not large enough to store the matched text,
* result will be filled with the partial text and an U_BUFFER_OVERFLOW_ERROR
* will be returned in status. result will be null-terminated whenever
* possible. If the buffer fits the matched text exactly, a null-termination
* is not possible, then a U_STRING_NOT_TERMINATED_ERROR set in status.
* Pre-flighting can be either done with length = 0 or the API
* usearch_getMatchLength.
* @param strsrch search iterator data struct
* @param result UChar buffer to store the matched string
* @param resultCapacity length of the result buffer
* @param status error returned if result is not large enough
* @return exact length of the matched text, not counting the null-termination
* @see #usearch_first
* @see #usearch_next
* @see #usearch_previous
* @see #usearch_last
* @see #USEARCH_DONE
* @stable ICU 2.4
*/
U_STABLE int32_t U_EXPORT2 usearch_getMatchedText(const UStringSearch *strsrch,
UChar *result,
int32_t resultCapacity,
UErrorCode *status);
#if !UCONFIG_NO_BREAK_ITERATION
/**
* Set the BreakIterator that will be used to restrict the points at which
* matches are detected.
* @param strsrch search iterator data struct
* @param breakiter A BreakIterator that will be used to restrict the points
* at which matches are detected. If a match is found, but
* the match's start or end index is not a boundary as
* determined by the BreakIterator, the match will
* be rejected and another will be searched for.
* If this parameter is NULL, no break detection is
* attempted.
* @param status for errors if it occurs
* @see #usearch_getBreakIterator
* @stable ICU 2.4
*/
U_STABLE void U_EXPORT2 usearch_setBreakIterator(UStringSearch *strsrch,
UBreakIterator *breakiter,
UErrorCode *status);
/**
* Returns the BreakIterator that is used to restrict the points at which
* matches are detected. This will be the same object that was passed to the
* constructor or to usearch_setBreakIterator. Note that
* NULL
* is a legal value; it means that break detection should not be attempted.
* @param strsrch search iterator data struct
* @return break iterator used
* @see #usearch_setBreakIterator
* @stable ICU 2.4
*/
U_STABLE const UBreakIterator * U_EXPORT2 usearch_getBreakIterator(
const UStringSearch *strsrch);
#endif
/**
* Set the string text to be searched. Text iteration will hence begin at the
* start of the text string. This method is useful if you want to re-use an
* iterator to search for the same pattern within a different body of text.
* @param strsrch search iterator data struct
* @param text new string to look for match
* @param textlength length of the new string, -1 for null-termination
* @param status for errors if it occurs. If text is NULL, or textlength is 0
* then an U_ILLEGAL_ARGUMENT_ERROR is returned with no change
* done to strsrch.
* @see #usearch_getText
* @stable ICU 2.4
*/
U_STABLE void U_EXPORT2 usearch_setText( UStringSearch *strsrch,
const UChar *text,
int32_t textlength,
UErrorCode *status);
/**
* Return the string text to be searched.
* @param strsrch search iterator data struct
* @param length returned string text length
* @return string text
* @see #usearch_setText
* @stable ICU 2.4
*/
U_STABLE const UChar * U_EXPORT2 usearch_getText(const UStringSearch *strsrch,
int32_t *length);
/**
* Gets the collator used for the language rules.
*
* Deleting the returned UCollator before calling
* usearch_close would cause the string search to fail.
* usearch_close will delete the collator if this search owns it.
* @param strsrch search iterator data struct
* @return collator
* @stable ICU 2.4
*/
U_STABLE UCollator * U_EXPORT2 usearch_getCollator(
const UStringSearch *strsrch);
/**
* Sets the collator used for the language rules. User retains the ownership
* of this collator, thus the responsibility of deletion lies with the user.
* This method causes internal data such as Boyer-Moore shift tables to
* be recalculated, but the iterator's position is unchanged.
* @param strsrch search iterator data struct
* @param collator to be used
* @param status for errors if it occurs
* @stable ICU 2.4
*/
U_STABLE void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch,
const UCollator *collator,
UErrorCode *status);
/**
* Sets the pattern used for matching.
* Internal data like the Boyer Moore table will be recalculated, but the
* iterator's position is unchanged.
* @param strsrch search iterator data struct
* @param pattern string
* @param patternlength pattern length, -1 for null-terminated string
* @param status for errors if it occurs. If text is NULL, or textlength is 0
* then an U_ILLEGAL_ARGUMENT_ERROR is returned with no change
* done to strsrch.
* @stable ICU 2.4
*/
U_STABLE void U_EXPORT2 usearch_setPattern( UStringSearch *strsrch,
const UChar *pattern,
int32_t patternlength,
UErrorCode *status);
/**
* Gets the search pattern
* @param strsrch search iterator data struct
* @param length return length of the pattern, -1 indicates that the pattern
* is null-terminated
* @return pattern string
* @stable ICU 2.4
*/
U_STABLE const UChar * U_EXPORT2 usearch_getPattern(
const UStringSearch *strsrch,
int32_t *length);
/* methods ------------------------------------------------------------- */
/**
* Returns the first index at which the string text matches the search
* pattern.
* The iterator is adjusted so that its current index (as returned by
* usearch_getOffset) is the match position if one was found.
* If a match is not found, USEARCH_DONE will be returned and
* the iterator will be adjusted to the index USEARCH_DONE.
* @param strsrch search iterator data struct
* @param status for errors if it occurs
* @return The character index of the first match, or
* USEARCH_DONE if there are no matches.
* @see #usearch_getOffset
* @see #USEARCH_DONE
* @stable ICU 2.4
*/
U_STABLE int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch,
UErrorCode *status);
/**
* Returns the first index equal or greater than position at which
* the string text
* matches the search pattern. The iterator is adjusted so that its current
* index (as returned by usearch_getOffset) is the match position if
* one was found.
* If a match is not found, USEARCH_DONE will be returned and
* the iterator will be adjusted to the index USEARCH_DONE
*
* Search positions that may render incorrect results are highlighted in the
* header comments. If position is less than or greater than the text range
* for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned
* @param strsrch search iterator data struct
* @param position to start the search at
* @param status for errors if it occurs
* @return The character index of the first match following pos,
* or USEARCH_DONE if there are no matches.
* @see #usearch_getOffset
* @see #USEARCH_DONE
* @stable ICU 2.4
*/
U_STABLE int32_t U_EXPORT2 usearch_following(UStringSearch *strsrch,
int32_t position,
UErrorCode *status);
/**
* Returns the last index in the target text at which it matches the search
* pattern. The iterator is adjusted so that its current
* index (as returned by usearch_getOffset) is the match position if
* one was found.
* If a match is not found, USEARCH_DONE will be returned and
* the iterator will be adjusted to the index USEARCH_DONE.
* @param strsrch search iterator data struct
* @param status for errors if it occurs
* @return The index of the first match, or USEARCH_DONE if there
* are no matches.
* @see #usearch_getOffset
* @see #USEARCH_DONE
* @stable ICU 2.4
*/
U_STABLE int32_t U_EXPORT2 usearch_last(UStringSearch *strsrch,
UErrorCode *status);
/**
* Returns the first index less than position at which the string text
* matches the search pattern. The iterator is adjusted so that its current
* index (as returned by usearch_getOffset) is the match position if
* one was found.
* If a match is not found, USEARCH_DONE will be returned and
* the iterator will be adjusted to the index USEARCH_DONE
*
* Search positions that may render incorrect results are highlighted in the
* header comments. If position is less than or greater than the text range
* for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned.
*
* When USEARCH_OVERLAP option is off, the last index of the
* result match is always less than position.
* When USERARCH_OVERLAP is on, the result match may span across
* position.
* @param strsrch search iterator data struct
* @param position index position the search is to begin at
* @param status for errors if it occurs
* @return The character index of the first match preceding pos,
* or USEARCH_DONE if there are no matches.
* @see #usearch_getOffset
* @see #USEARCH_DONE
* @stable ICU 2.4
*/
U_STABLE int32_t U_EXPORT2 usearch_preceding(UStringSearch *strsrch,
int32_t position,
UErrorCode *status);
/**
* Returns the index of the next point at which the string text matches the
* search pattern, starting from the current position.
* The iterator is adjusted so that its current
* index (as returned by usearch_getOffset) is the match position if
* one was found.
* If a match is not found, USEARCH_DONE will be returned and
* the iterator will be adjusted to the index USEARCH_DONE
* @param strsrch search iterator data struct
* @param status for errors if it occurs
* @return The index of the next match after the current position, or
* USEARCH_DONE if there are no more matches.
* @see #usearch_first
* @see #usearch_getOffset
* @see #USEARCH_DONE
* @stable ICU 2.4
*/
U_STABLE int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
UErrorCode *status);
/**
* Returns the index of the previous point at which the string text matches
* the search pattern, starting at the current position.
* The iterator is adjusted so that its current
* index (as returned by usearch_getOffset) is the match position if
* one was found.
* If a match is not found, USEARCH_DONE will be returned and
* the iterator will be adjusted to the index USEARCH_DONE
* @param strsrch search iterator data struct
* @param status for errors if it occurs
* @return The index of the previous match before the current position,
* or USEARCH_DONE if there are no more matches.
* @see #usearch_last
* @see #usearch_getOffset
* @see #USEARCH_DONE
* @stable ICU 2.4
*/
U_STABLE int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch,
UErrorCode *status);
/**
* Reset the iteration.
* Search will begin at the start of the text string if a forward iteration
* is initiated before a backwards iteration. Otherwise if a backwards
* iteration is initiated before a forwards iteration, the search will begin
* at the end of the text string.
* @param strsrch search iterator data struct
* @see #usearch_first
* @stable ICU 2.4
*/
U_STABLE void U_EXPORT2 usearch_reset(UStringSearch *strsrch);
#endif /* #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION */
#endif
// search.h
/*
**********************************************************************
* Copyright (C) 2001-2011 IBM and others. All rights reserved.
**********************************************************************
* Date Name Description
* 03/22/2000 helena Creation.
**********************************************************************
*/
#ifndef SEARCH_H
#define SEARCH_H
/**
* \file
* \brief C++ API: SearchIterator object.
*/
#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
/**
* @stable ICU 2.0
*/
struct USearch;
/**
* @stable ICU 2.0
*/
typedef struct USearch USearch;
#endif /* #if !UCONFIG_NO_COLLATION */
#endif
// uspoof.h
/*
***************************************************************************
* Copyright (C) 2008-2015, International Business Machines Corporation
* and others. All Rights Reserved.
***************************************************************************
* file name: uspoof.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2008Feb13
* created by: Andy Heninger
*
* Unicode Spoof Detection
*/
#ifndef USPOOF_H
#define USPOOF_H
#if !UCONFIG_NO_NORMALIZATION
/**
* \file
* \brief Unicode Security and Spoofing Detection, C API.
*
* These functions are intended to check strings, typically
* identifiers of some type, such as URLs, for the presence of
* characters that are likely to be visually confusing -
* for cases where the displayed form of an identifier may
* not be what it appears to be.
*
* Unicode Technical Report #36, http://unicode.org/reports/tr36, and
* Unicode Technical Standard #39, http://unicode.org/reports/tr39
* "Unicode security considerations", give more background on
* security an spoofing issues with Unicode identifiers.
* The tests and checks provided by this module implement the recommendations
* from those Unicode documents.
*
* The tests available on identifiers fall into two general categories:
* -# Single identifier tests. Check whether an identifier is
* potentially confusable with any other string, or is suspicious
* for other reasons.
* -# Two identifier tests. Check whether two specific identifiers are confusable.
* This does not consider whether either of strings is potentially
* confusable with any string other than the exact one specified.
*
* The steps to perform confusability testing are
* -# Open a USpoofChecker.
* -# Configure the USPoofChecker for the desired set of tests. The tests that will
* be performed are specified by a set of USpoofChecks flags.
* -# Perform the checks using the pre-configured USpoofChecker. The results indicate
* which (if any) of the selected tests have identified possible problems with the identifier.
* Results are reported as a set of USpoofChecks flags; this mirrors the form in which
* the set of tests to perform was originally specified to the USpoofChecker.
*
* A USpoofChecker may be used repeatedly to perform checks on any number of identifiers.
*
* Thread Safety: The test functions for checking a single identifier, or for testing
* whether two identifiers are possible confusable, are thread safe.
* They may called concurrently, from multiple threads, using the same USpoofChecker instance.
*
* More generally, the standard ICU thread safety rules apply: functions that take a
* const USpoofChecker parameter are thread safe. Those that take a non-const
* USpoofChecier are not thread safe.
*
*
* Descriptions of the available checks.
*
* When testing whether pairs of identifiers are confusable, with the uspoof_areConfusable()
* family of functions, the relevant tests are
*
* -# USPOOF_SINGLE_SCRIPT_CONFUSABLE: All of the characters from the two identifiers are
* from a single script, and the two identifiers are visually confusable.
* -# USPOOF_MIXED_SCRIPT_CONFUSABLE: At least one of the identifiers contains characters
* from more than one script, and the two identifiers are visually confusable.
* -# USPOOF_WHOLE_SCRIPT_CONFUSABLE: Each of the two identifiers is of a single script, but
* the two identifiers are from different scripts, and they are visually confusable.
*
* The safest approach is to enable all three of these checks as a group.
*
* USPOOF_ANY_CASE is a modifier for the above tests. If the identifiers being checked can
* be of mixed case and are used in a case-sensitive manner, this option should be specified.
*
* If the identifiers being checked are used in a case-insensitive manner, and if they are
* displayed to users in lower-case form only, the USPOOF_ANY_CASE option should not be
* specified. Confusabality issues involving upper case letters will not be reported.
*
* When performing tests on a single identifier, with the uspoof_check() family of functions,
* the relevant tests are:
*
* -# USPOOF_MIXED_SCRIPT_CONFUSABLE: the identifier contains characters from multiple
* scripts, and there exists an identifier of a single script that is visually confusable.
* -# USPOOF_WHOLE_SCRIPT_CONFUSABLE: the identifier consists of characters from a single
* script, and there exists a visually confusable identifier.
* The visually confusable identifier also consists of characters from a single script.
* but not the same script as the identifier being checked.
* -# USPOOF_ANY_CASE: modifies the mixed script and whole script confusables tests. If
* specified, the checks will consider confusable characters of any case. If this flag is not
* set, the test is performed assuming case folded identifiers.
* -# USPOOF_SINGLE_SCRIPT: check that the identifier contains only characters from a
* single script. (Characters from the 'common' and 'inherited' scripts are ignored.)
* This is not a test for confusable identifiers
* -# USPOOF_INVISIBLE: check an identifier for the presence of invisible characters,
* such as zero-width spaces, or character sequences that are
* likely not to display, such as multiple occurrences of the same
* non-spacing mark. This check does not test the input string as a whole
* for conformance to any particular syntax for identifiers.
* -# USPOOF_CHAR_LIMIT: check that an identifier contains only characters from a specified set
* of acceptable characters. See uspoof_setAllowedChars() and
* uspoof_setAllowedLocales().
*
* Note on Scripts:
* Characters from the Unicode Scripts "Common" and "Inherited" are ignored when considering
* the script of an identifier. Common characters include digits and symbols that
* are normally used with text from more than one script.
*
* Identifier Skeletons: A skeleton is a transformation of an identifier, such that
* all identifiers that are confusable with each other have the same skeleton.
* Using skeletons, it is possible to build a dictionary data structure for
* a set of identifiers, and then quickly test whether a new identifier is
* confusable with an identifier already in the set. The uspoof_getSkeleton()
* family of functions will produce the skeleton from an identifier.
*
* Note that skeletons are not guaranteed to be stable between versions
* of Unicode or ICU, so an applications should not rely on creating a permanent,
* or difficult to update, database of skeletons. Instabilities result from
* identifying new pairs or sequences of characters that are visually
* confusable, and thus must be mapped to the same skeleton character(s).
*
* Skeletons are computed using the algorithm and data describe in Unicode UAX 39.
* The latest proposed update, UAX 39 Version 8 draft 1, says "the tables SL, SA, and ML
* were still problematic, and discouraged from use in [Uniocde] 7.0.
* They were thus removed from version 8.0"
*
* In light of this, the default mapping data included with ICU 55 uses the
* Unicode 7 MA (Multi script Any case) table data for the other type options
* (Single Script, Any Case), (Single Script, Lower Case) and (Multi Script, Lower Case).
*/
struct USpoofChecker;
typedef struct USpoofChecker USpoofChecker; /**< typedef for C of USpoofChecker */
/**
* Enum for the kinds of checks that USpoofChecker can perform.
* These enum values are used both to select the set of checks that
* will be performed, and to report results from the check function.
*
* @stable ICU 4.2
*/
typedef enum USpoofChecks {
/** Single script confusable test.
* When testing whether two identifiers are confusable, report that they are if
* both are from the same script and they are visually confusable.
* Note: this test is not applicable to a check of a single identifier.
*/
USPOOF_SINGLE_SCRIPT_CONFUSABLE = 1,
/** Mixed script confusable test.
* When checking a single identifier, report a problem if
* the identifier contains multiple scripts, and
* is confusable with some other identifier in a single script
* When testing whether two identifiers are confusable, report that they are if
* the two IDs are visually confusable,
* and at least one contains characters from more than one script.
*/
USPOOF_MIXED_SCRIPT_CONFUSABLE = 2,
/** Whole script confusable test.
* When checking a single identifier, report a problem if
* The identifier is of a single script, and
* there exists a confusable identifier in another script.
* When testing whether two identifiers are confusable, report that they are if
* each is of a single script,
* the scripts of the two identifiers are different, and
* the identifiers are visually confusable.
*/
USPOOF_WHOLE_SCRIPT_CONFUSABLE = 4,
/** Any Case Modifier for confusable identifier tests.
If specified, consider all characters, of any case, when looking for confusables.
If USPOOF_ANY_CASE is not specified, identifiers being checked are assumed to have been
case folded. Upper case confusable characters will not be checked.
Selects between Lower Case Confusable and
Any Case Confusable. */
USPOOF_ANY_CASE = 8,
/**
* Check that an identifier is no looser than the specified RestrictionLevel.
* The default if uspoof_setRestrctionLevel() is not called is HIGHLY_RESTRICTIVE.
*
* If USPOOF_AUX_INFO is enabled the actual restriction level of the
* identifier being tested will also be returned by uspoof_check().
*
* @see URestrictionLevel
* @see uspoof_setRestrictionLevel
* @see USPOOF_AUX_INFO
*
* @stable ICU 51
*/
USPOOF_RESTRICTION_LEVEL = 16,
/** Check an identifier for the presence of invisible characters,
* such as zero-width spaces, or character sequences that are
* likely not to display, such as multiple occurrences of the same
* non-spacing mark. This check does not test the input string as a whole
* for conformance to any particular syntax for identifiers.
*/
USPOOF_INVISIBLE = 32,
/** Check that an identifier contains only characters from a specified set
* of acceptable characters. See uspoof_setAllowedChars() and
* uspoof_setAllowedLocales().
*/
USPOOF_CHAR_LIMIT = 64,
/**
* Check that an identifier does not include decimal digits from
* more than one numbering system.
*
* @stable ICU 51
*/
USPOOF_MIXED_NUMBERS = 128,
/**
* Enable all spoof checks.
*
* @stable ICU 4.6
*/
USPOOF_ALL_CHECKS = 0xFFFF,
/**
* Enable the return of auxillary (non-error) information in the
* upper bits of the check results value.
*
* If this "check" is not enabled, the results of uspoof_check() will be zero when an
* identifier passes all of the enabled checks.
*
* If this "check" is enabled, (uspoof_check() & USPOOF_ALL_CHECKS) will be zero
* when an identifier passes all checks.
*
* @stable ICU 51
*/
USPOOF_AUX_INFO = 0x40000000
} USpoofChecks;
/**
* Constants from UAX #39 for use in setRestrictionLevel(), and
* for returned identifier restriction levels in check results.
* @stable ICU 51
*/
typedef enum URestrictionLevel {
/**
* Only ASCII characters: U+0000..U+007F
*
* @stable ICU 51
*/
USPOOF_ASCII = 0x10000000,
/**
* All characters in each identifier must be from a single script.
*
* @stable ICU 53
*/
USPOOF_SINGLE_SCRIPT_RESTRICTIVE = 0x20000000,
/**
* All characters in each identifier must be from a single script, or from the combinations: Latin + Han +
* Hiragana + Katakana; Latin + Han + Bopomofo; or Latin + Han + Hangul. Note that this level will satisfy the
* vast majority of Latin-script users; also that TR36 has ASCII instead of Latin.
*
* @stable ICU 51
*/
USPOOF_HIGHLY_RESTRICTIVE = 0x30000000,
/**
* Allow Latin with other scripts except Cyrillic, Greek, Cherokee Otherwise, the same as Highly Restrictive
*
* @stable ICU 51
*/
USPOOF_MODERATELY_RESTRICTIVE = 0x40000000,
/**
* Allow arbitrary mixtures of scripts. Otherwise, the same as Moderately Restrictive.
*
* @stable ICU 51
*/
USPOOF_MINIMALLY_RESTRICTIVE = 0x50000000,
/**
* Any valid identifiers, including characters outside of the Identifier Profile.
*
* @stable ICU 51
*/
USPOOF_UNRESTRICTIVE = 0x60000000,
/**
* Mask for selecting the Restriction Level bits from the return value of uspoof_check().
*
* @stable ICU 53
*/
USPOOF_RESTRICTION_LEVEL_MASK = 0x7F000000
} URestrictionLevel;
/**
* Create a Unicode Spoof Checker, configured to perform all
* checks except for USPOOF_LOCALE_LIMIT and USPOOF_CHAR_LIMIT.
* Note that additional checks may be added in the future,
* resulting in the changes to the default checking behavior.
*
* @param status The error code, set if this function encounters a problem.
* @return the newly created Spoof Checker
* @stable ICU 4.2
*/
U_STABLE USpoofChecker * U_EXPORT2
uspoof_open(UErrorCode *status);
/**
* Open a Spoof checker from its serialized form, stored in 32-bit-aligned memory.
* Inverse of uspoof_serialize().
* The memory containing the serialized data must remain valid and unchanged
* as long as the spoof checker, or any cloned copies of the spoof checker,
* are in use. Ownership of the memory remains with the caller.
* The spoof checker (and any clones) must be closed prior to deleting the
* serialized data.
*
* @param data a pointer to 32-bit-aligned memory containing the serialized form of spoof data
* @param length the number of bytes available at data;
* can be more than necessary
* @param pActualLength receives the actual number of bytes at data taken up by the data;
* can be NULL
* @param pErrorCode ICU error code
* @return the spoof checker.
*
* @see uspoof_open
* @see uspoof_serialize
* @stable ICU 4.2
*/
U_STABLE USpoofChecker * U_EXPORT2
uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
UErrorCode *pErrorCode);
/**
* Open a Spoof Checker from the source form of the spoof data.
* The two inputs correspond to the Unicode data files confusables.txt
* and confusablesWholeScript.txt as described in Unicode UAX #39.
* The syntax of the source data is as described in UAX #39 for
* these files, and the content of these files is acceptable input.
*
* The character encoding of the (char *) input text is UTF-8.
*
* @param confusables a pointer to the confusable characters definitions,
* as found in file confusables.txt from unicode.org.
* @param confusablesLen The length of the confusables text, or -1 if the
* input string is zero terminated.
* @param confusablesWholeScript
* a pointer to the whole script confusables definitions,
* as found in the file confusablesWholeScript.txt from unicode.org.
* @param confusablesWholeScriptLen The length of the whole script confusables text, or
* -1 if the input string is zero terminated.
* @param errType In the event of an error in the input, indicates
* which of the input files contains the error.
* The value is one of USPOOF_SINGLE_SCRIPT_CONFUSABLE or
* USPOOF_WHOLE_SCRIPT_CONFUSABLE, or
* zero if no errors are found.
* @param pe In the event of an error in the input, receives the position
* in the input text (line, offset) of the error.
* @param status an in/out ICU UErrorCode. Among the possible errors is
* U_PARSE_ERROR, which is used to report syntax errors
* in the input.
* @return A spoof checker that uses the rules from the input files.
* @stable ICU 4.2
*/
U_STABLE USpoofChecker * U_EXPORT2
uspoof_openFromSource(const char *confusables, int32_t confusablesLen,
const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
int32_t *errType, UParseError *pe, UErrorCode *status);
/**
* Close a Spoof Checker, freeing any memory that was being held by
* its implementation.
* @stable ICU 4.2
*/
U_STABLE void U_EXPORT2
uspoof_close(USpoofChecker *sc);
/**
* Clone a Spoof Checker. The clone will be set to perform the same checks
* as the original source.
*
* @param sc The source USpoofChecker
* @param status The error code, set if this function encounters a problem.
* @return
* @stable ICU 4.2
*/
U_STABLE USpoofChecker * U_EXPORT2
uspoof_clone(const USpoofChecker *sc, UErrorCode *status);
/**
* Specify the set of checks that will be performed by the check
* functions of this Spoof Checker.
*
* @param sc The USpoofChecker
* @param checks The set of checks that this spoof checker will perform.
* The value is a bit set, obtained by OR-ing together
* values from enum USpoofChecks.
* @param status The error code, set if this function encounters a problem.
* @stable ICU 4.2
*
*/
U_STABLE void U_EXPORT2
uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status);
/**
* Get the set of checks that this Spoof Checker has been configured to perform.
*
* @param sc The USpoofChecker
* @param status The error code, set if this function encounters a problem.
* @return The set of checks that this spoof checker will perform.
* The value is a bit set, obtained by OR-ing together
* values from enum USpoofChecks.
* @stable ICU 4.2
*
*/
U_STABLE int32_t U_EXPORT2
uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status);
/**
* Set the loosest restriction level allowed. The default if this function
* is not called is HIGHLY_RESTRICTIVE.
* Calling this function also enables the RESTRICTION_LEVEL check.
* @param restrictionLevel The loosest restriction level allowed.
* @see URestrictionLevel
* @stable ICU 51
*/
U_STABLE void U_EXPORT2
uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel);
/**
* Get the Restriction Level that will be tested if the checks include RESTRICTION_LEVEL.
*
* @return The restriction level
* @see URestrictionLevel
* @stable ICU 51
*/
U_STABLE URestrictionLevel U_EXPORT2
uspoof_getRestrictionLevel(const USpoofChecker *sc);
/**
* Limit characters that are acceptable in identifiers being checked to those
* normally used with the languages associated with the specified locales.
* Any previously specified list of locales is replaced by the new settings.
*
* A set of languages is determined from the locale(s), and
* from those a set of acceptable Unicode scripts is determined.
* Characters from this set of scripts, along with characters from
* the "common" and "inherited" Unicode Script categories
* will be permitted.
*
* Supplying an empty string removes all restrictions;
* characters from any script will be allowed.
*
* The USPOOF_CHAR_LIMIT test is automatically enabled for this
* USpoofChecker when calling this function with a non-empty list
* of locales.
*
* The Unicode Set of characters that will be allowed is accessible
* via the uspoof_getAllowedChars() function. uspoof_setAllowedLocales()
* will replace any previously applied set of allowed characters.
*
* Adjustments, such as additions or deletions of certain classes of characters,
* can be made to the result of uspoof_setAllowedLocales() by
* fetching the resulting set with uspoof_getAllowedChars(),
* manipulating it with the Unicode Set API, then resetting the
* spoof detectors limits with uspoof_setAllowedChars()
*
* @param sc The USpoofChecker
* @param localesList A list list of locales, from which the language
* and associated script are extracted. The locales
* are comma-separated if there is more than one.
* White space may not appear within an individual locale,
* but is ignored otherwise.
* The locales are syntactically like those from the
* HTTP Accept-Language header.
* If the localesList is empty, no restrictions will be placed on
* the allowed characters.
*
* @param status The error code, set if this function encounters a problem.
* @stable ICU 4.2
*/
U_STABLE void U_EXPORT2
uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status);
/**
* Get a list of locales for the scripts that are acceptable in strings
* to be checked. If no limitations on scripts have been specified,
* an empty string will be returned.
*
* uspoof_setAllowedChars() will reset the list of allowed to be empty.
*
* The format of the returned list is the same as that supplied to
* uspoof_setAllowedLocales(), but returned list may not be identical
* to the originally specified string; the string may be reformatted,
* and information other than languages from
* the originally specified locales may be omitted.
*
* @param sc The USpoofChecker
* @param status The error code, set if this function encounters a problem.
* @return A string containing a list of locales corresponding
* to the acceptable scripts, formatted like an
* HTTP Accept Language value.
*
* @stable ICU 4.2
*/
U_STABLE const char * U_EXPORT2
uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status);
/**
* Limit the acceptable characters to those specified by a Unicode Set.
* Any previously specified character limit is
* is replaced by the new settings. This includes limits on
* characters that were set with the uspoof_setAllowedLocales() function.
*
* The USPOOF_CHAR_LIMIT test is automatically enabled for this
* USpoofChecker by this function.
*
* @param sc The USpoofChecker
* @param chars A Unicode Set containing the list of
* characters that are permitted. Ownership of the set
* remains with the caller. The incoming set is cloned by
* this function, so there are no restrictions on modifying
* or deleting the USet after calling this function.
* @param status The error code, set if this function encounters a problem.
* @stable ICU 4.2
*/
U_STABLE void U_EXPORT2
uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status);
/**
* Get a USet for the characters permitted in an identifier.
* This corresponds to the limits imposed by the Set Allowed Characters
* functions. Limitations imposed by other checks will not be
* reflected in the set returned by this function.
*
* The returned set will be frozen, meaning that it cannot be modified
* by the caller.
*
* Ownership of the returned set remains with the Spoof Detector. The
* returned set will become invalid if the spoof detector is closed,
* or if a new set of allowed characters is specified.
*
*
* @param sc The USpoofChecker
* @param status The error code, set if this function encounters a problem.
* @return A USet containing the characters that are permitted by
* the USPOOF_CHAR_LIMIT test.
* @stable ICU 4.2
*/
U_STABLE const USet * U_EXPORT2
uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status);
/**
* Check the specified string for possible security issues.
* The text to be checked will typically be an identifier of some sort.
* The set of checks to be performed is specified with uspoof_setChecks().
*
* @param sc The USpoofChecker
* @param id The identifier to be checked for possible security issues,
* in UTF-16 format.
* @param length the length of the string to be checked, expressed in
* 16 bit UTF-16 code units, or -1 if the string is
* zero terminated.
* @param position An out parameter.
* Originally, the index of the first string position that failed a check.
* Now, always returns zero.
* This parameter may be null.
* @param status The error code, set if an error occurred while attempting to
* perform the check.
* Spoofing or security issues detected with the input string are
* not reported here, but through the function's return value.
* @return An integer value with bits set for any potential security
* or spoofing issues detected. The bits are defined by
* enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
* will be zero if the input string passes all of the
* enabled checks.
* @stable ICU 4.2
*/
U_STABLE int32_t U_EXPORT2
uspoof_check(const USpoofChecker *sc,
const UChar *id, int32_t length,
int32_t *position,
UErrorCode *status);
/**
* Check the specified string for possible security issues.
* The text to be checked will typically be an identifier of some sort.
* The set of checks to be performed is specified with uspoof_setChecks().
*
* @param sc The USpoofChecker
* @param id A identifier to be checked for possible security issues, in UTF8 format.
* @param length the length of the string to be checked, or -1 if the string is
* zero terminated.
* @param position An out parameter.
* Originally, the index of the first string position that failed a check.
* Now, always returns zero.
* This parameter may be null.
* @deprecated ICU 51
* @param status The error code, set if an error occurred while attempting to
* perform the check.
* Spoofing or security issues detected with the input string are
* not reported here, but through the function's return value.
* If the input contains invalid UTF-8 sequences,
* a status of U_INVALID_CHAR_FOUND will be returned.
* @return An integer value with bits set for any potential security
* or spoofing issues detected. The bits are defined by
* enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
* will be zero if the input string passes all of the
* enabled checks.
* @stable ICU 4.2
*/
U_STABLE int32_t U_EXPORT2
uspoof_checkUTF8(const USpoofChecker *sc,
const char *id, int32_t length,
int32_t *position,
UErrorCode *status);
/**
* Check the whether two specified strings are visually confusable.
* The types of confusability to be tested - single script, mixed script,
* or whole script - are determined by the check options set for the
* USpoofChecker.
*
* The tests to be performed are controlled by the flags
* USPOOF_SINGLE_SCRIPT_CONFUSABLE
* USPOOF_MIXED_SCRIPT_CONFUSABLE
* USPOOF_WHOLE_SCRIPT_CONFUSABLE
* At least one of these tests must be selected.
*
* USPOOF_ANY_CASE is a modifier for the tests. Select it if the identifiers
* may be of mixed case.
* If identifiers are case folded for comparison and
* display to the user, do not select the USPOOF_ANY_CASE option.
*
*
* @param sc The USpoofChecker
* @param id1 The first of the two identifiers to be compared for
* confusability. The strings are in UTF-16 format.
* @param length1 the length of the first identifer, expressed in
* 16 bit UTF-16 code units, or -1 if the string is
* nul terminated.
* @param id2 The second of the two identifiers to be compared for
* confusability. The identifiers are in UTF-16 format.
* @param length2 The length of the second identifiers, expressed in
* 16 bit UTF-16 code units, or -1 if the string is
* nul terminated.
* @param status The error code, set if an error occurred while attempting to
* perform the check.
* Confusability of the identifiers is not reported here,
* but through this function's return value.
* @return An integer value with bit(s) set corresponding to
* the type of confusability found, as defined by
* enum USpoofChecks. Zero is returned if the identifiers
* are not confusable.
* @stable ICU 4.2
*/
U_STABLE int32_t U_EXPORT2
uspoof_areConfusable(const USpoofChecker *sc,
const UChar *id1, int32_t length1,
const UChar *id2, int32_t length2,
UErrorCode *status);
/**
* Check the whether two specified strings are visually confusable.
* The types of confusability to be tested - single script, mixed script,
* or whole script - are determined by the check options set for the
* USpoofChecker.
*
* @param sc The USpoofChecker
* @param id1 The first of the two identifiers to be compared for
* confusability. The strings are in UTF-8 format.
* @param length1 the length of the first identifiers, in bytes, or -1
* if the string is nul terminated.
* @param id2 The second of the two identifiers to be compared for
* confusability. The strings are in UTF-8 format.
* @param length2 The length of the second string in bytes, or -1
* if the string is nul terminated.
* @param status The error code, set if an error occurred while attempting to
* perform the check.
* Confusability of the strings is not reported here,
* but through this function's return value.
* @return An integer value with bit(s) set corresponding to
* the type of confusability found, as defined by
* enum USpoofChecks. Zero is returned if the strings
* are not confusable.
* @stable ICU 4.2
*/
U_STABLE int32_t U_EXPORT2
uspoof_areConfusableUTF8(const USpoofChecker *sc,
const char *id1, int32_t length1,
const char *id2, int32_t length2,
UErrorCode *status);
/**
* Get the "skeleton" for an identifier.
* Skeletons are a transformation of the input identifier;
* Two identifiers are confusable if their skeletons are identical.
* See Unicode UAX #39 for additional information.
*
* Using skeletons directly makes it possible to quickly check
* whether an identifier is confusable with any of some large
* set of existing identifiers, by creating an efficiently
* searchable collection of the skeletons.
*
* @param sc The USpoofChecker
* @param type The type of skeleton, corresponding to which
* of the Unicode confusable data tables to use.
* The default is Mixed-Script, Lowercase.
* Allowed options are USPOOF_SINGLE_SCRIPT_CONFUSABLE and
* USPOOF_ANY_CASE. The two flags may be ORed.
* @param id The input identifier whose skeleton will be computed.
* @param length The length of the input identifier, expressed in 16 bit
* UTF-16 code units, or -1 if the string is zero terminated.
* @param dest The output buffer, to receive the skeleton string.
* @param destCapacity The length of the output buffer, in 16 bit units.
* The destCapacity may be zero, in which case the function will
* return the actual length of the skeleton.
* @param status The error code, set if an error occurred while attempting to
* perform the check.
* @return The length of the skeleton string. The returned length
* is always that of the complete skeleton, even when the
* supplied buffer is too small (or of zero length)
*
* @stable ICU 4.2
*/
U_STABLE int32_t U_EXPORT2
uspoof_getSkeleton(const USpoofChecker *sc,
uint32_t type,
const UChar *id, int32_t length,
UChar *dest, int32_t destCapacity,
UErrorCode *status);
/**
* Get the "skeleton" for an identifier.
* Skeletons are a transformation of the input identifier;
* Two identifiers are confusable if their skeletons are identical.
* See Unicode UAX #39 for additional information.
*
* Using skeletons directly makes it possible to quickly check
* whether an identifier is confusable with any of some large
* set of existing identifiers, by creating an efficiently
* searchable collection of the skeletons.
*
* @param sc The USpoofChecker
* @param type The type of skeleton, corresponding to which
* of the Unicode confusable data tables to use.
* The default is Mixed-Script, Lowercase.
* Allowed options are USPOOF_SINGLE_SCRIPT_CONFUSABLE and
* USPOOF_ANY_CASE. The two flags may be ORed.
* @param id The UTF-8 format identifier whose skeleton will be computed.
* @param length The length of the input string, in bytes,
* or -1 if the string is zero terminated.
* @param dest The output buffer, to receive the skeleton string.
* @param destCapacity The length of the output buffer, in bytes.
* The destCapacity may be zero, in which case the function will
* return the actual length of the skeleton.
* @param status The error code, set if an error occurred while attempting to
* perform the check. Possible Errors include U_INVALID_CHAR_FOUND
* for invalid UTF-8 sequences, and
* U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small
* to hold the complete skeleton.
* @return The length of the skeleton string, in bytes. The returned length
* is always that of the complete skeleton, even when the
* supplied buffer is too small (or of zero length)
*
* @stable ICU 4.2
*/
U_STABLE int32_t U_EXPORT2
uspoof_getSkeletonUTF8(const USpoofChecker *sc,
uint32_t type,
const char *id, int32_t length,
char *dest, int32_t destCapacity,
UErrorCode *status);
/**
* Get the set of Candidate Characters for Inclusion in Identifiers, as defined
* in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Inclusion_in_Identifiers
*
* The returned set is frozen. Ownership of the set remains with the ICU library; it must not
* be deleted by the caller.
*
* @param status The error code, set if a problem occurs while creating the set.
*
* @stable ICU 51
*/
U_STABLE const USet * U_EXPORT2
uspoof_getInclusionSet(UErrorCode *status);
/**
* Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
* in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Recommended_Scripts
*
* The returned set is frozen. Ownership of the set remains with the ICU library; it must not
* be deleted by the caller.
*
* @param status The error code, set if a problem occurs while creating the set.
*
* @stable ICU 51
*/
U_STABLE const USet * U_EXPORT2
uspoof_getRecommendedSet(UErrorCode *status);
/**
* Serialize the data for a spoof detector into a chunk of memory.
* The flattened spoof detection tables can later be used to efficiently
* instantiate a new Spoof Detector.
*
* The serialized spoof checker includes only the data compiled from the
* Unicode data tables by uspoof_openFromSource(); it does not include
* include any other state or configuration that may have been set.
*
* @param sc the Spoof Detector whose data is to be serialized.
* @param data a pointer to 32-bit-aligned memory to be filled with the data,
* can be NULL if capacity==0
* @param capacity the number of bytes available at data,
* or 0 for preflighting
* @param status an in/out ICU UErrorCode; possible errors include:
* - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization
* - U_ILLEGAL_ARGUMENT_ERROR the data or capacity parameters are bad
* @return the number of bytes written or needed for the spoof data
*
* @see utrie2_openFromSerialized()
* @stable ICU 4.2
*/
U_STABLE int32_t U_EXPORT2
uspoof_serialize(USpoofChecker *sc,
void *data, int32_t capacity,
UErrorCode *status);
#endif
#endif /* USPOOF_H */
// utmscale.h
/*
*******************************************************************************
* Copyright (C) 2004 - 2008, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
#ifndef UTMSCALE_H
#define UTMSCALE_H
#if !UCONFIG_NO_FORMATTING
/**
* \file
* \brief C API: Universal Time Scale
*
* There are quite a few different conventions for binary datetime, depending on different
* platforms and protocols. Some of these have severe drawbacks. For example, people using
* Unix time (seconds since Jan 1, 1970) think that they are safe until near the year 2038.
* But cases can and do arise where arithmetic manipulations causes serious problems. Consider
* the computation of the average of two datetimes, for example: if one calculates them with
*
* Binary datetimes differ in a number of ways: the datatype, the unit,
* and the epoch (origin). We'll refer to these as time scales. For example:
*
*
* All of the epochs start at 00:00 am (the earliest possible time on the day in question),
* and are assumed to be UTC.
*
*
* The ranges for different datatypes are given in the following table (all values in years).
* The range of years includes the entire range expressible with positive and negative
* values of the datatype. The range of years for double is the range that would be allowed
* without losing precision to the corresponding unit.
*
*
* These functions implement a universal time scale which can be used as a 'pivot',
* and provide conversion functions to and from all other major time scales.
* This datetimes to be converted to the pivot time, safely manipulated,
* and converted back to any other datetime time scale.
*
*
* So what to use for this pivot? Java time has plenty of range, but cannot represent
* .NET
* The Unix extended time uses a structure with two components: time in seconds and a
* fractional field (microseconds). However, this is clumsy, slow, and
* prone to error (you always have to keep track of overflow and underflow in the
* fractional field).
* Because of these issues, we ended up concluding that the .NET framework's
* After a transliteration operation, some of the indices in this
* structure will be modified. See the field descriptions for
* details.
*
* contextStart <= start <= limit <= contextLimit
*
* Note: All index values in this structure must be at code point
* boundaries. That is, none of them may occur between two code units
* of a surrogate pair. If any index does split a surrogate pair,
* results are unspecified.
*
* @stable ICU 2.0
*/
typedef struct UTransPosition {
/**
* Beginning index, inclusive, of the context to be considered for
* a transliteration operation. The transliterator will ignore
* anything before this index. INPUT/OUTPUT parameter: This parameter
* is updated by a transliteration operation to reflect the maximum
* amount of antecontext needed by a transliterator.
* @stable ICU 2.4
*/
int32_t contextStart;
/**
* Ending index, exclusive, of the context to be considered for a
* transliteration operation. The transliterator will ignore
* anything at or after this index. INPUT/OUTPUT parameter: This
* parameter is updated to reflect changes in the length of the
* text, but points to the same logical position in the text.
* @stable ICU 2.4
*/
int32_t contextLimit;
/**
* Beginning index, inclusive, of the text to be transliteratd.
* INPUT/OUTPUT parameter: This parameter is advanced past
* characters that have already been transliterated by a
* transliteration operation.
* @stable ICU 2.4
*/
int32_t start;
/**
* Ending index, exclusive, of the text to be transliteratd.
* INPUT/OUTPUT parameter: This parameter is updated to reflect
* changes in the length of the text, but points to the same
* logical position in the text.
* @stable ICU 2.4
*/
int32_t limit;
} UTransPosition;
/********************************************************************
* General API
********************************************************************/
/**
* Open a custom transliterator, given a custom rules string
* OR
* a system transliterator, given its ID.
* Any non-NULL result from this function should later be closed with
* utrans_close().
*
* @param id a valid transliterator ID
* @param idLength the length of the ID string, or -1 if NUL-terminated
* @param dir the desired direction
* @param rules the transliterator rules. See the C++ header rbt.h for
* rules syntax. If NULL then a system transliterator matching
* the ID is returned.
* @param rulesLength the length of the rules, or -1 if the rules
* are NUL-terminated.
* @param parseError a pointer to a UParseError struct to receive the details
* of any parsing errors. This parameter may be NULL if no
* parsing error details are desired.
* @param pErrorCode a pointer to the UErrorCode
* @return a transliterator pointer that may be passed to other
* utrans_xxx() functions, or NULL if the open call fails.
* @stable ICU 2.8
*/
U_STABLE UTransliterator* U_EXPORT2
utrans_openU(const UChar *id,
int32_t idLength,
UTransDirection dir,
const UChar *rules,
int32_t rulesLength,
UParseError *parseError,
UErrorCode *pErrorCode);
/**
* Open an inverse of an existing transliterator. For this to work,
* the inverse must be registered with the system. For example, if
* the Transliterator "A-B" is opened, and then its inverse is opened,
* the result is the Transliterator "B-A", if such a transliterator is
* registered with the system. Otherwise the result is NULL and a
* failing UErrorCode is set. Any non-NULL result from this function
* should later be closed with utrans_close().
*
* @param trans the transliterator to open the inverse of.
* @param status a pointer to the UErrorCode
* @return a pointer to a newly-opened transliterator that is the
* inverse of trans, or NULL if the open call fails.
* @stable ICU 2.0
*/
U_STABLE UTransliterator* U_EXPORT2
utrans_openInverse(const UTransliterator* trans,
UErrorCode* status);
/**
* Create a copy of a transliterator. Any non-NULL result from this
* function should later be closed with utrans_close().
*
* @param trans the transliterator to be copied.
* @param status a pointer to the UErrorCode
* @return a transliterator pointer that may be passed to other
* utrans_xxx() functions, or NULL if the clone call fails.
* @stable ICU 2.0
*/
U_STABLE UTransliterator* U_EXPORT2
utrans_clone(const UTransliterator* trans,
UErrorCode* status);
/**
* Close a transliterator. Any non-NULL pointer returned by
* utrans_openXxx() or utrans_clone() should eventually be closed.
* @param trans the transliterator to be closed.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
utrans_close(UTransliterator* trans);
/**
* Return the programmatic identifier for this transliterator.
* If this identifier is passed to utrans_openU(), it will open
* a transliterator equivalent to this one, if the ID has been
* registered.
*
* @param trans the transliterator to return the ID of.
* @param resultLength pointer to an output variable receiving the length
* of the ID string; can be NULL
* @return the NUL-terminated ID string. This pointer remains
* valid until utrans_close() is called on this transliterator.
*
* @stable ICU 2.8
*/
U_STABLE const UChar * U_EXPORT2
utrans_getUnicodeID(const UTransliterator *trans,
int32_t *resultLength);
/**
* Register an open transliterator with the system. When
* utrans_open() is called with an ID string that is equal to that
* returned by utrans_getID(adoptedTrans,...), then
* utrans_clone(adoptedTrans,...) is returned.
*
* NOTE: After this call the system owns the adoptedTrans and will
* close it. The user must not call utrans_close() on adoptedTrans.
*
* @param adoptedTrans a transliterator, typically the result of
* utrans_openRules(), to be registered with the system.
* @param status a pointer to the UErrorCode
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
utrans_register(UTransliterator* adoptedTrans,
UErrorCode* status);
/**
* Unregister a transliterator from the system. After this call the
* system will no longer recognize the given ID when passed to
* utrans_open(). If the ID is invalid then nothing is done.
*
* @param id an ID to unregister
* @param idLength the length of id, or -1 if id is zero-terminated
* @stable ICU 2.8
*/
U_STABLE void U_EXPORT2
utrans_unregisterID(const UChar* id, int32_t idLength);
/**
* Set the filter used by a transliterator. A filter can be used to
* make the transliterator pass certain characters through untouched.
* The filter is expressed using a UnicodeSet pattern. If the
* filterPattern is NULL or the empty string, then the transliterator
* will be reset to use no filter.
*
* @param trans the transliterator
* @param filterPattern a pattern string, in the form accepted by
* UnicodeSet, specifying which characters to apply the
* transliteration to. May be NULL or the empty string to indicate no
* filter.
* @param filterPatternLen the length of filterPattern, or -1 if
* filterPattern is zero-terminated
* @param status a pointer to the UErrorCode
* @see UnicodeSet
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
utrans_setFilter(UTransliterator* trans,
const UChar* filterPattern,
int32_t filterPatternLen,
UErrorCode* status);
/**
* Return the number of system transliterators.
* It is recommended to use utrans_openIDs() instead.
*
* @return the number of system transliterators.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
utrans_countAvailableIDs(void);
/**
* Return a UEnumeration for the available transliterators.
*
* @param pErrorCode Pointer to the UErrorCode in/out parameter.
* @return UEnumeration for the available transliterators.
* Close with uenum_close().
*
* @stable ICU 2.8
*/
U_STABLE UEnumeration * U_EXPORT2
utrans_openIDs(UErrorCode *pErrorCode);
/********************************************************************
* Transliteration API
********************************************************************/
/**
* Transliterate a segment of a UReplaceable string. The string is
* passed in as a UReplaceable pointer rep and a UReplaceableCallbacks
* function pointer struct repFunc. Functions in the repFunc struct
* will be called in order to modify the rep string.
*
* @param trans the transliterator
* @param rep a pointer to the string. This will be passed to the
* repFunc functions.
* @param repFunc a set of function pointers that will be used to
* modify the string pointed to by rep.
* @param start the beginning index, inclusive; Upon return, values in Typical usage of this method begins with an initial call
* with This method assumes that future calls may be made that will
* insert new text into the buffer. As a result, it only performs
* unambiguous transliterations. After the last call to this method,
* there may be untransliterated text that is waiting for more input
* to resolve an ambiguity. In order to perform these pending
* transliterations, clients should call utrans_trans() with a start
* of index.start and a limit of index.end after the last call to this
* method has been made.
*
* @param trans the transliterator
* @param rep a pointer to the string. This will be passed to the
* repFunc functions.
* @param repFunc a set of function pointers that will be used to
* modify the string pointed to by rep.
* @param pos a struct containing the start and limit indices of the
* text to be read and the text to be transliterated
* @param status a pointer to the UErrorCode
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
utrans_transIncremental(const UTransliterator* trans,
UReplaceable* rep,
UReplaceableCallbacks* repFunc,
UTransPosition* pos,
UErrorCode* status);
/**
* Transliterate a segment of a UChar* string. The string is passed
* in in a UChar* buffer. The string is modified in place. If the
* result is longer than textCapacity, it is truncated. The actual
* length of the result is returned in *textLength, if textLength is
* non-NULL. *textLength may be greater than textCapacity, but only
* textCapacity UChars will be written to *text, including the zero
* terminator.
*
* @param trans the transliterator
* @param text a pointer to a buffer containing the text to be
* transliterated on input and the result text on output.
* @param textLength a pointer to the length of the string in text.
* If the length is -1 then the string is assumed to be
* zero-terminated. Upon return, the new length is stored in
* *textLength. If textLength is NULL then the string is assumed to
* be zero-terminated.
* @param textCapacity a pointer to the length of the text buffer.
* Upon return,
* @param start the beginning index, inclusive; MessageFormat C API
*
*
* \code
* UChar *result, *tzID, *str;
* UChar pattern[100];
* int32_t resultLengthOut, resultlength;
* UCalendar *cal;
* UDate d1;
* UDateFormat *def1;
* UErrorCode status = U_ZERO_ERROR;
*
* str=(UChar*)malloc(sizeof(UChar) * (strlen("disturbance in force") +1));
* u_uastrcpy(str, "disturbance in force");
* tzID=(UChar*)malloc(sizeof(UChar) * 4);
* u_uastrcpy(tzID, "PST");
* cal=ucal_open(tzID, u_strlen(tzID), "en_US", UCAL_TRADITIONAL, &status);
* ucal_setDateTime(cal, 1999, UCAL_MARCH, 18, 0, 0, 0, &status);
* d1=ucal_getMillis(cal, &status);
* u_uastrcpy(pattern, "On {0, date, long}, there was a {1} on planet {2,number,integer}");
* resultlength=0;
* resultLengthOut=u_formatMessage( "en_US", pattern, u_strlen(pattern), NULL, resultlength, &status, d1, str, 7);
* if(status==U_BUFFER_OVERFLOW_ERROR){
* status=U_ZERO_ERROR;
* resultlength=resultLengthOut+1;
* result=(UChar*)realloc(result, sizeof(UChar) * resultlength);
* u_formatMessage( "en_US", pattern, u_strlen(pattern), result, resultlength, &status, d1, str, 7);
* }
* printf("%s\n", austrdup(result) );//austrdup( a function used to convert UChar* to char*)
* //output>: "On March 18, 1999, there was a disturbance in force on planet 7
* \endcode
*
* Typically, the message format will come from resources, and the
* arguments will be dynamically set at runtime.
*
* \code
* UChar* str;
* UErrorCode status = U_ZERO_ERROR;
* UChar *result;
* UChar pattern[100];
* int32_t resultlength, resultLengthOut, i;
* double testArgs= { 100.0, 1.0, 0.0};
*
* str=(UChar*)malloc(sizeof(UChar) * 10);
* u_uastrcpy(str, "MyDisk");
* u_uastrcpy(pattern, "The disk {1} contains {0,choice,0#no files|1#one file|1<{0,number,integer} files}");
* for(i=0; i<3; i++){
* resultlength=0;
* resultLengthOut=u_formatMessage( "en_US", pattern, u_strlen(pattern), NULL, resultlength, &status, testArgs[i], str);
* if(status==U_BUFFER_OVERFLOW_ERROR){
* status=U_ZERO_ERROR;
* resultlength=resultLengthOut+1;
* result=(UChar*)malloc(sizeof(UChar) * resultlength);
* u_formatMessage( "en_US", pattern, u_strlen(pattern), result, resultlength, &status, testArgs[i], str);
* }
* printf("%s\n", austrdup(result) ); //austrdup( a function used to convert UChar* to char*)
* free(result);
* }
* // output, with different testArgs:
* // output: The disk "MyDisk" contains 100 files.
* // output: The disk "MyDisk" contains one file.
* // output: The disk "MyDisk" contains no files.
* \endcode
*
*
*
* Example 3:
*
* \code
* UChar* str;
* UChar* str1;
* UErrorCode status = U_ZERO_ERROR;
* UChar *result;
* UChar pattern[100];
* UChar expected[100];
* int32_t resultlength,resultLengthOut;
* str=(UChar*)malloc(sizeof(UChar) * 25);
* u_uastrcpy(str, "Kirti");
* str1=(UChar*)malloc(sizeof(UChar) * 25);
* u_uastrcpy(str1, "female");
* log_verbose("Testing message format with Select test #1\n:");
* u_uastrcpy(pattern, "{0} est {1, select, female {all\\u00E9e} other {all\\u00E9}} \\u00E0 Paris.");
* u_uastrcpy(expected, "Kirti est all\\u00E9e \\u00E0 Paris.");
* resultlength=0;
* resultLengthOut=u_formatMessage( "fr", pattern, u_strlen(pattern), NULL, resultlength, &status, str , str1);
* if(status==U_BUFFER_OVERFLOW_ERROR)
* {
* status=U_ZERO_ERROR;
* resultlength=resultLengthOut+1;
* result=(UChar*)malloc(sizeof(UChar) * resultlength);
* u_formatMessage( "fr", pattern, u_strlen(pattern), result, resultlength, &status, str , str1);
* if(u_strcmp(result, expected)==0)
* log_verbose("PASS: MessagFormat successful on Select test#1\n");
* else{
* log_err("FAIL: Error in MessageFormat on Select test#1\n GOT %s EXPECTED %s\n", austrdup(result),
* austrdup(expected) );
* }
* free(result);
* }
* \endcode
*
*/
/**
* Format a message for a locale.
* This function may perform re-ordering of the arguments depending on the
* locale. For all numeric arguments, double is assumed unless the type is
* explicitly integer. All choice format arguments must be of type double.
* @param locale The locale for which the message will be formatted
* @param pattern The pattern specifying the message's format
* @param patternLength The length of pattern
* @param result A pointer to a buffer to receive the formatted message.
* @param resultLength The maximum size of result.
* @param status A pointer to an UErrorCode to receive any errors
* @param ... A variable-length argument list containing the arguments specified
* in pattern.
* @return The total buffer size needed; if greater than resultLength, the
* output was truncated.
* @see u_parseMessage
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_formatMessage(const char *locale,
const UChar *pattern,
int32_t patternLength,
UChar *result,
int32_t resultLength,
UErrorCode *status,
...);
/**
* Format a message for a locale.
* This function may perform re-ordering of the arguments depending on the
* locale. For all numeric arguments, double is assumed unless the type is
* explicitly integer. All choice format arguments must be of type double.
* @param locale The locale for which the message will be formatted
* @param pattern The pattern specifying the message's format
* @param patternLength The length of pattern
* @param result A pointer to a buffer to receive the formatted message.
* @param resultLength The maximum size of result.
* @param ap A variable-length argument list containing the arguments specified
* @param status A pointer to an UErrorCode to receive any errors
* in pattern.
* @return The total buffer size needed; if greater than resultLength, the
* output was truncated.
* @see u_parseMessage
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_vformatMessage( const char *locale,
const UChar *pattern,
int32_t patternLength,
UChar *result,
int32_t resultLength,
va_list ap,
UErrorCode *status);
/**
* Parse a message.
* For numeric arguments, this function will always use doubles. Integer types
* should not be passed.
* This function is not able to parse all output from {@link #u_formatMessage }.
* @param locale The locale for which the message is formatted
* @param pattern The pattern specifying the message's format
* @param patternLength The length of pattern
* @param source The text to parse.
* @param sourceLength The length of source, or -1 if null-terminated.
* @param status A pointer to an UErrorCode to receive any errors
* @param ... A variable-length argument list containing the arguments
* specified in pattern.
* @see u_formatMessage
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
u_parseMessage( const char *locale,
const UChar *pattern,
int32_t patternLength,
const UChar *source,
int32_t sourceLength,
UErrorCode *status,
...);
/**
* Parse a message.
* For numeric arguments, this function will always use doubles. Integer types
* should not be passed.
* This function is not able to parse all output from {@link #u_formatMessage }.
* @param locale The locale for which the message is formatted
* @param pattern The pattern specifying the message's format
* @param patternLength The length of pattern
* @param source The text to parse.
* @param sourceLength The length of source, or -1 if null-terminated.
* @param ap A variable-length argument list containing the arguments
* @param status A pointer to an UErrorCode to receive any errors
* specified in pattern.
* @see u_formatMessage
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
u_vparseMessage(const char *locale,
const UChar *pattern,
int32_t patternLength,
const UChar *source,
int32_t sourceLength,
va_list ap,
UErrorCode *status);
/**
* Format a message for a locale.
* This function may perform re-ordering of the arguments depending on the
* locale. For all numeric arguments, double is assumed unless the type is
* explicitly integer. All choice format arguments must be of type double.
* @param locale The locale for which the message will be formatted
* @param pattern The pattern specifying the message's format
* @param patternLength The length of pattern
* @param result A pointer to a buffer to receive the formatted message.
* @param resultLength The maximum size of result.
* @param status A pointer to an UErrorCode to receive any errors
* @param ... A variable-length argument list containing the arguments specified
* in pattern.
* @param parseError A pointer to UParseError to receive information about errors
* occurred during parsing.
* @return The total buffer size needed; if greater than resultLength, the
* output was truncated.
* @see u_parseMessage
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_formatMessageWithError( const char *locale,
const UChar *pattern,
int32_t patternLength,
UChar *result,
int32_t resultLength,
UParseError *parseError,
UErrorCode *status,
...);
/**
* Format a message for a locale.
* This function may perform re-ordering of the arguments depending on the
* locale. For all numeric arguments, double is assumed unless the type is
* explicitly integer. All choice format arguments must be of type double.
* @param locale The locale for which the message will be formatted
* @param pattern The pattern specifying the message's format
* @param patternLength The length of pattern
* @param result A pointer to a buffer to receive the formatted message.
* @param resultLength The maximum size of result.
* @param parseError A pointer to UParseError to receive information about errors
* occurred during parsing.
* @param ap A variable-length argument list containing the arguments specified
* @param status A pointer to an UErrorCode to receive any errors
* in pattern.
* @return The total buffer size needed; if greater than resultLength, the
* output was truncated.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
u_vformatMessageWithError( const char *locale,
const UChar *pattern,
int32_t patternLength,
UChar *result,
int32_t resultLength,
UParseError* parseError,
va_list ap,
UErrorCode *status);
/**
* Parse a message.
* For numeric arguments, this function will always use doubles. Integer types
* should not be passed.
* This function is not able to parse all output from {@link #u_formatMessage }.
* @param locale The locale for which the message is formatted
* @param pattern The pattern specifying the message's format
* @param patternLength The length of pattern
* @param source The text to parse.
* @param sourceLength The length of source, or -1 if null-terminated.
* @param parseError A pointer to UParseError to receive information about errors
* occurred during parsing.
* @param status A pointer to an UErrorCode to receive any errors
* @param ... A variable-length argument list containing the arguments
* specified in pattern.
* @see u_formatMessage
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
u_parseMessageWithError(const char *locale,
const UChar *pattern,
int32_t patternLength,
const UChar *source,
int32_t sourceLength,
UParseError *parseError,
UErrorCode *status,
...);
/**
* Parse a message.
* For numeric arguments, this function will always use doubles. Integer types
* should not be passed.
* This function is not able to parse all output from {@link #u_formatMessage }.
* @param locale The locale for which the message is formatted
* @param pattern The pattern specifying the message's format
* @param patternLength The length of pattern
* @param source The text to parse.
* @param sourceLength The length of source, or -1 if null-terminated.
* @param ap A variable-length argument list containing the arguments
* @param parseError A pointer to UParseError to receive information about errors
* occurred during parsing.
* @param status A pointer to an UErrorCode to receive any errors
* specified in pattern.
* @see u_formatMessage
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
u_vparseMessageWithError(const char *locale,
const UChar *pattern,
int32_t patternLength,
const UChar *source,
int32_t sourceLength,
va_list ap,
UParseError *parseError,
UErrorCode* status);
/*----------------------- New experimental API --------------------------- */
/**
* The message format object
* @stable ICU 2.0
*/
typedef void* UMessageFormat;
/**
* Open a message formatter with given pattern and for the given locale.
* @param pattern A pattern specifying the format to use.
* @param patternLength Length of the pattern to use
* @param locale The locale for which the messages are formatted.
* @param parseError A pointer to UParseError struct to receive any errors
* occured during parsing. Can be NULL.
* @param status A pointer to an UErrorCode to receive any errors.
* @return A pointer to a UMessageFormat to use for formatting
* messages, or 0 if an error occurred.
* @stable ICU 2.0
*/
U_STABLE UMessageFormat* U_EXPORT2
umsg_open( const UChar *pattern,
int32_t patternLength,
const char *locale,
UParseError *parseError,
UErrorCode *status);
/**
* Close a UMessageFormat.
* Once closed, a UMessageFormat may no longer be used.
* @param format The formatter to close.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
umsg_close(UMessageFormat* format);
/**
* Open a copy of a UMessageFormat.
* This function performs a deep copy.
* @param fmt The formatter to copy
* @param status A pointer to an UErrorCode to receive any errors.
* @return A pointer to a UDateFormat identical to fmt.
* @stable ICU 2.0
*/
U_STABLE UMessageFormat U_EXPORT2
umsg_clone(const UMessageFormat *fmt,
UErrorCode *status);
/**
* Sets the locale. This locale is used for fetching default number or date
* format information.
* @param fmt The formatter to set
* @param locale The locale the formatter should use.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
umsg_setLocale(UMessageFormat *fmt,
const char* locale);
/**
* Gets the locale. This locale is used for fetching default number or date
* format information.
* @param fmt The formatter to querry
* @return the locale.
* @stable ICU 2.0
*/
U_STABLE const char* U_EXPORT2
umsg_getLocale(const UMessageFormat *fmt);
/**
* Sets the pattern.
* @param fmt The formatter to use
* @param pattern The pattern to be applied.
* @param patternLength Length of the pattern to use
* @param parseError Struct to receive information on position
* of error if an error is encountered.Can be NULL.
* @param status Output param set to success/failure code on
* exit. If the pattern is invalid, this will be
* set to a failure result.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
umsg_applyPattern( UMessageFormat *fmt,
const UChar* pattern,
int32_t patternLength,
UParseError* parseError,
UErrorCode* status);
/**
* Gets the pattern.
* @param fmt The formatter to use
* @param result A pointer to a buffer to receive the pattern.
* @param resultLength The maximum size of result.
* @param status Output param set to success/failure code on
* exit. If the pattern is invalid, this will be
* set to a failure result.
* @return the pattern of the format
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
umsg_toPattern(const UMessageFormat *fmt,
UChar* result,
int32_t resultLength,
UErrorCode* status);
/**
* Format a message for a locale.
* This function may perform re-ordering of the arguments depending on the
* locale. For all numeric arguments, double is assumed unless the type is
* explicitly integer. All choice format arguments must be of type double.
* @param fmt The formatter to use
* @param result A pointer to a buffer to receive the formatted message.
* @param resultLength The maximum size of result.
* @param status A pointer to an UErrorCode to receive any errors
* @param ... A variable-length argument list containing the arguments
* specified in pattern.
* @return The total buffer size needed; if greater than resultLength,
* the output was truncated.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
umsg_format( const UMessageFormat *fmt,
UChar *result,
int32_t resultLength,
UErrorCode *status,
...);
/**
* Format a message for a locale.
* This function may perform re-ordering of the arguments depending on the
* locale. For all numeric arguments, double is assumed unless the type is
* explicitly integer. All choice format arguments must be of type double.
* @param fmt The formatter to use
* @param result A pointer to a buffer to receive the formatted message.
* @param resultLength The maximum size of result.
* @param ap A variable-length argument list containing the arguments
* @param status A pointer to an UErrorCode to receive any errors
* specified in pattern.
* @return The total buffer size needed; if greater than resultLength,
* the output was truncated.
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
umsg_vformat( const UMessageFormat *fmt,
UChar *result,
int32_t resultLength,
va_list ap,
UErrorCode *status);
/**
* Parse a message.
* For numeric arguments, this function will always use doubles. Integer types
* should not be passed.
* This function is not able to parse all output from {@link #umsg_format }.
* @param fmt The formatter to use
* @param source The text to parse.
* @param sourceLength The length of source, or -1 if null-terminated.
* @param count Output param to receive number of elements returned.
* @param status A pointer to an UErrorCode to receive any errors
* @param ... A variable-length argument list containing the arguments
* specified in pattern.
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
umsg_parse( const UMessageFormat *fmt,
const UChar *source,
int32_t sourceLength,
int32_t *count,
UErrorCode *status,
...);
/**
* Parse a message.
* For numeric arguments, this function will always use doubles. Integer types
* should not be passed.
* This function is not able to parse all output from {@link #umsg_format }.
* @param fmt The formatter to use
* @param source The text to parse.
* @param sourceLength The length of source, or -1 if null-terminated.
* @param count Output param to receive number of elements returned.
* @param ap A variable-length argument list containing the arguments
* @param status A pointer to an UErrorCode to receive any errors
* specified in pattern.
* @see u_formatMessage
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
umsg_vparse(const UMessageFormat *fmt,
const UChar *source,
int32_t sourceLength,
int32_t *count,
va_list ap,
UErrorCode *status);
/**
* Convert an 'apostrophe-friendly' pattern into a standard
* pattern. Standard patterns treat all apostrophes as
* quotes, which is problematic in some languages, e.g.
* French, where apostrophe is commonly used. This utility
* assumes that only an unpaired apostrophe immediately before
* a brace is a true quote. Other unpaired apostrophes are paired,
* and the resulting standard pattern string is returned.
*
* Number Format C API
*
* Number Format C API Provides functions for
* formatting and parsing a number. Also provides methods for
* determining which locales have number formats, and what their names
* are.
*
* \code
* UChar myString[20];
* double myNumber = 7.0;
* UErrorCode status = U_ZERO_ERROR;
* UNumberFormat* nf = unum_open(UNUM_DEFAULT, NULL, -1, NULL, NULL, &status);
* unum_formatDouble(nf, myNumber, myString, 20, NULL, &status);
* printf(" Example 1: %s\n", austrdup(myString) ); //austrdup( a function used to convert UChar* to char*)
* \endcode
*
* If you are formatting multiple numbers, it is more efficient to get
* the format and use it multiple times so that the system doesn't
* have to fetch the information about the local language and country
* conventions multiple times.
*
* \code
* uint32_t i, resultlength, reslenneeded;
* UErrorCode status = U_ZERO_ERROR;
* UFieldPosition pos;
* uint32_t a[] = { 123, 3333, -1234567 };
* const uint32_t a_len = sizeof(a) / sizeof(a[0]);
* UNumberFormat* nf;
* UChar* result = NULL;
*
* nf = unum_open(UNUM_DEFAULT, NULL, -1, NULL, NULL, &status);
* for (i = 0; i < a_len; i++) {
* resultlength=0;
* reslenneeded=unum_format(nf, a[i], NULL, resultlength, &pos, &status);
* result = NULL;
* if(status==U_BUFFER_OVERFLOW_ERROR){
* status=U_ZERO_ERROR;
* resultlength=reslenneeded+1;
* result=(UChar*)malloc(sizeof(UChar) * resultlength);
* unum_format(nf, a[i], result, resultlength, &pos, &status);
* }
* printf( " Example 2: %s\n", austrdup(result));
* free(result);
* }
* \endcode
*
* To format a number for a different Locale, specify it in the
* call to unum_open().
*
* \code
* UNumberFormat* nf = unum_open(UNUM_DEFAULT, NULL, -1, "fr_FR", NULL, &success)
* \endcode
*
* You can use a NumberFormat API unum_parse() to parse.
*
* \code
* UErrorCode status = U_ZERO_ERROR;
* int32_t pos=0;
* int32_t num;
* num = unum_parse(nf, str, u_strlen(str), &pos, &status);
* \endcode
*
* Use UNUM_DECIMAL to get the normal number format for that country.
* There are other static options available. Use UNUM_CURRENCY
* to get the currency number format for that country. Use UNUM_PERCENT
* to get a format for displaying percentages. With this format, a
* fraction from 0.53 is displayed as 53%.
*
*
* format() is padded. */
UNUM_FORMAT_WIDTH,
/** The position at which padding will take place. */
UNUM_PADDING_POSITION,
/** Secondary grouping size */
UNUM_SECONDARY_GROUPING_SIZE,
/** Use significant digits
* @stable ICU 3.0 */
UNUM_SIGNIFICANT_DIGITS_USED,
/** Minimum significant digits
* @stable ICU 3.0 */
UNUM_MIN_SIGNIFICANT_DIGITS,
/** Maximum significant digits
* @stable ICU 3.0 */
UNUM_MAX_SIGNIFICANT_DIGITS,
/** Lenient parse mode used by rule-based formats.
* @stable ICU 3.0
*/
UNUM_LENIENT_PARSE,
#if UCONFIG_HAVE_PARSEALLINPUT
/** Consume all input. (may use fastpath). Set to UNUM_YES (require fastpath), UNUM_NO (skip fastpath), or UNUM_MAYBE (heuristic).
* This is an internal ICU API. Do not use.
* @internal
*/
UNUM_PARSE_ALL_INPUT = 20,
#endif
/**
* Scale, which adjusts the position of the
* decimal point when formatting. Amounts will be multiplied by 10 ^ (scale)
* before they are formatted. The default value for the scale is 0 ( no adjustment ).
*
* length>=size
* @see unum_setSymbol
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
unum_getSymbol(const UNumberFormat *fmt,
UNumberFormatSymbol symbol,
UChar *buffer,
int32_t size,
UErrorCode *status);
/**
* Set a symbol associated with a UNumberFormat.
* A UNumberFormat uses symbols to represent the special locale-dependent
* characters in a number, for example the percent sign. This API is not
* supported for rule-based formatters.
* @param fmt The formatter to set.
* @param symbol The UNumberFormatSymbol constant for the symbol to set
* @param value The string to set the symbol to
* @param length The length of the string, or -1 for a zero-terminated string
* @param status A pointer to an UErrorCode to receive any errors.
* @see unum_getSymbol
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
unum_setSymbol(UNumberFormat *fmt,
UNumberFormatSymbol symbol,
const UChar *value,
int32_t length,
UErrorCode *status);
/**
* Get the locale for this number format object.
* You can choose between valid and actual locale.
* @param fmt The formatter to get the locale from
* @param type type of the locale we're looking for (valid or actual)
* @param status error code for the operation
* @return the locale name
* @stable ICU 2.8
*/
U_STABLE const char* U_EXPORT2
unum_getLocaleByType(const UNumberFormat *fmt,
ULocDataLocaleType type,
UErrorCode* status);
/**
* Set a particular UDisplayContext value in the formatter, such as
* UDISPCTX_CAPITALIZATION_FOR_STANDALONE.
* @param fmt The formatter for which to set a UDisplayContext value.
* @param value The UDisplayContext value to set.
* @param status A pointer to an UErrorCode to receive any errors
* @stable ICU 53
*/
U_STABLE void U_EXPORT2
unum_setContext(UNumberFormat* fmt, UDisplayContext value, UErrorCode* status);
/**
* Get the formatter's UDisplayContext value for the specified UDisplayContextType,
* such as UDISPCTX_TYPE_CAPITALIZATION.
* @param fmt The formatter to query.
* @param type The UDisplayContextType whose value to return
* @param status A pointer to an UErrorCode to receive any errors
* @return The UDisplayContextValue for the specified type.
* @stable ICU 53
*/
U_STABLE UDisplayContext U_EXPORT2
unum_getContext(const UNumberFormat *fmt, UDisplayContextType type, UErrorCode* status);
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif
// udat.h
/*
*******************************************************************************
* Copyright (C) 1996-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
*/
#ifndef UDAT_H
#define UDAT_H
#if !UCONFIG_NO_FORMATTING
/**
* \file
* \brief C API: DateFormat
*
* Date Format C API
*
* Date Format C API consists of functions that convert dates and
* times from their internal representations to textual form and back again in a
* language-independent manner. Converting from the internal representation (milliseconds
* since midnight, January 1, 1970) to text is known as "formatting," and converting
* from text to millis is known as "parsing." We currently define only one concrete
* structure UDateFormat, which can handle pretty much all normal
* date formatting and parsing actions.
*
* \code
* UErrorCode status = U_ZERO_ERROR;
* UChar *myString;
* int32_t myStrlen = 0;
* UDateFormat* dfmt = udat_open(UDAT_DEFAULT, UDAT_DEFAULT, NULL, NULL, -1, NULL, -1, &status);
* myStrlen = udat_format(dfmt, myDate, NULL, myStrlen, NULL, &status);
* if (status==U_BUFFER_OVERFLOW_ERROR){
* status=U_ZERO_ERROR;
* myString=(UChar*)malloc(sizeof(UChar) * (myStrlen+1) );
* udat_format(dfmt, myDate, myString, myStrlen+1, NULL, &status);
* }
* \endcode
*
* If you are formatting multiple numbers, it is more efficient to get the
* format and use it multiple times so that the system doesn't have to fetch the
* information about the local language and country conventions multiple times.
*
* \code
* UErrorCode status = U_ZERO_ERROR;
* int32_t i, myStrlen = 0;
* UChar* myString;
* char buffer[1024];
* UDate myDateArr[] = { 0.0, 100000000.0, 2000000000.0 }; // test values
* UDateFormat* df = udat_open(UDAT_DEFAULT, UDAT_DEFAULT, NULL, NULL, -1, NULL, 0, &status);
* for (i = 0; i < 3; i++) {
* myStrlen = udat_format(df, myDateArr[i], NULL, myStrlen, NULL, &status);
* if(status == U_BUFFER_OVERFLOW_ERROR){
* status = U_ZERO_ERROR;
* myString = (UChar*)malloc(sizeof(UChar) * (myStrlen+1) );
* udat_format(df, myDateArr[i], myString, myStrlen+1, NULL, &status);
* printf("%s\n", u_austrcpy(buffer, myString) );
* free(myString);
* }
* }
* \endcode
*
* To get specific fields of a date, you can use UFieldPosition to
* get specific fields.
*
* \code
* UErrorCode status = U_ZERO_ERROR;
* UFieldPosition pos;
* UChar *myString;
* int32_t myStrlen = 0;
* char buffer[1024];
*
* pos.field = 1; // Same as the DateFormat::EField enum
* UDateFormat* dfmt = udat_open(UDAT_DEFAULT, UDAT_DEFAULT, NULL, -1, NULL, 0, &status);
* myStrlen = udat_format(dfmt, myDate, NULL, myStrlen, &pos, &status);
* if (status==U_BUFFER_OVERFLOW_ERROR){
* status=U_ZERO_ERROR;
* myString=(UChar*)malloc(sizeof(UChar) * (myStrlen+1) );
* udat_format(dfmt, myDate, myString, myStrlen+1, &pos, &status);
* }
* printf("date format: %s\n", u_austrcpy(buffer, myString));
* buffer[pos.endIndex] = 0; // NULL terminate the string.
* printf("UFieldPosition position equals %s\n", &buffer[pos.beginIndex]);
* \endcode
*
* To format a date for a different Locale, specify it in the call to
* udat_open()
*
* \code
* UDateFormat* df = udat_open(UDAT_SHORT, UDAT_SHORT, "fr_FR", NULL, -1, NULL, 0, &status);
* \endcode
*
* You can use a DateFormat API udat_parse() to parse.
*
* \code
* UErrorCode status = U_ZERO_ERROR;
* int32_t parsepos=0;
* UDate myDate = udat_parse(df, myString, u_strlen(myString), &parsepos, &status);
* \endcode
*
* You can pass in different options for the arguments for date and time style
* to control the length of the result; from SHORT to MEDIUM to LONG to FULL.
* The exact result depends on the locale, but generally:
* see UDateFormatStyle for more details
*
*
* You can also set the time zone on the format if you wish.
*
*
* UDateFormat supports
* the date and time formatting algorithm and pattern letters defined by
* UTS#35
* Unicode Locale Data Markup Language (LDML) and further documented for ICU in the
* ICU
* User Guide.enum URegexpFlag. All desired flags
* are bitwise-ORed together.
* @param pe Receives the position (line and column numbers) of any syntax
* error within the source regular expression string. If this
* information is not wanted, pass NULL for this parameter.
* @param status Receives error detected by this function.
* @stable ICU 3.0
*
*/
U_STABLE URegularExpression * U_EXPORT2
uregex_open( const UChar *pattern,
int32_t patternLength,
uint32_t flags,
UParseError *pe,
UErrorCode *status);
/**
* Open (compile) an ICU regular expression. Compiles the regular expression in
* string form into an internal representation using the specified match mode flags.
* The resulting regular expression handle can then be used to perform various
* matching operations.
* enum URegexpFlag. All desired flags
* are bitwise-ORed together.
* @param pe Receives the position (line and column numbers) of any syntax
* error within the source regular expression string. If this
* information is not wanted, pass NULL for this parameter.
* @param status Receives error detected by this function.
*
* @stable ICU 4.6
*/
U_STABLE URegularExpression * U_EXPORT2
uregex_openUText(UText *pattern,
uint32_t flags,
UParseError *pe,
UErrorCode *status);
/**
* Open (compile) an ICU regular expression. The resulting regular expression
* handle can then be used to perform various matching operations.
* enum URegexpFlag. All desired flags
* are bitwise-ORed together.
* @param pe Receives the position (line and column numbers) of any syntax
* error within the source regular expression string. If this
* information is not wanted, pass NULL for this parameter.
* @param status Receives errors detected by this function.
* @return The URegularExpression object representing the compiled
* pattern.
*
* @stable ICU 3.0
*/
#if !UCONFIG_NO_CONVERSION
U_STABLE URegularExpression * U_EXPORT2
uregex_openC( const char *pattern,
uint32_t flags,
UParseError *pe,
UErrorCode *status);
#endif
/**
* Close the regular expression, recovering all resources (memory) it
* was holding.
*
* @param regexp The regular expression to be closed.
* @stable ICU 3.0
*/
U_STABLE void U_EXPORT2
uregex_close(URegularExpression *regexp);
/**
* Make a copy of a compiled regular expression. Cloning a regular
* expression is faster than opening a second instance from the source
* form of the expression, and requires less memory.
* uregexp_start(), uregexp_end(),
* and uregex_group() functions.uregexp_start(), uregexp_end(),
* and uregex_group() functions.uregex_start(), uregex_end(), and
* uregex_group() will provide more information regarding the match.
*
* @param regexp The compiled regular expression.
* @param startIndex The position (native) in the input string to begin the search, or
* -1 to search within the Input Region.
* @param status A reference to a UErrorCode to receive any errors.
* @return TRUE if a match is found.
* @stable ICU 3.0
*/
U_STABLE UBool U_EXPORT2
uregex_find(URegularExpression *regexp,
int32_t startIndex,
UErrorCode *status);
/**
* 64bit version of uregex_find.
* Find the first matching substring of the input string that matches the pattern.
* If startIndex is >= zero the search for a match begins at the specified index,
* and any match region is reset. This corresponds directly with
* Matcher.find(startIndex) in Java.
*
* If startIndex == -1 the search begins at the start of the input region,
* or at the start of the full string if no region has been specified.
*
* If a match is found, uregex_start(), uregex_end(), and
* uregex_group() will provide more information regarding the match.
*
* @param regexp The compiled regular expression.
* @param startIndex The position (native) in the input string to begin the search, or
* -1 to search within the Input Region.
* @param status A reference to a UErrorCode to receive any errors.
* @return TRUE if a match is found.
* @stable ICU 4.6
*/
U_STABLE UBool U_EXPORT2
uregex_find64(URegularExpression *regexp,
int64_t startIndex,
UErrorCode *status);
/**
* Find the next pattern match in the input string. Begin searching
* the input at the location following the end of he previous match,
* or at the start of the string (or region) if there is no
* previous match. If a match is found, uregex_start(), uregex_end(), and
* uregex_group() will provide more information regarding the match.
*
* @param regexp The compiled regular expression.
* @param status A reference to a UErrorCode to receive any errors.
* @return TRUE if a match is found.
* @see uregex_reset
* @stable ICU 3.0
*/
U_STABLE UBool U_EXPORT2
uregex_findNext(URegularExpression *regexp,
UErrorCode *status);
/**
* Get the number of capturing groups in this regular expression's pattern.
* @param regexp The compiled regular expression.
* @param status A reference to a UErrorCode to receive any errors.
* @return the number of capture groups
* @stable ICU 3.0
*/
U_STABLE int32_t U_EXPORT2
uregex_groupCount(URegularExpression *regexp,
UErrorCode *status);
/**
* Get the group number corresponding to a named capture group.
* The returned number can be used with any function that access
* capture groups by number.
*
* The function returns an error status if the specified name does not
* appear in the pattern.
*
* @param regexp The compiled regular expression.
* @param groupName The capture group name.
* @param nameLength The length of the name, or -1 if the name is a
* nul-terminated string.
* @param status A pointer to a UErrorCode to receive any errors.
*
* @stable ICU 55
*/
U_STABLE int32_t U_EXPORT2
uregex_groupNumberFromName(URegularExpression *regexp,
const UChar *groupName,
int32_t nameLength,
UErrorCode *status);
/**
* Get the group number corresponding to a named capture group.
* The returned number can be used with any function that access
* capture groups by number.
*
* The function returns an error status if the specified name does not
* appear in the pattern.
*
* @param regexp The compiled regular expression.
* @param groupName The capture group name,
* platform invariant characters only.
* @param nameLength The length of the name, or -1 if the name is
* nul-terminated.
* @param status A pointer to a UErrorCode to receive any errors.
*
* @stable ICU 55
*/
U_STABLE int32_t U_EXPORT2
uregex_groupNumberFromCName(URegularExpression *regexp,
const char *groupName,
int32_t nameLength,
UErrorCode *status);
/** Extract the string for the specified matching expression or subexpression.
* Group #0 is the complete string of matched text.
* Group #1 is the text matched by the first set of capturing parentheses.
*
* @param regexp The compiled regular expression.
* @param groupNum The capture group to extract. Group 0 is the complete
* match. The value of this parameter must be
* less than or equal to the number of capture groups in
* the pattern.
* @param dest Buffer to receive the matching string data
* @param destCapacity Capacity of the dest buffer.
* @param status A reference to a UErrorCode to receive any errors.
* @return Length of matching data,
* or -1 if no applicable match.
* @stable ICU 3.0
*/
U_STABLE int32_t U_EXPORT2
uregex_group(URegularExpression *regexp,
int32_t groupNum,
UChar *dest,
int32_t destCapacity,
UErrorCode *status);
/** Returns a shallow immutable clone of the entire input string with the current index set
* to the beginning of the requested capture group. The capture group length is also
* returned via groupLength.
* Group #0 is the complete string of matched text.
* Group #1 is the text matched by the first set of capturing parentheses.
*
* @param regexp The compiled regular expression.
* @param groupNum The capture group to extract. Group 0 is the complete
* match. The value of this parameter must be
* less than or equal to the number of capture groups in
* the pattern.
* @param dest A mutable UText in which to store the current input.
* If NULL, a new UText will be created as an immutable shallow clone
* of the entire input string.
* @param groupLength The group length of the desired capture group. Output parameter.
* @param status A reference to a UErrorCode to receive any errors.
* @return The subject text currently associated with this regular expression.
* If a pre-allocated UText was provided, it will always be used and returned.
*
* @stable ICU 4.6
*/
U_STABLE UText * U_EXPORT2
uregex_groupUText(URegularExpression *regexp,
int32_t groupNum,
UText *dest,
int64_t *groupLength,
UErrorCode *status);
/**
* Returns the index in the input string of the start of the text matched by the
* specified capture group during the previous match operation. Return -1 if
* the capture group was not part of the last match.
* Group #0 refers to the complete range of matched text.
* Group #1 refers to the text matched by the first set of capturing parentheses.
*
* @param regexp The compiled regular expression.
* @param groupNum The capture group number
* @param status A reference to a UErrorCode to receive any errors.
* @return the starting (native) position in the input of the text matched
* by the specified group.
* @stable ICU 3.0
*/
U_STABLE int32_t U_EXPORT2
uregex_start(URegularExpression *regexp,
int32_t groupNum,
UErrorCode *status);
/**
* 64bit version of uregex_start.
* Returns the index in the input string of the start of the text matched by the
* specified capture group during the previous match operation. Return -1 if
* the capture group was not part of the last match.
* Group #0 refers to the complete range of matched text.
* Group #1 refers to the text matched by the first set of capturing parentheses.
*
* @param regexp The compiled regular expression.
* @param groupNum The capture group number
* @param status A reference to a UErrorCode to receive any errors.
* @return the starting (native) position in the input of the text matched
* by the specified group.
* @stable ICU 4.6
*/
U_STABLE int64_t U_EXPORT2
uregex_start64(URegularExpression *regexp,
int32_t groupNum,
UErrorCode *status);
/**
* Returns the index in the input string of the position following the end
* of the text matched by the specified capture group.
* Return -1 if the capture group was not part of the last match.
* Group #0 refers to the complete range of matched text.
* Group #1 refers to the text matched by the first set of capturing parentheses.
*
* @param regexp The compiled regular expression.
* @param groupNum The capture group number
* @param status A reference to a UErrorCode to receive any errors.
* @return the (native) index of the position following the last matched character.
* @stable ICU 3.0
*/
U_STABLE int32_t U_EXPORT2
uregex_end(URegularExpression *regexp,
int32_t groupNum,
UErrorCode *status);
/**
* 64bit version of uregex_end.
* Returns the index in the input string of the position following the end
* of the text matched by the specified capture group.
* Return -1 if the capture group was not part of the last match.
* Group #0 refers to the complete range of matched text.
* Group #1 refers to the text matched by the first set of capturing parentheses.
*
* @param regexp The compiled regular expression.
* @param groupNum The capture group number
* @param status A reference to a UErrorCode to receive any errors.
* @return the (native) index of the position following the last matched character.
* @stable ICU 4.6
*/
U_STABLE int64_t U_EXPORT2
uregex_end64(URegularExpression *regexp,
int32_t groupNum,
UErrorCode *status);
/**
* Reset any saved state from the previous match. Has the effect of
* causing uregex_findNext to begin at the specified index, and causing
* uregex_start(), uregex_end() and uregex_group() to return an error
* indicating that there is no match information available. Clears any
* match region that may have been set.
*
* @param regexp The compiled regular expression.
* @param index The position (native) in the text at which a
* uregex_findNext() should begin searching.
* @param status A reference to a UErrorCode to receive any errors.
* @stable ICU 3.0
*/
U_STABLE void U_EXPORT2
uregex_reset(URegularExpression *regexp,
int32_t index,
UErrorCode *status);
/**
* 64bit version of uregex_reset.
* Reset any saved state from the previous match. Has the effect of
* causing uregex_findNext to begin at the specified index, and causing
* uregex_start(), uregex_end() and uregex_group() to return an error
* indicating that there is no match information available. Clears any
* match region that may have been set.
*
* @param regexp The compiled regular expression.
* @param index The position (native) in the text at which a
* uregex_findNext() should begin searching.
* @param status A reference to a UErrorCode to receive any errors.
* @stable ICU 4.6
*/
U_STABLE void U_EXPORT2
uregex_reset64(URegularExpression *regexp,
int64_t index,
UErrorCode *status);
/**
* Sets the limits of the matching region for this URegularExpression.
* The region is the part of the input string that will be considered when matching.
* Invoking this method resets any saved state from the previous match,
* then sets the region to start at the index specified by the start parameter
* and end at the index specified by the end parameter.
*
* Depending on the transparency and anchoring being used (see useTransparentBounds
* and useAnchoringBounds), certain constructs such as anchors may behave differently
* at or around the boundaries of the region
*
* The function will fail if start is greater than limit, or if either index
* is less than zero or greater than the length of the string being matched.
*
* @param regexp The compiled regular expression.
* @param regionStart The (native) index to begin searches at.
* @param regionLimit The (native) index to end searches at (exclusive).
* @param status A pointer to a UErrorCode to receive any errors.
* @stable ICU 4.0
*/
U_STABLE void U_EXPORT2
uregex_setRegion(URegularExpression *regexp,
int32_t regionStart,
int32_t regionLimit,
UErrorCode *status);
/**
* 64bit version of uregex_setRegion.
* Sets the limits of the matching region for this URegularExpression.
* The region is the part of the input string that will be considered when matching.
* Invoking this method resets any saved state from the previous match,
* then sets the region to start at the index specified by the start parameter
* and end at the index specified by the end parameter.
*
* Depending on the transparency and anchoring being used (see useTransparentBounds
* and useAnchoringBounds), certain constructs such as anchors may behave differently
* at or around the boundaries of the region
*
* The function will fail if start is greater than limit, or if either index
* is less than zero or greater than the length of the string being matched.
*
* @param regexp The compiled regular expression.
* @param regionStart The (native) index to begin searches at.
* @param regionLimit The (native) index to end searches at (exclusive).
* @param status A pointer to a UErrorCode to receive any errors.
* @stable ICU 4.6
*/
U_STABLE void U_EXPORT2
uregex_setRegion64(URegularExpression *regexp,
int64_t regionStart,
int64_t regionLimit,
UErrorCode *status);
/**
* Set the matching region and the starting index for subsequent matches
* in a single operation.
* This is useful because the usual function for setting the starting
* index, urgex_reset(), also resets any region limits.
*
* @param regexp The compiled regular expression.
* @param regionStart The (native) index to begin searches at.
* @param regionLimit The (native) index to end searches at (exclusive).
* @param startIndex The index in the input text at which the next
* match operation should begin.
* @param status A pointer to a UErrorCode to receive any errors.
* @stable ICU 4.6
*/
U_STABLE void U_EXPORT2
uregex_setRegionAndStart(URegularExpression *regexp,
int64_t regionStart,
int64_t regionLimit,
int64_t startIndex,
UErrorCode *status);
/**
* Reports the start index of the matching region. Any matches found are limited to
* to the region bounded by regionStart (inclusive) and regionEnd (exclusive).
*
* @param regexp The compiled regular expression.
* @param status A pointer to a UErrorCode to receive any errors.
* @return The starting (native) index of this matcher's region.
* @stable ICU 4.0
*/
U_STABLE int32_t U_EXPORT2
uregex_regionStart(const URegularExpression *regexp,
UErrorCode *status);
/**
* 64bit version of uregex_regionStart.
* Reports the start index of the matching region. Any matches found are limited to
* to the region bounded by regionStart (inclusive) and regionEnd (exclusive).
*
* @param regexp The compiled regular expression.
* @param status A pointer to a UErrorCode to receive any errors.
* @return The starting (native) index of this matcher's region.
* @stable ICU 4.6
*/
U_STABLE int64_t U_EXPORT2
uregex_regionStart64(const URegularExpression *regexp,
UErrorCode *status);
/**
* Reports the end index (exclusive) of the matching region for this URegularExpression.
* Any matches found are limited to to the region bounded by regionStart (inclusive)
* and regionEnd (exclusive).
*
* @param regexp The compiled regular expression.
* @param status A pointer to a UErrorCode to receive any errors.
* @return The ending point (native) of this matcher's region.
* @stable ICU 4.0
*/
U_STABLE int32_t U_EXPORT2
uregex_regionEnd(const URegularExpression *regexp,
UErrorCode *status);
/**
* 64bit version of uregex_regionEnd.
* Reports the end index (exclusive) of the matching region for this URegularExpression.
* Any matches found are limited to to the region bounded by regionStart (inclusive)
* and regionEnd (exclusive).
*
* @param regexp The compiled regular expression.
* @param status A pointer to a UErrorCode to receive any errors.
* @return The ending point (native) of this matcher's region.
* @stable ICU 4.6
*/
U_STABLE int64_t U_EXPORT2
uregex_regionEnd64(const URegularExpression *regexp,
UErrorCode *status);
/**
* Queries the transparency of region bounds for this URegularExpression.
* See useTransparentBounds for a description of transparent and opaque bounds.
* By default, matching boundaries are opaque.
*
* @param regexp The compiled regular expression.
* @param status A pointer to a UErrorCode to receive any errors.
* @return TRUE if this matcher is using opaque bounds, false if it is not.
* @stable ICU 4.0
*/
U_STABLE UBool U_EXPORT2
uregex_hasTransparentBounds(const URegularExpression *regexp,
UErrorCode *status);
/**
* Sets the transparency of region bounds for this URegularExpression.
* Invoking this function with an argument of TRUE will set matches to use transparent bounds.
* If the boolean argument is FALSE, then opaque bounds will be used.
*
* Using transparent bounds, the boundaries of the matching region are transparent
* to lookahead, lookbehind, and boundary matching constructs. Those constructs can
* see text beyond the boundaries of the region while checking for a match.
*
* With opaque bounds, no text outside of the matching region is visible to lookahead,
* lookbehind, and boundary matching constructs.
*
* By default, opaque bounds are used.
*
* @param regexp The compiled regular expression.
* @param b TRUE for transparent bounds; FALSE for opaque bounds
* @param status A pointer to a UErrorCode to receive any errors.
* @stable ICU 4.0
**/
U_STABLE void U_EXPORT2
uregex_useTransparentBounds(URegularExpression *regexp,
UBool b,
UErrorCode *status);
/**
* Return true if this URegularExpression is using anchoring bounds.
* By default, anchoring region bounds are used.
*
* @param regexp The compiled regular expression.
* @param status A pointer to a UErrorCode to receive any errors.
* @return TRUE if this matcher is using anchoring bounds.
* @stable ICU 4.0
*/
U_STABLE UBool U_EXPORT2
uregex_hasAnchoringBounds(const URegularExpression *regexp,
UErrorCode *status);
/**
* Set whether this URegularExpression is using Anchoring Bounds for its region.
* With anchoring bounds, pattern anchors such as ^ and $ will match at the start
* and end of the region. Without Anchoring Bounds, anchors will only match at
* the positions they would in the complete text.
*
* Anchoring Bounds are the default for regions.
*
* @param regexp The compiled regular expression.
* @param b TRUE if to enable anchoring bounds; FALSE to disable them.
* @param status A pointer to a UErrorCode to receive any errors.
* @stable ICU 4.0
*/
U_STABLE void U_EXPORT2
uregex_useAnchoringBounds(URegularExpression *regexp,
UBool b,
UErrorCode *status);
/**
* Return TRUE if the most recent matching operation touched the
* end of the text being processed. In this case, additional input text could
* change the results of that match.
*
* @param regexp The compiled regular expression.
* @param status A pointer to a UErrorCode to receive any errors.
* @return TRUE if the most recent match hit the end of input
* @stable ICU 4.0
*/
U_STABLE UBool U_EXPORT2
uregex_hitEnd(const URegularExpression *regexp,
UErrorCode *status);
/**
* Return TRUE the most recent match succeeded and additional input could cause
* it to fail. If this function returns false and a match was found, then more input
* might change the match but the match won't be lost. If a match was not found,
* then requireEnd has no meaning.
*
* @param regexp The compiled regular expression.
* @param status A pointer to a UErrorCode to receive any errors.
* @return TRUE if more input could cause the most recent match to no longer match.
* @stable ICU 4.0
*/
U_STABLE UBool U_EXPORT2
uregex_requireEnd(const URegularExpression *regexp,
UErrorCode *status);
/**
* Replaces every substring of the input that matches the pattern
* with the given replacement string. This is a convenience function that
* provides a complete find-and-replace-all operation.
*
* This method scans the input string looking for matches of the pattern.
* Input that is not part of any match is copied unchanged to the
* destination buffer. Matched regions are replaced in the output
* buffer by the replacement string. The replacement string may contain
* references to capture groups; these take the form of $1, $2, etc.
*
* @param regexp The compiled regular expression.
* @param replacementText A string containing the replacement text.
* @param replacementLength The length of the replacement string, or
* -1 if it is NUL terminated.
* @param destBuf A (UChar *) buffer that will receive the result.
* @param destCapacity The capacity of the destination buffer.
* @param status A reference to a UErrorCode to receive any errors.
* @return The length of the string resulting from the find
* and replace operation. In the event that the
* destination capacity is inadequate, the return value
* is still the full length of the untruncated string.
* @stable ICU 3.0
*/
U_STABLE int32_t U_EXPORT2
uregex_replaceAll(URegularExpression *regexp,
const UChar *replacementText,
int32_t replacementLength,
UChar *destBuf,
int32_t destCapacity,
UErrorCode *status);
/**
* Replaces every substring of the input that matches the pattern
* with the given replacement string. This is a convenience function that
* provides a complete find-and-replace-all operation.
*
* This method scans the input string looking for matches of the pattern.
* Input that is not part of any match is copied unchanged to the
* destination buffer. Matched regions are replaced in the output
* buffer by the replacement string. The replacement string may contain
* references to capture groups; these take the form of $1, $2, etc.
*
* @param regexp The compiled regular expression.
* @param replacement A string containing the replacement text.
* @param dest A mutable UText that will receive the result.
* If NULL, a new UText will be created (which may not be mutable).
* @param status A reference to a UErrorCode to receive any errors.
* @return A UText containing the results of the find and replace.
* If a pre-allocated UText was provided, it will always be used and returned.
*
* @stable ICU 4.6
*/
U_STABLE UText * U_EXPORT2
uregex_replaceAllUText(URegularExpression *regexp,
UText *replacement,
UText *dest,
UErrorCode *status);
/**
* Replaces the first substring of the input that matches the pattern
* with the given replacement string. This is a convenience function that
* provides a complete find-and-replace operation.
*
* This method scans the input string looking for a match of the pattern.
* All input that is not part of the match is copied unchanged to the
* destination buffer. The matched region is replaced in the output
* buffer by the replacement string. The replacement string may contain
* references to capture groups; these take the form of $1, $2, etc.
*
* @param regexp The compiled regular expression.
* @param replacementText A string containing the replacement text.
* @param replacementLength The length of the replacement string, or
* -1 if it is NUL terminated.
* @param destBuf A (UChar *) buffer that will receive the result.
* @param destCapacity The capacity of the destination buffer.
* @param status a reference to a UErrorCode to receive any errors.
* @return The length of the string resulting from the find
* and replace operation. In the event that the
* destination capacity is inadequate, the return value
* is still the full length of the untruncated string.
* @stable ICU 3.0
*/
U_STABLE int32_t U_EXPORT2
uregex_replaceFirst(URegularExpression *regexp,
const UChar *replacementText,
int32_t replacementLength,
UChar *destBuf,
int32_t destCapacity,
UErrorCode *status);
/**
* Replaces the first substring of the input that matches the pattern
* with the given replacement string. This is a convenience function that
* provides a complete find-and-replace operation.
*
* This method scans the input string looking for a match of the pattern.
* All input that is not part of the match is copied unchanged to the
* destination buffer. The matched region is replaced in the output
* buffer by the replacement string. The replacement string may contain
* references to capture groups; these take the form of $1, $2, etc.
*
* @param regexp The compiled regular expression.
* @param replacement A string containing the replacement text.
* @param dest A mutable UText that will receive the result.
* If NULL, a new UText will be created (which may not be mutable).
* @param status A reference to a UErrorCode to receive any errors.
* @return A UText containing the results of the find and replace.
* If a pre-allocated UText was provided, it will always be used and returned.
*
* @stable ICU 4.6
*/
U_STABLE UText * U_EXPORT2
uregex_replaceFirstUText(URegularExpression *regexp,
UText *replacement,
UText *dest,
UErrorCode *status);
/**
* Implements a replace operation intended to be used as part of an
* incremental find-and-replace.
*
* uregex_appendTail() is intended
* to be invoked after one or more invocations of the
* uregex_appendReplacement() function.
*
* @param regexp The regular expression object. This is needed to
* obtain the input string and with the position
* of the last match within it.
* @param destBuf The buffer in which the results of the
* find-and-replace are placed. On return, the pointer
* will be updated to refer to the beginning of the
* unused portion of buffer.
* @param destCapacity The size of the output buffer, On return, this
* value will be updated to reflect the space remaining
* unused in the output buffer.
* @param status A reference to a UErrorCode to receive any errors.
* @return The length of the result string. In the event that
* destCapacity is inadequate, the full length of the
* untruncated output string is returned.
*
* @stable ICU 3.0
*/
U_STABLE int32_t U_EXPORT2
uregex_appendTail(URegularExpression *regexp,
UChar **destBuf,
int32_t *destCapacity,
UErrorCode *status);
/**
* As the final step in a find-and-replace operation, append the remainder
* of the input string, starting at the position following the last match,
* to the destination string. uregex_appendTailUText() is intended
* to be invoked after one or more invocations of the
* uregex_appendReplacementUText() function.
*
* @param regexp The regular expression object. This is needed to
* obtain the input string and with the position
* of the last match within it.
* @param dest A mutable UText that will receive the result. Must not be NULL.
*
* @param status Error code
*
* @return The destination UText.
*
* @stable ICU 4.6
*/
U_STABLE UText * U_EXPORT2
uregex_appendTailUText(URegularExpression *regexp,
UText *dest,
UErrorCode *status);
/**
* Split a string into fields. Somewhat like split() from Perl.
* The pattern matches identify delimiters that separate the input
* into fields. The input data between the matches becomes the
* fields themselves.
*
* Each of the fields is copied from the input string to the destination
* buffer, and NUL terminated. The position of each field within
* the destination buffer is returned in the destFields array.
*
* If the delimiter pattern includes capture groups, the captured text will
* also appear in the destination array of output strings, interspersed
* with the fields. This is similar to Perl, but differs from Java,
* which ignores the presence of capture groups in the pattern.
*
* Trailing empty fields will always be returned, assuming sufficient
* destination capacity. This differs from the default behavior for Java
* and Perl where trailing empty fields are not returned.
*
* The number of strings produced by the split operation is returned.
* This count includes the strings from capture groups in the delimiter pattern.
* This behavior differs from Java, which ignores capture groups.
*
* @param regexp The compiled regular expression.
* @param destBuf A (UChar *) buffer to receive the fields that
* are extracted from the input string. These
* field pointers will refer to positions within the
* destination buffer supplied by the caller. Any
* extra positions within the destFields array will be
* set to NULL.
* @param destCapacity The capacity of the destBuf.
* @param requiredCapacity The actual capacity required of the destBuf.
* If destCapacity is too small, requiredCapacity will return
* the total capacity required to hold all of the output, and
* a U_BUFFER_OVERFLOW_ERROR will be returned.
* @param destFields An array to be filled with the position of each
* of the extracted fields within destBuf.
* @param destFieldsCapacity The number of elements in the destFields array.
* If the number of fields found is less than destFieldsCapacity,
* the extra destFields elements are set to zero.
* If destFieldsCapacity is too small, the trailing part of the
* input, including any field delimiters, is treated as if it
* were the last field - it is copied to the destBuf, and
* its position is in the destBuf is stored in the last element
* of destFields. This behavior mimics that of Perl. It is not
* an error condition, and no error status is returned when all destField
* positions are used.
* @param status A reference to a UErrorCode to receive any errors.
* @return The number of fields into which the input string was split.
* @stable ICU 3.0
*/
U_STABLE int32_t U_EXPORT2
uregex_split( URegularExpression *regexp,
UChar *destBuf,
int32_t destCapacity,
int32_t *requiredCapacity,
UChar *destFields[],
int32_t destFieldsCapacity,
UErrorCode *status);
/**
* Split a string into fields. Somewhat like split() from Perl.
* The pattern matches identify delimiters that separate the input
* into fields. The input data between the matches becomes the
* fields themselves.
*
*
*
* Let S' be the sub-string of a text string S between the offsets start and
* end
* A pattern string P matches a text string S at the offsets
* option 1. Some canonical equivalent of P matches some canonical equivalent
* of S'
* option 2. P matches S' and if P starts or ends with a combining mark,
* there exists no non-ignorable combining mark before or after S'
* in S respectively.
*
* Option 2. will be the default.
*
*
*
* Currently there are no composite characters that consists of a
* character with combining class > 0 before a character with combining
* class == 0. However, if such a character exists in the future, the
* search mechanism does not guarantee the results for option 1.
*
*
*
* @stable ICU 2.4
*/
/**
* DONE is returned by previous() and next() after all valid matches have
* been returned, and by first() and last() if there are no matches at all.
* @stable ICU 2.4
*/
#define USEARCH_DONE -1
/**
* Data structure for searching
* @stable ICU 2.4
*/
struct UStringSearch;
/**
* Data structure for searching
* @stable ICU 2.4
*/
typedef struct UStringSearch UStringSearch;
/**
* @stable ICU 2.4
*/
typedef enum {
/**
* Option for overlapping matches
* @stable ICU 2.4
*/
USEARCH_OVERLAP = 0,
/**
* Option to control how collation elements are compared.
* The default value will be USEARCH_STANDARD_ELEMENT_COMPARISON.
* @stable ICU 4.4
*/
USEARCH_ELEMENT_COMPARISON = 2,
/**
* Count of attribute types
* @stable ICU 2.4
*/
USEARCH_ATTRIBUTE_COUNT = 3
} USearchAttribute;
/**
* @stable ICU 2.4
*/
typedef enum {
/**
* Default value for any USearchAttribute
* @stable ICU 2.4
*/
USEARCH_DEFAULT = -1,
/**
* Value for USEARCH_OVERLAP and USEARCH_CANONICAL_MATCH
* @stable ICU 2.4
*/
USEARCH_OFF,
/**
* Value for USEARCH_OVERLAP and USEARCH_CANONICAL_MATCH
* @stable ICU 2.4
*/
USEARCH_ON,
/**
* Value (default) for USEARCH_ELEMENT_COMPARISON;
* standard collation element comparison at the specified collator
* strength.
* @stable ICU 4.4
*/
USEARCH_STANDARD_ELEMENT_COMPARISON,
/**
* Value for USEARCH_ELEMENT_COMPARISON;
* collation element comparison is modified to effectively provide
* behavior between the specified strength and strength - 1. Collation
* elements in the pattern that have the base weight for the specified
* strength are treated as "wildcards" that match an element with any
* other weight at that collation level in the searched text. For
* example, with a secondary-strength English collator, a plain 'e' in
* the pattern will match a plain e or an e with any diacritic in the
* searched text, but an e with diacritic in the pattern will only
* match an e with the same diacritic in the searched text.
*
* This supports "asymmetric search" as described in
*
* UTS #10 Unicode Collation Algorithm.
*
* @stable ICU 4.4
*/
USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD,
/**
* Value for USEARCH_ELEMENT_COMPARISON.
* collation element comparison is modified to effectively provide
* behavior between the specified strength and strength - 1. Collation
* elements in either the pattern or the searched text that have the
* base weight for the specified strength are treated as "wildcards"
* that match an element with any other weight at that collation level.
* For example, with a secondary-strength English collator, a plain 'e'
* in the pattern will match a plain e or an e with any diacritic in the
* searched text, but an e with diacritic in the pattern will only
* match an e with the same diacritic or a plain e in the searched text.
*
* This option is similar to "asymmetric search" as described in
*
* UTS #10 Unicode Collation Algorithmusearch_close.
* @param pattern for matching
* @param patternlength length of the pattern, -1 for null-termination
* @param text text string
* @param textlength length of the text string, -1 for null-termination
* @param locale name of locale for the rules to be used
* @param breakiter A BreakIterator that will be used to restrict the points
* at which matches are detected. If a match is found, but
* the match's start or end index is not a boundary as
* determined by the BreakIterator, the match will
* be rejected and another will be searched for.
* If this parameter is NULL, no break detection is
* attempted.
* @param status for errors if it occurs. If pattern or text is NULL, or if
* patternlength or textlength is 0 then an
* U_ILLEGAL_ARGUMENT_ERROR is returned.
* @return search iterator data structure, or NULL if there is an error.
* @stable ICU 2.4
*/
U_STABLE UStringSearch * U_EXPORT2 usearch_open(const UChar *pattern,
int32_t patternlength,
const UChar *text,
int32_t textlength,
const char *locale,
UBreakIterator *breakiter,
UErrorCode *status);
/**
* Creating a search iterator data struct using the argument collator language
* rule set. Note, user retains the ownership of this collator, thus the
* responsibility of deletion lies with the user.
* NOTE: string search cannot be instantiated from a collator that has
* collate digits as numbers (CODAN) turned on.
* @param pattern for matching
* @param patternlength length of the pattern, -1 for null-termination
* @param text text string
* @param textlength length of the text string, -1 for null-termination
* @param collator used for the language rules
* @param breakiter A BreakIterator that will be used to restrict the points
* at which matches are detected. If a match is found, but
* the match's start or end index is not a boundary as
* determined by the BreakIterator, the match will
* be rejected and another will be searched for.
* If this parameter is NULL, no break detection is
* attempted.
* @param status for errors if it occurs. If collator, pattern or text is NULL,
* or if patternlength or textlength is 0 then an
* U_ILLEGAL_ARGUMENT_ERROR is returned.
* @return search iterator data structure, or NULL if there is an error.
* @stable ICU 2.4
*/
U_STABLE UStringSearch * U_EXPORT2 usearch_openFromCollator(
const UChar *pattern,
int32_t patternlength,
const UChar *text,
int32_t textlength,
const UCollator *collator,
UBreakIterator *breakiter,
UErrorCode *status);
/**
* Destroying and cleaning up the search iterator data struct.
* If a collator is created in usearch_open, it will be destroyed here.
* @param searchiter data struct to clean up
* @stable ICU 2.4
*/
U_STABLE void U_EXPORT2 usearch_close(UStringSearch *searchiter);
/* get and set methods -------------------------------------------------- */
/**
* Sets the current position in the text string which the next search will
* start from. Clears previous states.
* This method takes the argument index and sets the position in the text
* string accordingly without checking if the index is pointing to a
* valid starting point to begin searching.
* Search positions that may render incorrect results are highlighted in the
* header comments
* @param strsrch search iterator data struct
* @param position position to start next search from. If position is less
* than or greater than the text range for searching,
* an U_INDEX_OUTOFBOUNDS_ERROR will be returned
* @param status error status if any.
* @stable ICU 2.4
*/
U_STABLE void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch,
int32_t position,
UErrorCode *status);
/**
* Return the current index in the string text being searched.
* If the iteration has gone past the end of the text (or past the beginning
* for a backwards search), USEARCH_DONE is returned.
* @param strsrch search iterator data struct
* @see #USEARCH_DONE
* @stable ICU 2.4
*/
U_STABLE int32_t U_EXPORT2 usearch_getOffset(const UStringSearch *strsrch);
/**
* Sets the text searching attributes located in the enum USearchAttribute
* with values from the enum USearchAttributeValue.
* USEARCH_DEFAULT can be used for all attributes for resetting.
* @param strsrch search iterator data struct
* @param attribute text attribute to be set
* @param value text attribute value
* @param status for errors if it occurs
* @see #usearch_getAttribute
* @stable ICU 2.4
*/
U_STABLE void U_EXPORT2 usearch_setAttribute(UStringSearch *strsrch,
USearchAttribute attribute,
USearchAttributeValue value,
UErrorCode *status);
/**
* Gets the text searching attributes.
* @param strsrch search iterator data struct
* @param attribute text attribute to be retrieve
* @return text attribute value
* @see #usearch_setAttribute
* @stable ICU 2.4
*/
U_STABLE USearchAttributeValue U_EXPORT2 usearch_getAttribute(
const UStringSearch *strsrch,
USearchAttribute attribute);
/**
* Returns the index to the match in the text string that was searched.
* This call returns a valid result only after a successful call to
* usearch_first, usearch_next, usearch_previous,
* or usearch_last.
* Just after construction, or after a searching method returns
* USEARCH_DONE, this method will return USEARCH_DONE.
*
* char *tgtstr = "The quick brown fox jumped over the lazy fox";
* char *patstr = "fox";
* UChar target[64];
* UChar pattern[16];
* UErrorCode status = U_ZERO_ERROR;
* u_uastrcpy(target, tgtstr);
* u_uastrcpy(pattern, patstr);
*
* UStringSearch *search = usearch_open(pattern, -1, target, -1, "en_US",
* NULL, &status);
* if (U_SUCCESS(status)) {
* for (int pos = usearch_first(search, &status);
* pos != USEARCH_DONE;
* pos = usearch_next(search, &status))
* {
* printf("Found match at %d pos, length is %d\n", pos,
* usearch_getMatchLength(search));
* }
* }
*
* usearch_close(search);
* averageTime = (time1 + time2)/2, there will be overflow even with dates
* around the present. Moreover, even if these problems don't occur, there is the issue of
* conversion back and forth between different systems.
*
*
*
*
*
*
*
* Source
* Datatype
* Unit
* Epoch
*
*
* UDTS_JAVA_TIME
* int64_t
* milliseconds
* Jan 1, 1970
*
*
*
* UDTS_UNIX_TIME
* int32_t or int64_t
* seconds
* Jan 1, 1970
*
*
* UDTS_ICU4C_TIME
*
* double
* milliseconds
* Jan 1, 1970
*
*
* UDTS_WINDOWS_FILE_TIME
* int64_t
*
* ticks (100 nanoseconds)
* Jan 1, 1601
*
*
* UDTS_DOTNET_DATE_TIME
* int64_t
* ticks (100 nanoseconds)
*
* Jan 1, 0001
*
*
* UDTS_MAC_OLD_TIME
* int32_t or int64_t
* seconds
* Jan 1, 1904
*
*
*
*
* UDTS_MAC_TIME
* double
* seconds
* Jan 1, 2001
*
*
* UDTS_EXCEL_TIME
* ?
* days
* Dec 31, 1899
*
*
*
*
* UDTS_DB2_TIME
* ?
* days
* Dec 31, 1899
*
*
* UDTS_UNIX_MICROSECONDS_TIME
* int64_t
* microseconds
* Jan 1, 1970
*
*
*
*
*
*
* Units
* int64_t
* double
* int32_t
*
*
* 1 sec
* 5.84542x1011
* 285,420,920.94
* 136.10
*
*
*
* 1 millisecond
* 584,542,046.09
* 285,420.92
* 0.14
*
*
* 1 microsecond
*
* 584,542.05
* 285.42
* 0.00
*
*
* 100 nanoseconds (tick)
* 58,454.20
* 28.54
* 0.00
*
*
* 1 nanosecond
* 584.5420461
* 0.2854
* 0.00
* System.DateTime values without severe loss of precision. ICU4C time addresses this by using a
* double that is otherwise equivalent to the Java time. However, there are disadvantages
* with doubles. They provide for much more graceful degradation in arithmetic operations.
* But they only have 53 bits of accuracy, which means that they will lose precision when
* converting back and forth to ticks. What would really be nice would be a
* long double (80 bits -- 64 bit mantissa), but that is not supported on most systems.
*
*BigDecimal would allow for arbitrary precision and arbitrary range,
* but we do not want to use this as the normal type, because it is slow and does not
* have a fixed size.
*
*System.DateTime would be the best pivot. However, we use the full range
* allowed by the datatype, allowing for datetimes back to 29,000 BC and up to 29,000 AD.
* This time scale is very fine grained, does not lose precision, and covers a range that
* will meet almost all requirements. It will not handle the range that Java times do,
* but frankly, being able to handle dates before 29,000 BC or after 29,000 AD is of very limited interest.
*
*/
/**
* UDateTimeScale values are used to specify the time scale used for
* conversion into or out if the universal time scale.
*
* @stable ICU 3.2
*/
typedef enum UDateTimeScale {
/**
* Used in the JDK. Data is a Java long (int64_t). Value
* is milliseconds since January 1, 1970.
*
* @stable ICU 3.2
*/
UDTS_JAVA_TIME = 0,
/**
* Used on Unix systems. Data is int32_t or int64_t. Value
* is seconds since January 1, 1970.
*
* @stable ICU 3.2
*/
UDTS_UNIX_TIME,
/**
* Used in IUC4C. Data is a double. Value
* is milliseconds since January 1, 1970.
*
* @stable ICU 3.2
*/
UDTS_ICU4C_TIME,
/**
* Used in Windows for file times. Data is an int64_t. Value
* is ticks (1 tick == 100 nanoseconds) since January 1, 1601.
*
* @stable ICU 3.2
*/
UDTS_WINDOWS_FILE_TIME,
/**
* Used in the .NET framework's System.DateTime structure. Data is an int64_t. Value
* is ticks (1 tick == 100 nanoseconds) since January 1, 0001.
*
* @stable ICU 3.2
*/
UDTS_DOTNET_DATE_TIME,
/**
* Used in older Macintosh systems. Data is int32_t or int64_t. Value
* is seconds since January 1, 1904.
*
* @stable ICU 3.2
*/
UDTS_MAC_OLD_TIME,
/**
* Used in newer Macintosh systems. Data is a double. Value
* is seconds since January 1, 2001.
*
* @stable ICU 3.2
*/
UDTS_MAC_TIME,
/**
* Used in Excel. Data is an ?unknown?. Value
* is days since December 31, 1899.
*
* @stable ICU 3.2
*/
UDTS_EXCEL_TIME,
/**
* Used in DB2. Data is an ?unknown?. Value
* is days since December 31, 1899.
*
* @stable ICU 3.2
*/
UDTS_DB2_TIME,
/**
* Data is a long. Value is microseconds since January 1, 1970.
* Similar to Unix time (linear value from 1970) and struct timeval
* (microseconds resolution).
*
* @stable ICU 3.8
*/
UDTS_UNIX_MICROSECONDS_TIME,
/**
* The first unused time scale value. The limit of this enum
*/
UDTS_MAX_SCALE
} UDateTimeScale;
/**
* UTimeScaleValue values are used to specify the time scale values
* to utmscale_getTimeScaleValue.
*
* @see utmscale_getTimeScaleValue
*
* @stable ICU 3.2
*/
typedef enum UTimeScaleValue {
/**
* The constant used to select the units vale
* for a time scale.
*
* @see utmscale_getTimeScaleValue
*
* @stable ICU 3.2
*/
UTSV_UNITS_VALUE = 0,
/**
* The constant used to select the epoch offset value
* for a time scale.
*
* @see utmscale_getTimeScaleValue
*
* @stable ICU 3.2
*/
UTSV_EPOCH_OFFSET_VALUE=1,
/**
* The constant used to select the minimum from value
* for a time scale.
*
* @see utmscale_getTimeScaleValue
*
* @stable ICU 3.2
*/
UTSV_FROM_MIN_VALUE=2,
/**
* The constant used to select the maximum from value
* for a time scale.
*
* @see utmscale_getTimeScaleValue
*
* @stable ICU 3.2
*/
UTSV_FROM_MAX_VALUE=3,
/**
* The constant used to select the minimum to value
* for a time scale.
*
* @see utmscale_getTimeScaleValue
*
* @stable ICU 3.2
*/
UTSV_TO_MIN_VALUE=4,
/**
* The constant used to select the maximum to value
* for a time scale.
*
* @see utmscale_getTimeScaleValue
*
* @stable ICU 3.2
*/
UTSV_TO_MAX_VALUE=5,
/**
* The number of time scale values, in other words limit of this enum.
*
* @see utmscale_getTimeScaleValue
*/
UTSV_MAX_SCALE_VALUE=11
} UTimeScaleValue;
/**
* Get a value associated with a particular time scale.
*
* @param timeScale The time scale
* @param value A constant representing the value to get
* @param status The status code. Set to U_ILLEGAL_ARGUMENT_ERROR if arguments are invalid.
* @return - the value.
*
* @stable ICU 3.2
*/
U_STABLE int64_t U_EXPORT2
utmscale_getTimeScaleValue(UDateTimeScale timeScale, UTimeScaleValue value, UErrorCode *status);
/* Conversion to 'universal time scale' */
/**
* Convert a int64_t datetime from the given time scale to the universal time scale.
*
* @param otherTime The int64_t datetime
* @param timeScale The time scale to convert from
* @param status The status code. Set to U_ILLEGAL_ARGUMENT_ERROR if the conversion is out of range.
*
* @return The datetime converted to the universal time scale
*
* @stable ICU 3.2
*/
U_STABLE int64_t U_EXPORT2
utmscale_fromInt64(int64_t otherTime, UDateTimeScale timeScale, UErrorCode *status);
/* Conversion from 'universal time scale' */
/**
* Convert a datetime from the universal time scale to a int64_t in the given time scale.
*
* @param universalTime The datetime in the universal time scale
* @param timeScale The time scale to convert to
* @param status The status code. Set to U_ILLEGAL_ARGUMENT_ERROR if the conversion is out of range.
*
* @return The datetime converted to the given time scale
*
* @stable ICU 3.2
*/
U_STABLE int64_t U_EXPORT2
utmscale_toInt64(int64_t universalTime, UDateTimeScale timeScale, UErrorCode *status);
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif
// utrans.h
/*
*******************************************************************************
* Copyright (C) 1997-2011,2014-2015 International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* Date Name Description
* 06/21/00 aliu Creation.
*******************************************************************************
*/
#ifndef UTRANS_H
#define UTRANS_H
#if !UCONFIG_NO_TRANSLITERATION
/********************************************************************
* General Notes
********************************************************************
*/
/**
* \file
* \brief C API: Transliterator
*
* Transliteration
* The data structures and functions described in this header provide
* transliteration services. Transliteration services are implemented
* as C++ classes. The comments and documentation in this header
* assume the reader is familiar with the C++ headers translit.h and
* associated documentation.
*
* A significant but incomplete subset of the C++ transliteration
* services are available to C code through this header. In order to
* access more complex transliteration services, refer to the C++
* headers and documentation.
*
* There are two sets of functions for working with transliterator IDs:
*
* An old, deprecated set uses char * IDs, which works for true and pure
* identifiers that these APIs were designed for,
* for example "Cyrillic-Latin".
* It does not work when the ID contains filters ("[:Script=Cyrl:]")
* or even a complete set of rules because then the ID string contains more
* than just "invariant" characters (see utypes.h).
*
* A new set of functions replaces the old ones and uses UChar * IDs,
* paralleling the UnicodeString IDs in the C++ API. (New in ICU 2.8.)
*/
/********************************************************************
* Data Structures
********************************************************************/
/**
* An opaque transliterator for use in C. Open with utrans_openxxx()
* and close with utrans_close() when done. Equivalent to the C++ class
* Transliterator and its subclasses.
* @see Transliterator
* @stable ICU 2.0
*/
typedef void* UTransliterator;
/**
* Direction constant indicating the direction in a transliterator,
* e.g., the forward or reverse rules of a RuleBasedTransliterator.
* Specified when a transliterator is opened. An "A-B" transliterator
* transliterates A to B when operating in the forward direction, and
* B to A when operating in the reverse direction.
* @stable ICU 2.0
*/
typedef enum UTransDirection {
/**
* UTRANS_FORWARD means from <source> to <target> for a
* transliterator with ID <source>-<target>. For a transliterator
* opened using a rule, it means forward direction rules, e.g.,
* "A > B".
*/
UTRANS_FORWARD,
/**
* UTRANS_REVERSE means from <target> to <source> for a
* transliterator with ID <source>-<target>. For a transliterator
* opened using a rule, it means reverse direction rules, e.g.,
* "A < B".
*/
UTRANS_REVERSE
} UTransDirection;
/**
* Position structure for utrans_transIncremental() incremental
* transliteration. This structure defines two substrings of the text
* being transliterated. The first region, [contextStart,
* contextLimit), defines what characters the transliterator will read
* as context. The second region, [start, limit), defines what
* characters will actually be transliterated. The second region
* should be a subset of the first.
*
* 0 <= start <=
* limit.
* @param limit pointer to the ending index, exclusive; start <=
* limit <= repFunc->length(rep). Upon return, *limit will
* contain the new limit index. The text previously occupying
* [start, limit) has been transliterated, possibly to a
* string of a different length, at [start,
* new-limit), where new-limit
* is the return value.
* @param status a pointer to the UErrorCode
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
utrans_trans(const UTransliterator* trans,
UReplaceable* rep,
UReplaceableCallbacks* repFunc,
int32_t start,
int32_t* limit,
UErrorCode* status);
/**
* Transliterate the portion of the UReplaceable text buffer that can
* be transliterated unambiguosly. This method is typically called
* after new text has been inserted, e.g. as a result of a keyboard
* event. The transliterator will try to transliterate characters of
* rep between index.cursor and
* index.limit. Characters before
* index.cursor will not be changed.
*
* index will be updated.
* index.start will be advanced to the first
* character that future calls to this method will read.
* index.cursor and index.limit will
* be adjusted to delimit the range of text that future calls to
* this method may change.
*
* index.start and index.limit
* set to indicate the portion of text to be
* transliterated, and index.cursor == index.start.
* Thereafter, index can be used without
* modification in future calls, provided that all changes to
* text are made via this method.
*
* 0 <= start <=
* limit.
* @param limit pointer to the ending index, exclusive; start <=
* limit <= repFunc->length(rep). Upon return, *limit will
* contain the new limit index. The text previously occupying
* [start, limit) has been transliterated, possibly to a
* string of a different length, at [start,
* new-limit), where new-limit
* is the return value.
* @param status a pointer to the UErrorCode
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
utrans_transUChars(const UTransliterator* trans,
UChar* text,
int32_t* textLength,
int32_t textCapacity,
int32_t start,
int32_t* limit,
UErrorCode* status);
/**
* Transliterate the portion of the UChar* text buffer that can be
* transliterated unambiguosly. See utrans_transIncremental(). The
* string is passed in in a UChar* buffer. The string is modified in
* place. If the result is longer than textCapacity, it is truncated.
* The actual length of the result is returned in *textLength, if
* textLength is non-NULL. *textLength may be greater than
* textCapacity, but only textCapacity UChars will be written to
* *text, including the zero terminator. See utrans_transIncremental()
* for usage details.
*
* @param trans the transliterator
* @param text a pointer to a buffer containing the text to be
* transliterated on input and the result text on output.
* @param textLength a pointer to the length of the string in text.
* If the length is -1 then the string is assumed to be
* zero-terminated. Upon return, the new length is stored in
* *textLength. If textLength is NULL then the string is assumed to
* be zero-terminated.
* @param textCapacity the length of the text buffer
* @param pos a struct containing the start and limit indices of the
* text to be read and the text to be transliterated
* @param status a pointer to the UErrorCode
* @see utrans_transIncremental
* @stable ICU 2.0
*/
U_STABLE void U_EXPORT2
utrans_transIncrementalUChars(const UTransliterator* trans,
UChar* text,
int32_t* textLength,
int32_t textCapacity,
UTransPosition* pos,
UErrorCode* status);
/**
* Create a rule string that can be passed to utrans_openU to recreate this
* transliterator.
*
* @param trans The transliterator
* @param escapeUnprintable if TRUE then convert unprintable characters to their
* hex escape representations, \\uxxxx or \\Uxxxxxxxx.
* Unprintable characters are those other than
* U+000A, U+0020..U+007E.
* @param result A pointer to a buffer to receive the rules.
* @param resultLength The maximum size of result.
* @param status A pointer to the UErrorCode. In case of error status, the
* contents of result are undefined.
* @return int32_t The length of the rule string (may be greater than resultLength,
* in which case an error is returned).
* @stable ICU 53
*/
U_STABLE int32_t U_EXPORT2
utrans_toRules( const UTransliterator* trans,
UBool escapeUnprintable,
UChar* result, int32_t resultLength,
UErrorCode* status);
/**
* Returns the set of all characters that may be modified in the input text by
* this UTransliterator, optionally ignoring the transliterator's current filter.
* @param trans The transliterator.
* @param ignoreFilter If FALSE, the returned set incorporates the
* UTransliterator's current filter; if the filter is changed,
* the return value of this function will change. If TRUE, the
* returned set ignores the effect of the UTransliterator's
* current filter.
* @param fillIn Pointer to a USet object to receive the modifiable characters
* set. Previous contents of fillIn are lost. If fillIn is
* NULL, then a new USet is created and returned. The caller
* owns the result and must dispose of it by calling uset_close.
* @param status A pointer to the UErrorCode.
* @return USet* Either fillIn, or if fillIn is NULL, a pointer to a
* newly-allocated USet that the user must close. In case of
* error, NULL is returned.
* @stable ICU 53
*/
U_STABLE USet* U_EXPORT2
utrans_getSourceSet(const UTransliterator* trans,
UBool ignoreFilter,
USet* fillIn,
UErrorCode* status);
/* deprecated API ----------------------------------------------------------- */
#endif /* #if !UCONFIG_NO_TRANSLITERATION */
#endif
// vtzone.h
// No supported content
#endif // (NTDDI_VERSION >= NTDDI_WIN10_RS2)