ICU 69.1  69.1
unorm2.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2009-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: unorm2.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2009dec15
16 * created by: Markus W. Scherer
17 */
18 
19 #ifndef __UNORM2_H__
20 #define __UNORM2_H__
21 
33 #include "unicode/utypes.h"
34 #include "unicode/stringoptions.h"
35 #include "unicode/uset.h"
36 
37 #if U_SHOW_CPLUSPLUS_API
38 #include "unicode/localpointer.h"
39 #endif // U_SHOW_CPLUSPLUS_API
40 
48 typedef enum {
91 
117 
122 struct UNormalizer2;
123 typedef struct UNormalizer2 UNormalizer2;
125 #if !UCONFIG_NO_NORMALIZATION
126 
138 U_CAPI const UNormalizer2 * U_EXPORT2
139 unorm2_getNFCInstance(UErrorCode *pErrorCode);
140 
152 U_CAPI const UNormalizer2 * U_EXPORT2
153 unorm2_getNFDInstance(UErrorCode *pErrorCode);
154 
166 U_CAPI const UNormalizer2 * U_EXPORT2
168 
180 U_CAPI const UNormalizer2 * U_EXPORT2
182 
194 U_CAPI const UNormalizer2 * U_EXPORT2
196 
218 U_CAPI const UNormalizer2 * U_EXPORT2
219 unorm2_getInstance(const char *packageName,
220  const char *name,
221  UNormalization2Mode mode,
222  UErrorCode *pErrorCode);
223 
239 U_CAPI UNormalizer2 * U_EXPORT2
240 unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode);
241 
248 U_CAPI void U_EXPORT2
249 unorm2_close(UNormalizer2 *norm2);
250 
251 #if U_SHOW_CPLUSPLUS_API
252 
253 U_NAMESPACE_BEGIN
254 
265 
266 U_NAMESPACE_END
267 
268 #endif
269 
286 U_CAPI int32_t U_EXPORT2
287 unorm2_normalize(const UNormalizer2 *norm2,
288  const UChar *src, int32_t length,
289  UChar *dest, int32_t capacity,
290  UErrorCode *pErrorCode);
309 U_CAPI int32_t U_EXPORT2
311  UChar *first, int32_t firstLength, int32_t firstCapacity,
312  const UChar *second, int32_t secondLength,
313  UErrorCode *pErrorCode);
332 U_CAPI int32_t U_EXPORT2
333 unorm2_append(const UNormalizer2 *norm2,
334  UChar *first, int32_t firstLength, int32_t firstCapacity,
335  const UChar *second, int32_t secondLength,
336  UErrorCode *pErrorCode);
337 
357 U_CAPI int32_t U_EXPORT2
359  UChar32 c, UChar *decomposition, int32_t capacity,
360  UErrorCode *pErrorCode);
361 
391 U_CAPI int32_t U_EXPORT2
393  UChar32 c, UChar *decomposition, int32_t capacity,
394  UErrorCode *pErrorCode);
395 
411 U_CAPI UChar32 U_EXPORT2
412 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b);
413 
423 U_CAPI uint8_t U_EXPORT2
425 
442 U_CAPI UBool U_EXPORT2
443 unorm2_isNormalized(const UNormalizer2 *norm2,
444  const UChar *s, int32_t length,
445  UErrorCode *pErrorCode);
446 
465 unorm2_quickCheck(const UNormalizer2 *norm2,
466  const UChar *s, int32_t length,
467  UErrorCode *pErrorCode);
468 
493 U_CAPI int32_t U_EXPORT2
495  const UChar *s, int32_t length,
496  UErrorCode *pErrorCode);
497 
507 U_CAPI UBool U_EXPORT2
509 
519 U_CAPI UBool U_EXPORT2
521 
530 U_CAPI UBool U_EXPORT2
531 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
532 
599 U_CAPI int32_t U_EXPORT2
600 unorm_compare(const UChar *s1, int32_t length1,
601  const UChar *s2, int32_t length2,
602  uint32_t options,
603  UErrorCode *pErrorCode);
604 
605 #endif /* !UCONFIG_NO_NORMALIZATION */
606 #endif /* __UNORM2_H__ */
U_CAPI int32_t unorm2_getRawDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode)
Gets the raw decomposition mapping of c.
U_CAPI int32_t unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
Returns the end of the normalized substring of the input string.
"Fast C or D" form.
Definition: unorm2.h:79
The input string is not in the normalization form.
Definition: unorm2.h:102
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
Map, and reorder canonically.
Definition: unorm2.h:66
C API: Bit set option bit constants for various string and character processing functions.
U_CAPI int32_t unorm2_getDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode)
Gets the decomposition mapping of c.
U_CAPI UNormalizationCheckResult unorm2_quickCheck(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
Tests if the string is normalized.
U_CAPI int32_t unorm2_normalize(const UNormalizer2 *norm2, const UChar *src, int32_t length, UChar *dest, int32_t capacity, UErrorCode *pErrorCode)
Writes the normalized form of the source string to the destination string (replacing its contents) an...
U_CAPI void unorm2_close(UNormalizer2 *norm2)
Closes a UNormalizer2 instance from unorm2_openFiltered().
The input string is in the normalization form.
Definition: unorm2.h:107
U_CAPI int32_t unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode)
Appends the normalized form of the second string to the first string (merging them at the boundary) a...
The input string may or may not be in the normalization form.
Definition: unorm2.h:115
U_CAPI UBool unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c)
Tests if the character always has a normalization boundary after it, regardless of context...
Compose only contiguously.
Definition: unorm2.h:89
U_CAPI const UNormalizer2 * unorm2_getInstance(const char *packageName, const char *name, UNormalization2Mode mode, UErrorCode *pErrorCode)
Returns a UNormalizer2 instance which uses the specified data file (packageName/name similar to ucnv_...
C API: Unicode Set.
U_CAPI UBool unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c)
Tests if the character always has a normalization boundary before it, regardless of context...
U_CAPI UBool unorm2_isNormalized(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
Tests if the string is normalized.
U_CAPI const UNormalizer2 * unorm2_getNFKDInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFKD normalization.
U_CAPI const UNormalizer2 * unorm2_getNFDInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFD normalization.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:550
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
U_CAPI int32_t unorm2_append(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode)
Appends the second string to the first string (merging them at the boundary) and returns the length o...
U_CAPI const UNormalizer2 * unorm2_getNFCInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFC normalization.
struct UNormalizer2 UNormalizer2
C typedef for struct UNormalizer2.
Definition: unorm2.h:123
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:467
U_CAPI UChar32 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b)
Performs pairwise composition of a & b and returns the composite if there is one. ...
U_CAPI uint8_t unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c)
Gets the combining class of c.
U_CAPI UNormalizer2 * unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode)
Constructs a filtered normalizer wrapping any UNormalizer2 instance and a filter set.
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:418
U_CAPI const UNormalizer2 * unorm2_getNFKCInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFKC normalization.
UNormalization2Mode
Constants for normalization modes.
Definition: unorm2.h:48
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition: uset.h:50
U_CAPI const UNormalizer2 * unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization.
Basic definitions for ICU, for both C and C++ APIs.
U_CAPI int32_t unorm_compare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compares two strings for canonical equivalence.
U_CAPI UBool unorm2_isInert(const UNormalizer2 *norm2, UChar32 c)
Tests if the character is normalization-inert.
Decomposition followed by composition.
Definition: unorm2.h:57
"Smart pointer" class, closes a UNormalizer2 via unorm2_close().
UNormalizationCheckResult
Result values for normalization quick check functions.
Definition: unorm2.h:97
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:269