ICU 69.1  69.1
uset.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2002-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: uset.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2002mar07
16 * created by: Markus W. Scherer
17 *
18 * C version of UnicodeSet.
19 */
20 
21 
29 #ifndef __USET_H__
30 #define __USET_H__
31 
32 #include "unicode/utypes.h"
33 #include "unicode/uchar.h"
34 
35 #if U_SHOW_CPLUSPLUS_API
36 #include "unicode/localpointer.h"
37 #endif // U_SHOW_CPLUSPLUS_API
38 
39 #ifndef USET_DEFINED
40 
41 #ifndef U_IN_DOXYGEN
42 #define USET_DEFINED
43 #endif
44 
50 typedef struct USet USet;
51 #endif
52 
58 enum {
64 
92 
102 };
103 
159 typedef enum USetSpanCondition {
208 #ifndef U_HIDE_DEPRECATED_API
209 
214 #endif // U_HIDE_DEPRECATED_API
216 
217 enum {
225 };
226 
232 typedef struct USerializedSet {
237  const uint16_t *array;
242  int32_t bmpLength;
247  int32_t length;
254 
255 /*********************************************************************
256  * USet API
257  *********************************************************************/
258 
266 U_CAPI USet* U_EXPORT2
267 uset_openEmpty(void);
268 
279 U_CAPI USet* U_EXPORT2
280 uset_open(UChar32 start, UChar32 end);
281 
291 U_CAPI USet* U_EXPORT2
292 uset_openPattern(const UChar* pattern, int32_t patternLength,
293  UErrorCode* ec);
294 
306 U_CAPI USet* U_EXPORT2
307 uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
308  uint32_t options,
309  UErrorCode* ec);
310 
317 U_CAPI void U_EXPORT2
318 uset_close(USet* set);
319 
320 #if U_SHOW_CPLUSPLUS_API
321 
322 U_NAMESPACE_BEGIN
323 
334 
335 U_NAMESPACE_END
336 
337 #endif
338 
348 U_CAPI USet * U_EXPORT2
349 uset_clone(const USet *set);
350 
360 U_CAPI UBool U_EXPORT2
361 uset_isFrozen(const USet *set);
362 
377 U_CAPI void U_EXPORT2
378 uset_freeze(USet *set);
379 
390 U_CAPI USet * U_EXPORT2
391 uset_cloneAsThawed(const USet *set);
392 
402 U_CAPI void U_EXPORT2
403 uset_set(USet* set,
404  UChar32 start, UChar32 end);
405 
427 U_CAPI int32_t U_EXPORT2
429  const UChar *pattern, int32_t patternLength,
430  uint32_t options,
431  UErrorCode *status);
432 
455 U_CAPI void U_EXPORT2
457  UProperty prop, int32_t value, UErrorCode* ec);
458 
494 U_CAPI void U_EXPORT2
496  const UChar *prop, int32_t propLength,
497  const UChar *value, int32_t valueLength,
498  UErrorCode* ec);
499 
509 U_CAPI UBool U_EXPORT2
510 uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
511  int32_t pos);
512 
528 U_CAPI int32_t U_EXPORT2
529 uset_toPattern(const USet* set,
530  UChar* result, int32_t resultCapacity,
531  UBool escapeUnprintable,
532  UErrorCode* ec);
533 
542 U_CAPI void U_EXPORT2
543 uset_add(USet* set, UChar32 c);
544 
557 U_CAPI void U_EXPORT2
558 uset_addAll(USet* set, const USet *additionalSet);
559 
569 U_CAPI void U_EXPORT2
570 uset_addRange(USet* set, UChar32 start, UChar32 end);
571 
581 U_CAPI void U_EXPORT2
582 uset_addString(USet* set, const UChar* str, int32_t strLen);
583 
593 U_CAPI void U_EXPORT2
594 uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
595 
604 U_CAPI void U_EXPORT2
605 uset_remove(USet* set, UChar32 c);
606 
616 U_CAPI void U_EXPORT2
617 uset_removeRange(USet* set, UChar32 start, UChar32 end);
618 
628 U_CAPI void U_EXPORT2
629 uset_removeString(USet* set, const UChar* str, int32_t strLen);
630 
631 #ifndef U_HIDE_DRAFT_API
632 
641 U_CAPI void U_EXPORT2
642 uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
643 #endif // U_HIDE_DRAFT_API
644 
656 U_CAPI void U_EXPORT2
657 uset_removeAll(USet* set, const USet* removeSet);
658 
671 U_CAPI void U_EXPORT2
672 uset_retain(USet* set, UChar32 start, UChar32 end);
673 
674 #ifndef U_HIDE_DRAFT_API
675 
686 U_CAPI void U_EXPORT2
687 uset_retainString(USet *set, const UChar *str, int32_t length);
688 
698 U_CAPI void U_EXPORT2
699 uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
700 #endif // U_HIDE_DRAFT_API
701 
714 U_CAPI void U_EXPORT2
715 uset_retainAll(USet* set, const USet* retain);
716 
725 U_CAPI void U_EXPORT2
726 uset_compact(USet* set);
727 
736 U_CAPI void U_EXPORT2
737 uset_complement(USet* set);
738 
739 #ifndef U_HIDE_DRAFT_API
740 
753 U_CAPI void U_EXPORT2
754 uset_complementRange(USet *set, UChar32 start, UChar32 end);
755 
766 U_CAPI void U_EXPORT2
767 uset_complementString(USet *set, const UChar *str, int32_t length);
768 
778 U_CAPI void U_EXPORT2
779 uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
780 #endif // U_HIDE_DRAFT_API
781 
793 U_CAPI void U_EXPORT2
794 uset_complementAll(USet* set, const USet* complement);
795 
803 U_CAPI void U_EXPORT2
804 uset_clear(USet* set);
805 
832 U_CAPI void U_EXPORT2
833 uset_closeOver(USet* set, int32_t attributes);
834 
841 U_CAPI void U_EXPORT2
843 
851 U_CAPI UBool U_EXPORT2
852 uset_isEmpty(const USet* set);
853 
862 U_CAPI UBool U_EXPORT2
863 uset_contains(const USet* set, UChar32 c);
864 
874 U_CAPI UBool U_EXPORT2
875 uset_containsRange(const USet* set, UChar32 start, UChar32 end);
876 
885 U_CAPI UBool U_EXPORT2
886 uset_containsString(const USet* set, const UChar* str, int32_t strLen);
887 
898 U_CAPI int32_t U_EXPORT2
899 uset_indexOf(const USet* set, UChar32 c);
900 
911 U_CAPI UChar32 U_EXPORT2
912 uset_charAt(const USet* set, int32_t charIndex);
913 
922 U_CAPI int32_t U_EXPORT2
923 uset_size(const USet* set);
924 
933 U_CAPI int32_t U_EXPORT2
934 uset_getItemCount(const USet* set);
935 
954 U_CAPI int32_t U_EXPORT2
955 uset_getItem(const USet* set, int32_t itemIndex,
956  UChar32* start, UChar32* end,
957  UChar* str, int32_t strCapacity,
958  UErrorCode* ec);
959 
968 U_CAPI UBool U_EXPORT2
969 uset_containsAll(const USet* set1, const USet* set2);
970 
981 U_CAPI UBool U_EXPORT2
982 uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
983 
992 U_CAPI UBool U_EXPORT2
993 uset_containsNone(const USet* set1, const USet* set2);
994 
1003 U_CAPI UBool U_EXPORT2
1004 uset_containsSome(const USet* set1, const USet* set2);
1005 
1025 U_CAPI int32_t U_EXPORT2
1026 uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1027 
1046 U_CAPI int32_t U_EXPORT2
1047 uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1048 
1068 U_CAPI int32_t U_EXPORT2
1069 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1070 
1089 U_CAPI int32_t U_EXPORT2
1090 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1091 
1100 U_CAPI UBool U_EXPORT2
1101 uset_equals(const USet* set1, const USet* set2);
1102 
1103 /*********************************************************************
1104  * Serialized set API
1105  *********************************************************************/
1106 
1156 U_CAPI int32_t U_EXPORT2
1157 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1158 
1167 U_CAPI UBool U_EXPORT2
1168 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1169 
1177 U_CAPI void U_EXPORT2
1179 
1188 U_CAPI UBool U_EXPORT2
1190 
1200 U_CAPI int32_t U_EXPORT2
1202 
1216 U_CAPI UBool U_EXPORT2
1217 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1218  UChar32* pStart, UChar32* pEnd);
1219 
1220 #endif
U_CAPI USet * uset_clone(const USet *set)
Returns a copy of this object.
U_CAPI void uset_clear(USet *set)
Removes all of the elements from this set.
U_CAPI void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
U_CAPI void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition: uset.h:252
U_CAPI void uset_complementString(USet *set, const UChar *str, int32_t length)
Complements the specified string in this set.
U_CAPI void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
U_CAPI void uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length)
Removes EACH of the characters in this string.
U_CAPI UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
U_CAPI int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
U_CAPI void uset_close(USet *set)
Disposes of the storage used by a USet object.
U_CAPI void uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length)
Retains EACH of the characters in this string.
U_CAPI int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
Spans the longest substring that is a concatenation of set elements (characters or strings)...
Definition: uset.h:187
One more than the last span condition.
Definition: uset.h:213
U_CAPI void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
const uint16_t * array
The serialized Unicode Set.
Definition: uset.h:237
U_CAPI void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
U_CAPI void uset_freeze(USet *set)
Freeze the set (make it immutable).
U_CAPI UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
U_CAPI void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
U_CAPI USet * uset_openEmpty(void)
Create an empty USet object.
"Smart pointer" class, closes a USet via uset_close().
U_CAPI void uset_removeAllStrings(USet *set)
Remove all strings from this set.
U_CAPI void uset_complement(USet *set)
Inverts this set.
U_CAPI void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
U_CAPI UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
U_CAPI UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
U_CAPI void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
U_CAPI void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they're not already present.
Ignore white space within patterns unless quoted or escaped.
Definition: uset.h:63
U_CAPI void uset_complementRange(USet *set, UChar32 start, UChar32 end)
Complements the specified range in this set.
struct USerializedSet USerializedSet
A serialized form of a Unicode set.
U_CAPI UBool uset_contains(const USet *set, UChar32 c)
Returns true if the given USet contains the given character.
U_CAPI USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
U_CAPI UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns true if the given USet contains the given string.
U_CAPI void uset_retainString(USet *set, const UChar *str, int32_t length)
Retains only the specified string from this set if it is present.
U_CAPI int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
U_CAPI UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
U_CAPI UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:550
U_CAPI UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns true if the given USet contains all characters c where start <= c && c <= end...
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
U_CAPI UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
U_CAPI USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
U_CAPI int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in the given USet.
U_CAPI int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
U_CAPI int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:467
U_CAPI void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
U_CAPI UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
U_CAPI void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
U_CAPI void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition: uset.h:159
C API: Unicode Properties.
U_CAPI int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:418
U_CAPI int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
U_CAPI void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property...
int32_t length
The total length of the array.
Definition: uset.h:247
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:195
U_CAPI USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
U_CAPI UBool uset_isEmpty(const USet *set)
Returns true if the given USet contains no characters and no strings.
U_CAPI void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition: uset.h:50
U_CAPI int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
U_CAPI UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns true if the given USerializedSet contains the given character.
Capacity of USerializedSet::staticArray.
Definition: uset.h:224
Enable case insensitive matching.
Definition: uset.h:91
Enable case insensitive matching.
Definition: uset.h:101
Basic definitions for ICU, for both C and C++ APIs.
int32_t bmpLength
The length of the array that contains BMP characters.
Definition: uset.h:242
U_CAPI UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
A serialized form of a Unicode set.
Definition: uset.h:232
U_CAPI int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
Continues a span() while there is a set element at the current position.
Definition: uset.h:207
U_CAPI void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
U_CAPI void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
U_CAPI UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
U_CAPI int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set...
U_CAPI void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
U_CAPI void uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length)
Complements EACH of the characters in this string.
U_CAPI void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
Continues a span() while there is no set element at the current position.
Definition: uset.h:172
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:269