00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef MAGICKCORE_TOKEN_PRIVATE_H
00019 #define MAGICKCORE_TOKEN_PRIVATE_H
00020
00021 #if defined(__cplusplus) || defined(c_plusplus)
00022 extern "C" {
00023 #endif
00024
00025 #ifndef EILSEQ
00026 #define EILSEQ ENOENT
00027 #endif
00028
00029 #define MaxMultibyteCodes 6
00030
00031 typedef struct
00032 {
00033 int
00034 code_mask,
00035 code_value,
00036 utf_mask,
00037 utf_value;
00038 } UTFInfo;
00039
00040 static UTFInfo
00041 utf_info[MaxMultibyteCodes] =
00042 {
00043 { 0x80, 0x00, 0x000007f, 0x0000000 },
00044 { 0xE0, 0xC0, 0x00007ff, 0x0000080 },
00045 { 0xF0, 0xE0, 0x000ffff, 0x0000800 },
00046 { 0xF8, 0xF0, 0x01fffff, 0x0010000 },
00047 { 0xFC, 0xF8, 0x03fffff, 0x0200000 },
00048 { 0xFE, 0xFC, 0x7ffffff, 0x4000000 },
00049 };
00050
00051 static inline unsigned char *ConvertLatin1ToUTF8(const unsigned char *content)
00052 {
00053 register const unsigned char
00054 *p;
00055
00056 register unsigned char
00057 *q;
00058
00059 size_t
00060 length;
00061
00062 unsigned char
00063 *utf8;
00064
00065 unsigned int
00066 c;
00067
00068 length=0;
00069 for (p=content; *p != '\0'; p++)
00070 length+=(*p & 0x80) != 0 ? 2 : 1;
00071 utf8=(unsigned char *) NULL;
00072 if (~length >= 1)
00073 utf8=(unsigned char *) AcquireQuantumMemory(length+1UL,sizeof(*utf8));
00074 if (utf8 == (unsigned char *) NULL)
00075 return((unsigned char *) NULL);
00076 q=utf8;
00077 for (p=content; *p != '\0'; p++)
00078 {
00079 c=(*p);
00080 if ((c & 0x80) == 0)
00081 *q++=c;
00082 else
00083 {
00084 *q++=0xc0 | ((c >> 6) & 0x3f);
00085 *q++=0x80 | (c & 0x3f);
00086 }
00087 }
00088 *q='\0';
00089 return(utf8);
00090 }
00091
00092 static inline int GetNextUTFCode(const char *text,unsigned int *octets)
00093 {
00094 int
00095 code;
00096
00097 register ssize_t
00098 i;
00099
00100 register int
00101 c,
00102 unicode;
00103
00104 *octets=1;
00105 if (text == (const char *) NULL)
00106 {
00107 errno=EINVAL;
00108 return(-1);
00109 }
00110 code=(int) (*text++) & 0xff;
00111 unicode=code;
00112 for (i=0; i < MaxMultibyteCodes; i++)
00113 {
00114 if ((code & utf_info[i].code_mask) == utf_info[i].code_value)
00115 {
00116 unicode&=utf_info[i].utf_mask;
00117 if (unicode < utf_info[i].utf_value)
00118 break;
00119 *octets=(unsigned int) (i+1);
00120 return(unicode);
00121 }
00122 c=(int) (*text++ ^ 0x80) & 0xff;
00123 if ((c & 0xc0) != 0)
00124 break;
00125 if (unicode > 0x10FFFF)
00126 break;
00127 unicode=(unicode << 6) | c;
00128 }
00129 errno=EILSEQ;
00130 return(-1);
00131 }
00132
00133 static inline int GetUTFCode(const char *text)
00134 {
00135 unsigned int
00136 octets;
00137
00138 return(GetNextUTFCode(text,&octets));
00139 }
00140
00141 static inline unsigned int GetUTFOctets(const char *text)
00142 {
00143 unsigned int
00144 octets;
00145
00146 (void) GetNextUTFCode(text,&octets);
00147 return(octets);
00148 }
00149
00150 static inline MagickBooleanType IsUTFSpace(int code)
00151 {
00152 if (((code >= 0x0009) && (code <= 0x000d)) || (code == 0x0020) ||
00153 (code == 0x0085) || (code == 0x00a0) || (code == 0x1680) ||
00154 (code == 0x180e) || ((code >= 0x2000) && (code <= 0x200a)) ||
00155 (code == 0x2028) || (code == 0x2029) || (code == 0x202f) ||
00156 (code == 0x205f) || (code == 0x3000))
00157 return(MagickTrue);
00158 return(MagickFalse);
00159 }
00160
00161 static inline MagickBooleanType IsUTFValid(int code)
00162 {
00163 int
00164 mask;
00165
00166 mask=(int) 0x7fffffff;
00167 if (((code & ~mask) != 0) && ((code < 0xd800) || (code > 0xdfff)) &&
00168 (code != 0xfffe) && (code != 0xffff))
00169 return(MagickFalse);
00170 return(MagickTrue);
00171 }
00172
00173 static inline MagickBooleanType IsUTFAscii(int code)
00174 {
00175 int
00176 mask;
00177
00178 mask=(int) 0x7f;
00179 if ((code & ~mask) != 0)
00180 return(MagickFalse);
00181 return(MagickTrue);
00182 }
00183
00184 #if defined(__cplusplus) || defined(c_plusplus)
00185 }
00186 #endif
00187
00188 #endif