utf8.c
Go to the documentation of this file.00001
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include "utf8.h"
00027 #include <string.h>
00028
00035 int UTF8_delete_char (char *s, int pos)
00036 {
00037 int start = pos;
00038 int next = pos;
00039
00040 while (start > 0 && UTF8_CONTINUATION_BYTE(s[start]))
00041 start--;
00042 if (s[next] != 0)
00043 next++;
00044 while (s[next] != 0 && UTF8_CONTINUATION_BYTE(s[next]))
00045 next++;
00046
00047
00048 memmove(&s[start], &s[next], strlen(&s[next]) + 1);
00049 return start;
00050 }
00051
00060 int UTF8_insert_char (char *s, int n, int pos, int c)
00061 {
00062 const int utf8len = UTF8_encoded_len(c);
00063 const int tail = strlen(&s[pos]) + 1;
00064
00065 if (utf8len == 0)
00066 return 0;
00067
00068 if (pos + tail + utf8len >= n)
00069 return 0;
00070
00071
00072 memmove(&s[pos + utf8len], &s[pos], tail);
00073
00074 if (c <= 0x7f) {
00075 s[pos] = c;
00076 } else if (c <= 0x7ff) {
00077 s[pos] = 0xc0 | (c >> 6);
00078 s[pos + 1] = 0x80 | (c & 0x3f);
00079 } else if (c <= 0xffff) {
00080 s[pos] = 0xe0 | (c >> 12);
00081 s[pos + 1] = 0x80 | ((c >> 6) & 0x3f);
00082 s[pos + 2] = 0x80 | (c & 0x3f);
00083 } else if (c <= 0x10ffff) {
00084 s[pos] = 0xf0 | (c >> 18);
00085 s[pos + 1] = 0x80 | ((c >> 12) & 0x3f);
00086 s[pos + 2] = 0x80 | ((c >> 6) & 0x3f);
00087 s[pos + 3] = 0x80 | (c & 0x3f);
00088 }
00089
00090 return utf8len;
00091 }
00092
00103 int UTF8_char_len (unsigned char c)
00104 {
00105 if (c < 0x80)
00106 return 1;
00107 if (c < 0xc0)
00108 return 0;
00109 if (c < 0xe0)
00110 return 2;
00111 if (c < 0xf0)
00112 return 3;
00113 if (c < 0xf8)
00114 return 4;
00115
00116
00117 return 0;
00118 }
00119
00124 int UTF8_encoded_len (int c)
00125 {
00126 if (c <= 0x7F)
00127 return 1;
00128 if (c <= 0x07FF)
00129 return 2;
00130 if (c <= 0xFFFF)
00131 return 3;
00132 if (c <= 0x10FFFF)
00133 return 4;
00134 return 0;
00135 }
00136
00143 size_t UTF8_strlen (const char *str)
00144 {
00145 size_t result = 0;
00146
00147 while (str[0] != '\0') {
00148 const int n = UTF8_char_len((unsigned char)*str);
00149 str += n;
00150 result++;
00151 }
00152 return result;
00153 }
00154
00162 char *UTF8_strncpyz (char *dest, const char *src, size_t limit)
00163 {
00164 size_t length;
00165
00166 length = strlen(src);
00167 if (length > limit - 1) {
00168 length = limit - 1;
00169 if (length > 0 && (unsigned char) src[length - 1] >= 0x80) {
00170 size_t i = length - 1;
00171 while ((i > 0) && ((unsigned char) src[i] & 0xC0) == 0x80)
00172 i--;
00173 if (UTF8_char_len(src[i]) + i > length)
00174 length = i;
00175 }
00176 }
00177
00178 memcpy(dest, src, length);
00179 dest[length] = '\0';
00180
00181 return dest;
00182 }
00183