| 1 | /** |
| 2 | \file obexftp/unicode.c |
| 3 | Unicode charset and encoding conversions. |
| 4 | ObexFTP library - language bindings for OBEX file transfer. |
| 5 | |
| 6 | Copyright (c) 2007 Christian W. Zuckschwerdt <zany@triq.net> |
| 7 | |
| 8 | ObexFTP is free software; you can redistribute it and/or modify |
| 9 | it under the terms of the GNU Lesser General Public License as |
| 10 | published by the Free Software Foundation; either version 2 of |
| 11 | the License, or (at your option) any later version. |
| 12 | |
| 13 | This program is distributed in the hope that it will be useful, |
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | GNU Lesser General Public License for more details. |
| 17 | |
| 18 | You should have received a copy of the GNU Lesser General Public |
| 19 | License along with ObexFTP. If not, see <http://www.gnu.org/>. |
| 20 | */ |
| 21 | |
| 22 | #ifdef HAVE_CONFIG_H |
| 23 | #include <config.h> |
| 24 | #endif |
| 25 | |
| 26 | #include <stdio.h> |
| 27 | #include <stdlib.h> |
| 28 | #include <string.h> |
| 29 | |
| 30 | #ifdef _WIN32 /* no need for iconv */ |
| 31 | #include <windows.h> /* pulls in Winnls.h */ |
| 32 | #else |
| 33 | #ifdef HAVE_ICONV |
| 34 | #include <iconv.h> |
| 35 | #include <locale.h> |
| 36 | #ifndef ICONV_CONST |
| 37 | #define ICONV_CONST |
| 38 | #endif |
| 39 | #ifdef HAVE_LANGINFO_H |
| 40 | #include <langinfo.h> |
| 41 | #define locale_charset nl_langinfo(CODESET) |
| 42 | #else |
| 43 | #define locale_charset "" |
| 44 | #endif /* HAVE_LANGINFO_H */ |
| 45 | #endif /* HAVE_ICONV */ |
| 46 | #endif /* _WIN32 */ |
| 47 | |
| 48 | #include "unicode.h" |
| 49 | |
| 50 | #include <common.h> |
| 51 | |
| 52 | |
| 53 | /** |
| 54 | Convert a string to UTF-16BE, tries to guess charset and encoding. |
| 55 | |
| 56 | As a lib we can't be sure what the input charset and encoding is. |
| 57 | Try to read the input as UTF-8, this will also work for plain ASCII (7bit). |
| 58 | On errors fall back to the environment locale, which again could be UTF-8. |
| 59 | As last resort try to copy verbatim, i.e. as ISO-8859-1. |
| 60 | |
| 61 | \note This is a quick hack until OpenOBEX is iconv-ready. |
| 62 | */ |
| 63 | int CharToUnicode(uint8_t *uc, const uint8_t *c, int size) |
| 64 | { |
| 65 | #ifdef _WIN32 /* no need for iconv */ |
| 66 | int ret, i; |
| 67 | char tmp; |
| 68 | |
| 69 | return_val_if_fail(uc != NULL, -1); |
| 70 | return_val_if_fail(c != NULL, -1); |
| 71 | |
| 72 | /* ANSI to UTF-16LE */ |
| 73 | ret = MultiByteToWideChar(CP_ACP, 0, c, -1, (LPWSTR)uc, size); |
| 74 | /* turn the eggs the right way around now */ |
| 75 | for (i=0; i < ret; i++) { |
| 76 | tmp = uc[2*i]; |
| 77 | uc[2*i] = uc[2*i+1]; |
| 78 | uc[2*i+1] = tmp; |
| 79 | } |
| 80 | return ret * 2; /* 0 on error */ |
| 81 | #else /* _WIN32 */ |
| 82 | |
| 83 | #ifdef HAVE_ICONV |
| 84 | iconv_t utf16; |
| 85 | size_t ni, no, nrc; |
| 86 | /* avoid type-punned dereferecing (breaks strict aliasing) */ |
| 87 | ICONV_CONST char *cc = (ICONV_CONST char *)c; |
| 88 | char *ucc = (char *)uc; |
| 89 | |
| 90 | return_val_if_fail(uc != NULL, -1); |
| 91 | return_val_if_fail(c != NULL, -1); |
| 92 | |
| 93 | /* try UTF-8 to UTF-16BE */ |
| 94 | ni = strlen(cc) + 1; |
| 95 | no = size; |
| 96 | utf16 = iconv_open("UTF-16BE", "UTF-8"); |
| 97 | nrc = iconv(utf16, &cc, &ni, &ucc, &no); |
| 98 | (void)iconv_close(utf16); |
| 99 | if (nrc == (size_t)(-1)) { |
| 100 | DEBUG(3, "Iconv from UTF-8 conversion error: '%s'\n", cc); |
| 101 | } else { |
| 102 | return size-no; |
| 103 | } |
| 104 | |
| 105 | /* try current locale charset to UTF-16BE */ |
| 106 | setlocale(LC_CTYPE, ""); |
| 107 | DEBUG(2, "Iconv from locale \"%s\"\n", locale_charset); |
| 108 | cc = (ICONV_CONST char *)c; |
| 109 | ucc = (char *)uc; |
| 110 | ni = strlen(cc) + 1; |
| 111 | no = size; |
| 112 | utf16 = iconv_open("UTF-16BE", locale_charset); |
| 113 | nrc = iconv(utf16, &cc, &ni, &ucc, &no); |
| 114 | (void)iconv_close(utf16); |
| 115 | if (nrc == (size_t)(-1)) { |
| 116 | DEBUG(3, "Iconv from locale conversion error: '%s'\n", cc); |
| 117 | } else { |
| 118 | return size-no; |
| 119 | } |
| 120 | |
| 121 | /* fallback to ISO-8859-1 to UTF-16BE (every byte is valid here) */ |
| 122 | cc = (ICONV_CONST char *)c; |
| 123 | ucc = (char *)uc; |
| 124 | ni = strlen(cc) + 1; |
| 125 | no = size; |
| 126 | utf16 = iconv_open("UTF-16BE", "ISO-8859-1"); |
| 127 | nrc = iconv(utf16, &cc, &ni, &ucc, &no); |
| 128 | (void)iconv_close(utf16); |
| 129 | if (nrc == (size_t)(-1)) { |
| 130 | DEBUG(2, "Iconv internal conversion error: '%s'\n", cc); |
| 131 | return -1; |
| 132 | } |
| 133 | |
| 134 | return size-no; |
| 135 | #else /* HAVE_ICONV */ |
| 136 | int len, n; |
| 137 | |
| 138 | if (uc == NULL || c == NULL) |
| 139 | return -1; |
| 140 | |
| 141 | len = n = strlen((char *) c); |
| 142 | if (n*2+2 > size) |
| 143 | return -1; |
| 144 | |
| 145 | uc[n*2+1] = 0; |
| 146 | uc[n*2] = 0; |
| 147 | |
| 148 | while (n--) { |
| 149 | uc[n*2+1] = c[n]; |
| 150 | uc[n*2] = 0; |
| 151 | } |
| 152 | |
| 153 | return (len * 2) + 2; |
| 154 | #endif /* HAVE_ICONV */ |
| 155 | |
| 156 | #endif /* _WIN32 */ |
| 157 | } |
| 158 | |
| 159 | |
| 160 | /** |
| 161 | Convert a string from UTF-16BE to locale charset. |
| 162 | |
| 163 | Plain ASCII (7bit) and basic ISO-8859-1 will always work. |
| 164 | This conversion supports UTF-8 and single byte locales. |
| 165 | |
| 166 | \note This is a quick hack until OpenOBEX is iconv-ready. |
| 167 | */ |
| 168 | int UnicodeToChar(uint8_t *c, const uint8_t *uc, int size) |
| 169 | { |
| 170 | #ifdef _WIN32 /* no need for iconv */ |
| 171 | int ret, n, i; |
| 172 | uint8_t *le; |
| 173 | |
| 174 | return_val_if_fail(uc != NULL, -1); |
| 175 | return_val_if_fail(c != NULL, -1); |
| 176 | |
| 177 | /* turn the eggs around, pointy side up */ |
| 178 | for (n=0; uc[2*n] != 0 || uc[2*n+1] != 0; n++); |
| 179 | le = malloc(2*n+2); |
| 180 | for (i=0; i <= n; i++) { |
| 181 | le[2*i] = uc[2*i+1]; |
| 182 | le[2*i+1] = uc[2*i]; |
| 183 | } |
| 184 | /* UTF-16LE to ANSI */ |
| 185 | ret = WideCharToMultiByte(CP_ACP, 0, le, -1, c, size, NULL, NULL); |
| 186 | free(le); |
| 187 | return ret; /* 0 on error */ |
| 188 | #else /* _WIN32 */ |
| 189 | |
| 190 | #ifdef HAVE_ICONV |
| 191 | iconv_t utf16; |
| 192 | size_t ni, no, nrc; |
| 193 | /* avoid type-punned dereferecing (breaks strict aliasing) */ |
| 194 | char *cc = (char *)c; |
| 195 | ICONV_CONST char *ucc = (ICONV_CONST char *)uc; |
| 196 | |
| 197 | return_val_if_fail(uc != NULL, -1); |
| 198 | return_val_if_fail(c != NULL, -1); |
| 199 | |
| 200 | /* UTF-16BE to current locale charset */ |
| 201 | setlocale(LC_CTYPE, ""); |
| 202 | DEBUG(3, "Iconv to locale \"%s\"\n", locale_charset); |
| 203 | for (ni=0; ucc[2*ni] != 0 || ucc[2*ni+1] != 0; ni++); |
| 204 | ni = 2*ni+2; |
| 205 | no = size; |
| 206 | utf16 = iconv_open(locale_charset, "UTF-16BE"); |
| 207 | nrc = iconv(utf16, &ucc, &ni, &cc, &no); |
| 208 | (void)iconv_close(utf16); |
| 209 | if (nrc == (size_t)(-1)) { |
| 210 | DEBUG(2, "Iconv from locale conversion error: '%s'\n", cc); |
| 211 | } |
| 212 | return size-no; |
| 213 | #else /* HAVE_ICONV */ |
| 214 | int n; |
| 215 | |
| 216 | if (uc == NULL || c == NULL) |
| 217 | return -1; |
| 218 | |
| 219 | /* Make sure buffer is big enough! */ |
| 220 | for (n = 0; uc[n*2+1] != 0; n++); |
| 221 | |
| 222 | if (n >= size) |
| 223 | return -1; |
| 224 | |
| 225 | for (n = 0; uc[n*2+1] != 0; n++) |
| 226 | c[n] = uc[n*2+1]; |
| 227 | c[n] = 0; |
| 228 | |
| 229 | return 0; |
| 230 | #endif /* HAVE_ICONV */ |
| 231 | |
| 232 | #endif /* _WIN32 */ |
| 233 | } |
| 234 | |
| 235 | |
| 236 | /** |
| 237 | Convert a (xml) string from UTF-8 to locale charset. |
| 238 | |
| 239 | Plain ASCII (7bit) and basic ISO-8859-1 will always work. |
| 240 | This conversion supports UTF-8 and single byte locales. |
| 241 | |
| 242 | \note This is a quick hack until OpenOBEX is iconv-ready. |
| 243 | */ |
| 244 | int Utf8ToChar(uint8_t *c, const uint8_t *uc, int size) |
| 245 | { |
| 246 | #ifdef _WIN32 /* no need for iconv */ |
| 247 | int ret, n, i; |
| 248 | uint8_t *le; |
| 249 | |
| 250 | return_val_if_fail(uc != NULL, -1); |
| 251 | return_val_if_fail(c != NULL, -1); |
| 252 | |
| 253 | n = strlen(uc)*2+2; |
| 254 | le = malloc(n); |
| 255 | /* UTF-8 to UTF-16LE */ |
| 256 | ret = MultiByteToWideChar(CP_UTF8, 0, uc, -1, (LPWSTR)le, n); |
| 257 | |
| 258 | /* UTF-16LE to ANSI */ |
| 259 | ret = WideCharToMultiByte(CP_ACP, 0, le, -1, c, size, NULL, NULL); |
| 260 | free(le); |
| 261 | return ret; /* 0 on error */ |
| 262 | #else /* _WIN32 */ |
| 263 | |
| 264 | #ifdef HAVE_ICONV |
| 265 | iconv_t utf8; |
| 266 | size_t ni, no, nrc; |
| 267 | /* avoid type-punned dereferecing (breaks strict aliasing) */ |
| 268 | char *cc = (char *)c; |
| 269 | ICONV_CONST char *ucc = (ICONV_CONST char *)uc; |
| 270 | |
| 271 | return_val_if_fail(uc != NULL, -1); |
| 272 | return_val_if_fail(c != NULL, -1); |
| 273 | |
| 274 | setlocale(LC_CTYPE, ""); |
| 275 | DEBUG(2, "Iconv to \"%s\"\n", locale_charset); |
| 276 | ni = strlen(ucc); |
| 277 | no = size; |
| 278 | utf8 = iconv_open(locale_charset, "UTF-8"); |
| 279 | nrc = iconv(utf8, &ucc, &ni, &cc, &no); |
| 280 | (void)iconv_close(utf8); |
| 281 | if (nrc != (size_t)(-1)) { |
| 282 | DEBUG(2, "Iconv from locale conversion error: '%s'\n", cc); |
| 283 | } |
| 284 | return size-no; |
| 285 | #else /* HAVE_ICONV */ |
| 286 | int n, i; |
| 287 | n = strlen(uc); |
| 288 | strncpy(c, uc, size); |
| 289 | c[size] = '\0'; |
| 290 | return n; |
| 291 | #endif /* HAVE_ICONV */ |
| 292 | |
| 293 | #endif /* _WIN32 */ |
| 294 | } |