[2679] | 1 | --- file-5.05/src/Makefile.am.vinejtext 2010-07-22 00:56:10.000000000 +0900 |
---|
| 2 | +++ file-5.05/src/Makefile.am 2011-02-11 16:53:06.000000000 +0900 |
---|
| 3 | @@ -4,11 +4,11 @@ |
---|
| 4 | |
---|
| 5 | bin_PROGRAMS = file |
---|
| 6 | |
---|
| 7 | -AM_CPPFLAGS = -DMAGIC='"$(MAGIC)"' |
---|
| 8 | +AM_CPPFLAGS = -DMAGIC='"$(MAGIC)"' -DDETECT_JAPANESE |
---|
| 9 | AM_CFLAGS = @WARNINGS@ |
---|
| 10 | |
---|
| 11 | libmagic_la_SOURCES = magic.c apprentice.c softmagic.c ascmagic.c \ |
---|
| 12 | - encoding.c compress.c is_tar.c readelf.c print.c fsmagic.c \ |
---|
| 13 | + encoding.c compress.c is_tar.c readelf.c print.c jcode.c fsmagic.c \ |
---|
| 14 | funcs.c file.h names.h patchlevel.h readelf.h tar.h apptype.c \ |
---|
| 15 | file_opts.h elfclass.h mygetopt.h cdf.c cdf_time.c readcdf.c cdf.h |
---|
| 16 | libmagic_la_LDFLAGS = -no-undefined -version-info 1:0:0 |
---|
| 17 | --- file-5.05/src/encoding.c.vinejtext 2010-07-22 01:47:17.000000000 +0900 |
---|
| 18 | +++ file-5.05/src/encoding.c 2011-02-11 17:26:00.000000000 +0900 |
---|
| 19 | @@ -42,7 +42,7 @@ FILE_RCSID("@(#)$File: encoding.c,v 1.5 |
---|
| 20 | #include <string.h> |
---|
| 21 | #include <memory.h> |
---|
| 22 | #include <stdlib.h> |
---|
| 23 | - |
---|
| 24 | +#include "jcode.h" |
---|
| 25 | |
---|
| 26 | private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *); |
---|
| 27 | private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *, |
---|
| 28 | @@ -68,7 +68,7 @@ protected int |
---|
| 29 | file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, unichar **ubuf, size_t *ulen, const char **code, const char **code_mime, const char **type) |
---|
| 30 | { |
---|
| 31 | size_t mlen; |
---|
| 32 | - int rv = 1, ucs_type; |
---|
| 33 | + int rv = 1, ucs_type, jcode; |
---|
| 34 | unsigned char *nbuf = NULL; |
---|
| 35 | |
---|
| 36 | mlen = (nbytes + 1) * sizeof(nbuf[0]); |
---|
| 37 | @@ -83,10 +83,27 @@ file_encoding(struct magic_set *ms, cons |
---|
| 38 | } |
---|
| 39 | |
---|
| 40 | *type = "text"; |
---|
| 41 | - if (looks_ascii(buf, nbytes, *ubuf, ulen)) { |
---|
| 42 | + jcode = detect_kcode(buf, nbytes, *ubuf, ulen); |
---|
| 43 | + if (jcode == ASCII) { |
---|
| 44 | DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen)); |
---|
| 45 | *code = "ASCII"; |
---|
| 46 | *code_mime = "us-ascii"; |
---|
| 47 | + } else if (jcode == JIS) { |
---|
| 48 | + DPRINTF(("jis %" SIZE_T_FORMAT "u\n", *ulen)); |
---|
[2730] | 49 | + *code = "7-bit JIS [ESC$B, ESC(B]"; |
---|
| 50 | + *code_mime = "jis"; |
---|
[2679] | 51 | + } else if (jcode == SJIS){ |
---|
| 52 | + DPRINTF(("sjis %" SIZE_T_FORMAT "u\n", *ulen)); |
---|
[2730] | 53 | + *code = "SJIS"; |
---|
| 54 | + *code_mime = "sjis"; |
---|
[2679] | 55 | + } else if (jcode == EUC){ |
---|
| 56 | + DPRINTF(("euc %" SIZE_T_FORMAT "u\n", *ulen)); |
---|
[2730] | 57 | + *code = "EUC"; |
---|
| 58 | + *code_mime = "euc-jp"; |
---|
[2679] | 59 | + } else if (jcode == EUCORSJIS){ |
---|
| 60 | + DPRINTF(("euc or sjis %" SIZE_T_FORMAT "u\n", *ulen)); |
---|
[2730] | 61 | + *code = "EUC or SJIS"; |
---|
| 62 | + *code_mime = "unknown"; |
---|
[2679] | 63 | } else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) { |
---|
| 64 | DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen)); |
---|
| 65 | *code = "UTF-8 Unicode (with BOM)"; |
---|
| 66 | --- /dev/null 2011-02-06 21:11:58.373999997 +0900 |
---|
| 67 | +++ file-5.05/src/jcode.c 2011-02-11 17:14:29.000000000 +0900 |
---|
| 68 | @@ -0,0 +1,205 @@ |
---|
| 69 | +/* |
---|
| 70 | +jcode.c: Kanji-code detect routing by Jun Nishii <jun@vinelinux.org> |
---|
| 71 | + modified by Ryoichi INAGAKI <inagaki@vinelinux.org> |
---|
| 72 | + */ |
---|
| 73 | +#include <stdio.h> |
---|
| 74 | +#include <unistd.h> |
---|
| 75 | +#include <signal.h> |
---|
| 76 | +#include <sys/types.h> |
---|
| 77 | +#include <sys/wait.h> |
---|
| 78 | +#include <jcode.h> |
---|
| 79 | + |
---|
| 80 | +typedef unsigned long unichar; |
---|
| 81 | + |
---|
| 82 | +#define F 0 /* character never appears in text */ |
---|
| 83 | +#define T 1 /* character appears in plain ASCII text */ |
---|
| 84 | +#define I 2 /* character appears in ISO-8859 text */ |
---|
| 85 | +#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */ |
---|
| 86 | +#define J 4 /* character appears in JIS or plain ASCII */ |
---|
| 87 | +#define S 5 /* character appears in SJIS */ |
---|
| 88 | +#define E 6 /* character appears in EUC */ |
---|
| 89 | +#define O 7 /* character appears in EUC or SJIS */ |
---|
| 90 | + |
---|
| 91 | +#define ESC 27 |
---|
| 92 | + |
---|
| 93 | +static char jp_chars1[256] = { |
---|
| 94 | + F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */ |
---|
| 95 | + F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */ |
---|
| 96 | + T, J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, /* 0x2X */ |
---|
| 97 | + J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, /* 0x3X */ |
---|
| 98 | + J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, /* 0x4X */ |
---|
| 99 | + J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, /* 0x5X */ |
---|
| 100 | + J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, /* 0x6X */ |
---|
| 101 | + J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, F, /* 0x7X */ |
---|
| 102 | + /* NEL */ |
---|
| 103 | + X, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x8X */ |
---|
| 104 | + S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x9X */ |
---|
| 105 | + I, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, /* 0xaX */ |
---|
| 106 | + E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, /* 0xbX */ |
---|
| 107 | + E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, /* 0xcX */ |
---|
| 108 | + E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, /* 0xdX */ |
---|
| 109 | + O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 0xeX */ |
---|
| 110 | + E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, I /* 0xfX */ |
---|
| 111 | +}; |
---|
| 112 | + |
---|
| 113 | +static char jp_chars2[256] = { |
---|
| 114 | + /* BEL BS HT LF FF CR */ |
---|
| 115 | + F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */ |
---|
| 116 | + /* ESC */ |
---|
| 117 | + F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */ |
---|
| 118 | + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */ |
---|
| 119 | + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */ |
---|
| 120 | + S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x4X */ |
---|
| 121 | + S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x5X */ |
---|
| 122 | + S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x6X */ |
---|
| 123 | + S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, F, /* 0x7X */ |
---|
| 124 | + /* NEL */ |
---|
| 125 | + S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x8X */ |
---|
| 126 | + S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x9X */ |
---|
| 127 | + S, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 0xaX */ |
---|
| 128 | + O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 0xbX */ |
---|
| 129 | + O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 0xcX */ |
---|
| 130 | + O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 0xdX */ |
---|
| 131 | + O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 0xeX */ |
---|
| 132 | + O, O, O, O, O, O, O, O, O, O, O, O, O, E, E, I /* 0xfX */ |
---|
| 133 | +}; |
---|
| 134 | + |
---|
| 135 | + |
---|
| 136 | +int |
---|
| 137 | +check_asc_jis(buf, nbytes, ubuf, ulen) |
---|
| 138 | + const unsigned char *buf; |
---|
| 139 | + size_t nbytes; |
---|
| 140 | + unichar *ubuf; |
---|
| 141 | + size_t *ulen; |
---|
| 142 | +{ |
---|
| 143 | + size_t i; |
---|
| 144 | + int jflag; |
---|
| 145 | + |
---|
| 146 | + *ulen = 0; jflag=0; |
---|
| 147 | + |
---|
| 148 | + for (i = 0; i < nbytes; i++) { |
---|
| 149 | + int t = jp_chars1[buf[i]]; |
---|
| 150 | + |
---|
| 151 | + if (t != T && t != J ) |
---|
| 152 | + return 0; |
---|
| 153 | + |
---|
| 154 | + if (buf[i] == ESC && i+2<nbytes) { |
---|
| 155 | + if ((buf[i+1]=='$' && buf[i+2]=='B')|| |
---|
| 156 | + (buf[i+1]=='$' && buf[i+2]=='@')) jflag=1; |
---|
| 157 | + } |
---|
| 158 | + |
---|
| 159 | + ubuf[(*ulen)++] = buf[i]; |
---|
| 160 | + } |
---|
| 161 | + |
---|
| 162 | + if (jflag==1) return JIS; |
---|
| 163 | + else return ASCII; |
---|
| 164 | +} |
---|
| 165 | + |
---|
| 166 | +int |
---|
| 167 | +check_sjis(buf, nbytes, ubuf, ulen) |
---|
| 168 | + const unsigned char *buf; |
---|
| 169 | + size_t nbytes; |
---|
| 170 | + unichar *ubuf; |
---|
| 171 | + size_t *ulen; |
---|
| 172 | +{ |
---|
| 173 | + size_t i; |
---|
| 174 | + int jflag; |
---|
| 175 | + |
---|
| 176 | + *ulen = 0; |
---|
| 177 | + jflag = ASCII; |
---|
| 178 | + for (i = 0; i < nbytes; i++) { |
---|
| 179 | + int t = jp_chars1[buf[i]]; |
---|
| 180 | + |
---|
| 181 | + if (t != T && t != J && t != S && t!= O) |
---|
| 182 | + return 0; |
---|
| 183 | + |
---|
| 184 | + if (t == S && i<nbytes-1){ |
---|
| 185 | + ubuf[(*ulen)++] = buf[i]; |
---|
| 186 | + ++i; |
---|
| 187 | + t=jp_chars2[buf[i]]; |
---|
| 188 | + if(t != S && t != O ) return 0; |
---|
| 189 | + jflag=SJIS; |
---|
| 190 | + } else if (t == O && i<nbytes-1){ |
---|
| 191 | + ubuf[(*ulen)++] = buf[i]; |
---|
| 192 | + ++i; |
---|
| 193 | + t=jp_chars2[buf[i]]; |
---|
| 194 | + if( t == S ){ jflag=SJIS; } |
---|
| 195 | + else if( t == O ){ if(jflag==ASCII) jflag=EUCORSJIS; } |
---|
| 196 | + else return 0; |
---|
| 197 | + } |
---|
| 198 | + |
---|
| 199 | + ubuf[(*ulen)++] = buf[i]; |
---|
| 200 | + } |
---|
| 201 | +#ifdef Z |
---|
| 202 | + if (jflag==SJIS) {ckfputs("SJIS text", stdout); return SJIS;} |
---|
| 203 | + if (jflag==EUCORSJIS) {ckfputs("EUCorSJIS", stdout); return EUCORSJIS;} |
---|
| 204 | +#else |
---|
| 205 | + if (jflag==SJIS) {return SJIS;} |
---|
| 206 | + if (jflag==EUCORSJIS) {return EUCORSJIS;} |
---|
| 207 | +#endif |
---|
| 208 | +} |
---|
| 209 | + |
---|
| 210 | +int |
---|
| 211 | +check_euc(buf, nbytes, ubuf, ulen) |
---|
| 212 | + const unsigned char *buf; |
---|
| 213 | + size_t nbytes; |
---|
| 214 | + unichar *ubuf; |
---|
| 215 | + size_t *ulen; |
---|
| 216 | +{ |
---|
| 217 | + size_t i; |
---|
| 218 | + int jflag; |
---|
| 219 | + |
---|
| 220 | + *ulen = 0; |
---|
| 221 | + jflag = ASCII; |
---|
| 222 | + |
---|
| 223 | + for (i = 0; i < nbytes; i++) { |
---|
| 224 | + int t = jp_chars1[buf[i]]; |
---|
| 225 | + |
---|
| 226 | + if (t != T && t != J && t != E && t!= O) |
---|
| 227 | + return 0; |
---|
| 228 | + |
---|
| 229 | + if (t == E && i<nbytes-1){ |
---|
| 230 | + ubuf[(*ulen)++] = buf[i]; |
---|
| 231 | + ++i; |
---|
| 232 | + t= jp_chars2[buf[i]]; |
---|
| 233 | + if( t != E && t != O) return 0; |
---|
| 234 | + jflag=EUC; |
---|
| 235 | + } else if (t == O && i<nbytes-1){ |
---|
| 236 | + ubuf[(*ulen)++] = buf[i]; |
---|
| 237 | + ++i; |
---|
| 238 | + t=jp_chars2[buf[i]]; |
---|
| 239 | + if( t == E ){ jflag=EUC; } |
---|
| 240 | + else if( t == O ){ if(jflag==ASCII) jflag=EUCORSJIS; } |
---|
| 241 | + else return 0; |
---|
| 242 | + } |
---|
| 243 | + |
---|
| 244 | + ubuf[(*ulen)++] = buf[i]; |
---|
| 245 | + } |
---|
| 246 | +#ifdef Z |
---|
| 247 | + if (jflag==EUC) {ckfputs("EUC text", stdout); return EUC;} |
---|
| 248 | + if (jflag==EUCORSJIS) {ckfputs("EUCorSJIS", stdout); return EUCORSJIS;} |
---|
| 249 | +#else |
---|
| 250 | + if (jflag==EUC) { return EUC;} |
---|
| 251 | + if (jflag==EUCORSJIS) {return EUCORSJIS;} |
---|
| 252 | +#endif |
---|
| 253 | +} |
---|
| 254 | + |
---|
| 255 | +int |
---|
| 256 | +detect_kcode(buf, nbytes, ubuf, ulen) |
---|
| 257 | + const unsigned char *buf; |
---|
| 258 | + size_t nbytes; |
---|
| 259 | + unichar *ubuf; |
---|
| 260 | + size_t *ulen; |
---|
| 261 | +{ |
---|
| 262 | + int ret; |
---|
| 263 | + ret=check_asc_jis(buf, nbytes, ubuf, ulen); |
---|
| 264 | + if(ret==ASCII) return ASCII; |
---|
| 265 | + if(ret==JIS) return JIS; |
---|
| 266 | + |
---|
| 267 | + ret=check_sjis(buf, nbytes, ubuf, ulen); |
---|
| 268 | + if(ret==SJIS) return SJIS; |
---|
| 269 | + if(ret==EUCORSJIS) return EUCORSJIS; |
---|
| 270 | + ret=check_euc(buf, nbytes, ubuf, ulen); |
---|
| 271 | + if(ret==EUC) return EUC; |
---|
| 272 | + if(ret==EUCORSJIS) return EUCORSJIS; |
---|
| 273 | +} |
---|
| 274 | --- /dev/null 2011-02-06 21:11:58.373999997 +0900 |
---|
| 275 | +++ file-5.05/src/jcode.h 2011-02-11 17:12:11.000000000 +0900 |
---|
| 276 | @@ -0,0 +1,15 @@ |
---|
| 277 | +/* |
---|
| 278 | + jcode.h - for jcode.c by Jun Nishii <jun@vinelinux.org> |
---|
| 279 | + modified by Ryoichi INAGAKI <inagaki@vinelinux.org> |
---|
| 280 | + */ |
---|
| 281 | + |
---|
| 282 | +#define ASCII 1 |
---|
| 283 | +#define JIS 2 |
---|
| 284 | +#define EUC 3 |
---|
| 285 | +#define SJIS 4 |
---|
| 286 | +#define EUCORSJIS 5 |
---|
| 287 | + |
---|
| 288 | +extern int detect_kcode (const unsigned char *, size_t, unichar *, size_t *); |
---|
| 289 | +extern int looks_jis (const unsigned char *, size_t, unichar *, size_t *); |
---|
| 290 | +extern int looks_sjis (const unsigned char *, size_t, unichar *, size_t *); |
---|
| 291 | +extern int looks_euc (const unsigned char *, size_t, unichar *, size_t *); |
---|
| 292 | --- file-5.05/src/names.h.vinejtext 2010-10-09 06:58:44.000000000 +0900 |
---|
| 293 | +++ file-5.05/src/names.h 2011-02-11 17:28:18.000000000 +0900 |
---|
| 294 | @@ -135,8 +135,6 @@ |
---|
| 295 | {"/*", L_C, 2 }, /* must precede "The", "the", etc. */ |
---|
| 296 | {"#include", L_C, 2 }, |
---|
| 297 | {"char", L_C, 2 }, |
---|
| 298 | - {"The", L_ENG, 2 }, |
---|
| 299 | - {"the", L_ENG, 2 }, |
---|
| 300 | {"double", L_C, 1 }, |
---|
| 301 | {"extern", L_C, 2 }, |
---|
| 302 | {"float", L_C, 1 }, |
---|