aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2009-12-19 12:58:35 +0000
committerChristian Grothoff <christian@grothoff.org>2009-12-19 12:58:35 +0000
commit01e3e60dd64caeb9a441248d602b4c8a60e9413b (patch)
tree7b861acf0d3098d5d93fa99effdb3e468952ff20
parent4db7e805a2c0082b95b1cd2a735e687897adcf3b (diff)
downloadlibextractor-01e3e60dd64caeb9a441248d602b4c8a60e9413b.tar.gz
libextractor-01e3e60dd64caeb9a441248d602b4c8a60e9413b.zip
dos2unix
-rw-r--r--src/plugins/translitextractor.c326
1 files changed, 163 insertions, 163 deletions
diff --git a/src/plugins/translitextractor.c b/src/plugins/translitextractor.c
index 8e8d525..0453156 100644
--- a/src/plugins/translitextractor.c
+++ b/src/plugins/translitextractor.c
@@ -1,128 +1,128 @@
1/* 1/*
2 This file is part of libextractor. 2 This file is part of libextractor.
3 (C) 2002 - 2005 Vidyut Samanta and Christian Grothoff 3 (C) 2002 - 2005 Vidyut Samanta and Christian Grothoff
4 4
5 libextractor is free software; you can redistribute it and/or modify 5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published 6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your 7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version. 8 option) any later version.
9 9
10 libextractor is distributed in the hope that it will be useful, but 10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of 11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details. 13 General Public License for more details.
14 14
15 You should have received a copy of the GNU General Public License 15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the 16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330, 17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. 18 Boston, MA 02111-1307, USA.
19 */ 19 */
20 20
21/** 21/**
22 * @brief Transliterate keywords that contain international characters 22 * @brief Transliterate keywords that contain international characters
23 * @author Nils Durner 23 * @author Nils Durner
24 */ 24 */
25 25
26#include "platform.h" 26#include "platform.h"
27#include "extractor.h" 27#include "extractor.h"
28#include "convert.h" 28#include "convert.h"
29 29
30/* Language independent chars were taken from glibc's locale/C-translit.h.in 30/* Language independent chars were taken from glibc's locale/C-translit.h.in
31 * 31 *
32 * This extractor uses two tables: one contains the Unicode 32 * This extractor uses two tables: one contains the Unicode
33 * characters and the other one contains the transliterations (since 33 * characters and the other one contains the transliterations (since
34 * transliterations are often used more than once: ä -> ae, æ -> ae). 34 * transliterations are often used more than once: ä -> ae, æ -> ae).
35 * The first table points to an appropriate transliteration stored in the 35 * The first table points to an appropriate transliteration stored in the
36 * second table. 36 * second table.
37 * 37 *
38 * To generate the two tables, a relational database was prepared: 38 * To generate the two tables, a relational database was prepared:
39 * create table TBL(UNI varchar(20), TRANSL varchar(10), TRANSLID integer); 39 * create table TBL(UNI varchar(20), TRANSL varchar(10), TRANSLID integer);
40 * create table TRANSL (TRANSL varchar(20) primary key, TRANSLID integer); 40 * create table TRANSL (TRANSL varchar(20) primary key, TRANSLID integer);
41 * 41 *
42 * After that, the data from glibc was converted to a SQL script using 42 * After that, the data from glibc was converted to a SQL script using
43 * "awk -F '\t'": 43 * "awk -F '\t'":
44 * { 44 * {
45 * transl = $2; 45 * transl = $2;
46 * gsub(/'/, "''", transl); 46 * gsub(/'/, "''", transl);
47 * print "insert into TBL(UNI, TRANSL) values ('0x" substr($3, 6, index($3, ">") - 6) "', '" transl "');"; 47 * print "insert into TBL(UNI, TRANSL) values ('0x" substr($3, 6, index($3, ">") - 6) "', '" transl "');";
48 * print "insert into TRANSL(TRANSL, TRANSLID) values ('" transl "', (Select count(*) from TRANSL));"; 48 * print "insert into TRANSL(TRANSL, TRANSLID) values ('" transl "', (Select count(*) from TRANSL));";
49 * } 49 * }
50 * 50 *
51 * Then the SQL script was executed, "commit"ted and the relation between the 51 * Then the SQL script was executed, "commit"ted and the relation between the
52 * two tables established using: 52 * two tables established using:
53 * update TBL Set TRANSLID = (Select TRANSLID from TRANSL where TRANSL.TRANSL = TBL.TRANSL); 53 * update TBL Set TRANSLID = (Select TRANSLID from TRANSL where TRANSL.TRANSL = TBL.TRANSL);
54 * commit; 54 * commit;
55 * 55 *
56 * The C arrays were then created with: 56 * The C arrays were then created with:
57 * Select '{' || UNI || ', ' || TRANSLID || '},' from TBL order by UNI; 57 * Select '{' || UNI || ', ' || TRANSLID || '},' from TBL order by UNI;
58 * Select TRANSL || ', ' from TRANSL order by TRANSLID; 58 * Select TRANSL || ', ' from TRANSL order by TRANSLID;
59 * and reformatted with: 59 * and reformatted with:
60 * { 60 * {
61 * a = $0; 61 * a = $0;
62 * getline; 62 * getline;
63 * b = $0; 63 * b = $0;
64 * getline; 64 * getline;
65 * c = $0; 65 * c = $0;
66 * getline; 66 * getline;
67 * printf("%s %s %s %s\n", a, b, c, $0); 67 * printf("%s %s %s %s\n", a, b, c, $0);
68 * } 68 * }
69 * 69 *
70 * The unicode values for the other characters were taken from 70 * The unicode values for the other characters were taken from
71 * http://bigfield.ddo.jp/unicode/unicode0.html 71 * http://bigfield.ddo.jp/unicode/unicode0.html
72 */ 72 */
73 unsigned int chars[][2] = { 73 unsigned int chars[][2] = {
74 {0x00C4, 444}, {0x00D6, 445}, {0x00DC, 446}, {0x00DF, 13}, 74 {0x00C4, 444}, {0x00D6, 445}, {0x00DC, 446}, {0x00DF, 13},
75 /* Ä, Ö, Ü, ß */ 75 /* Ä, Ö, Ü, ß */
76{0x00E4, 14}, {0x00F6, 19}, {0x00FC, 447}, {0x00C5, 448}, /* ä, ö, ü, Å */ 76{0x00E4, 14}, {0x00F6, 19}, {0x00FC, 447}, {0x00C5, 448}, /* ä, ö, ü, Å */
77{0x00E5, 449}, {0x00C6, 444}, {0x00E6, 14}, {0x00D8, 445}, /* å, Æ, æ, Ø */ 77{0x00E5, 449}, {0x00C6, 444}, {0x00E6, 14}, {0x00D8, 445}, /* å, Æ, æ, Ø */
78{0x00F8, 19}, {0x00C0, 419}, {0x00C8, 77}, {0x00D9, 426}, /* ø, À, È, Ù */ 78{0x00F8, 19}, {0x00C0, 419}, {0x00C8, 77}, {0x00D9, 426}, /* ø, À, È, Ù */
79{0x00E0, 431}, {0x00E8, 76}, {0x00F9, 5}, {0x00C9, 77}, /* à, è, ù, É */ 79{0x00E0, 431}, {0x00E8, 76}, {0x00F9, 5}, {0x00C9, 77}, /* à, è, ù, É */
80{0x00E9, 76}, {0x00C2, 419}, {0x00CA, 77}, {0x00CE, 63}, /* é, Â, Ê, Î */ 80{0x00E9, 76}, {0x00C2, 419}, {0x00CA, 77}, {0x00CE, 63}, /* é, Â, Ê, Î */
81{0x00D4, 423}, {0x00DB, 426}, {0x00E2, 431}, {0x00EA, 76}, /* Ô, Û, â, ê */ 81{0x00D4, 423}, {0x00DB, 426}, {0x00E2, 431}, {0x00EA, 76}, /* Ô, Û, â, ê */
82{0x00EE, 80}, {0x00F4, 41}, {0x00FB, 5}, {0x00CB, 77}, /* î, ô, û, Ë */ 82{0x00EE, 80}, {0x00F4, 41}, {0x00FB, 5}, {0x00CB, 77}, /* î, ô, û, Ë */
83{0x00CF, 63}, {0x00EB, 76}, {0x00EF, 80}, {0x00C7, 57}, /* Ï, ë, ï, Ç */ 83{0x00CF, 63}, {0x00EB, 76}, {0x00EF, 80}, {0x00C7, 57}, /* Ï, ë, ï, Ç */
84{0x00E7, 118}, {0x0152, 445}, {0x0053, 19}, {0x0080, 66}, /* ç, Œ, œ, € */ 84{0x00E7, 118}, {0x0152, 445}, {0x0053, 19}, {0x0080, 66}, /* ç, Œ, œ, € */
85 85
86 /* Language independent */ 86 /* Language independent */
87{0xFB00, 391}, {0xFB01, 392}, {0xFB02, 393}, {0xFB03, 394}, 87{0xFB00, 391}, {0xFB01, 392}, {0xFB02, 393}, {0xFB03, 394},
88 {0xFB04, 395}, {0xFB06, 396}, {0xFB29, 40}, {0xFEFF, 36}, 88 {0xFB04, 395}, {0xFB06, 396}, {0xFB29, 40}, {0xFEFF, 36},
89 {0xFE4D, 33}, {0xFE4E, 33}, {0xFE4F, 33}, {0xFE5A, 401}, 89 {0xFE4D, 33}, {0xFE4E, 33}, {0xFE4F, 33}, {0xFE5A, 401},
90 {0xFE5B, 402}, {0xFE5C, 403}, {0xFE5F, 404}, {0xFE50, 6}, 90 {0xFE5B, 402}, {0xFE5C, 403}, {0xFE5F, 404}, {0xFE50, 6},
91 {0xFE52, 42}, {0xFE54, 397}, {0xFE55, 34}, {0xFE56, 398}, 91 {0xFE52, 42}, {0xFE54, 397}, {0xFE55, 34}, {0xFE56, 398},
92 {0xFE57, 399}, {0xFE59, 400}, {0xFE6A, 407}, {0xFE6B, 408}, 92 {0xFE57, 399}, {0xFE59, 400}, {0xFE6A, 407}, {0xFE6B, 408},
93 {0xFE60, 405}, {0xFE61, 128}, {0xFE62, 40}, {0xFE63, 3}, 93 {0xFE60, 405}, {0xFE61, 128}, {0xFE62, 40}, {0xFE63, 3},
94 {0xFE64, 47}, {0xFE65, 48}, {0xFE66, 262}, {0xFE68, 127}, 94 {0xFE64, 47}, {0xFE65, 48}, {0xFE66, 262}, {0xFE68, 127},
95 {0xFE69, 406}, {0xFF0A, 128}, {0xFF0B, 40}, {0xFF0C, 6}, 95 {0xFE69, 406}, {0xFF0A, 128}, {0xFF0B, 40}, {0xFF0C, 6},
96 {0xFF0D, 3}, {0xFF0E, 42}, {0xFF0F, 126}, {0xFF01, 399}, 96 {0xFF0D, 3}, {0xFF0E, 42}, {0xFF0F, 126}, {0xFF01, 399},
97 {0xFF02, 38}, {0xFF03, 404}, {0xFF04, 406}, {0xFF05, 407}, 97 {0xFF02, 38}, {0xFF03, 404}, {0xFF04, 406}, {0xFF05, 407},
98 {0xFF06, 405}, {0xFF07, 30}, {0xFF08, 400}, {0xFF09, 401}, 98 {0xFF06, 405}, {0xFF07, 30}, {0xFF08, 400}, {0xFF09, 401},
99 {0xFF1A, 34}, {0xFF1B, 397}, {0xFF1C, 47}, {0xFF1D, 262}, 99 {0xFF1A, 34}, {0xFF1B, 397}, {0xFF1C, 47}, {0xFF1D, 262},
100 {0xFF1E, 48}, {0xFF1F, 398}, {0xFF10, 409}, {0xFF11, 410}, 100 {0xFF1E, 48}, {0xFF1F, 398}, {0xFF10, 409}, {0xFF11, 410},
101 {0xFF12, 411}, {0xFF13, 412}, {0xFF14, 413}, {0xFF15, 414}, 101 {0xFF12, 411}, {0xFF13, 412}, {0xFF14, 413}, {0xFF15, 414},
102 {0xFF16, 415}, {0xFF17, 416}, {0xFF18, 417}, {0xFF19, 418}, 102 {0xFF16, 415}, {0xFF17, 416}, {0xFF18, 417}, {0xFF19, 418},
103 {0xFF2A, 421}, {0xFF2B, 422}, {0xFF2C, 64}, {0xFF2D, 79}, 103 {0xFF2A, 421}, {0xFF2B, 422}, {0xFF2C, 64}, {0xFF2D, 79},
104 {0xFF2E, 66}, {0xFF2F, 423}, {0xFF20, 408}, {0xFF21, 419}, 104 {0xFF2E, 66}, {0xFF2F, 423}, {0xFF20, 408}, {0xFF21, 419},
105 {0xFF22, 75}, {0xFF23, 57}, {0xFF24, 81}, {0xFF25, 77}, 105 {0xFF22, 75}, {0xFF23, 57}, {0xFF24, 81}, {0xFF25, 77},
106 {0xFF26, 78}, {0xFF27, 420}, {0xFF28, 61}, {0xFF29, 63}, 106 {0xFF26, 78}, {0xFF27, 420}, {0xFF28, 61}, {0xFF29, 63},
107 {0xFF3A, 73}, {0xFF3B, 429}, {0xFF3C, 127}, {0xFF3D, 430}, 107 {0xFF3A, 73}, {0xFF3B, 429}, {0xFF3C, 127}, {0xFF3D, 430},
108 {0xFF3E, 31}, {0xFF3F, 33}, {0xFF30, 68}, {0xFF31, 69}, 108 {0xFF3E, 31}, {0xFF3F, 33}, {0xFF30, 68}, {0xFF31, 69},
109 {0xFF32, 70}, {0xFF33, 424}, {0xFF34, 425}, {0xFF35, 426}, 109 {0xFF32, 70}, {0xFF33, 424}, {0xFF34, 425}, {0xFF35, 426},
110 {0xFF36, 100}, {0xFF37, 427}, {0xFF38, 105}, {0xFF39, 428}, 110 {0xFF36, 100}, {0xFF37, 427}, {0xFF38, 105}, {0xFF39, 428},
111 {0xFF4A, 83}, {0xFF4B, 434}, {0xFF4C, 65}, {0xFF4D, 119}, 111 {0xFF4A, 83}, {0xFF4B, 434}, {0xFF4C, 65}, {0xFF4D, 119},
112 {0xFF4E, 435}, {0xFF4F, 41}, {0xFF40, 32}, {0xFF41, 431}, 112 {0xFF4E, 435}, {0xFF4F, 41}, {0xFF40, 32}, {0xFF41, 431},
113 {0xFF42, 432}, {0xFF43, 118}, {0xFF44, 82}, {0xFF45, 76}, 113 {0xFF42, 432}, {0xFF43, 118}, {0xFF44, 82}, {0xFF45, 76},
114 {0xFF46, 433}, {0xFF47, 60}, {0xFF48, 62}, {0xFF49, 80}, 114 {0xFF46, 433}, {0xFF47, 60}, {0xFF48, 62}, {0xFF49, 80},
115 {0xFF5A, 442}, {0xFF5B, 402}, {0xFF5C, 129}, {0xFF5D, 403}, 115 {0xFF5A, 442}, {0xFF5B, 402}, {0xFF5C, 129}, {0xFF5D, 403},
116 {0xFF5E, 35}, {0xFF50, 436}, {0xFF51, 437}, {0xFF52, 438}, 116 {0xFF5E, 35}, {0xFF50, 436}, {0xFF51, 437}, {0xFF52, 438},
117 {0xFF53, 20}, {0xFF54, 439}, {0xFF55, 5}, {0xFF56, 111}, 117 {0xFF53, 20}, {0xFF54, 439}, {0xFF55, 5}, {0xFF56, 111},
118 {0xFF57, 440}, {0xFF58, 12}, {0xFF59, 441}, {0x00AB, 2}, 118 {0xFF57, 440}, {0xFF58, 12}, {0xFF59, 441}, {0x00AB, 2},
119 {0x00AD, 3}, {0x00AE, 4}, {0x00A0, 0}, {0x00A9, 1}, 119 {0x00AD, 3}, {0x00AE, 4}, {0x00A0, 0}, {0x00A9, 1},
120 {0x00BB, 7}, {0x00BC, 8}, {0x00BD, 9}, {0x00BE, 10}, 120 {0x00BB, 7}, {0x00BC, 8}, {0x00BD, 9}, {0x00BE, 10},
121 {0x00B5, 5}, {0x00B8, 6}, {0x00C6, 11}, {0x00DF, 13}, 121 {0x00B5, 5}, {0x00B8, 6}, {0x00C6, 11}, {0x00DF, 13},
122 {0x00D7, 12}, {0x00E6, 14}, {0x0001D4AA, 423}, {0x0001D4AB, 68}, 122 {0x00D7, 12}, {0x00E6, 14}, {0x0001D4AA, 423}, {0x0001D4AB, 68},
123 {0x0001D4AC, 69}, {0x0001D4AE, 424}, {0x0001D4AF, 425}, {0x0001D4A2, 420}, 123 {0x0001D4AC, 69}, {0x0001D4AE, 424}, {0x0001D4AF, 425}, {0x0001D4A2, 420},
124 {0x0001D4A5, 421}, {0x0001D4A6, 422}, {0x0001D4A9, 66}, {0x0001D4BB, 433}, 124 {0x0001D4A5, 421}, {0x0001D4A6, 422}, {0x0001D4A9, 66}, {0x0001D4BB, 433},
125 {0x0001D4BD, 62}, {0x0001D4BE, 80}, {0x0001D4BF, 83}, {0x0001D4B0, 426}, 125 {0x0001D4BD, 62}, {0x0001D4BE, 80}, {0x0001D4BF, 83}, {0x0001D4B0, 426},
126 {0x0001D4B1, 100}, {0x0001D4B2, 427}, {0x0001D4B3, 105}, {0x0001D4B4, 428}, 126 {0x0001D4B1, 100}, {0x0001D4B2, 427}, {0x0001D4B3, 105}, {0x0001D4B4, 428},
127 {0x0001D4B5, 73}, {0x0001D4B6, 431}, {0x0001D4B7, 432}, {0x0001D4B8, 118}, 127 {0x0001D4B5, 73}, {0x0001D4B6, 431}, {0x0001D4B7, 432}, {0x0001D4B8, 118},
128 {0x0001D4B9, 82}, {0x0001D4CA, 5}, {0x0001D4CB, 111}, {0x0001D4CC, 440}, 128 {0x0001D4B9, 82}, {0x0001D4CA, 5}, {0x0001D4CB, 111}, {0x0001D4CC, 440},
@@ -430,9 +430,9 @@
430"log", "lx", "mb", "mil", "mol", "PH", "p.m.", "PPM", "PR", "sr", "Sv", "Wb", "ff", "fi", 430"log", "lx", "mb", "mil", "mol", "PH", "p.m.", "PPM", "PR", "sr", "Sv", "Wb", "ff", "fi",
431"fl", "ffi", "ffl", "st", ";", "?", "!", "(", ")", "{", "}", "#", "&", "$", "%", "@", 431"fl", "ffi", "ffl", "st", ";", "?", "!", "(", ")", "{", "}", "#", "&", "$", "%", "@",
432"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "G", "J", "K", "O", "S", "T", "U", 432"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "G", "J", "K", "O", "S", "T", "U",
433"W", "Y", "[", "]", "a", "b", "f", "k", "n", "p", "q", "r", "t", "w", "y", "z", "z", 433"W", "Y", "[", "]", "a", "b", "f", "k", "n", "p", "q", "r", "t", "w", "y", "z", "z",
434 /* German */ "Ae", "Oe", "Ue", "ue", 434 /* German */ "Ae", "Oe", "Ue", "ue",
435 /* Scandinavian */ "Aa", "aa" 435 /* Scandinavian */ "Aa", "aa"
436}; 436};
437 static void 437 static void
438addKeyword (struct EXTRACTOR_Keywords **list, char *keyword, 438addKeyword (struct EXTRACTOR_Keywords **list, char *keyword,
@@ -445,7 +445,7 @@ addKeyword (struct EXTRACTOR_Keywords **list, char *keyword,
445 next->keywordType = type; 445 next->keywordType = type;
446 *list = next; 446 *list = next;
447 } 447 }
448 struct EXTRACTOR_Keywords * 448 struct EXTRACTOR_Keywords *
449libextractor_translit_extract (const char *filename, const char *data, 449libextractor_translit_extract (const char *filename, const char *data,
450 size_t size, struct EXTRACTOR_Keywords *prev) 450 size_t size, struct EXTRACTOR_Keywords *prev)
451{ 451{
@@ -456,7 +456,7 @@ libextractor_translit_extract (const char *filename, const char *data,
456 mem = 256; 456 mem = 256;
457 transl = malloc (mem + 1); 457 transl = malloc (mem + 1);
458 while (pos != NULL) 458 while (pos != NULL)
459 459
460 { 460 {
461 int charlen = 0; 461 int charlen = 0;
462 char *srcdata = pos->keyword; 462 char *srcdata = pos->keyword;
@@ -468,67 +468,67 @@ libextractor_translit_extract (const char *filename, const char *data,
468 long long unicode; 468 long long unicode;
469 int idx; 469 int idx;
470 char *tr; 470 char *tr;
471 471
472 /* Get length of character */ 472 /* Get length of character */
473 c = srcdata[src]; 473 c = srcdata[src];
474 if ((c & 0xC0) == 0xC0) 474 if ((c & 0xC0) == 0xC0)
475 475
476 /* UTF-8 char */ 476 /* UTF-8 char */
477 if ((c & 0xE0) == 0xE0) 477 if ((c & 0xE0) == 0xE0)
478 if ((c & 0xF0) == 0xF0) 478 if ((c & 0xF0) == 0xF0)
479 charlen = 4; 479 charlen = 4;
480 480
481 else 481 else
482 charlen = 3; 482 charlen = 3;
483 483
484 else 484 else
485 charlen = 2; 485 charlen = 2;
486 486
487 else 487 else
488 charlen = 1; 488 charlen = 1;
489 if (src + charlen - 1 > len) 489 if (src + charlen - 1 > len)
490 { 490 {
491 491
492 /* incomplete UTF-8 */ 492 /* incomplete UTF-8 */
493 src = len; 493 src = len;
494 continue; 494 continue;
495 } 495 }
496 496
497 /* Copy character to destination */ 497 /* Copy character to destination */
498 if (charlen > 1) 498 if (charlen > 1)
499 { 499 {
500 unicode = 0; 500 unicode = 0;
501 if (charlen == 2) 501 if (charlen == 2)
502 { 502 {
503 503
504 /* 5 bits from the first byte and 6 bits from the second. 504 /* 5 bits from the first byte and 6 bits from the second.
505 64 = 2^6 */ 505 64 = 2^6 */
506 unicode = 506 unicode =
507 ((srcdata[src] & 0x1F) * 64) | (srcdata[src + 1] & 0x3F); 507 ((srcdata[src] & 0x1F) * 64) | (srcdata[src + 1] & 0x3F);
508 } 508 }
509 509
510 else if (charlen == 3) 510 else if (charlen == 3)
511 { 511 {
512 512
513 /* 4 bits from the first byte and 6 bits from the second and third 513 /* 4 bits from the first byte and 6 bits from the second and third
514 byte. 4096 = 2^12 */ 514 byte. 4096 = 2^12 */
515 unicode = ((srcdata[src] & 0xF) * 4096) | 515 unicode = ((srcdata[src] & 0xF) * 4096) |
516 ((srcdata[src + 1] & 0x3F) * 516 ((srcdata[src + 1] & 0x3F) *
517 64) | (srcdata[src + 2] & 0x3F); 517 64) | (srcdata[src + 2] & 0x3F);
518 } 518 }
519 519
520 else if (charlen == 4) 520 else if (charlen == 4)
521 { 521 {
522 522
523 /* 3 bits from the first byte and 6 bits from the second, third 523 /* 3 bits from the first byte and 6 bits from the second, third
524 and fourth byte. 262144 = 2^18 */ 524 and fourth byte. 262144 = 2^18 */
525 unicode = ((srcdata[src] & 7) * 262144) | 525 unicode = ((srcdata[src] & 7) * 262144) |
526 ((srcdata[src] & 0xF) * 4096) | 526 ((srcdata[src] & 0xF) * 4096) |
527 ((srcdata[src + 1] & 0x3F) * 527 ((srcdata[src + 1] & 0x3F) *
528 64) | (srcdata[src + 2] & 0x3F); 528 64) | (srcdata[src + 2] & 0x3F);
529 } 529 }
530 530
531 /* Look it up */ 531 /* Look it up */
532 idx = 0; 532 idx = 0;
533 tr = srcdata + src; 533 tr = srcdata + src;
534 trlen = charlen; 534 trlen = charlen;
@@ -536,8 +536,8 @@ libextractor_translit_extract (const char *filename, const char *data,
536 { 536 {
537 if (unicode == chars[idx][0]) 537 if (unicode == chars[idx][0])
538 { 538 {
539 539
540 /* Found it */ 540 /* Found it */
541 tr = translit[chars[idx][1]]; 541 tr = translit[chars[idx][1]];
542 trlen = strlen (tr); 542 trlen = strlen (tr);
543 break; 543 break;
@@ -545,7 +545,7 @@ libextractor_translit_extract (const char *filename, const char *data,
545 idx++; 545 idx++;
546 } 546 }
547 } 547 }
548 548
549 else 549 else
550 trlen = 1; 550 trlen = 1;
551 if (dest + trlen > mem) 551 if (dest + trlen > mem)
@@ -555,11 +555,11 @@ libextractor_translit_extract (const char *filename, const char *data,
555 } 555 }
556 if (charlen > 1) 556 if (charlen > 1)
557 { 557 {
558 558
559 /* Copy character to destination string */ 559 /* Copy character to destination string */
560 memcpy (transl + dest, tr, trlen); 560 memcpy (transl + dest, tr, trlen);
561 } 561 }
562 562
563 else 563 else
564 transl[dest] = c; 564 transl[dest] = c;
565 dest += trlen; 565 dest += trlen;
@@ -573,4 +573,4 @@ libextractor_translit_extract (const char *filename, const char *data,
573 return prev; 573 return prev;
574 } 574 }
575 575
576 576