1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
/*
This file is part of libextractor.
Copyright (C) 2004 Vidyut Samanta and Christian Grothoff
libextractor is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
by the Free Software Foundation; either version 3, or (at your
option) any later version.
libextractor is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with libextractor; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/
#include "platform.h"
#include "extractor.h"
#include "convert.h"
/**
* Convert the len characters long character sequence
* given in input that is in the given charset
* to UTF-8.
*
* @param input string to convert
* @param len number of bytes in input
* @param charset input character set
* @return the converted string (0-terminated), NULL on error
* @return the converted string (0-terminated),
* if conversion fails, a copy of the original
* string is returned.
*/
char *
EXTRACTOR_common_convert_to_utf8 (const char *input,
size_t len,
const char *charset)
{
#if HAVE_ICONV
size_t tmpSize;
size_t finSize;
char *tmp;
char *ret;
char *itmp;
const char *i;
iconv_t cd;
i = input;
cd = iconv_open ("UTF-8", charset);
if (cd == (iconv_t) -1)
return strndup (i, len);
if (len > 1024 * 1024)
{
iconv_close (cd);
return NULL; /* too big for meta data */
}
tmpSize = 3 * len + 4;
tmp = malloc (tmpSize);
if (tmp == NULL)
{
iconv_close (cd);
return NULL;
}
itmp = tmp;
finSize = tmpSize;
if (iconv (cd, (char **) &input, &len, &itmp, &finSize) == ((size_t) -1))
{
iconv_close (cd);
free (tmp);
return strndup (i, len);
}
ret = malloc (tmpSize - finSize + 1);
if (ret == NULL)
{
iconv_close (cd);
free (tmp);
return NULL;
}
memcpy (ret, tmp, tmpSize - finSize);
ret[tmpSize - finSize] = '\0';
free (tmp);
iconv_close (cd);
return ret;
#else
char *ret;
ret = malloc (len + 1);
memcpy (ret, input, len);
ret[len] = '\0';
return ret;
#endif
}
/* end of convert.c */
|