libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

convert.c (2526B)


      1 /*
      2      This file is part of libextractor.
      3      Copyright (C) 2004 Vidyut Samanta and Christian Grothoff
      4 
      5      libextractor is free software; you can redistribute it and/or modify
      6      it under the terms of the GNU General Public License as published
      7      by the Free Software Foundation; either version 3, or (at your
      8      option) any later version.
      9 
     10      libextractor is distributed in the hope that it will be useful, but
     11      WITHOUT ANY WARRANTY; without even the implied warranty of
     12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13      General Public License for more details.
     14 
     15      You should have received a copy of the GNU General Public License
     16      along with libextractor; see the file COPYING.  If not, write to the
     17      Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18      Boston, MA 02110-1301, USA.
     19  */
     20 
     21 #include "platform.h"
     22 #include "extractor.h"
     23 #include "convert.h"
     24 
     25 /**
     26  * Convert the len characters long character sequence
     27  * given in input that is in the given charset
     28  * to UTF-8.
     29  *
     30  * @param input string to convert
     31  * @param len number of bytes in input
     32  * @param charset input character set
     33  * @return the converted string (0-terminated), NULL on error
     34  * @return the converted string (0-terminated),
     35  *  if conversion fails, a copy of the original
     36  *  string is returned.
     37  */
     38 char *
     39 EXTRACTOR_common_convert_to_utf8 (const char *input,
     40                                   size_t len,
     41                                   const char *charset)
     42 {
     43 #if HAVE_ICONV
     44   size_t tmpSize;
     45   size_t finSize;
     46   char *tmp;
     47   char *ret;
     48   char *itmp;
     49   const char *i;
     50   iconv_t cd;
     51 
     52   i = input;
     53   cd = iconv_open ("UTF-8", charset);
     54   if (cd == (iconv_t) -1)
     55     return strndup (i, len);
     56   if (len > 1024 * 1024)
     57   {
     58     iconv_close (cd);
     59     return NULL;   /* too big for meta data */
     60   }
     61   tmpSize = 3 * len + 4;
     62   tmp = malloc (tmpSize);
     63   if (tmp == NULL)
     64   {
     65     iconv_close (cd);
     66     return NULL;
     67   }
     68   itmp = tmp;
     69   finSize = tmpSize;
     70   if (iconv (cd, (char **) &input, &len, &itmp, &finSize) == ((size_t) -1))
     71   {
     72     iconv_close (cd);
     73     free (tmp);
     74     return strndup (i, len);
     75   }
     76   ret = malloc (tmpSize - finSize + 1);
     77   if (ret == NULL)
     78   {
     79     iconv_close (cd);
     80     free (tmp);
     81     return NULL;
     82   }
     83   memcpy (ret, tmp, tmpSize - finSize);
     84   ret[tmpSize - finSize] = '\0';
     85   free (tmp);
     86   iconv_close (cd);
     87   return ret;
     88 #else
     89   char *ret;
     90 
     91   ret = malloc (len + 1);
     92   memcpy (ret, input, len);
     93   ret[len] = '\0';
     94   return ret;
     95 #endif
     96 }
     97 
     98 
     99 /* end of convert.c */