1 files changed, 20 insertions, 20 deletions
diff --git a/src/plugins/html_extractor.c b/src/plugins/html_extractor.c
index 6ac0809..8cd4aba 100644
--- a/src/plugins/html_extractor.c
+++ b/src/plugins/html_extractor.c
@@ -27,7 +27,7 @@
 #include "extractor.h"
 #include <magic.h>
 #include <tidy/tidy.h>
-#include <tidy/buffio.h>
+#include <tidy/tidybuffio.h>
 /**
 * Mapping of HTML META names to LE types.
@@ -59,7 +59,7 @@ static struct
  { "rights", EXTRACTOR_METATYPE_RIGHTS },
  { "dc.rights", EXTRACTOR_METATYPE_RIGHTS },
  { "copyright", EXTRACTOR_METATYPE_COPYRIGHT },
-  { "language", EXTRACTOR_METATYPE_LANGUAGE },  
+  { "language", EXTRACTOR_METATYPE_LANGUAGE },
  { "keywords", EXTRACTOR_METATYPE_KEYWORDS },
  { "abstract", EXTRACTOR_METATYPE_ABSTRACT },
  { "formatter", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE },
@@ -82,7 +82,7 @@ static magic_t magic;
 * @param tag tag to map
 * @return EXTRACTOR_METATYPE_RESERVED if the type was not found
 */
-static enum EXTRACTOR_MetaType 
+static enum EXTRACTOR_MetaType
 tag_to_type (const char *tag)
 {
  unsigned int i;
@@ -146,7 +146,7 @@ static void TIDY_CALL
 unget_byte_cb (void *sourceData, byte bt)
 {
  struct EXTRACTOR_ExtractContext *ec = sourceData;
-  
  (void) ec->seek (ec->cls, -1, SEEK_CUR);
 }
@@ -167,11 +167,11 @@ eof_cb (void *sourceData)
 /**
- * Main entry method for the 'text/html' extraction plugin.  
+ * Main entry method for the 'text/html' extraction plugin.
 *
 * @param ec extraction context provided to the plugin
 */
-void 
+void
 EXTRACTOR_html_extract_method (struct EXTRACTOR_ExtractContext *ec)
 {
  TidyDoc doc;
@@ -250,9 +250,9 @@ EXTRACTOR_html_extract_method (struct EXTRACTOR_ExtractContext *ec)
        case TidyNode_Php:
          break;
        case TidyNode_XmlDecl:
-          break;          
+          break;
        case TidyNode_Start:
-        case TidyNode_StartEnd: 
+        case TidyNode_StartEnd:
          name = tidyNodeGetName (child);
          if ( (0 == strcasecmp (name, "title")) &&
               (NULL != (title = tidyGetChild (child))) )
@@ -278,13 +278,13 @@ EXTRACTOR_html_extract_method (struct EXTRACTOR_ExtractContext *ec)
            }
          if (0 == strcasecmp (name, "meta"))
            {
-              if (NULL == (attr = tidyAttrGetById (child, 
+              if (NULL == (attr = tidyAttrGetById (child,
                                                   TidyAttr_NAME)))
                break;
-              if (EXTRACTOR_METATYPE_RESERVED == 
+              if (EXTRACTOR_METATYPE_RESERVED ==
                  (type = tag_to_type (tidyAttrValue (attr))))
                break;
-              if (NULL == (attr = tidyAttrGetById (child, 
+              if (NULL == (attr = tidyAttrGetById (child,
                                                   TidyAttr_CONTENT)))
                break;
              name = tidyAttrValue (attr);
@@ -297,14 +297,14 @@ EXTRACTOR_html_extract_method (struct EXTRACTOR_ExtractContext *ec)
                            name,
                            strlen (name) + 1))
                goto CLEANUP;
-              break;    
+              break;
            }
          break;
        case TidyNode_End:
-          break;          
+          break;
        default:
          break;
-        }      
+        }
    }
 CLEANUP:
  tidyRelease (doc);
@@ -463,7 +463,7 @@ findInTags (struct TagInfo * t,
 /* mimetype = text/html */
-int 
+int
 EXTRACTOR_html_extract (const char *data,
                        size_t size,
                        EXTRACTOR_MetaDataProcessor proc,
@@ -562,7 +562,7 @@ EXTRACTOR_html_extract (const char *data,
         if text/html is present, we take that as the mime-type; if charset=
         is present, we try to use that for character set conversion. */
      if (0 == strncasecmp (tmp, "text/html", strlen ("text/html")))
-        ret = proc (proc_cls, 
+        ret = proc (proc_cls,
                    "html",
                    EXTRACTOR_METATYPE_MIMETYPE,
                    EXTRACTOR_METAFORMAT_UTF8,
@@ -613,7 +613,7 @@ EXTRACTOR_html_extract (const char *data,
        free (tmp);
      i++;
    }
-  while (tags != NULL) 
+  while (tags != NULL)
    {
      t = tags;
      if ( (tagMatch ("title", t->tagStart, t->tagEnd)) &&
@@ -667,7 +667,7 @@ EXTRACTOR_html_extract (const char *data,
 /**
 * Initialize glib and load magic file.
 */
-void __attribute__ ((constructor)) 
+void __attribute__ ((constructor))
 html_gobject_init ()
 {
  magic = magic_open (MAGIC_MIME_TYPE);
@@ -681,8 +681,8 @@ html_gobject_init ()
 /**
 * Destructor for the library, cleans up.
 */
-void __attribute__ ((destructor)) 
+void __attribute__ ((destructor))
-html_ltdl_fini () 
+html_ltdl_fini ()
 {
  if (NULL != magic)
    {

diff --git a/src/plugins/html_extractor.c b/src/plugins/html_extractor.c index 6ac0809..8cd4aba 100644 --- a/src/plugins/html_extractor.c +++ b/src/plugins/html_extractor.c
@@ -27,7 +27,7 @@
27	#include "extractor.h"	27	#include "extractor.h"
28	#include <magic.h>	28	#include <magic.h>
29	#include <tidy/tidy.h>	29	#include <tidy/tidy.h>
30	#include <tidy/buffio.h>	30	#include <tidy/tidybuffio.h>
31		31
32	/**	32	/**
33	* Mapping of HTML META names to LE types.	33	* Mapping of HTML META names to LE types.
@@ -59,7 +59,7 @@ static struct
59	{ "rights", EXTRACTOR_METATYPE_RIGHTS },	59	{ "rights", EXTRACTOR_METATYPE_RIGHTS },
60	{ "dc.rights", EXTRACTOR_METATYPE_RIGHTS },	60	{ "dc.rights", EXTRACTOR_METATYPE_RIGHTS },
61	{ "copyright", EXTRACTOR_METATYPE_COPYRIGHT },	61	{ "copyright", EXTRACTOR_METATYPE_COPYRIGHT },
62	{ "language", EXTRACTOR_METATYPE_LANGUAGE },	62	{ "language", EXTRACTOR_METATYPE_LANGUAGE },
63	{ "keywords", EXTRACTOR_METATYPE_KEYWORDS },	63	{ "keywords", EXTRACTOR_METATYPE_KEYWORDS },
64	{ "abstract", EXTRACTOR_METATYPE_ABSTRACT },	64	{ "abstract", EXTRACTOR_METATYPE_ABSTRACT },
65	{ "formatter", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE },	65	{ "formatter", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE },
@@ -82,7 +82,7 @@ static magic_t magic;
82	* @param tag tag to map	82	* @param tag tag to map
83	* @return EXTRACTOR_METATYPE_RESERVED if the type was not found	83	* @return EXTRACTOR_METATYPE_RESERVED if the type was not found
84	*/	84	*/
85	static enum EXTRACTOR_MetaType	85	static enum EXTRACTOR_MetaType
86	tag_to_type (const char *tag)	86	tag_to_type (const char *tag)
87	{	87	{
88	unsigned int i;	88	unsigned int i;
@@ -146,7 +146,7 @@ static void TIDY_CALL
146	unget_byte_cb (void *sourceData, byte bt)	146	unget_byte_cb (void *sourceData, byte bt)
147	{	147	{
148	struct EXTRACTOR_ExtractContext *ec = sourceData;	148	struct EXTRACTOR_ExtractContext *ec = sourceData;
149		149
150	(void) ec->seek (ec->cls, -1, SEEK_CUR);	150	(void) ec->seek (ec->cls, -1, SEEK_CUR);
151	}	151	}
152		152
@@ -167,11 +167,11 @@ eof_cb (void *sourceData)
167		167
168		168
169	/**	169	/**
170	* Main entry method for the 'text/html' extraction plugin.	170	* Main entry method for the 'text/html' extraction plugin.
171	*	171	*
172	* @param ec extraction context provided to the plugin	172	* @param ec extraction context provided to the plugin
173	*/	173	*/
174	void	174	void
175	EXTRACTOR_html_extract_method (struct EXTRACTOR_ExtractContext *ec)	175	EXTRACTOR_html_extract_method (struct EXTRACTOR_ExtractContext *ec)
176	{	176	{
177	TidyDoc doc;	177	TidyDoc doc;
@@ -250,9 +250,9 @@ EXTRACTOR_html_extract_method (struct EXTRACTOR_ExtractContext *ec)
250	case TidyNode_Php:	250	case TidyNode_Php:
251	break;	251	break;
252	case TidyNode_XmlDecl:	252	case TidyNode_XmlDecl:
253	break;	253	break;
254	case TidyNode_Start:	254	case TidyNode_Start:
255	case TidyNode_StartEnd:	255	case TidyNode_StartEnd:
256	name = tidyNodeGetName (child);	256	name = tidyNodeGetName (child);
257	if ( (0 == strcasecmp (name, "title")) &&	257	if ( (0 == strcasecmp (name, "title")) &&
258	(NULL != (title = tidyGetChild (child))) )	258	(NULL != (title = tidyGetChild (child))) )
@@ -278,13 +278,13 @@ EXTRACTOR_html_extract_method (struct EXTRACTOR_ExtractContext *ec)
278	}	278	}
279	if (0 == strcasecmp (name, "meta"))	279	if (0 == strcasecmp (name, "meta"))
280	{	280	{
281	if (NULL == (attr = tidyAttrGetById (child,	281	if (NULL == (attr = tidyAttrGetById (child,
282	TidyAttr_NAME)))	282	TidyAttr_NAME)))
283	break;	283	break;
284	if (EXTRACTOR_METATYPE_RESERVED ==	284	if (EXTRACTOR_METATYPE_RESERVED ==
285	(type = tag_to_type (tidyAttrValue (attr))))	285	(type = tag_to_type (tidyAttrValue (attr))))
286	break;	286	break;
287	if (NULL == (attr = tidyAttrGetById (child,	287	if (NULL == (attr = tidyAttrGetById (child,
288	TidyAttr_CONTENT)))	288	TidyAttr_CONTENT)))
289	break;	289	break;
290	name = tidyAttrValue (attr);	290	name = tidyAttrValue (attr);
@@ -297,14 +297,14 @@ EXTRACTOR_html_extract_method (struct EXTRACTOR_ExtractContext *ec)
297	name,	297	name,
298	strlen (name) + 1))	298	strlen (name) + 1))
299	goto CLEANUP;	299	goto CLEANUP;
300	break;	300	break;
301	}	301	}
302	break;	302	break;
303	case TidyNode_End:	303	case TidyNode_End:
304	break;	304	break;
305	default:	305	default:
306	break;	306	break;
307	}	307	}
308	}	308	}
309	CLEANUP:	309	CLEANUP:
310	tidyRelease (doc);	310	tidyRelease (doc);
@@ -463,7 +463,7 @@ findInTags (struct TagInfo * t,
463		463
464		464
465	/* mimetype = text/html */	465	/* mimetype = text/html */
466	int	466	int
467	EXTRACTOR_html_extract (const char *data,	467	EXTRACTOR_html_extract (const char *data,
468	size_t size,	468	size_t size,
469	EXTRACTOR_MetaDataProcessor proc,	469	EXTRACTOR_MetaDataProcessor proc,
@@ -562,7 +562,7 @@ EXTRACTOR_html_extract (const char *data,
562	if text/html is present, we take that as the mime-type; if charset=	562	if text/html is present, we take that as the mime-type; if charset=
563	is present, we try to use that for character set conversion. */	563	is present, we try to use that for character set conversion. */
564	if (0 == strncasecmp (tmp, "text/html", strlen ("text/html")))	564	if (0 == strncasecmp (tmp, "text/html", strlen ("text/html")))
565	ret = proc (proc_cls,	565	ret = proc (proc_cls,
566	"html",	566	"html",
567	EXTRACTOR_METATYPE_MIMETYPE,	567	EXTRACTOR_METATYPE_MIMETYPE,
568	EXTRACTOR_METAFORMAT_UTF8,	568	EXTRACTOR_METAFORMAT_UTF8,
@@ -613,7 +613,7 @@ EXTRACTOR_html_extract (const char *data,
613	free (tmp);	613	free (tmp);
614	i++;	614	i++;
615	}	615	}
616	while (tags != NULL)	616	while (tags != NULL)
617	{	617	{
618	t = tags;	618	t = tags;
619	if ( (tagMatch ("title", t->tagStart, t->tagEnd)) &&	619	if ( (tagMatch ("title", t->tagStart, t->tagEnd)) &&
@@ -667,7 +667,7 @@ EXTRACTOR_html_extract (const char *data,
667	/**	667	/**
668	* Initialize glib and load magic file.	668	* Initialize glib and load magic file.
669	*/	669	*/
670	void __attribute__ ((constructor))	670	void __attribute__ ((constructor))
671	html_gobject_init ()	671	html_gobject_init ()
672	{	672	{
673	magic = magic_open (MAGIC_MIME_TYPE);	673	magic = magic_open (MAGIC_MIME_TYPE);
@@ -681,8 +681,8 @@ html_gobject_init ()
681	/**	681	/**
682	* Destructor for the library, cleans up.	682	* Destructor for the library, cleans up.
683	*/	683	*/
684	void __attribute__ ((destructor))	684	void __attribute__ ((destructor))
685	html_ltdl_fini ()	685	html_ltdl_fini ()
686	{	686	{
687	if (NULL != magic)	687	if (NULL != magic)
688	{	688	{