From 4779f7d8eb9c7f6eec051a90a358d66f2dcfe9b0 Mon Sep 17 00:00:00 2001
From: Christian Grothoff <christian@grothoff.org>
Date: Sun, 9 Sep 2007 05:46:28 +0000
Subject: better name, code forgotten earlier

---
 src/daemon/daemontest_post.c |   8 +-
 src/daemon/postprocessor.c   | 675 +++++++++++++++++++++++++++++++++++++++++++
 src/include/microhttpd.h     |  23 +-
 3 files changed, 690 insertions(+), 16 deletions(-)
 create mode 100644 src/daemon/postprocessor.c

diff --git a/src/daemon/daemontest_post.c b/src/daemon/daemontest_post.c
index 87eca380..86fd67f9 100644
--- a/src/daemon/daemontest_post.c
+++ b/src/daemon/daemontest_post.c
@@ -72,10 +72,10 @@ static int
 post_iterator (void *cls,
                enum MHD_ValueKind kind,
                const char *key,
-	       const char *filename,
-	       const char *content_type,
-	       const char *transfer_encoding,
-	       const char *value, size_t off, size_t size)
+               const char *filename,
+               const char *content_type,
+               const char *transfer_encoding,
+               const char *value, size_t off, size_t size)
 {
   int *eok = cls;
 
diff --git a/src/daemon/postprocessor.c b/src/daemon/postprocessor.c
new file mode 100644
index 00000000..47d2efa6
--- /dev/null
+++ b/src/daemon/postprocessor.c
@@ -0,0 +1,675 @@
+/*
+     This file is part of libmicrohttpd
+     (C) 2007 Daniel Pittman and Christian Grothoff
+
+     This library is free software; you can redistribute it and/or
+     modify it under the terms of the GNU Lesser General Public
+     License as published by the Free Software Foundation; either
+     version 2.1 of the License, or (at your option) any later version.
+
+     This library is distributed in the hope that it will be useful,
+     but WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+     Lesser General Public License for more details.
+
+     You should have received a copy of the GNU Lesser General Public
+     License along with this library; if not, write to the Free Software
+     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+/**
+ * @file postprocessor.c
+ * @brief  Methods for parsing POST data
+ * @author Christian Grothoff
+ */
+
+#include "internal.h"
+
+/**
+ * States in the PP parser's state machine.
+ */
+enum PP_State
+{
+
+  PP_Init = 0,
+
+  PP_HaveKey = 1,
+
+  PP_ExpectNewLine = 2,
+
+  PP_ExpectNewLineR = 3,
+
+  PP_ExpectNewLineN = 4,
+
+  PP_Headers = 5,
+
+  PP_SkipRNRN = 6,
+
+  PP_SkipNRN = 7,
+
+  PP_SkipRN = 8,
+
+  PP_SkipN = 9,
+
+  PP_ValueToBoundary = 10,
+
+  PP_FinalDash = 11,
+
+  PP_Error = 9999,
+
+
+};
+
+/**
+ * Internal state of the post-processor.
+ */
+struct MHD_PostProcessor
+{
+
+  /**
+   * The connection for which we are doing
+   * POST processing.
+   */
+  struct MHD_Connection *connection;
+
+  /**
+   * Function to call with POST data.
+   */
+  MHD_PostDataIterator ikvi;
+
+  /**
+   * Extra argument to ikvi.
+   */
+  void *cls;
+
+  /**
+   * Encoding as given by the headers of the
+   * connection.
+   */
+  const char *encoding;
+
+  /**
+   * Pointer to the name given in disposition.
+   */
+  char *content_disposition;
+
+  /**
+   * Pointer to the (current) content type.
+   */
+  char *content_type;
+
+  /**
+   * Pointer to the (current) filename.
+   */
+  char *filename;
+
+  /**
+   * Pointer to the (current) encoding.
+   */
+  char *transfer_encoding;
+
+  /**
+   * Unprocessed value bytes due to escape
+   * sequences (URL-encoding only).
+   */
+  char xbuf[8];
+
+  /**
+   * Size of our buffer for the key.
+   */
+  unsigned int buffer_size;
+
+  /**
+   * Current position in the key buffer.
+   */
+  unsigned int buffer_pos;
+
+  /**
+   * Current position in xbuf.
+   */
+  unsigned int xbuf_pos;
+
+  /**
+   * Current offset in the value being processed.
+   */
+  unsigned int value_offset;
+
+  /**
+   * State of the parser.
+   */
+  enum PP_State state;
+
+};
+
+
+/**
+ * Create a PostProcessor.
+ * 
+ * A PostProcessor can be used to (incrementally)
+ * parse the data portion of a POST request.
+ *
+ * @param connection the connection on which the POST is
+ *        happening (used to determine the POST format)
+ * @param buffer_size maximum number of bytes to use for
+ *        internal buffering (used only for the parsing,
+ *        specifically the parsing of the keys).  A
+ *        tiny value (256-1024) should be sufficient.
+ *        Do NOT use 0.
+ * @param ikvi iterator to be called with the parsed data
+ * @param cls first argument to ikvi
+ * @return NULL on error (out of memory, unsupported encoding),
+ *         otherwise a PP handle
+ */
+struct MHD_PostProcessor *
+MHD_create_post_processor (struct MHD_Connection *connection,
+                           unsigned int buffer_size,
+                           MHD_PostDataIterator ikvi, void *cls)
+{
+  struct MHD_PostProcessor *ret;
+  const char *encoding;
+
+  if ((buffer_size < 256) || (connection == NULL) || (ikvi == NULL))
+    abort ();
+  encoding = MHD_lookup_connection_value (connection,
+                                          MHD_HEADER_KIND,
+                                          MHD_HTTP_HEADER_CONTENT_TYPE);
+  if (encoding == NULL)
+    return NULL;
+  if ((0 != strcasecmp (MHD_HTTP_POST_ENCODING_FORM_URLENCODED,
+                        encoding)) &&
+      (0 != strcasecmp (MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA, encoding)))
+    return NULL;
+  ret = malloc (sizeof (struct MHD_PostProcessor) + buffer_size + 1);
+  if (ret == NULL)
+    return NULL;
+  memset (ret, 0, sizeof (struct MHD_PostProcessor));
+  ret->connection = connection;
+  ret->ikvi = ikvi;
+  ret->cls = cls;
+  ret->encoding = encoding;
+  ret->buffer_size = buffer_size;
+  ret->state = PP_Init;
+  return ret;
+}
+
+/**
+ * On-stack buffer that we use for un-escaping of the value.
+ */
+#define XBUF_SIZE 1024
+
+/**
+ * Process url-encoded POST data.
+ */
+static int
+post_process_urlencoded (struct MHD_PostProcessor *pp,
+                         const char *post_data, unsigned int post_data_len)
+{
+  unsigned int equals;
+  unsigned int amper;
+  unsigned int poff;
+  unsigned int xoff;
+  unsigned int delta;
+  char *buf;
+  char xbuf[XBUF_SIZE + 1];
+
+  buf = (char *) &pp[1];
+  poff = 0;
+  while (poff < post_data_len)
+    {
+      switch (pp->state)
+        {
+        case PP_Init:
+          equals = 0;
+          while ((equals + poff < post_data_len) &&
+                 (post_data[equals + poff] != '='))
+            equals++;
+          if (equals + pp->buffer_pos > pp->buffer_size)
+            {
+              pp->state = PP_Error;     /* out of memory */
+              return MHD_NO;
+            }
+          memcpy (&buf[pp->buffer_pos], &post_data[poff], equals);
+          pp->buffer_pos += equals;
+          if (equals + poff == post_data_len)
+            return MHD_YES;     /* no '=' yet */
+          buf[pp->buffer_pos] = '\0';   /* 0-terminate key */
+          pp->buffer_pos = 0;   /* reset for next key */
+          MHD_http_unescape (buf);
+          poff += equals + 1;
+          pp->state = PP_HaveKey;
+          pp->value_offset = 0;
+          break;
+        case PP_HaveKey:
+          /* obtain rest of value from previous iteration */
+          memcpy (xbuf, pp->xbuf, pp->xbuf_pos);
+          xoff = pp->xbuf_pos;
+          pp->xbuf_pos = 0;
+
+          /* find last position in input buffer that is part of the value */
+          amper = 0;
+          while ((amper + poff < post_data_len) &&
+                 (post_data[amper + poff] != '&') &&
+                 (post_data[amper + poff] != '\n') &&
+                 (post_data[amper + poff] != '\r'))
+            amper++;
+
+          /* compute delta, the maximum number of bytes that we will be able to
+             process right now (either amper-limited of xbuf-size limited) */
+          delta = amper;
+          if (delta > XBUF_SIZE - xoff)
+            delta = XBUF_SIZE - xoff;
+
+          /* move input into processing buffer */
+          memcpy (&xbuf[xoff], &post_data[poff], delta);
+          xoff += delta;
+          poff += delta;
+
+          /* find if escape sequence is at the end of the processing buffer;
+             if so, exclude those from processing (reduce delta to point at
+             end of processed region) */
+          delta = xoff;
+          if ((delta > 0) && (xbuf[delta - 1] == '%'))
+            delta--;
+          else if ((delta > 1) && (xbuf[delta - 2] == '%'))
+            delta -= 2;
+
+          /* if we have an incomplete escape sequence, save it to 
+             pp->xbuf for later */
+          if (delta < xoff)
+            {
+              memcpy (pp->xbuf, &xbuf[delta], xoff - delta);
+              pp->xbuf_pos = xoff - delta;
+              xoff = delta;
+            }
+
+          /* If we have nothing to do (delta == 0) and
+             not just because the value is empty (are
+             waiting for more data), go for next iteration */
+          if ((xoff == 0) && (poff == post_data_len))
+            continue;
+
+          /* unescape */
+          xbuf[xoff] = '\0';    /* 0-terminate in preparation */
+          MHD_http_unescape (xbuf);
+
+          /* finally: call application! */
+          pp->ikvi (pp->cls, MHD_POSTDATA_KIND, (const char *) &pp[1],  /* key */
+                    NULL, NULL, NULL, xbuf, pp->value_offset, xoff);
+          pp->value_offset += xoff;
+
+          /* are we done with the value? */
+          if (poff < post_data_len)
+            {
+              /* we found the end of the value! */
+              pp->state = PP_Init;
+              poff++;           /* skip '&' or new-lines */
+
+              if ((post_data[poff - 1] == '\n') ||
+                  (post_data[poff - 1] == '\r'))
+                pp->state = PP_ExpectNewLine;
+            }
+          break;
+        case PP_ExpectNewLine:
+          if ((post_data[poff] == '\n') || (post_data[poff] == '\r'))
+            {
+              poff++;
+              /* we are done, report error if we receive any more... */
+              pp->state = PP_Error;
+              return MHD_YES;
+            }
+          return MHD_NO;
+        case PP_Error:
+          return MHD_NO;
+        default:
+          abort ();             /* should never happen! */
+        }
+    }
+  return MHD_YES;
+}
+
+/**
+ * If the given line matches the prefix, strdup the
+ * rest of the line into the suffix ptr.
+ *
+ * @return MHD_YES if there was a match, MHD_NO if not
+ */
+static int
+try_match_header (const char *prefix, char *line, char **suffix)
+{
+  if (0 == strncasecmp (prefix, line, strlen (prefix)))
+    {
+      *suffix = strdup (&line[strlen (prefix)]);
+      return MHD_YES;
+    }
+  return MHD_NO;
+}
+
+/**
+ * Decode multipart POST data. 
+ *
+ * TODO: If the content-type is multipart/mixed, we do not do anything
+ * special.  However, we should probably break the individual values
+ * apart and give them to the callback individually (will require some
+ * additional states & state).
+ *
+ * TODO: this code has never been tested...
+ * 
+ * See http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4 
+ */
+static int
+post_process_multipart (struct MHD_PostProcessor *pp,
+                        const char *post_data, unsigned int post_data_len)
+{
+  char *buf;
+  const char *boundary;
+  unsigned int max;
+  unsigned int ioff;
+  unsigned int poff;
+  unsigned int newline;
+  unsigned int endquote;
+  size_t blen;
+
+  buf = (char *) &pp[1];
+  ioff = 0;
+  poff = 0;
+  boundary =
+    &pp->encoding[strlen (MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA)];
+  if (NULL != strstr (boundary, "boundary="))
+    boundary = strstr (boundary, "boundary=") + strlen ("boundary=");
+  else
+    return MHD_NO;              /* failed to determine boundary */
+  blen = strlen (boundary);
+  if (blen * 2 + 2 > pp->buffer_size)
+    return MHD_NO;              /* (will be) out of memory */
+  while ((poff < post_data_len) || (pp->buffer_pos > ioff))
+    {
+      /* first, move data to our internal buffer */
+      max = pp->buffer_size - pp->buffer_pos;
+      if ((max < ioff) && (max < post_data_len))
+        {
+          memmove (buf, &buf[ioff], pp->buffer_pos - ioff);
+          pp->buffer_pos -= ioff;
+          ioff = 0;
+          max = pp->buffer_size - pp->buffer_pos;
+        }
+      if (max > post_data_len)
+        max = post_data_len;
+      memcpy (&buf[pp->buffer_pos], post_data, max);
+      poff += max;
+      pp->buffer_pos += max;
+
+      switch (pp->state)
+        {
+        case PP_Init:
+          /* we're looking for the boundary */
+          if (pp->buffer_pos < 2 + blen + ioff)
+            goto END;
+          if ((0 != memcmp ("--", &buf[ioff], 2)) ||
+              (0 != memcmp (&buf[ioff + 2], boundary, blen)))
+            return MHD_NO;      /* expected boundary */
+
+          /* remove boundary from buffer */
+          ioff += 2 + blen;
+
+          /* next: start with headers */
+          pp->state = PP_ExpectNewLineR;
+          break;
+        case PP_ExpectNewLineR:
+          if (buf[ioff] == '-')
+            {
+              /* last boundary ends with "--" */
+              ioff++;
+              pp->state = PP_FinalDash;
+              break;
+            }
+          if (buf[ioff] == '\r')
+            {
+              ioff++;
+              pp->state = PP_ExpectNewLineN;
+              break;
+            }
+          /* fall through! */
+        case PP_ExpectNewLineN:
+          if (buf[ioff] == '\n')
+            {
+              ioff++;
+              pp->state = PP_Headers;
+              break;
+            }
+          return MHD_NO;
+        case PP_Headers:
+          newline = 0;
+          while ((newline + ioff < pp->buffer_pos) &&
+                 (buf[newline + ioff] != '\r') &&
+                 (buf[newline + ioff] != '\n'))
+            newline++;
+          if (newline == pp->buffer_size)
+            return MHD_NO;      /* out of memory */
+          if (newline + ioff == pp->buffer_pos)
+            {
+              /* try to make more room */
+              memmove (buf, &buf[ioff], pp->buffer_pos - ioff);
+              pp->buffer_pos -= ioff;
+              ioff = 0;
+              break;
+            }
+          if (newline == 0)
+            {
+              pp->state = PP_SkipRNRN;
+              break;
+            }
+          buf[ioff + newline] = '\0';
+          if ((MHD_YES
+               == try_match_header ("Content-Disposition: form-data; name=\"",
+                                    &buf[ioff],
+                                    &pp->content_disposition)) &&
+              (pp->content_disposition != NULL) &&
+              (0 < strlen (pp->content_disposition)))
+            {
+              /* find end-quote; then check if we also have a filename! */
+              endquote = 0;
+              while ((pp->content_disposition[endquote] != '\"') &&
+                     (pp->content_disposition[endquote] != '\0'))
+                endquote++;
+              pp->content_disposition[endquote++] = '\0';       /* remove end-quote */
+              if ((MHD_YES
+                   == try_match_header (" filename=",
+                                        &pp->content_disposition[endquote],
+                                        &pp->filename)) &&
+                  (pp->filename != NULL) && (0 < strlen (pp->filename)))
+                pp->filename[strlen (pp->filename) - 1] = '\0'; /* remove end-quote */
+            }
+          try_match_header ("Content-Type: ", &buf[ioff], &pp->content_type);
+          try_match_header ("Content-Transfer-Encoding: ",
+                            &buf[ioff], &pp->transfer_encoding);
+          break;
+        case PP_SkipRNRN:
+          if (buf[ioff] == '\r')
+            {
+              ioff++;
+              pp->state = PP_SkipNRN;
+              break;
+            }
+          /* fall through! */
+        case PP_SkipNRN:
+          if (buf[ioff] == '\n')
+            {
+              ioff++;
+              pp->state = PP_SkipRN;
+              break;
+            }
+          return MHD_NO;        /* parse error */
+        case PP_SkipRN:
+          if (buf[ioff] == '\r')
+            {
+              ioff++;
+              pp->state = PP_SkipN;
+              break;
+            }
+          /* fall through! */
+        case PP_SkipN:
+          if (buf[ioff] == '\n')
+            {
+              ioff++;
+              pp->state = PP_ValueToBoundary;
+              pp->value_offset = 0;
+              break;
+            }
+          return MHD_NO;        /* parse error */
+        case PP_ValueToBoundary:
+          /* all data in buf until the boundary
+             (\r\n--+boundary) is part of the value */
+          newline = 0;
+          while (1)
+            {
+              while ((newline + ioff + 4 < pp->buffer_pos) &&
+                     (0 != memcmp ("\r\n--", &buf[newline + ioff], 4)))
+                newline++;
+              if (newline + blen + 4 > pp->buffer_size)
+                {
+                  /* boundary not in sight -- 
+                     process data, then make room for more! */
+                  if (MHD_NO == pp->ikvi (pp->cls,
+                                          MHD_POSTDATA_KIND,
+                                          pp->content_disposition,
+                                          pp->filename,
+                                          pp->content_type,
+                                          pp->transfer_encoding,
+                                          &buf[ioff],
+                                          pp->value_offset, newline))
+                    {
+                      pp->state = PP_Error;
+                      break;
+                    }
+                  pp->value_offset += newline;
+                  ioff += newline;
+                  memmove (buf, &buf[ioff], pp->buffer_pos - ioff);
+                  pp->buffer_pos -= ioff;
+                  break;
+                }
+              if (newline + blen + 4 < pp->buffer_pos)
+                {
+                  /* can check for boundary right now! */
+                  if (0 == memcmp (&buf[newline + ioff + 4], boundary, blen))
+                    {
+                      /* found: process data, then look for more */
+                      if (MHD_NO == pp->ikvi (pp->cls,
+                                              MHD_POSTDATA_KIND,
+                                              pp->content_disposition,
+                                              pp->filename,
+                                              pp->content_type,
+                                              pp->transfer_encoding,
+                                              &buf[ioff],
+                                              pp->value_offset, newline))
+                        {
+                          pp->state = PP_Error;
+                          break;
+                        }
+
+                      /* clean up! */
+                      if (pp->content_type != NULL)
+                        {
+                          free (pp->content_type);
+                          pp->content_type = NULL;
+                        }
+                      if (pp->content_disposition != NULL)
+                        {
+                          free (pp->content_disposition);
+                          pp->content_disposition = NULL;
+                        }
+                      if (pp->filename != NULL)
+                        {
+                          free (pp->filename);
+                          pp->filename = NULL;
+                        }
+                      if (pp->transfer_encoding != NULL)
+                        {
+                          free (pp->transfer_encoding);
+                          pp->transfer_encoding = NULL;
+                        }
+                      pp->value_offset = 0;
+                      ioff += newline + 2;      /* skip data + new line */
+                      pp->state = PP_Init;
+                      break;
+                    }
+                  /* not the boundary, look further! */
+                  newline += 4;
+                  continue;
+                }
+
+
+            }
+          break;
+        case PP_FinalDash:
+          if (buf[ioff] == '-')
+            {
+              /* last boundary ends with "--" */
+              ioff++;
+              pp->state = PP_Error;
+              break;
+            }
+          return MHD_NO;        /* parse error */
+        case PP_Error:
+          return MHD_NO;
+        default:
+          abort ();             /* should never happen! */
+
+        }
+    }
+END:
+  memmove (buf, &buf[ioff], pp->buffer_pos - ioff);
+  pp->buffer_pos -= ioff;
+  return MHD_YES;
+}
+
+/**
+ * Parse and process POST data.
+ * Call this function when POST data is available
+ * (usually during an MHD_AccessHandlerCallback)
+ * with the upload_data and upload_data_size.  
+ * Whenever possible, this will then cause calls
+ * to the MHD_IncrementalKeyValueIterator.  
+ *
+ * @param pp the post processor
+ * @param post_data post_data_len bytes of POST data
+ * @param post_data_len length of post_data
+ * @return MHD_YES on success, MHD_NO on error
+ *         (out-of-memory, iterator aborted, parse error)
+ */
+int
+MHD_post_process (struct MHD_PostProcessor *pp,
+                  const char *post_data, unsigned int post_data_len)
+{
+  if (post_data_len == 0)
+    return MHD_YES;
+  if (0 == strcasecmp (MHD_HTTP_POST_ENCODING_FORM_URLENCODED, pp->encoding))
+    return post_process_urlencoded (pp, post_data, post_data_len);
+  if (0 ==
+      strncasecmp (MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA, pp->encoding,
+                   strlen (MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA)))
+    return post_process_multipart (pp, post_data, post_data_len);
+  /* this should never be reached */
+  return MHD_NO;
+}
+
+/**
+ * Release PostProcessor resources.
+ */
+void
+MHD_destroy_post_processor (struct MHD_PostProcessor *pp)
+{
+  /* These internal strings need cleaning up since
+     the post-processing may have been interrupted
+     at any stage */
+  if (pp->content_type != NULL)
+    free (pp->content_type);
+  if (pp->content_disposition != NULL)
+    free (pp->content_disposition);
+  if (pp->filename != NULL)
+    free (pp->filename);
+  if (pp->transfer_encoding != NULL)
+    free (pp->transfer_encoding);
+  free (pp);
+}
+
+/* end of postprocessor.c */
diff --git a/src/include/microhttpd.h b/src/include/microhttpd.h
index 9a298abc..4df60a15 100644
--- a/src/include/microhttpd.h
+++ b/src/include/microhttpd.h
@@ -579,14 +579,13 @@ typedef void (*MHD_ContentReaderFreeCallback) (void *cls);
  *         MHD_NO to abort the iteration
  */
 typedef int
-  (*MHD_IncrementalKeyValueIterator) (void *cls,
-                                      enum MHD_ValueKind kind,
-                                      const char *key,
-				      const char *filename,
-				      const char *content_type,
-				      const char *transfer_encoding,
-                                      const char *data,
-                                      size_t off, size_t size);
+  (*MHD_PostDataIterator) (void *cls,
+                           enum MHD_ValueKind kind,
+                           const char *key,
+                           const char *filename,
+                           const char *content_type,
+                           const char *transfer_encoding,
+                           const char *data, size_t off, size_t size);
 
 /**
  * Start a webserver on the given port.
@@ -822,8 +821,8 @@ const char *MHD_get_response_header (struct MHD_Response *response,
  *        internal buffering (used only for the parsing,
  *        specifically the parsing of the keys).  A
  *        tiny value (256-1024) should be sufficient.
- *        Do NOT use 0.
- * @param ikvi iterator to be called with the parsed data,
+ *        Do NOT use a value smaller than 256.
+ * @param iter iterator to be called with the parsed data,
  *        Must NOT be NULL.
  * @param cls first argument to ikvi
  * @return  NULL on error (out of memory, unsupported encoding),
@@ -832,8 +831,8 @@ const char *MHD_get_response_header (struct MHD_Response *response,
 struct MHD_PostProcessor *MHD_create_post_processor (struct MHD_Connection
                                                      *connection,
                                                      unsigned int buffer_size,
-                                                     MHD_IncrementalKeyValueIterator
-                                                     ikvi, void *cls);
+                                                     MHD_PostDataIterator
+                                                     iter, void *cls);
 
 /**
  * Parse and process POST data.
-- 
cgit v1.2.3