From 4779f7d8eb9c7f6eec051a90a358d66f2dcfe9b0 Mon Sep 17 00:00:00 2001 From: Christian Grothoff Date: Sun, 9 Sep 2007 05:46:28 +0000 Subject: better name, code forgotten earlier --- src/daemon/daemontest_post.c | 8 +- src/daemon/postprocessor.c | 675 +++++++++++++++++++++++++++++++++++++++++++ src/include/microhttpd.h | 23 +- 3 files changed, 690 insertions(+), 16 deletions(-) create mode 100644 src/daemon/postprocessor.c diff --git a/src/daemon/daemontest_post.c b/src/daemon/daemontest_post.c index 87eca380..86fd67f9 100644 --- a/src/daemon/daemontest_post.c +++ b/src/daemon/daemontest_post.c @@ -72,10 +72,10 @@ static int post_iterator (void *cls, enum MHD_ValueKind kind, const char *key, - const char *filename, - const char *content_type, - const char *transfer_encoding, - const char *value, size_t off, size_t size) + const char *filename, + const char *content_type, + const char *transfer_encoding, + const char *value, size_t off, size_t size) { int *eok = cls; diff --git a/src/daemon/postprocessor.c b/src/daemon/postprocessor.c new file mode 100644 index 00000000..47d2efa6 --- /dev/null +++ b/src/daemon/postprocessor.c @@ -0,0 +1,675 @@ +/* + This file is part of libmicrohttpd + (C) 2007 Daniel Pittman and Christian Grothoff + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +/** + * @file postprocessor.c + * @brief Methods for parsing POST data + * @author Christian Grothoff + */ + +#include "internal.h" + +/** + * States in the PP parser's state machine. + */ +enum PP_State +{ + + PP_Init = 0, + + PP_HaveKey = 1, + + PP_ExpectNewLine = 2, + + PP_ExpectNewLineR = 3, + + PP_ExpectNewLineN = 4, + + PP_Headers = 5, + + PP_SkipRNRN = 6, + + PP_SkipNRN = 7, + + PP_SkipRN = 8, + + PP_SkipN = 9, + + PP_ValueToBoundary = 10, + + PP_FinalDash = 11, + + PP_Error = 9999, + + +}; + +/** + * Internal state of the post-processor. + */ +struct MHD_PostProcessor +{ + + /** + * The connection for which we are doing + * POST processing. + */ + struct MHD_Connection *connection; + + /** + * Function to call with POST data. + */ + MHD_PostDataIterator ikvi; + + /** + * Extra argument to ikvi. + */ + void *cls; + + /** + * Encoding as given by the headers of the + * connection. + */ + const char *encoding; + + /** + * Pointer to the name given in disposition. + */ + char *content_disposition; + + /** + * Pointer to the (current) content type. + */ + char *content_type; + + /** + * Pointer to the (current) filename. + */ + char *filename; + + /** + * Pointer to the (current) encoding. + */ + char *transfer_encoding; + + /** + * Unprocessed value bytes due to escape + * sequences (URL-encoding only). + */ + char xbuf[8]; + + /** + * Size of our buffer for the key. + */ + unsigned int buffer_size; + + /** + * Current position in the key buffer. + */ + unsigned int buffer_pos; + + /** + * Current position in xbuf. + */ + unsigned int xbuf_pos; + + /** + * Current offset in the value being processed. + */ + unsigned int value_offset; + + /** + * State of the parser. + */ + enum PP_State state; + +}; + + +/** + * Create a PostProcessor. + * + * A PostProcessor can be used to (incrementally) + * parse the data portion of a POST request. + * + * @param connection the connection on which the POST is + * happening (used to determine the POST format) + * @param buffer_size maximum number of bytes to use for + * internal buffering (used only for the parsing, + * specifically the parsing of the keys). A + * tiny value (256-1024) should be sufficient. + * Do NOT use 0. + * @param ikvi iterator to be called with the parsed data + * @param cls first argument to ikvi + * @return NULL on error (out of memory, unsupported encoding), + * otherwise a PP handle + */ +struct MHD_PostProcessor * +MHD_create_post_processor (struct MHD_Connection *connection, + unsigned int buffer_size, + MHD_PostDataIterator ikvi, void *cls) +{ + struct MHD_PostProcessor *ret; + const char *encoding; + + if ((buffer_size < 256) || (connection == NULL) || (ikvi == NULL)) + abort (); + encoding = MHD_lookup_connection_value (connection, + MHD_HEADER_KIND, + MHD_HTTP_HEADER_CONTENT_TYPE); + if (encoding == NULL) + return NULL; + if ((0 != strcasecmp (MHD_HTTP_POST_ENCODING_FORM_URLENCODED, + encoding)) && + (0 != strcasecmp (MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA, encoding))) + return NULL; + ret = malloc (sizeof (struct MHD_PostProcessor) + buffer_size + 1); + if (ret == NULL) + return NULL; + memset (ret, 0, sizeof (struct MHD_PostProcessor)); + ret->connection = connection; + ret->ikvi = ikvi; + ret->cls = cls; + ret->encoding = encoding; + ret->buffer_size = buffer_size; + ret->state = PP_Init; + return ret; +} + +/** + * On-stack buffer that we use for un-escaping of the value. + */ +#define XBUF_SIZE 1024 + +/** + * Process url-encoded POST data. + */ +static int +post_process_urlencoded (struct MHD_PostProcessor *pp, + const char *post_data, unsigned int post_data_len) +{ + unsigned int equals; + unsigned int amper; + unsigned int poff; + unsigned int xoff; + unsigned int delta; + char *buf; + char xbuf[XBUF_SIZE + 1]; + + buf = (char *) &pp[1]; + poff = 0; + while (poff < post_data_len) + { + switch (pp->state) + { + case PP_Init: + equals = 0; + while ((equals + poff < post_data_len) && + (post_data[equals + poff] != '=')) + equals++; + if (equals + pp->buffer_pos > pp->buffer_size) + { + pp->state = PP_Error; /* out of memory */ + return MHD_NO; + } + memcpy (&buf[pp->buffer_pos], &post_data[poff], equals); + pp->buffer_pos += equals; + if (equals + poff == post_data_len) + return MHD_YES; /* no '=' yet */ + buf[pp->buffer_pos] = '\0'; /* 0-terminate key */ + pp->buffer_pos = 0; /* reset for next key */ + MHD_http_unescape (buf); + poff += equals + 1; + pp->state = PP_HaveKey; + pp->value_offset = 0; + break; + case PP_HaveKey: + /* obtain rest of value from previous iteration */ + memcpy (xbuf, pp->xbuf, pp->xbuf_pos); + xoff = pp->xbuf_pos; + pp->xbuf_pos = 0; + + /* find last position in input buffer that is part of the value */ + amper = 0; + while ((amper + poff < post_data_len) && + (post_data[amper + poff] != '&') && + (post_data[amper + poff] != '\n') && + (post_data[amper + poff] != '\r')) + amper++; + + /* compute delta, the maximum number of bytes that we will be able to + process right now (either amper-limited of xbuf-size limited) */ + delta = amper; + if (delta > XBUF_SIZE - xoff) + delta = XBUF_SIZE - xoff; + + /* move input into processing buffer */ + memcpy (&xbuf[xoff], &post_data[poff], delta); + xoff += delta; + poff += delta; + + /* find if escape sequence is at the end of the processing buffer; + if so, exclude those from processing (reduce delta to point at + end of processed region) */ + delta = xoff; + if ((delta > 0) && (xbuf[delta - 1] == '%')) + delta--; + else if ((delta > 1) && (xbuf[delta - 2] == '%')) + delta -= 2; + + /* if we have an incomplete escape sequence, save it to + pp->xbuf for later */ + if (delta < xoff) + { + memcpy (pp->xbuf, &xbuf[delta], xoff - delta); + pp->xbuf_pos = xoff - delta; + xoff = delta; + } + + /* If we have nothing to do (delta == 0) and + not just because the value is empty (are + waiting for more data), go for next iteration */ + if ((xoff == 0) && (poff == post_data_len)) + continue; + + /* unescape */ + xbuf[xoff] = '\0'; /* 0-terminate in preparation */ + MHD_http_unescape (xbuf); + + /* finally: call application! */ + pp->ikvi (pp->cls, MHD_POSTDATA_KIND, (const char *) &pp[1], /* key */ + NULL, NULL, NULL, xbuf, pp->value_offset, xoff); + pp->value_offset += xoff; + + /* are we done with the value? */ + if (poff < post_data_len) + { + /* we found the end of the value! */ + pp->state = PP_Init; + poff++; /* skip '&' or new-lines */ + + if ((post_data[poff - 1] == '\n') || + (post_data[poff - 1] == '\r')) + pp->state = PP_ExpectNewLine; + } + break; + case PP_ExpectNewLine: + if ((post_data[poff] == '\n') || (post_data[poff] == '\r')) + { + poff++; + /* we are done, report error if we receive any more... */ + pp->state = PP_Error; + return MHD_YES; + } + return MHD_NO; + case PP_Error: + return MHD_NO; + default: + abort (); /* should never happen! */ + } + } + return MHD_YES; +} + +/** + * If the given line matches the prefix, strdup the + * rest of the line into the suffix ptr. + * + * @return MHD_YES if there was a match, MHD_NO if not + */ +static int +try_match_header (const char *prefix, char *line, char **suffix) +{ + if (0 == strncasecmp (prefix, line, strlen (prefix))) + { + *suffix = strdup (&line[strlen (prefix)]); + return MHD_YES; + } + return MHD_NO; +} + +/** + * Decode multipart POST data. + * + * TODO: If the content-type is multipart/mixed, we do not do anything + * special. However, we should probably break the individual values + * apart and give them to the callback individually (will require some + * additional states & state). + * + * TODO: this code has never been tested... + * + * See http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4 + */ +static int +post_process_multipart (struct MHD_PostProcessor *pp, + const char *post_data, unsigned int post_data_len) +{ + char *buf; + const char *boundary; + unsigned int max; + unsigned int ioff; + unsigned int poff; + unsigned int newline; + unsigned int endquote; + size_t blen; + + buf = (char *) &pp[1]; + ioff = 0; + poff = 0; + boundary = + &pp->encoding[strlen (MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA)]; + if (NULL != strstr (boundary, "boundary=")) + boundary = strstr (boundary, "boundary=") + strlen ("boundary="); + else + return MHD_NO; /* failed to determine boundary */ + blen = strlen (boundary); + if (blen * 2 + 2 > pp->buffer_size) + return MHD_NO; /* (will be) out of memory */ + while ((poff < post_data_len) || (pp->buffer_pos > ioff)) + { + /* first, move data to our internal buffer */ + max = pp->buffer_size - pp->buffer_pos; + if ((max < ioff) && (max < post_data_len)) + { + memmove (buf, &buf[ioff], pp->buffer_pos - ioff); + pp->buffer_pos -= ioff; + ioff = 0; + max = pp->buffer_size - pp->buffer_pos; + } + if (max > post_data_len) + max = post_data_len; + memcpy (&buf[pp->buffer_pos], post_data, max); + poff += max; + pp->buffer_pos += max; + + switch (pp->state) + { + case PP_Init: + /* we're looking for the boundary */ + if (pp->buffer_pos < 2 + blen + ioff) + goto END; + if ((0 != memcmp ("--", &buf[ioff], 2)) || + (0 != memcmp (&buf[ioff + 2], boundary, blen))) + return MHD_NO; /* expected boundary */ + + /* remove boundary from buffer */ + ioff += 2 + blen; + + /* next: start with headers */ + pp->state = PP_ExpectNewLineR; + break; + case PP_ExpectNewLineR: + if (buf[ioff] == '-') + { + /* last boundary ends with "--" */ + ioff++; + pp->state = PP_FinalDash; + break; + } + if (buf[ioff] == '\r') + { + ioff++; + pp->state = PP_ExpectNewLineN; + break; + } + /* fall through! */ + case PP_ExpectNewLineN: + if (buf[ioff] == '\n') + { + ioff++; + pp->state = PP_Headers; + break; + } + return MHD_NO; + case PP_Headers: + newline = 0; + while ((newline + ioff < pp->buffer_pos) && + (buf[newline + ioff] != '\r') && + (buf[newline + ioff] != '\n')) + newline++; + if (newline == pp->buffer_size) + return MHD_NO; /* out of memory */ + if (newline + ioff == pp->buffer_pos) + { + /* try to make more room */ + memmove (buf, &buf[ioff], pp->buffer_pos - ioff); + pp->buffer_pos -= ioff; + ioff = 0; + break; + } + if (newline == 0) + { + pp->state = PP_SkipRNRN; + break; + } + buf[ioff + newline] = '\0'; + if ((MHD_YES + == try_match_header ("Content-Disposition: form-data; name=\"", + &buf[ioff], + &pp->content_disposition)) && + (pp->content_disposition != NULL) && + (0 < strlen (pp->content_disposition))) + { + /* find end-quote; then check if we also have a filename! */ + endquote = 0; + while ((pp->content_disposition[endquote] != '\"') && + (pp->content_disposition[endquote] != '\0')) + endquote++; + pp->content_disposition[endquote++] = '\0'; /* remove end-quote */ + if ((MHD_YES + == try_match_header (" filename=", + &pp->content_disposition[endquote], + &pp->filename)) && + (pp->filename != NULL) && (0 < strlen (pp->filename))) + pp->filename[strlen (pp->filename) - 1] = '\0'; /* remove end-quote */ + } + try_match_header ("Content-Type: ", &buf[ioff], &pp->content_type); + try_match_header ("Content-Transfer-Encoding: ", + &buf[ioff], &pp->transfer_encoding); + break; + case PP_SkipRNRN: + if (buf[ioff] == '\r') + { + ioff++; + pp->state = PP_SkipNRN; + break; + } + /* fall through! */ + case PP_SkipNRN: + if (buf[ioff] == '\n') + { + ioff++; + pp->state = PP_SkipRN; + break; + } + return MHD_NO; /* parse error */ + case PP_SkipRN: + if (buf[ioff] == '\r') + { + ioff++; + pp->state = PP_SkipN; + break; + } + /* fall through! */ + case PP_SkipN: + if (buf[ioff] == '\n') + { + ioff++; + pp->state = PP_ValueToBoundary; + pp->value_offset = 0; + break; + } + return MHD_NO; /* parse error */ + case PP_ValueToBoundary: + /* all data in buf until the boundary + (\r\n--+boundary) is part of the value */ + newline = 0; + while (1) + { + while ((newline + ioff + 4 < pp->buffer_pos) && + (0 != memcmp ("\r\n--", &buf[newline + ioff], 4))) + newline++; + if (newline + blen + 4 > pp->buffer_size) + { + /* boundary not in sight -- + process data, then make room for more! */ + if (MHD_NO == pp->ikvi (pp->cls, + MHD_POSTDATA_KIND, + pp->content_disposition, + pp->filename, + pp->content_type, + pp->transfer_encoding, + &buf[ioff], + pp->value_offset, newline)) + { + pp->state = PP_Error; + break; + } + pp->value_offset += newline; + ioff += newline; + memmove (buf, &buf[ioff], pp->buffer_pos - ioff); + pp->buffer_pos -= ioff; + break; + } + if (newline + blen + 4 < pp->buffer_pos) + { + /* can check for boundary right now! */ + if (0 == memcmp (&buf[newline + ioff + 4], boundary, blen)) + { + /* found: process data, then look for more */ + if (MHD_NO == pp->ikvi (pp->cls, + MHD_POSTDATA_KIND, + pp->content_disposition, + pp->filename, + pp->content_type, + pp->transfer_encoding, + &buf[ioff], + pp->value_offset, newline)) + { + pp->state = PP_Error; + break; + } + + /* clean up! */ + if (pp->content_type != NULL) + { + free (pp->content_type); + pp->content_type = NULL; + } + if (pp->content_disposition != NULL) + { + free (pp->content_disposition); + pp->content_disposition = NULL; + } + if (pp->filename != NULL) + { + free (pp->filename); + pp->filename = NULL; + } + if (pp->transfer_encoding != NULL) + { + free (pp->transfer_encoding); + pp->transfer_encoding = NULL; + } + pp->value_offset = 0; + ioff += newline + 2; /* skip data + new line */ + pp->state = PP_Init; + break; + } + /* not the boundary, look further! */ + newline += 4; + continue; + } + + + } + break; + case PP_FinalDash: + if (buf[ioff] == '-') + { + /* last boundary ends with "--" */ + ioff++; + pp->state = PP_Error; + break; + } + return MHD_NO; /* parse error */ + case PP_Error: + return MHD_NO; + default: + abort (); /* should never happen! */ + + } + } +END: + memmove (buf, &buf[ioff], pp->buffer_pos - ioff); + pp->buffer_pos -= ioff; + return MHD_YES; +} + +/** + * Parse and process POST data. + * Call this function when POST data is available + * (usually during an MHD_AccessHandlerCallback) + * with the upload_data and upload_data_size. + * Whenever possible, this will then cause calls + * to the MHD_IncrementalKeyValueIterator. + * + * @param pp the post processor + * @param post_data post_data_len bytes of POST data + * @param post_data_len length of post_data + * @return MHD_YES on success, MHD_NO on error + * (out-of-memory, iterator aborted, parse error) + */ +int +MHD_post_process (struct MHD_PostProcessor *pp, + const char *post_data, unsigned int post_data_len) +{ + if (post_data_len == 0) + return MHD_YES; + if (0 == strcasecmp (MHD_HTTP_POST_ENCODING_FORM_URLENCODED, pp->encoding)) + return post_process_urlencoded (pp, post_data, post_data_len); + if (0 == + strncasecmp (MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA, pp->encoding, + strlen (MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA))) + return post_process_multipart (pp, post_data, post_data_len); + /* this should never be reached */ + return MHD_NO; +} + +/** + * Release PostProcessor resources. + */ +void +MHD_destroy_post_processor (struct MHD_PostProcessor *pp) +{ + /* These internal strings need cleaning up since + the post-processing may have been interrupted + at any stage */ + if (pp->content_type != NULL) + free (pp->content_type); + if (pp->content_disposition != NULL) + free (pp->content_disposition); + if (pp->filename != NULL) + free (pp->filename); + if (pp->transfer_encoding != NULL) + free (pp->transfer_encoding); + free (pp); +} + +/* end of postprocessor.c */ diff --git a/src/include/microhttpd.h b/src/include/microhttpd.h index 9a298abc..4df60a15 100644 --- a/src/include/microhttpd.h +++ b/src/include/microhttpd.h @@ -579,14 +579,13 @@ typedef void (*MHD_ContentReaderFreeCallback) (void *cls); * MHD_NO to abort the iteration */ typedef int - (*MHD_IncrementalKeyValueIterator) (void *cls, - enum MHD_ValueKind kind, - const char *key, - const char *filename, - const char *content_type, - const char *transfer_encoding, - const char *data, - size_t off, size_t size); + (*MHD_PostDataIterator) (void *cls, + enum MHD_ValueKind kind, + const char *key, + const char *filename, + const char *content_type, + const char *transfer_encoding, + const char *data, size_t off, size_t size); /** * Start a webserver on the given port. @@ -822,8 +821,8 @@ const char *MHD_get_response_header (struct MHD_Response *response, * internal buffering (used only for the parsing, * specifically the parsing of the keys). A * tiny value (256-1024) should be sufficient. - * Do NOT use 0. - * @param ikvi iterator to be called with the parsed data, + * Do NOT use a value smaller than 256. + * @param iter iterator to be called with the parsed data, * Must NOT be NULL. * @param cls first argument to ikvi * @return NULL on error (out of memory, unsupported encoding), @@ -832,8 +831,8 @@ const char *MHD_get_response_header (struct MHD_Response *response, struct MHD_PostProcessor *MHD_create_post_processor (struct MHD_Connection *connection, unsigned int buffer_size, - MHD_IncrementalKeyValueIterator - ikvi, void *cls); + MHD_PostDataIterator + iter, void *cls); /** * Parse and process POST data. -- cgit v1.2.3