/* This file is part of libmicrohttpd (C) 2007 Daniel Pittman and Christian Grothoff This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /** * @file postprocessor.c * @brief Methods for parsing POST data * @author Christian Grothoff */ #include "internal.h" /** * States in the PP parser's state machine. */ enum PP_State { PP_Init = 0, PP_HaveKey = 1, PP_ExpectNewLine = 2, PP_ExpectNewLineR = 3, PP_ExpectNewLineN = 4, PP_Headers = 5, PP_SkipRNRN = 6, PP_SkipNRN = 7, PP_SkipRN = 8, PP_SkipN = 9, PP_ValueToBoundary = 10, PP_FinalDash = 11, PP_Error = 9999, }; /** * Internal state of the post-processor. */ struct MHD_PostProcessor { /** * The connection for which we are doing * POST processing. */ struct MHD_Connection *connection; /** * Function to call with POST data. */ MHD_PostDataIterator ikvi; /** * Extra argument to ikvi. */ void *cls; /** * Encoding as given by the headers of the * connection. */ const char *encoding; /** * Pointer to the name given in disposition. */ char *content_disposition; /** * Pointer to the (current) content type. */ char *content_type; /** * Pointer to the (current) filename. */ char *filename; /** * Pointer to the (current) encoding. */ char *transfer_encoding; /** * Unprocessed value bytes due to escape * sequences (URL-encoding only). */ char xbuf[8]; /** * Size of our buffer for the key. */ unsigned int buffer_size; /** * Current position in the key buffer. */ unsigned int buffer_pos; /** * Current position in xbuf. */ unsigned int xbuf_pos; /** * Current offset in the value being processed. */ unsigned int value_offset; /** * State of the parser. */ enum PP_State state; }; /** * Create a PostProcessor. * * A PostProcessor can be used to (incrementally) * parse the data portion of a POST request. * * @param connection the connection on which the POST is * happening (used to determine the POST format) * @param buffer_size maximum number of bytes to use for * internal buffering (used only for the parsing, * specifically the parsing of the keys). A * tiny value (256-1024) should be sufficient. * Do NOT use 0. * @param ikvi iterator to be called with the parsed data * @param cls first argument to ikvi * @return NULL on error (out of memory, unsupported encoding), * otherwise a PP handle */ struct MHD_PostProcessor * MHD_create_post_processor (struct MHD_Connection *connection, unsigned int buffer_size, MHD_PostDataIterator ikvi, void *cls) { struct MHD_PostProcessor *ret; const char *encoding; if ((buffer_size < 256) || (connection == NULL) || (ikvi == NULL)) abort (); encoding = MHD_lookup_connection_value (connection, MHD_HEADER_KIND, MHD_HTTP_HEADER_CONTENT_TYPE); if (encoding == NULL) return NULL; if ((0 != strcasecmp (MHD_HTTP_POST_ENCODING_FORM_URLENCODED, encoding)) && (0 != strcasecmp (MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA, encoding))) return NULL; ret = malloc (sizeof (struct MHD_PostProcessor) + buffer_size + 1); if (ret == NULL) return NULL; memset (ret, 0, sizeof (struct MHD_PostProcessor)); ret->connection = connection; ret->ikvi = ikvi; ret->cls = cls; ret->encoding = encoding; ret->buffer_size = buffer_size; ret->state = PP_Init; return ret; } /** * On-stack buffer that we use for un-escaping of the value. */ #define XBUF_SIZE 1024 /** * Process url-encoded POST data. */ static int post_process_urlencoded (struct MHD_PostProcessor *pp, const char *post_data, unsigned int post_data_len) { unsigned int equals; unsigned int amper; unsigned int poff; unsigned int xoff; unsigned int delta; char *buf; char xbuf[XBUF_SIZE + 1]; buf = (char *) &pp[1]; poff = 0; while (poff < post_data_len) { switch (pp->state) { case PP_Init: equals = 0; while ((equals + poff < post_data_len) && (post_data[equals + poff] != '=')) equals++; if (equals + pp->buffer_pos > pp->buffer_size) { pp->state = PP_Error; /* out of memory */ return MHD_NO; } memcpy (&buf[pp->buffer_pos], &post_data[poff], equals); pp->buffer_pos += equals; if (equals + poff == post_data_len) return MHD_YES; /* no '=' yet */ buf[pp->buffer_pos] = '\0'; /* 0-terminate key */ pp->buffer_pos = 0; /* reset for next key */ MHD_http_unescape (buf); poff += equals + 1; pp->state = PP_HaveKey; pp->value_offset = 0; break; case PP_HaveKey: /* obtain rest of value from previous iteration */ memcpy (xbuf, pp->xbuf, pp->xbuf_pos); xoff = pp->xbuf_pos; pp->xbuf_pos = 0; /* find last position in input buffer that is part of the value */ amper = 0; while ((amper + poff < post_data_len) && (post_data[amper + poff] != '&') && (post_data[amper + poff] != '\n') && (post_data[amper + poff] != '\r')) amper++; /* compute delta, the maximum number of bytes that we will be able to process right now (either amper-limited of xbuf-size limited) */ delta = amper; if (delta > XBUF_SIZE - xoff) delta = XBUF_SIZE - xoff; /* move input into processing buffer */ memcpy (&xbuf[xoff], &post_data[poff], delta); xoff += delta; poff += delta; /* find if escape sequence is at the end of the processing buffer; if so, exclude those from processing (reduce delta to point at end of processed region) */ delta = xoff; if ((delta > 0) && (xbuf[delta - 1] == '%')) delta--; else if ((delta > 1) && (xbuf[delta - 2] == '%')) delta -= 2; /* if we have an incomplete escape sequence, save it to pp->xbuf for later */ if (delta < xoff) { memcpy (pp->xbuf, &xbuf[delta], xoff - delta); pp->xbuf_pos = xoff - delta; xoff = delta; } /* If we have nothing to do (delta == 0) and not just because the value is empty (are waiting for more data), go for next iteration */ if ((xoff == 0) && (poff == post_data_len)) continue; /* unescape */ xbuf[xoff] = '\0'; /* 0-terminate in preparation */ MHD_http_unescape (xbuf); /* finally: call application! */ pp->ikvi (pp->cls, MHD_POSTDATA_KIND, (const char *) &pp[1], /* key */ NULL, NULL, NULL, xbuf, pp->value_offset, xoff); pp->value_offset += xoff; /* are we done with the value? */ if (poff < post_data_len) { /* we found the end of the value! */ pp->state = PP_Init; poff++; /* skip '&' or new-lines */ if ((post_data[poff - 1] == '\n') || (post_data[poff - 1] == '\r')) pp->state = PP_ExpectNewLine; } break; case PP_ExpectNewLine: if ((post_data[poff] == '\n') || (post_data[poff] == '\r')) { poff++; /* we are done, report error if we receive any more... */ pp->state = PP_Error; return MHD_YES; } return MHD_NO; case PP_Error: return MHD_NO; default: abort (); /* should never happen! */ } } return MHD_YES; } /** * If the given line matches the prefix, strdup the * rest of the line into the suffix ptr. * * @return MHD_YES if there was a match, MHD_NO if not */ static int try_match_header (const char *prefix, char *line, char **suffix) { if (0 == strncasecmp (prefix, line, strlen (prefix))) { *suffix = strdup (&line[strlen (prefix)]); return MHD_YES; } return MHD_NO; } /** * Decode multipart POST data. * * TODO: If the content-type is multipart/mixed, we do not do anything * special. However, we should probably break the individual values * apart and give them to the callback individually (will require some * additional states & state). * * TODO: this code has never been tested... * * See http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4 */ static int post_process_multipart (struct MHD_PostProcessor *pp, const char *post_data, unsigned int post_data_len) { char *buf; const char *boundary; unsigned int max; unsigned int ioff; unsigned int poff; unsigned int newline; unsigned int endquote; size_t blen; buf = (char *) &pp[1]; ioff = 0; poff = 0; boundary = &pp->encoding[strlen (MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA)]; if (NULL != strstr (boundary, "boundary=")) boundary = strstr (boundary, "boundary=") + strlen ("boundary="); else return MHD_NO; /* failed to determine boundary */ blen = strlen (boundary); if (blen * 2 + 2 > pp->buffer_size) return MHD_NO; /* (will be) out of memory */ while ((poff < post_data_len) || (pp->buffer_pos > ioff)) { /* first, move data to our internal buffer */ max = pp->buffer_size - pp->buffer_pos; if ((max < ioff) && (max < post_data_len)) { memmove (buf, &buf[ioff], pp->buffer_pos - ioff); pp->buffer_pos -= ioff; ioff = 0; max = pp->buffer_size - pp->buffer_pos; } if (max > post_data_len) max = post_data_len; memcpy (&buf[pp->buffer_pos], post_data, max); poff += max; pp->buffer_pos += max; switch (pp->state) { case PP_Init: /* we're looking for the boundary */ if (pp->buffer_pos < 2 + blen + ioff) goto END; if ((0 != memcmp ("--", &buf[ioff], 2)) || (0 != memcmp (&buf[ioff + 2], boundary, blen))) return MHD_NO; /* expected boundary */ /* remove boundary from buffer */ ioff += 2 + blen; /* next: start with headers */ pp->state = PP_ExpectNewLineR; break; case PP_ExpectNewLineR: if (buf[ioff] == '-') { /* last boundary ends with "--" */ ioff++; pp->state = PP_FinalDash; break; } if (buf[ioff] == '\r') { ioff++; pp->state = PP_ExpectNewLineN; break; } /* fall through! */ case PP_ExpectNewLineN: if (buf[ioff] == '\n') { ioff++; pp->state = PP_Headers; break; } return MHD_NO; case PP_Headers: newline = 0; while ((newline + ioff < pp->buffer_pos) && (buf[newline + ioff] != '\r') && (buf[newline + ioff] != '\n')) newline++; if (newline == pp->buffer_size) return MHD_NO; /* out of memory */ if (newline + ioff == pp->buffer_pos) { /* try to make more room */ memmove (buf, &buf[ioff], pp->buffer_pos - ioff); pp->buffer_pos -= ioff; ioff = 0; break; } if (newline == 0) { pp->state = PP_SkipRNRN; break; } buf[ioff + newline] = '\0'; if ((MHD_YES == try_match_header ("Content-Disposition: form-data; name=\"", &buf[ioff], &pp->content_disposition)) && (pp->content_disposition != NULL) && (0 < strlen (pp->content_disposition))) { /* find end-quote; then check if we also have a filename! */ endquote = 0; while ((pp->content_disposition[endquote] != '\"') && (pp->content_disposition[endquote] != '\0')) endquote++; pp->content_disposition[endquote++] = '\0'; /* remove end-quote */ if ((MHD_YES == try_match_header (" filename=", &pp->content_disposition[endquote], &pp->filename)) && (pp->filename != NULL) && (0 < strlen (pp->filename))) pp->filename[strlen (pp->filename) - 1] = '\0'; /* remove end-quote */ } try_match_header ("Content-Type: ", &buf[ioff], &pp->content_type); try_match_header ("Content-Transfer-Encoding: ", &buf[ioff], &pp->transfer_encoding); break; case PP_SkipRNRN: if (buf[ioff] == '\r') { ioff++; pp->state = PP_SkipNRN; break; } /* fall through! */ case PP_SkipNRN: if (buf[ioff] == '\n') { ioff++; pp->state = PP_SkipRN; break; } return MHD_NO; /* parse error */ case PP_SkipRN: if (buf[ioff] == '\r') { ioff++; pp->state = PP_SkipN; break; } /* fall through! */ case PP_SkipN: if (buf[ioff] == '\n') { ioff++; pp->state = PP_ValueToBoundary; pp->value_offset = 0; break; } return MHD_NO; /* parse error */ case PP_ValueToBoundary: /* all data in buf until the boundary (\r\n--+boundary) is part of the value */ newline = 0; while (1) { while ((newline + ioff + 4 < pp->buffer_pos) && (0 != memcmp ("\r\n--", &buf[newline + ioff], 4))) newline++; if (newline + blen + 4 > pp->buffer_size) { /* boundary not in sight -- process data, then make room for more! */ if (MHD_NO == pp->ikvi (pp->cls, MHD_POSTDATA_KIND, pp->content_disposition, pp->filename, pp->content_type, pp->transfer_encoding, &buf[ioff], pp->value_offset, newline)) { pp->state = PP_Error; break; } pp->value_offset += newline; ioff += newline; memmove (buf, &buf[ioff], pp->buffer_pos - ioff); pp->buffer_pos -= ioff; break; } if (newline + blen + 4 < pp->buffer_pos) { /* can check for boundary right now! */ if (0 == memcmp (&buf[newline + ioff + 4], boundary, blen)) { /* found: process data, then look for more */ if (MHD_NO == pp->ikvi (pp->cls, MHD_POSTDATA_KIND, pp->content_disposition, pp->filename, pp->content_type, pp->transfer_encoding, &buf[ioff], pp->value_offset, newline)) { pp->state = PP_Error; break; } /* clean up! */ if (pp->content_type != NULL) { free (pp->content_type); pp->content_type = NULL; } if (pp->content_disposition != NULL) { free (pp->content_disposition); pp->content_disposition = NULL; } if (pp->filename != NULL) { free (pp->filename); pp->filename = NULL; } if (pp->transfer_encoding != NULL) { free (pp->transfer_encoding); pp->transfer_encoding = NULL; } pp->value_offset = 0; ioff += newline + 2; /* skip data + new line */ pp->state = PP_Init; break; } /* not the boundary, look further! */ newline += 4; continue; } } break; case PP_FinalDash: if (buf[ioff] == '-') { /* last boundary ends with "--" */ ioff++; pp->state = PP_Error; break; } return MHD_NO; /* parse error */ case PP_Error: return MHD_NO; default: abort (); /* should never happen! */ } } END: memmove (buf, &buf[ioff], pp->buffer_pos - ioff); pp->buffer_pos -= ioff; return MHD_YES; } /** * Parse and process POST data. * Call this function when POST data is available * (usually during an MHD_AccessHandlerCallback) * with the upload_data and upload_data_size. * Whenever possible, this will then cause calls * to the MHD_IncrementalKeyValueIterator. * * @param pp the post processor * @param post_data post_data_len bytes of POST data * @param post_data_len length of post_data * @return MHD_YES on success, MHD_NO on error * (out-of-memory, iterator aborted, parse error) */ int MHD_post_process (struct MHD_PostProcessor *pp, const char *post_data, unsigned int post_data_len) { if (post_data_len == 0) return MHD_YES; if (0 == strcasecmp (MHD_HTTP_POST_ENCODING_FORM_URLENCODED, pp->encoding)) return post_process_urlencoded (pp, post_data, post_data_len); if (0 == strncasecmp (MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA, pp->encoding, strlen (MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA))) return post_process_multipart (pp, post_data, post_data_len); /* this should never be reached */ return MHD_NO; } /** * Release PostProcessor resources. */ void MHD_destroy_post_processor (struct MHD_PostProcessor *pp) { /* These internal strings need cleaning up since the post-processing may have been interrupted at any stage */ if (pp->content_type != NULL) free (pp->content_type); if (pp->content_disposition != NULL) free (pp->content_disposition); if (pp->filename != NULL) free (pp->filename); if (pp->transfer_encoding != NULL) free (pp->transfer_encoding); free (pp); } /* end of postprocessor.c */