From efcb5fd20fb7cb7dfd93fa4ffc7bbfef8802ba1c Mon Sep 17 00:00:00 2001 From: wang_yue111 <648774160@qq.com> Date: Tue, 27 Oct 2020 11:58:40 +0800 Subject: [PATCH] Fix CVE-2020-12673 CVE-2020-12674 CVE-2020-12100 --- CVE-2020-12100-1.patch | 3283 ++++++++++++++++++++++++++++++++++++++++ CVE-2020-12100-2.patch | 70 + CVE-2020-12673.patch | 31 + CVE-2020-12674.patch | 22 + dovecot.spec | 9 +- 5 files changed, 3414 insertions(+), 1 deletion(-) create mode 100644 CVE-2020-12100-1.patch create mode 100644 CVE-2020-12100-2.patch create mode 100644 CVE-2020-12673.patch create mode 100644 CVE-2020-12674.patch diff --git a/CVE-2020-12100-1.patch b/CVE-2020-12100-1.patch new file mode 100644 index 0000000..59ba23a --- /dev/null +++ b/CVE-2020-12100-1.patch @@ -0,0 +1,3283 @@ +From 0ffcae6adfe1be48124429484a39acdf32979549 Mon Sep 17 00:00:00 2001 +From: Timo Sirainen +Date: Thu, 23 Apr 2020 11:14:04 +0300 +Subject: [PATCH 01/15] lib-mail: test-message-parser - Add another test for + boundary matching + +--- + src/lib-mail/test-message-parser.c | 71 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 71 insertions(+) + +diff --git a/src/lib-mail/test-message-parser.c b/src/lib-mail/test-message-parser.c +index 4f4d684083..529a36d59c 100644 +--- a/src/lib-mail/test-message-parser.c ++++ b/src/lib-mail/test-message-parser.c +@@ -613,6 +613,76 @@ static const char input_msg[] = + test_end(); + } + ++static void test_message_parser_continuing_mime_boundary_reverse(void) ++{ ++static const char input_msg[] = ++"Content-Type: multipart/mixed; boundary=\"ab\"\n" ++"\n" ++"--ab\n" ++"Content-Type: multipart/mixed; boundary=\"a\"\n" ++"\n" ++"--a\n" ++"Content-Type: text/plain\n" ++"\n" ++"body\n" ++"--ab\n" ++"Content-Type: text/html\n" ++"\n" ++"body2\n"; ++ struct message_parser_ctx *parser; ++ struct istream *input; ++ struct message_part *parts; ++ struct message_block block; ++ pool_t pool; ++ int ret; ++ ++ test_begin("message parser continuing mime boundary reverse"); ++ pool = pool_alloconly_create("message parser", 10240); ++ input = test_istream_create(input_msg); ++ ++ parser = message_parser_init(pool, input, 0, 0); ++ while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; ++ test_assert(ret < 0); ++ message_parser_deinit(&parser, &parts); ++ ++ test_assert(parts->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); ++ test_assert(parts->header_size.lines == 2); ++ test_assert(parts->header_size.physical_size == 46); ++ test_assert(parts->header_size.virtual_size == 46+2); ++ test_assert(parts->body_size.lines == 11); ++ test_assert(parts->body_size.physical_size == 121); ++ test_assert(parts->body_size.virtual_size == 121+11); ++ test_assert(parts->children->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); ++ test_assert(parts->children->physical_pos == 51); ++ test_assert(parts->children->header_size.lines == 2); ++ test_assert(parts->children->header_size.physical_size == 45); ++ test_assert(parts->children->header_size.virtual_size == 45+2); ++ test_assert(parts->children->body_size.lines == 3); ++ test_assert(parts->children->body_size.physical_size == 34); ++ test_assert(parts->children->body_size.virtual_size == 34+3); ++ test_assert(parts->children->children->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); ++ test_assert(parts->children->children->physical_pos == 100); ++ test_assert(parts->children->children->header_size.lines == 2); ++ test_assert(parts->children->children->header_size.physical_size == 26); ++ test_assert(parts->children->children->header_size.virtual_size == 26+2); ++ test_assert(parts->children->children->body_size.lines == 0); ++ test_assert(parts->children->children->body_size.physical_size == 4); ++ test_assert(parts->children->children->body_size.virtual_size == 4); ++ test_assert(parts->children->next->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); ++ test_assert(parts->children->next->physical_pos == 136); ++ test_assert(parts->children->next->header_size.lines == 2); ++ test_assert(parts->children->next->header_size.physical_size == 25); ++ test_assert(parts->children->next->header_size.virtual_size == 25+2); ++ test_assert(parts->children->next->body_size.lines == 1); ++ test_assert(parts->children->next->body_size.physical_size == 6); ++ test_assert(parts->children->next->body_size.virtual_size == 6+1); ++ ++ test_parsed_parts(input, parts); ++ i_stream_unref(&input); ++ pool_unref(&pool); ++ test_end(); ++} ++ + static void test_message_parser_no_eoh(void) + { + static const char input_msg[] = "a:b\n"; +@@ -653,6 +723,7 @@ int main(void) + test_message_parser_garbage_suffix_mime_boundary, + test_message_parser_continuing_mime_boundary, + test_message_parser_continuing_truncated_mime_boundary, ++ test_message_parser_continuing_mime_boundary_reverse, + test_message_parser_no_eoh, + NULL + }; +-- +2.11.0 + +From 6defb282bec6b17bd1c0e2f62a4d365b453c39ef Mon Sep 17 00:00:00 2001 +From: Timo Sirainen +Date: Thu, 23 Apr 2020 11:27:14 +0300 +Subject: [PATCH 02/15] lib-mail: test-message-parser - Test that + children_count is correct + +--- + src/lib-mail/test-message-parser.c | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +diff --git a/src/lib-mail/test-message-parser.c b/src/lib-mail/test-message-parser.c +index 529a36d59c..c275707265 100644 +--- a/src/lib-mail/test-message-parser.c ++++ b/src/lib-mail/test-message-parser.c +@@ -59,6 +59,7 @@ static bool msg_parts_cmp(struct message_part *p1, struct message_part *p2) + p1->body_size.physical_size != p2->body_size.physical_size || + p1->body_size.virtual_size != p2->body_size.virtual_size || + p1->body_size.lines != p2->body_size.lines || ++ p1->children_count != p2->children_count || + p1->flags != p2->flags) + return FALSE; + +@@ -195,6 +196,7 @@ static const char input_msg[] = + message_parser_deinit(&parser, &parts); + + test_assert((parts->flags & MESSAGE_PART_FLAG_MULTIPART) != 0); ++ test_assert(parts->children_count == 4); + test_assert(parts->header_size.lines == 2); + test_assert(parts->header_size.physical_size == 48); + test_assert(parts->header_size.virtual_size == 48+2); +@@ -218,6 +220,7 @@ static const char input_msg[] = + test_assert(parts->children->next->next->next->header_size.virtual_size == 23); + test_assert(parts->children->next->next->next->header_size.lines == 0); + for (part = parts->children; part != NULL; part = part->next) { ++ test_assert(part->children_count == 0); + test_assert(part->body_size.physical_size == 0); + test_assert(part->body_size.virtual_size == 0); + } +@@ -258,6 +261,7 @@ static const char input_msg[] = + message_parser_deinit(&parser, &parts); + + test_assert(parts->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); ++ test_assert(parts->children_count == 2); + test_assert(parts->header_size.lines == 2); + test_assert(parts->header_size.physical_size == 46); + test_assert(parts->header_size.virtual_size == 46+2); +@@ -265,6 +269,7 @@ static const char input_msg[] = + test_assert(parts->body_size.physical_size == 86); + test_assert(parts->body_size.virtual_size == 86+8); + ++ test_assert(parts->children->children_count == 0); + test_assert(parts->children->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->children->physical_pos == 51); + test_assert(parts->children->header_size.lines == 1); +@@ -274,6 +279,7 @@ static const char input_msg[] = + test_assert(parts->children->body_size.physical_size == 0); + test_assert(parts->children->children == NULL); + ++ test_assert(parts->children->next->children_count == 0); + test_assert(parts->children->next->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->children->next->physical_pos == 101); + test_assert(parts->children->next->header_size.lines == 2); +@@ -310,6 +316,7 @@ static const char input_msg[] = + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); + ++ test_assert(parts->children_count == 0); + test_assert(parts->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->header_size.lines == 1); + test_assert(parts->header_size.physical_size == 45); +@@ -347,6 +354,7 @@ static const char input_msg[] = + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); + ++ test_assert(parts->children_count == 0); + test_assert(parts->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->header_size.lines == 2); + test_assert(parts->header_size.physical_size == 46); +@@ -391,6 +399,7 @@ static const char input_msg[] = + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); + ++ test_assert(parts->children_count == 2); + test_assert(parts->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->header_size.lines == 2); + test_assert(parts->header_size.physical_size == 45); +@@ -398,6 +407,7 @@ static const char input_msg[] = + test_assert(parts->body_size.lines == 7); + test_assert(parts->body_size.physical_size == 84); + test_assert(parts->body_size.virtual_size == 84+7); ++ test_assert(parts->children->children_count == 1); + test_assert(parts->children->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->children->physical_pos == 49); + test_assert(parts->children->header_size.lines == 2); +@@ -406,6 +416,7 @@ static const char input_msg[] = + test_assert(parts->children->body_size.lines == 4); + test_assert(parts->children->body_size.physical_size == 35); + test_assert(parts->children->body_size.virtual_size == 35+4); ++ test_assert(parts->children->children->children_count == 0); + test_assert(parts->children->children->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->children->children->physical_pos == 98); + test_assert(parts->children->children->header_size.lines == 2); +@@ -449,6 +460,7 @@ static const char input_msg[] = + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); + ++ test_assert(parts->children_count == 2); + test_assert(parts->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->header_size.lines == 2); + test_assert(parts->header_size.physical_size == 45); +@@ -456,6 +468,7 @@ static const char input_msg[] = + test_assert(parts->body_size.lines == 7); + test_assert(parts->body_size.physical_size == 86); + test_assert(parts->body_size.virtual_size == 86+7); ++ test_assert(parts->children->children_count == 1); + test_assert(parts->children->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->children->physical_pos == 50); + test_assert(parts->children->header_size.lines == 2); +@@ -464,6 +477,7 @@ static const char input_msg[] = + test_assert(parts->children->body_size.lines == 4); + test_assert(parts->children->body_size.physical_size == 36); + test_assert(parts->children->body_size.virtual_size == 36+4); ++ test_assert(parts->children->children->children_count == 0); + test_assert(parts->children->children->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->children->children->physical_pos == 100); + test_assert(parts->children->children->header_size.lines == 2); +@@ -507,6 +521,7 @@ static const char input_msg[] = + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); + ++ test_assert(parts->children_count == 2); + test_assert(parts->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->header_size.lines == 2); + test_assert(parts->header_size.physical_size == 45); +@@ -514,6 +529,7 @@ static const char input_msg[] = + test_assert(parts->body_size.lines == 7); + test_assert(parts->body_size.physical_size == 86); + test_assert(parts->body_size.virtual_size == 86+7); ++ test_assert(parts->children->children_count == 1); + test_assert(parts->children->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->children->physical_pos == 49); + test_assert(parts->children->header_size.lines == 2); +@@ -522,6 +538,7 @@ static const char input_msg[] = + test_assert(parts->children->body_size.lines == 4); + test_assert(parts->children->body_size.physical_size == 36); + test_assert(parts->children->body_size.virtual_size == 36+4); ++ test_assert(parts->children->children->children_count == 0); + test_assert(parts->children->children->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->children->children->physical_pos == 100); + test_assert(parts->children->children->header_size.lines == 2); +@@ -567,6 +584,7 @@ static const char input_msg[] = + message_parser_deinit(&parser, &parts); + + part = parts; ++ test_assert(part->children_count == 3); + test_assert(part->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(part->header_size.lines == 2); + test_assert(part->header_size.physical_size == 45); +@@ -576,6 +594,7 @@ static const char input_msg[] = + test_assert(part->body_size.virtual_size == 112+9); + + part = parts->children; ++ test_assert(part->children_count == 0); + test_assert(part->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(part->physical_pos == 49); + test_assert(part->header_size.lines == 1); +@@ -589,6 +608,7 @@ static const char input_msg[] = + we could make it, but it would complicate the message-parser even + more. */ + part = parts->children->next; ++ test_assert(part->children_count == 0); + test_assert(part->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(part->physical_pos == 117); + test_assert(part->header_size.lines == 1); +@@ -599,6 +619,7 @@ static const char input_msg[] = + test_assert(part->children == NULL); + + part = parts->children->next->next; ++ test_assert(part->children_count == 0); + test_assert(part->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(part->header_size.lines == 0); + test_assert(part->header_size.physical_size == 0); +@@ -645,6 +666,7 @@ static const char input_msg[] = + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); + ++ test_assert(parts->children_count == 3); + test_assert(parts->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->header_size.lines == 2); + test_assert(parts->header_size.physical_size == 46); +@@ -652,6 +674,7 @@ static const char input_msg[] = + test_assert(parts->body_size.lines == 11); + test_assert(parts->body_size.physical_size == 121); + test_assert(parts->body_size.virtual_size == 121+11); ++ test_assert(parts->children->children_count == 1); + test_assert(parts->children->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->children->physical_pos == 51); + test_assert(parts->children->header_size.lines == 2); +@@ -660,6 +683,7 @@ static const char input_msg[] = + test_assert(parts->children->body_size.lines == 3); + test_assert(parts->children->body_size.physical_size == 34); + test_assert(parts->children->body_size.virtual_size == 34+3); ++ test_assert(parts->children->children->children_count == 0); + test_assert(parts->children->children->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->children->children->physical_pos == 100); + test_assert(parts->children->children->header_size.lines == 2); +@@ -668,6 +692,7 @@ static const char input_msg[] = + test_assert(parts->children->children->body_size.lines == 0); + test_assert(parts->children->children->body_size.physical_size == 4); + test_assert(parts->children->children->body_size.virtual_size == 4); ++ test_assert(parts->children->next->children_count == 0); + test_assert(parts->children->next->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->children->next->physical_pos == 136); + test_assert(parts->children->next->header_size.lines == 2); +-- +2.11.0 + +From 354d5b43a93b6f2ba80c7fc507b75fb1dc4c8df5 Mon Sep 17 00:00:00 2001 +From: Timo Sirainen +Date: Thu, 23 Apr 2020 11:47:18 +0300 +Subject: [PATCH 03/15] lib-mail: Move message_parser_init_from_parts() + handling to its own file + +This helps to see what code they have in common. +--- + src/lib-mail/Makefile.am | 4 +- + src/lib-mail/message-parser-from-parts.c | 366 +++++++++++++++++++++++++++ + src/lib-mail/message-parser-private.h | 55 +++++ + src/lib-mail/message-parser.c | 410 +------------------------------ + 4 files changed, 428 insertions(+), 407 deletions(-) + create mode 100644 src/lib-mail/message-parser-from-parts.c + create mode 100644 src/lib-mail/message-parser-private.h + +diff --git a/src/lib-mail/Makefile.am b/src/lib-mail/Makefile.am +index 8fe43d69d0..57d9e2b8c4 100644 +--- a/src/lib-mail/Makefile.am ++++ b/src/lib-mail/Makefile.am +@@ -28,6 +28,7 @@ libmail_la_SOURCES = \ + message-header-parser.c \ + message-id.c \ + message-parser.c \ ++ message-parser-from-parts.c \ + message-part.c \ + message-part-data.c \ + message-part-serialize.c \ +@@ -42,7 +43,8 @@ libmail_la_SOURCES = \ + rfc822-parser.c + + noinst_HEADERS = \ +- html-entities.h ++ html-entities.h \ ++ message-parser-private.h + + headers = \ + istream-attachment-connector.h \ +diff --git a/src/lib-mail/message-parser-from-parts.c b/src/lib-mail/message-parser-from-parts.c +new file mode 100644 +index 0000000000..b23055ab9b +--- /dev/null ++++ b/src/lib-mail/message-parser-from-parts.c +@@ -0,0 +1,366 @@ ++/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */ ++ ++#include "lib.h" ++#include "istream.h" ++#include "message-parser-private.h" ++ ++static int preparsed_parse_epilogue_init(struct message_parser_ctx *ctx, ++ struct message_block *block_r); ++static int preparsed_parse_next_header_init(struct message_parser_ctx *ctx, ++ struct message_block *block_r); ++ ++static int preparsed_parse_eof(struct message_parser_ctx *ctx ATTR_UNUSED, ++ struct message_block *block_r ATTR_UNUSED) ++{ ++ return -1; ++} ++ ++static void preparsed_skip_to_next(struct message_parser_ctx *ctx) ++{ ++ ctx->parse_next_block = preparsed_parse_next_header_init; ++ while (ctx->part != NULL) { ++ if (ctx->part->next != NULL) { ++ ctx->part = ctx->part->next; ++ break; ++ } ++ ++ /* parse epilogue of multipart parent if requested */ ++ if (ctx->part->parent != NULL && ++ (ctx->part->parent->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 && ++ (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) != 0) { ++ /* check for presence of epilogue */ ++ uoff_t part_end = ctx->part->physical_pos + ++ ctx->part->header_size.physical_size + ++ ctx->part->body_size.physical_size; ++ uoff_t parent_end = ctx->part->parent->physical_pos + ++ ctx->part->parent->header_size.physical_size + ++ ctx->part->parent->body_size.physical_size; ++ ++ if (parent_end > part_end) { ++ ctx->parse_next_block = preparsed_parse_epilogue_init; ++ break; ++ } ++ } ++ ctx->part = ctx->part->parent; ++ } ++ if (ctx->part == NULL) ++ ctx->parse_next_block = preparsed_parse_eof; ++} ++ ++static int preparsed_parse_body_finish(struct message_parser_ctx *ctx, ++ struct message_block *block_r) ++{ ++ i_stream_skip(ctx->input, ctx->skip); ++ ctx->skip = 0; ++ ++ preparsed_skip_to_next(ctx); ++ return ctx->parse_next_block(ctx, block_r); ++} ++ ++static int preparsed_parse_prologue_finish(struct message_parser_ctx *ctx, ++ struct message_block *block_r) ++{ ++ i_stream_skip(ctx->input, ctx->skip); ++ ctx->skip = 0; ++ ++ ctx->parse_next_block = preparsed_parse_next_header_init; ++ ctx->part = ctx->part->children; ++ return ctx->parse_next_block(ctx, block_r); ++} ++ ++static int preparsed_parse_body_more(struct message_parser_ctx *ctx, ++ struct message_block *block_r) ++{ ++ uoff_t end_offset = ctx->part->physical_pos + ++ ctx->part->header_size.physical_size + ++ ctx->part->body_size.physical_size; ++ bool full; ++ int ret; ++ ++ if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) ++ return ret; ++ ++ if (ctx->input->v_offset + block_r->size >= end_offset) { ++ block_r->size = end_offset - ctx->input->v_offset; ++ ctx->parse_next_block = preparsed_parse_body_finish; ++ } ++ ctx->skip = block_r->size; ++ return 1; ++} ++ ++static int preparsed_parse_prologue_more(struct message_parser_ctx *ctx, ++ struct message_block *block_r) ++{ ++ uoff_t boundary_min_start, end_offset; ++ const unsigned char *cur; ++ bool full; ++ int ret; ++ ++ i_assert(ctx->part->children != NULL); ++ end_offset = ctx->part->children->physical_pos; ++ ++ if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) ++ return ret; ++ ++ if (ctx->input->v_offset + block_r->size >= end_offset) { ++ /* we've got the full prologue: clip off the initial boundary */ ++ block_r->size = end_offset - ctx->input->v_offset; ++ cur = block_r->data + block_r->size - 1; ++ ++ /* [\r]\n--boundary[\r]\n */ ++ if (block_r->size < 5 || *cur != '\n') { ++ ctx->broken_reason = "Prologue boundary end not at expected position"; ++ return -1; ++ } ++ ++ cur--; ++ if (*cur == '\r') cur--; ++ ++ /* find newline just before boundary */ ++ for (; cur >= block_r->data; cur--) { ++ if (*cur == '\n') break; ++ } ++ ++ if (cur[0] != '\n' || cur[1] != '-' || cur[2] != '-') { ++ ctx->broken_reason = "Prologue boundary beginning not at expected position"; ++ return -1; ++ } ++ ++ if (cur != block_r->data && cur[-1] == '\r') cur--; ++ ++ /* clip boundary */ ++ block_r->size = cur - block_r->data; ++ ++ ctx->parse_next_block = preparsed_parse_prologue_finish; ++ ctx->skip = block_r->size; ++ return 1; ++ } ++ ++ /* retain enough data in the stream buffer to contain initial boundary */ ++ if (end_offset > BOUNDARY_END_MAX_LEN) ++ boundary_min_start = end_offset - BOUNDARY_END_MAX_LEN; ++ else ++ boundary_min_start = 0; ++ ++ if (ctx->input->v_offset + block_r->size >= boundary_min_start) { ++ if (boundary_min_start <= ctx->input->v_offset) ++ return 0; ++ block_r->size = boundary_min_start - ctx->input->v_offset; ++ } ++ ctx->skip = block_r->size; ++ return 1; ++} ++ ++static int preparsed_parse_epilogue_more(struct message_parser_ctx *ctx, ++ struct message_block *block_r) ++{ ++ uoff_t end_offset = ctx->part->physical_pos + ++ ctx->part->header_size.physical_size + ++ ctx->part->body_size.physical_size; ++ bool full; ++ int ret; ++ ++ if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) ++ return ret; ++ ++ if (ctx->input->v_offset + block_r->size >= end_offset) { ++ block_r->size = end_offset - ctx->input->v_offset; ++ ctx->parse_next_block = preparsed_parse_body_finish; ++ } ++ ctx->skip = block_r->size; ++ return 1; ++} ++ ++static int preparsed_parse_epilogue_boundary(struct message_parser_ctx *ctx, ++ struct message_block *block_r) ++{ ++ uoff_t end_offset = ctx->part->physical_pos + ++ ctx->part->header_size.physical_size + ++ ctx->part->body_size.physical_size; ++ const unsigned char *data, *cur; ++ size_t size; ++ bool full; ++ int ret; ++ ++ if (end_offset - ctx->input->v_offset < 7) { ++ ctx->broken_reason = "Epilogue position is wrong"; ++ return -1; ++ } ++ ++ if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) ++ return ret; ++ ++ /* [\r]\n--boundary--[\r]\n */ ++ if (block_r->size < 7) { ++ ctx->want_count = 7; ++ return 0; ++ } ++ ++ data = block_r->data; ++ size = block_r->size; ++ cur = data; ++ ++ if (*cur == '\r') cur++; ++ ++ if (cur[0] != '\n' || cur[1] != '-' || data[2] != '-') { ++ ctx->broken_reason = "Epilogue boundary start not at expected position"; ++ return -1; ++ } ++ ++ /* find the end of the line */ ++ cur += 3; ++ if ((cur = memchr(cur, '\n', size - (cur-data))) == NULL) { ++ if (end_offset < ctx->input->v_offset + size) { ++ ctx->broken_reason = "Epilogue boundary end not at expected position"; ++ return -1; ++ } else if (ctx->input->v_offset + size < end_offset && ++ size < BOUNDARY_END_MAX_LEN && ++ !ctx->input->eof && !full) { ++ ctx->want_count = BOUNDARY_END_MAX_LEN; ++ return 0; ++ } ++ } ++ ++ block_r->size = 0; ++ ctx->parse_next_block = preparsed_parse_epilogue_more; ++ ctx->skip = cur - data + 1; ++ return 0; ++} ++ ++static int preparsed_parse_body_init(struct message_parser_ctx *ctx, ++ struct message_block *block_r) ++{ ++ uoff_t offset = ctx->part->physical_pos + ++ ctx->part->header_size.physical_size; ++ ++ if (offset < ctx->input->v_offset) { ++ /* header was actually larger than the cached size suggested */ ++ ctx->broken_reason = "Header larger than its cached size"; ++ return -1; ++ } ++ i_stream_skip(ctx->input, offset - ctx->input->v_offset); ++ ++ /* multipart messages may begin with --boundary--, which makes them ++ not have any children. */ ++ if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 || ++ ctx->part->children == NULL) ++ ctx->parse_next_block = preparsed_parse_body_more; ++ else ++ ctx->parse_next_block = preparsed_parse_prologue_more; ++ return ctx->parse_next_block(ctx, block_r); ++} ++ ++static int preparsed_parse_epilogue_init(struct message_parser_ctx *ctx, ++ struct message_block *block_r) ++{ ++ uoff_t offset = ctx->part->physical_pos + ++ ctx->part->header_size.physical_size + ++ ctx->part->body_size.physical_size; ++ ++ ctx->part = ctx->part->parent; ++ ++ if (offset < ctx->input->v_offset) { ++ /* last child was actually larger than the cached size ++ suggested */ ++ ctx->broken_reason = "Part larger than its cached size"; ++ return -1; ++ } ++ i_stream_skip(ctx->input, offset - ctx->input->v_offset); ++ ++ ctx->parse_next_block = preparsed_parse_epilogue_boundary; ++ return ctx->parse_next_block(ctx, block_r); ++} ++ ++static int preparsed_parse_finish_header(struct message_parser_ctx *ctx, ++ struct message_block *block_r) ++{ ++ if (ctx->part->children != NULL) { ++ if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 && ++ (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) != 0) ++ ctx->parse_next_block = preparsed_parse_body_init; ++ else { ++ ctx->parse_next_block = preparsed_parse_next_header_init; ++ ctx->part = ctx->part->children; ++ } ++ } else if ((ctx->flags & MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK) == 0) { ++ ctx->parse_next_block = preparsed_parse_body_init; ++ } else { ++ preparsed_skip_to_next(ctx); ++ } ++ return ctx->parse_next_block(ctx, block_r); ++} ++ ++static int preparsed_parse_next_header(struct message_parser_ctx *ctx, ++ struct message_block *block_r) ++{ ++ struct message_header_line *hdr; ++ int ret; ++ ++ ret = message_parse_header_next(ctx->hdr_parser_ctx, &hdr); ++ if (ret == 0 || (ret < 0 && ctx->input->stream_errno != 0)) { ++ ctx->want_count = i_stream_get_data_size(ctx->input) + 1; ++ return ret; ++ } ++ ++ if (hdr != NULL) { ++ block_r->hdr = hdr; ++ block_r->size = 0; ++ return 1; ++ } ++ message_parse_header_deinit(&ctx->hdr_parser_ctx); ++ ++ ctx->parse_next_block = preparsed_parse_finish_header; ++ ++ /* return empty block as end of headers */ ++ block_r->hdr = NULL; ++ block_r->size = 0; ++ ++ i_assert(ctx->skip == 0); ++ if (ctx->input->v_offset != ctx->part->physical_pos + ++ ctx->part->header_size.physical_size) { ++ ctx->broken_reason = "Cached header size mismatch"; ++ return -1; ++ } ++ return 1; ++} ++ ++static int preparsed_parse_next_header_init(struct message_parser_ctx *ctx, ++ struct message_block *block_r) ++{ ++ struct istream *hdr_input; ++ ++ i_assert(ctx->hdr_parser_ctx == NULL); ++ ++ i_assert(ctx->part->physical_pos >= ctx->input->v_offset); ++ i_stream_skip(ctx->input, ctx->part->physical_pos - ++ ctx->input->v_offset); ++ ++ /* the header may become truncated by --boundaries. limit the header ++ stream's size to what it's supposed to be to avoid duplicating (and ++ keeping in sync!) all the same complicated logic as in ++ parse_next_header(). */ ++ hdr_input = i_stream_create_limit(ctx->input, ctx->part->header_size.physical_size); ++ ctx->hdr_parser_ctx = ++ message_parse_header_init(hdr_input, NULL, ctx->hdr_flags); ++ i_stream_unref(&hdr_input); ++ ++ ctx->parse_next_block = preparsed_parse_next_header; ++ return preparsed_parse_next_header(ctx, block_r); ++} ++ ++struct message_parser_ctx * ++message_parser_init_from_parts(struct message_part *parts, ++ struct istream *input, ++ enum message_header_parser_flags hdr_flags, ++ enum message_parser_flags flags) ++{ ++ struct message_parser_ctx *ctx; ++ ++ i_assert(parts != NULL); ++ ++ ctx = message_parser_init_int(input, hdr_flags, flags); ++ ctx->preparsed = TRUE; ++ ctx->parts = ctx->part = parts; ++ ctx->parse_next_block = preparsed_parse_next_header_init; ++ return ctx; ++} +diff --git a/src/lib-mail/message-parser-private.h b/src/lib-mail/message-parser-private.h +new file mode 100644 +index 0000000000..98a576eda4 +--- /dev/null ++++ b/src/lib-mail/message-parser-private.h +@@ -0,0 +1,55 @@ ++#ifndef MESSAGE_PARSER_PRIVATE_H ++#define MESSAGE_PARSER_PRIVATE_H ++ ++#include "message-parser.h" ++ ++/* RFC-2046 requires boundaries are max. 70 chars + "--" prefix + "--" suffix. ++ We'll add a bit more just in case. */ ++#define BOUNDARY_END_MAX_LEN (70 + 2 + 2 + 10) ++ ++struct message_boundary { ++ struct message_boundary *next; ++ ++ struct message_part *part; ++ const char *boundary; ++ size_t len; ++ ++ bool epilogue_found:1; ++}; ++ ++struct message_parser_ctx { ++ pool_t parser_pool, part_pool; ++ struct istream *input; ++ struct message_part *parts, *part; ++ const char *broken_reason; ++ ++ enum message_header_parser_flags hdr_flags; ++ enum message_parser_flags flags; ++ ++ const char *last_boundary; ++ struct message_boundary *boundaries; ++ ++ size_t skip; ++ char last_chr; ++ unsigned int want_count; ++ ++ struct message_header_parser_ctx *hdr_parser_ctx; ++ unsigned int prev_hdr_newline_size; ++ ++ int (*parse_next_block)(struct message_parser_ctx *ctx, ++ struct message_block *block_r); ++ ++ bool part_seen_content_type:1; ++ bool multipart:1; ++ bool preparsed:1; ++ bool eof:1; ++}; ++ ++struct message_parser_ctx * ++message_parser_init_int(struct istream *input, ++ enum message_header_parser_flags hdr_flags, ++ enum message_parser_flags flags); ++int message_parser_read_more(struct message_parser_ctx *ctx, ++ struct message_block *block_r, bool *full_r); ++ ++#endif +diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c +index e7a4f4cc31..653f964118 100644 +--- a/src/lib-mail/message-parser.c ++++ b/src/lib-mail/message-parser.c +@@ -6,49 +6,7 @@ + #include "istream.h" + #include "rfc822-parser.h" + #include "rfc2231-parser.h" +-#include "message-parser.h" +- +-/* RFC-2046 requires boundaries are max. 70 chars + "--" prefix + "--" suffix. +- We'll add a bit more just in case. */ +-#define BOUNDARY_END_MAX_LEN (70 + 2 + 2 + 10) +- +-struct message_boundary { +- struct message_boundary *next; +- +- struct message_part *part; +- const char *boundary; +- size_t len; +- +- bool epilogue_found:1; +-}; +- +-struct message_parser_ctx { +- pool_t parser_pool, part_pool; +- struct istream *input; +- struct message_part *parts, *part; +- const char *broken_reason; +- +- enum message_header_parser_flags hdr_flags; +- enum message_parser_flags flags; +- +- const char *last_boundary; +- struct message_boundary *boundaries; +- +- size_t skip; +- char last_chr; +- unsigned int want_count; +- +- struct message_header_parser_ctx *hdr_parser_ctx; +- unsigned int prev_hdr_newline_size; +- +- int (*parse_next_block)(struct message_parser_ctx *ctx, +- struct message_block *block_r); +- +- bool part_seen_content_type:1; +- bool multipart:1; +- bool preparsed:1; +- bool eof:1; +-}; ++#include "message-parser-private.h" + + message_part_header_callback_t *null_message_part_header_callback = NULL; + +@@ -58,10 +16,6 @@ static int parse_next_body_to_boundary(struct message_parser_ctx *ctx, + struct message_block *block_r); + static int parse_next_body_to_eof(struct message_parser_ctx *ctx, + struct message_block *block_r); +-static int preparsed_parse_epilogue_init(struct message_parser_ctx *ctx, +- struct message_block *block_r); +-static int preparsed_parse_next_header_init(struct message_parser_ctx *ctx, +- struct message_block *block_r); + + static struct message_boundary * + boundary_find(struct message_boundary *boundaries, +@@ -122,8 +76,8 @@ static void parse_body_add_block(struct message_parser_ctx *ctx, + ctx->part->body_size.virtual_size += block->size + missing_cr_count; + } + +-static int message_parser_read_more(struct message_parser_ctx *ctx, +- struct message_block *block_r, bool *full_r) ++int message_parser_read_more(struct message_parser_ctx *ctx, ++ struct message_block *block_r, bool *full_r) + { + int ret; + +@@ -692,346 +646,7 @@ static int parse_next_header_init(struct message_parser_ctx *ctx, + return parse_next_header(ctx, block_r); + } + +-static int preparsed_parse_eof(struct message_parser_ctx *ctx ATTR_UNUSED, +- struct message_block *block_r ATTR_UNUSED) +-{ +- return -1; +-} +- +-static void preparsed_skip_to_next(struct message_parser_ctx *ctx) +-{ +- ctx->parse_next_block = preparsed_parse_next_header_init; +- while (ctx->part != NULL) { +- if (ctx->part->next != NULL) { +- ctx->part = ctx->part->next; +- break; +- } +- +- /* parse epilogue of multipart parent if requested */ +- if (ctx->part->parent != NULL && +- (ctx->part->parent->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 && +- (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) != 0) { +- /* check for presence of epilogue */ +- uoff_t part_end = ctx->part->physical_pos + +- ctx->part->header_size.physical_size + +- ctx->part->body_size.physical_size; +- uoff_t parent_end = ctx->part->parent->physical_pos + +- ctx->part->parent->header_size.physical_size + +- ctx->part->parent->body_size.physical_size; +- +- if (parent_end > part_end) { +- ctx->parse_next_block = preparsed_parse_epilogue_init; +- break; +- } +- } +- ctx->part = ctx->part->parent; +- } +- if (ctx->part == NULL) +- ctx->parse_next_block = preparsed_parse_eof; +-} +- +-static int preparsed_parse_body_finish(struct message_parser_ctx *ctx, +- struct message_block *block_r) +-{ +- i_stream_skip(ctx->input, ctx->skip); +- ctx->skip = 0; +- +- preparsed_skip_to_next(ctx); +- return ctx->parse_next_block(ctx, block_r); +-} +- +-static int preparsed_parse_prologue_finish(struct message_parser_ctx *ctx, +- struct message_block *block_r) +-{ +- i_stream_skip(ctx->input, ctx->skip); +- ctx->skip = 0; +- +- ctx->parse_next_block = preparsed_parse_next_header_init; +- ctx->part = ctx->part->children; +- return ctx->parse_next_block(ctx, block_r); +-} +- +-static int preparsed_parse_body_more(struct message_parser_ctx *ctx, +- struct message_block *block_r) +-{ +- uoff_t end_offset = ctx->part->physical_pos + +- ctx->part->header_size.physical_size + +- ctx->part->body_size.physical_size; +- bool full; +- int ret; +- +- if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) +- return ret; +- +- if (ctx->input->v_offset + block_r->size >= end_offset) { +- block_r->size = end_offset - ctx->input->v_offset; +- ctx->parse_next_block = preparsed_parse_body_finish; +- } +- ctx->skip = block_r->size; +- return 1; +-} +- +-static int preparsed_parse_prologue_more(struct message_parser_ctx *ctx, +- struct message_block *block_r) +-{ +- uoff_t boundary_min_start, end_offset; +- const unsigned char *cur; +- bool full; +- int ret; +- +- i_assert(ctx->part->children != NULL); +- end_offset = ctx->part->children->physical_pos; +- +- if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) +- return ret; +- +- if (ctx->input->v_offset + block_r->size >= end_offset) { +- /* we've got the full prologue: clip off the initial boundary */ +- block_r->size = end_offset - ctx->input->v_offset; +- cur = block_r->data + block_r->size - 1; +- +- /* [\r]\n--boundary[\r]\n */ +- if (block_r->size < 5 || *cur != '\n') { +- ctx->broken_reason = "Prologue boundary end not at expected position"; +- return -1; +- } +- +- cur--; +- if (*cur == '\r') cur--; +- +- /* find newline just before boundary */ +- for (; cur >= block_r->data; cur--) { +- if (*cur == '\n') break; +- } +- +- if (cur[0] != '\n' || cur[1] != '-' || cur[2] != '-') { +- ctx->broken_reason = "Prologue boundary beginning not at expected position"; +- return -1; +- } +- +- if (cur != block_r->data && cur[-1] == '\r') cur--; +- +- /* clip boundary */ +- block_r->size = cur - block_r->data; +- +- ctx->parse_next_block = preparsed_parse_prologue_finish; +- ctx->skip = block_r->size; +- return 1; +- } +- +- /* retain enough data in the stream buffer to contain initial boundary */ +- if (end_offset > BOUNDARY_END_MAX_LEN) +- boundary_min_start = end_offset - BOUNDARY_END_MAX_LEN; +- else +- boundary_min_start = 0; +- +- if (ctx->input->v_offset + block_r->size >= boundary_min_start) { +- if (boundary_min_start <= ctx->input->v_offset) +- return 0; +- block_r->size = boundary_min_start - ctx->input->v_offset; +- } +- ctx->skip = block_r->size; +- return 1; +-} +- +-static int preparsed_parse_epilogue_more(struct message_parser_ctx *ctx, +- struct message_block *block_r) +-{ +- uoff_t end_offset = ctx->part->physical_pos + +- ctx->part->header_size.physical_size + +- ctx->part->body_size.physical_size; +- bool full; +- int ret; +- +- if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) +- return ret; +- +- if (ctx->input->v_offset + block_r->size >= end_offset) { +- block_r->size = end_offset - ctx->input->v_offset; +- ctx->parse_next_block = preparsed_parse_body_finish; +- } +- ctx->skip = block_r->size; +- return 1; +-} +- +-static int preparsed_parse_epilogue_boundary(struct message_parser_ctx *ctx, +- struct message_block *block_r) +-{ +- uoff_t end_offset = ctx->part->physical_pos + +- ctx->part->header_size.physical_size + +- ctx->part->body_size.physical_size; +- const unsigned char *data, *cur; +- size_t size; +- bool full; +- int ret; +- +- if (end_offset - ctx->input->v_offset < 7) { +- ctx->broken_reason = "Epilogue position is wrong"; +- return -1; +- } +- +- if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) +- return ret; +- +- /* [\r]\n--boundary--[\r]\n */ +- if (block_r->size < 7) { +- ctx->want_count = 7; +- return 0; +- } +- +- data = block_r->data; +- size = block_r->size; +- cur = data; +- +- if (*cur == '\r') cur++; +- +- if (cur[0] != '\n' || cur[1] != '-' || data[2] != '-') { +- ctx->broken_reason = "Epilogue boundary start not at expected position"; +- return -1; +- } +- +- /* find the end of the line */ +- cur += 3; +- if ((cur = memchr(cur, '\n', size - (cur-data))) == NULL) { +- if (end_offset < ctx->input->v_offset + size) { +- ctx->broken_reason = "Epilogue boundary end not at expected position"; +- return -1; +- } else if (ctx->input->v_offset + size < end_offset && +- size < BOUNDARY_END_MAX_LEN && +- !ctx->input->eof && !full) { +- ctx->want_count = BOUNDARY_END_MAX_LEN; +- return 0; +- } +- } +- +- block_r->size = 0; +- ctx->parse_next_block = preparsed_parse_epilogue_more; +- ctx->skip = cur - data + 1; +- return 0; +-} +- +-static int preparsed_parse_body_init(struct message_parser_ctx *ctx, +- struct message_block *block_r) +-{ +- uoff_t offset = ctx->part->physical_pos + +- ctx->part->header_size.physical_size; +- +- if (offset < ctx->input->v_offset) { +- /* header was actually larger than the cached size suggested */ +- ctx->broken_reason = "Header larger than its cached size"; +- return -1; +- } +- i_stream_skip(ctx->input, offset - ctx->input->v_offset); +- +- /* multipart messages may begin with --boundary--, which makes them +- not have any children. */ +- if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 || +- ctx->part->children == NULL) +- ctx->parse_next_block = preparsed_parse_body_more; +- else +- ctx->parse_next_block = preparsed_parse_prologue_more; +- return ctx->parse_next_block(ctx, block_r); +-} +- +-static int preparsed_parse_epilogue_init(struct message_parser_ctx *ctx, +- struct message_block *block_r) +-{ +- uoff_t offset = ctx->part->physical_pos + +- ctx->part->header_size.physical_size + +- ctx->part->body_size.physical_size; +- +- ctx->part = ctx->part->parent; +- +- if (offset < ctx->input->v_offset) { +- /* last child was actually larger than the cached size +- suggested */ +- ctx->broken_reason = "Part larger than its cached size"; +- return -1; +- } +- i_stream_skip(ctx->input, offset - ctx->input->v_offset); +- +- ctx->parse_next_block = preparsed_parse_epilogue_boundary; +- return ctx->parse_next_block(ctx, block_r); +-} +- +-static int preparsed_parse_finish_header(struct message_parser_ctx *ctx, +- struct message_block *block_r) +-{ +- if (ctx->part->children != NULL) { +- if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 && +- (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) != 0) +- ctx->parse_next_block = preparsed_parse_body_init; +- else { +- ctx->parse_next_block = preparsed_parse_next_header_init; +- ctx->part = ctx->part->children; +- } +- } else if ((ctx->flags & MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK) == 0) { +- ctx->parse_next_block = preparsed_parse_body_init; +- } else { +- preparsed_skip_to_next(ctx); +- } +- return ctx->parse_next_block(ctx, block_r); +-} +- +-static int preparsed_parse_next_header(struct message_parser_ctx *ctx, +- struct message_block *block_r) +-{ +- struct message_header_line *hdr; +- int ret; +- +- ret = message_parse_header_next(ctx->hdr_parser_ctx, &hdr); +- if (ret == 0 || (ret < 0 && ctx->input->stream_errno != 0)) { +- ctx->want_count = i_stream_get_data_size(ctx->input) + 1; +- return ret; +- } +- +- if (hdr != NULL) { +- block_r->hdr = hdr; +- block_r->size = 0; +- return 1; +- } +- message_parse_header_deinit(&ctx->hdr_parser_ctx); +- +- ctx->parse_next_block = preparsed_parse_finish_header; +- +- /* return empty block as end of headers */ +- block_r->hdr = NULL; +- block_r->size = 0; +- +- i_assert(ctx->skip == 0); +- if (ctx->input->v_offset != ctx->part->physical_pos + +- ctx->part->header_size.physical_size) { +- ctx->broken_reason = "Cached header size mismatch"; +- return -1; +- } +- return 1; +-} +- +-static int preparsed_parse_next_header_init(struct message_parser_ctx *ctx, +- struct message_block *block_r) +-{ +- struct istream *hdr_input; +- +- i_assert(ctx->hdr_parser_ctx == NULL); +- +- i_assert(ctx->part->physical_pos >= ctx->input->v_offset); +- i_stream_skip(ctx->input, ctx->part->physical_pos - +- ctx->input->v_offset); +- +- /* the header may become truncated by --boundaries. limit the header +- stream's size to what it's supposed to be to avoid duplicating (and +- keeping in sync!) all the same complicated logic as in +- parse_next_header(). */ +- hdr_input = i_stream_create_limit(ctx->input, ctx->part->header_size.physical_size); +- ctx->hdr_parser_ctx = +- message_parse_header_init(hdr_input, NULL, ctx->hdr_flags); +- i_stream_unref(&hdr_input); +- +- ctx->parse_next_block = preparsed_parse_next_header; +- return preparsed_parse_next_header(ctx, block_r); +-} +- +-static struct message_parser_ctx * ++struct message_parser_ctx * + message_parser_init_int(struct istream *input, + enum message_header_parser_flags hdr_flags, + enum message_parser_flags flags) +@@ -1063,23 +678,6 @@ message_parser_init(pool_t part_pool, struct istream *input, + return ctx; + } + +-struct message_parser_ctx * +-message_parser_init_from_parts(struct message_part *parts, +- struct istream *input, +- enum message_header_parser_flags hdr_flags, +- enum message_parser_flags flags) +-{ +- struct message_parser_ctx *ctx; +- +- i_assert(parts != NULL); +- +- ctx = message_parser_init_int(input, hdr_flags, flags); +- ctx->preparsed = TRUE; +- ctx->parts = ctx->part = parts; +- ctx->parse_next_block = preparsed_parse_next_header_init; +- return ctx; +-} +- + void message_parser_deinit(struct message_parser_ctx **_ctx, + struct message_part **parts_r) + { +-- +2.11.0 + +From 1765e533f1172e4823413268493e8d4198004f37 Mon Sep 17 00:00:00 2001 +From: Timo Sirainen +Date: Thu, 23 Apr 2020 11:33:31 +0300 +Subject: [PATCH 04/15] lib-mail: message-parser - Add a message_part_finish() + helper function + +--- + src/lib-mail/message-parser.c | 25 ++++++++++++------------- + 1 file changed, 12 insertions(+), 13 deletions(-) + +diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c +index 653f964118..9a98cc83a3 100644 +--- a/src/lib-mail/message-parser.c ++++ b/src/lib-mail/message-parser.c +@@ -150,6 +150,13 @@ message_part_append(pool_t pool, struct message_part *parent) + return part; + } + ++static void message_part_finish(struct message_parser_ctx *ctx) ++{ ++ message_size_add(&ctx->part->parent->body_size, &ctx->part->body_size); ++ message_size_add(&ctx->part->parent->body_size, &ctx->part->header_size); ++ ctx->part = ctx->part->parent; ++} ++ + static void parse_next_body_multipart_init(struct message_parser_ctx *ctx) + { + struct message_boundary *b; +@@ -267,19 +274,16 @@ static int parse_part_finish(struct message_parser_ctx *ctx, + struct message_boundary *boundary, + struct message_block *block_r, bool first_line) + { +- struct message_part *part; + size_t line_size; + + i_assert(ctx->last_boundary == NULL); + + /* get back to parent MIME part, summing the child MIME part sizes + into parent's body sizes */ +- for (part = ctx->part; part != boundary->part; part = part->parent) { +- message_size_add(&part->parent->body_size, &part->body_size); +- message_size_add(&part->parent->body_size, &part->header_size); ++ while (ctx->part != boundary->part) { ++ message_part_finish(ctx); ++ i_assert(ctx->part != NULL); + } +- i_assert(part != NULL); +- ctx->part = part; + + if (boundary->epilogue_found) { + /* this boundary isn't needed anymore */ +@@ -734,13 +738,8 @@ int message_parser_parse_next_block(struct message_parser_ctx *ctx, + i_assert(ctx->input->eof || ctx->input->closed || + ctx->input->stream_errno != 0 || + ctx->broken_reason != NULL); +- while (ctx->part->parent != NULL) { +- message_size_add(&ctx->part->parent->body_size, +- &ctx->part->body_size); +- message_size_add(&ctx->part->parent->body_size, +- &ctx->part->header_size); +- ctx->part = ctx->part->parent; +- } ++ while (ctx->part->parent != NULL) ++ message_part_finish(ctx); + } + + if (block_r->size == 0) { +-- +2.11.0 + +From 2dd9d73d7ec0e64c8a6fa055ce4bc382639d1826 Mon Sep 17 00:00:00 2001 +From: Timo Sirainen +Date: Thu, 23 Apr 2020 11:34:22 +0300 +Subject: [PATCH 05/15] lib-mail: message-parser - Change message_part_append() + to do all work internally + +--- + src/lib-mail/message-parser.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c +index 9a98cc83a3..0edd4d267c 100644 +--- a/src/lib-mail/message-parser.c ++++ b/src/lib-mail/message-parser.c +@@ -122,16 +122,17 @@ int message_parser_read_more(struct message_parser_ctx *ctx, + return 1; + } + +-static struct message_part * +-message_part_append(pool_t pool, struct message_part *parent) ++static void ++message_part_append(struct message_parser_ctx *ctx) + { ++ struct message_part *parent = ctx->part; + struct message_part *p, *part, **list; + + i_assert(parent != NULL); + i_assert((parent->flags & (MESSAGE_PART_FLAG_MULTIPART | + MESSAGE_PART_FLAG_MESSAGE_RFC822)) != 0); + +- part = p_new(pool, struct message_part, 1); ++ part = p_new(ctx->part_pool, struct message_part, 1); + part->parent = parent; + for (p = parent; p != NULL; p = p->parent) + p->children_count++; +@@ -147,7 +148,7 @@ message_part_append(pool_t pool, struct message_part *parent) + list = &(*list)->next; + + *list = part; +- return part; ++ ctx->part = part; + } + + static void message_part_finish(struct message_parser_ctx *ctx) +@@ -175,7 +176,7 @@ static void parse_next_body_multipart_init(struct message_parser_ctx *ctx) + static int parse_next_body_message_rfc822_init(struct message_parser_ctx *ctx, + struct message_block *block_r) + { +- ctx->part = message_part_append(ctx->part_pool, ctx->part); ++ message_part_append(ctx); + return parse_next_header_init(ctx, block_r); + } + +@@ -225,7 +226,7 @@ boundary_line_find(struct message_parser_ctx *ctx, + static int parse_next_mime_header_init(struct message_parser_ctx *ctx, + struct message_block *block_r) + { +- ctx->part = message_part_append(ctx->part_pool, ctx->part); ++ message_part_append(ctx); + ctx->part->flags |= MESSAGE_PART_FLAG_IS_MIME; + + return parse_next_header_init(ctx, block_r); +-- +2.11.0 + +From 9f565d94ed7962f6c982387c25d093c34edbb5f0 Mon Sep 17 00:00:00 2001 +From: Timo Sirainen +Date: Thu, 23 Apr 2020 11:36:48 +0300 +Subject: [PATCH 06/15] lib-mail: message-parser - Optimize updating + children_count + +--- + src/lib-mail/message-parser.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c +index 0edd4d267c..f19759c257 100644 +--- a/src/lib-mail/message-parser.c ++++ b/src/lib-mail/message-parser.c +@@ -126,7 +126,7 @@ static void + message_part_append(struct message_parser_ctx *ctx) + { + struct message_part *parent = ctx->part; +- struct message_part *p, *part, **list; ++ struct message_part *part, **list; + + i_assert(parent != NULL); + i_assert((parent->flags & (MESSAGE_PART_FLAG_MULTIPART | +@@ -134,8 +134,6 @@ message_part_append(struct message_parser_ctx *ctx) + + part = p_new(ctx->part_pool, struct message_part, 1); + part->parent = parent; +- for (p = parent; p != NULL; p = p->parent) +- p->children_count++; + + /* set child position */ + part->physical_pos = +@@ -155,6 +153,7 @@ static void message_part_finish(struct message_parser_ctx *ctx) + { + message_size_add(&ctx->part->parent->body_size, &ctx->part->body_size); + message_size_add(&ctx->part->parent->body_size, &ctx->part->header_size); ++ ctx->part->parent->children_count += 1 + ctx->part->children_count; + ctx->part = ctx->part->parent; + } + +-- +2.11.0 + +From f543817877ce91e59386eb83144e00e288a19839 Mon Sep 17 00:00:00 2001 +From: Timo Sirainen +Date: Thu, 23 Apr 2020 12:00:38 +0300 +Subject: [PATCH 07/15] lib-mail: message-parser - Optimize appending new part + to linked list + +--- + src/lib-mail/message-parser-private.h | 3 +++ + src/lib-mail/message-parser.c | 24 ++++++++++++++++++------ + 2 files changed, 21 insertions(+), 6 deletions(-) + +diff --git a/src/lib-mail/message-parser-private.h b/src/lib-mail/message-parser-private.h +index 98a576eda4..fd92a48776 100644 +--- a/src/lib-mail/message-parser-private.h ++++ b/src/lib-mail/message-parser-private.h +@@ -29,6 +29,9 @@ struct message_parser_ctx { + const char *last_boundary; + struct message_boundary *boundaries; + ++ struct message_part **next_part; ++ ARRAY(struct message_part **) next_part_stack; ++ + size_t skip; + char last_chr; + unsigned int want_count; +diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c +index f19759c257..0f690ab689 100644 +--- a/src/lib-mail/message-parser.c ++++ b/src/lib-mail/message-parser.c +@@ -1,7 +1,7 @@ + /* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */ + + #include "lib.h" +-#include "buffer.h" ++#include "array.h" + #include "str.h" + #include "istream.h" + #include "rfc822-parser.h" +@@ -126,7 +126,7 @@ static void + message_part_append(struct message_parser_ctx *ctx) + { + struct message_part *parent = ctx->part; +- struct message_part *part, **list; ++ struct message_part *part; + + i_assert(parent != NULL); + i_assert((parent->flags & (MESSAGE_PART_FLAG_MULTIPART | +@@ -141,16 +141,26 @@ message_part_append(struct message_parser_ctx *ctx) + parent->body_size.physical_size + + parent->header_size.physical_size; + +- list = &part->parent->children; +- while (*list != NULL) +- list = &(*list)->next; ++ /* add to parent's linked list */ ++ *ctx->next_part = part; ++ /* update the parent's end-of-linked-list pointer */ ++ struct message_part **next_part = &part->next; ++ array_push_back(&ctx->next_part_stack, &next_part); ++ /* This part is now the new parent for the next message_part_append() ++ call. Its linked list begins with the children pointer. */ ++ ctx->next_part = &part->children; + +- *list = part; + ctx->part = part; + } + + static void message_part_finish(struct message_parser_ctx *ctx) + { ++ struct message_part **const *parent_next_partp; ++ ++ parent_next_partp = array_back(&ctx->next_part_stack); ++ array_pop_back(&ctx->next_part_stack); ++ ctx->next_part = *parent_next_partp; ++ + message_size_add(&ctx->part->parent->body_size, &ctx->part->body_size); + message_size_add(&ctx->part->parent->body_size, &ctx->part->header_size); + ctx->part->parent->children_count += 1 + ctx->part->children_count; +@@ -678,7 +688,9 @@ message_parser_init(pool_t part_pool, struct istream *input, + ctx = message_parser_init_int(input, hdr_flags, flags); + ctx->part_pool = part_pool; + ctx->parts = ctx->part = p_new(part_pool, struct message_part, 1); ++ ctx->next_part = &ctx->part->children; + ctx->parse_next_block = parse_next_header_init; ++ p_array_init(&ctx->next_part_stack, ctx->parser_pool, 4); + return ctx; + } + +-- +2.11.0 + +From f080cb2da2064005f35e1d16cd4efded21552491 Mon Sep 17 00:00:00 2001 +From: Timo Sirainen +Date: Thu, 23 Apr 2020 12:10:07 +0300 +Subject: [PATCH 08/15] lib-mail: message-parser - Minor code cleanup to + finding the end of boundary line + +--- + src/lib-mail/message-parser.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c +index 0f690ab689..88c1b31564 100644 +--- a/src/lib-mail/message-parser.c ++++ b/src/lib-mail/message-parser.c +@@ -211,17 +211,16 @@ boundary_line_find(struct message_parser_ctx *ctx, + } + + /* need to find the end of line */ +- if (memchr(data + 2, '\n', size - 2) == NULL && +- size < BOUNDARY_END_MAX_LEN && ++ data += 2; ++ size -= 2; ++ if (memchr(data, '\n', size) == NULL && ++ size+2 < BOUNDARY_END_MAX_LEN && + !ctx->input->eof && !full) { + /* no LF found */ + ctx->want_count = BOUNDARY_END_MAX_LEN; + return 0; + } + +- data += 2; +- size -= 2; +- + *boundary_r = boundary_find(ctx->boundaries, data, size); + if (*boundary_r == NULL) + return -1; +-- +2.11.0 + +From 6c530af237d5f0486d5fff68d114de905ced9b97 Mon Sep 17 00:00:00 2001 +From: Timo Sirainen +Date: Thu, 23 Apr 2020 12:53:12 +0300 +Subject: [PATCH 09/15] lib-mail: message-parser - Truncate excessively long + MIME boundaries + +RFC 2046 requires that the boundaries are a maximum of 70 characters +(excluding the "--" prefix and suffix). We allow 80 characters for a bit of +extra safety. Anything longer than that is truncated and treated the same +as if it was just 80 characters. +--- + src/lib-mail/message-parser-private.h | 3 +- + src/lib-mail/message-parser.c | 4 +- + src/lib-mail/test-message-parser.c | 95 +++++++++++++++++++++++++++++++++++ + 3 files changed, 100 insertions(+), 2 deletions(-) + +diff --git a/src/lib-mail/message-parser-private.h b/src/lib-mail/message-parser-private.h +index fd92a48776..d8116259ad 100644 +--- a/src/lib-mail/message-parser-private.h ++++ b/src/lib-mail/message-parser-private.h +@@ -5,7 +5,8 @@ + + /* RFC-2046 requires boundaries are max. 70 chars + "--" prefix + "--" suffix. + We'll add a bit more just in case. */ +-#define BOUNDARY_END_MAX_LEN (70 + 2 + 2 + 10) ++#define BOUNDARY_STRING_MAX_LEN (70 + 10) ++#define BOUNDARY_END_MAX_LEN (BOUNDARY_STRING_MAX_LEN + 2 + 2) + + struct message_boundary { + struct message_boundary *next; +diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c +index 88c1b31564..43142491b2 100644 +--- a/src/lib-mail/message-parser.c ++++ b/src/lib-mail/message-parser.c +@@ -477,8 +477,10 @@ static void parse_content_type(struct message_parser_ctx *ctx, + rfc2231_parse(&parser, &results); + for (; *results != NULL; results += 2) { + if (strcasecmp(results[0], "boundary") == 0) { ++ /* truncate excessively long boundaries */ + ctx->last_boundary = +- p_strdup(ctx->parser_pool, results[1]); ++ p_strndup(ctx->parser_pool, results[1], ++ BOUNDARY_STRING_MAX_LEN); + break; + } + } +diff --git a/src/lib-mail/test-message-parser.c b/src/lib-mail/test-message-parser.c +index c275707265..6bf1643e88 100644 +--- a/src/lib-mail/test-message-parser.c ++++ b/src/lib-mail/test-message-parser.c +@@ -736,6 +736,100 @@ static void test_message_parser_no_eoh(void) + test_end(); + } + ++static void test_message_parser_long_mime_boundary(void) ++{ ++ /* Close the boundaries in wrong reverse order. But because all ++ boundaries are actually truncated to the same size (..890) it ++ works the same as if all of them were duplicate boundaries. */ ++static const char input_msg[] = ++"Content-Type: multipart/mixed; boundary=\"1234567890123456789012345678901234567890123456789012345678901234567890123456789012\"\n" ++"\n" ++"--1234567890123456789012345678901234567890123456789012345678901234567890123456789012\n" ++"Content-Type: multipart/mixed; boundary=\"123456789012345678901234567890123456789012345678901234567890123456789012345678901\"\n" ++"\n" ++"--123456789012345678901234567890123456789012345678901234567890123456789012345678901\n" ++"Content-Type: multipart/mixed; boundary=\"12345678901234567890123456789012345678901234567890123456789012345678901234567890\"\n" ++"\n" ++"--12345678901234567890123456789012345678901234567890123456789012345678901234567890\n" ++"Content-Type: text/plain\n" ++"\n" ++"1\n" ++"--1234567890123456789012345678901234567890123456789012345678901234567890123456789012\n" ++"Content-Type: text/plain\n" ++"\n" ++"22\n" ++"--123456789012345678901234567890123456789012345678901234567890123456789012345678901\n" ++"Content-Type: text/plain\n" ++"\n" ++"333\n" ++"--12345678901234567890123456789012345678901234567890123456789012345678901234567890\n" ++"Content-Type: text/plain\n" ++"\n" ++"4444\n"; ++ struct message_parser_ctx *parser; ++ struct istream *input; ++ struct message_part *parts, *part; ++ struct message_block block; ++ pool_t pool; ++ int ret; ++ ++ test_begin("message parser long mime boundary"); ++ pool = pool_alloconly_create("message parser", 10240); ++ input = test_istream_create(input_msg); ++ ++ parser = message_parser_init(pool, input, 0, 0); ++ while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; ++ test_assert(ret < 0); ++ message_parser_deinit(&parser, &parts); ++ ++ part = parts; ++ test_assert(part->children_count == 6); ++ test_assert(part->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); ++ test_assert(part->header_size.lines == 2); ++ test_assert(part->header_size.physical_size == 126); ++ test_assert(part->header_size.virtual_size == 126+2); ++ test_assert(part->body_size.lines == 22); ++ test_assert(part->body_size.physical_size == 871); ++ test_assert(part->body_size.virtual_size == 871+22); ++ ++ part = parts->children; ++ test_assert(part->children_count == 5); ++ test_assert(part->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); ++ test_assert(part->header_size.lines == 2); ++ test_assert(part->header_size.physical_size == 125); ++ test_assert(part->header_size.virtual_size == 125+2); ++ test_assert(part->body_size.lines == 19); ++ test_assert(part->body_size.physical_size == 661); ++ test_assert(part->body_size.virtual_size == 661+19); ++ ++ part = parts->children->children; ++ test_assert(part->children_count == 4); ++ test_assert(part->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); ++ test_assert(part->header_size.lines == 2); ++ test_assert(part->header_size.physical_size == 124); ++ test_assert(part->header_size.virtual_size == 124+2); ++ test_assert(part->body_size.lines == 16); ++ test_assert(part->body_size.physical_size == 453); ++ test_assert(part->body_size.virtual_size == 453+16); ++ ++ part = parts->children->children->children; ++ for (unsigned int i = 1; i <= 3; i++, part = part->next) { ++ test_assert(part->children_count == 0); ++ test_assert(part->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); ++ test_assert(part->header_size.lines == 2); ++ test_assert(part->header_size.physical_size == 26); ++ test_assert(part->header_size.virtual_size == 26+2); ++ test_assert(part->body_size.lines == 0); ++ test_assert(part->body_size.physical_size == i); ++ test_assert(part->body_size.virtual_size == i); ++ } ++ ++ test_parsed_parts(input, parts); ++ i_stream_unref(&input); ++ pool_unref(&pool); ++ test_end(); ++} ++ + int main(void) + { + static void (*const test_functions[])(void) = { +@@ -749,6 +843,7 @@ int main(void) + test_message_parser_continuing_mime_boundary, + test_message_parser_continuing_truncated_mime_boundary, + test_message_parser_continuing_mime_boundary_reverse, ++ test_message_parser_long_mime_boundary, + test_message_parser_no_eoh, + NULL + }; +-- +2.11.0 + +From f6a31b782088d9f626f4718654cb91cf723f3f9c Mon Sep 17 00:00:00 2001 +From: Timo Sirainen +Date: Thu, 23 Apr 2020 13:06:02 +0300 +Subject: [PATCH 10/15] lib-mail: message-parser - Optimize boundary lookups + when exact boundary is found + +When an exact boundary is found, there's no need to continue looking for +more boundaries. +--- + src/lib-mail/message-parser.c | 24 +++++++++++++++++++++--- + 1 file changed, 21 insertions(+), 3 deletions(-) + +diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c +index 43142491b2..f0a0cf41f0 100644 +--- a/src/lib-mail/message-parser.c ++++ b/src/lib-mail/message-parser.c +@@ -31,8 +31,14 @@ boundary_find(struct message_boundary *boundaries, + while (boundaries != NULL) { + if (boundaries->len <= len && + memcmp(boundaries->boundary, data, boundaries->len) == 0 && +- (best == NULL || best->len < boundaries->len)) ++ (best == NULL || best->len < boundaries->len)) { + best = boundaries; ++ if (best->len == len) { ++ /* This is exactly the wanted boundary. There ++ can't be a better one. */ ++ break; ++ } ++ } + + boundaries = boundaries->next; + } +@@ -213,15 +219,27 @@ boundary_line_find(struct message_parser_ctx *ctx, + /* need to find the end of line */ + data += 2; + size -= 2; +- if (memchr(data, '\n', size) == NULL && ++ const unsigned char *lf_pos = memchr(data, '\n', size); ++ if (lf_pos == NULL && + size+2 < BOUNDARY_END_MAX_LEN && + !ctx->input->eof && !full) { + /* no LF found */ + ctx->want_count = BOUNDARY_END_MAX_LEN; + return 0; + } ++ size_t find_size = size; ++ ++ if (lf_pos != NULL) { ++ find_size = lf_pos - data; ++ if (find_size > 0 && data[find_size-1] == '\r') ++ find_size--; ++ if (find_size > 2 && data[find_size-1] == '-' && ++ data[find_size-2] == '-') ++ find_size -= 2; ++ } else if (find_size > BOUNDARY_END_MAX_LEN) ++ find_size = BOUNDARY_END_MAX_LEN; + +- *boundary_r = boundary_find(ctx->boundaries, data, size); ++ *boundary_r = boundary_find(ctx->boundaries, data, find_size); + if (*boundary_r == NULL) + return -1; + +-- +2.11.0 + +From 86adf700cc31775744ed48ff5aebe62b97e52c51 Mon Sep 17 00:00:00 2001 +From: Timo Sirainen +Date: Thu, 23 Apr 2020 14:53:27 +0300 +Subject: [PATCH 11/15] lib-mail: message-parser - Add boundary_remove_until() + helper function + +--- + src/lib-mail/message-parser.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c +index f0a0cf41f0..0fb0f9181a 100644 +--- a/src/lib-mail/message-parser.c ++++ b/src/lib-mail/message-parser.c +@@ -173,6 +173,13 @@ static void message_part_finish(struct message_parser_ctx *ctx) + ctx->part = ctx->part->parent; + } + ++static void ++boundary_remove_until(struct message_parser_ctx *ctx, ++ struct message_boundary *boundary) ++{ ++ ctx->boundaries = boundary; ++} ++ + static void parse_next_body_multipart_init(struct message_parser_ctx *ctx) + { + struct message_boundary *b; +@@ -314,10 +321,10 @@ static int parse_part_finish(struct message_parser_ctx *ctx, + + if (boundary->epilogue_found) { + /* this boundary isn't needed anymore */ +- ctx->boundaries = boundary->next; ++ boundary_remove_until(ctx, boundary->next); + } else { + /* forget about the boundaries we possibly skipped */ +- ctx->boundaries = boundary; ++ boundary_remove_until(ctx, boundary); + } + + /* the boundary itself should already be in buffer. add that. */ +-- +2.11.0 + +From a6f9ff266c1a2458a1762cdb1206b50497cacb09 Mon Sep 17 00:00:00 2001 +From: Timo Sirainen +Date: Thu, 23 Apr 2020 15:00:57 +0300 +Subject: [PATCH 12/15] lib-mail: message-parser - Don't use memory pool for + parser + +This reduces memory usage when parsing many MIME parts where boundaries are +being added and removed constantly. +--- + src/lib-mail/message-parser-private.h | 6 ++--- + src/lib-mail/message-parser.c | 41 ++++++++++++++++++++++++----------- + 2 files changed, 31 insertions(+), 16 deletions(-) + +diff --git a/src/lib-mail/message-parser-private.h b/src/lib-mail/message-parser-private.h +index d8116259ad..fe106819e2 100644 +--- a/src/lib-mail/message-parser-private.h ++++ b/src/lib-mail/message-parser-private.h +@@ -12,14 +12,14 @@ struct message_boundary { + struct message_boundary *next; + + struct message_part *part; +- const char *boundary; ++ char *boundary; + size_t len; + + bool epilogue_found:1; + }; + + struct message_parser_ctx { +- pool_t parser_pool, part_pool; ++ pool_t part_pool; + struct istream *input; + struct message_part *parts, *part; + const char *broken_reason; +@@ -27,7 +27,7 @@ struct message_parser_ctx { + enum message_header_parser_flags hdr_flags; + enum message_parser_flags flags; + +- const char *last_boundary; ++ char *last_boundary; + struct message_boundary *boundaries; + + struct message_part **next_part; +diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c +index 0fb0f9181a..c9ff98576b 100644 +--- a/src/lib-mail/message-parser.c ++++ b/src/lib-mail/message-parser.c +@@ -173,10 +173,24 @@ static void message_part_finish(struct message_parser_ctx *ctx) + ctx->part = ctx->part->parent; + } + ++static void message_boundary_free(struct message_boundary *b) ++{ ++ i_free(b->boundary); ++ i_free(b); ++} ++ + static void + boundary_remove_until(struct message_parser_ctx *ctx, + struct message_boundary *boundary) + { ++ while (ctx->boundaries != boundary) { ++ struct message_boundary *cur = ctx->boundaries; ++ ++ i_assert(cur != NULL); ++ ctx->boundaries = cur->next; ++ message_boundary_free(cur); ++ ++ } + ctx->boundaries = boundary; + } + +@@ -184,15 +198,14 @@ static void parse_next_body_multipart_init(struct message_parser_ctx *ctx) + { + struct message_boundary *b; + +- b = p_new(ctx->parser_pool, struct message_boundary, 1); ++ b = i_new(struct message_boundary, 1); + b->part = ctx->part; + b->boundary = ctx->last_boundary; ++ ctx->last_boundary = NULL; + b->len = strlen(b->boundary); + + b->next = ctx->boundaries; + ctx->boundaries = b; +- +- ctx->last_boundary = NULL; + } + + static int parse_next_body_message_rfc822_init(struct message_parser_ctx *ctx, +@@ -309,6 +322,8 @@ static int parse_part_finish(struct message_parser_ctx *ctx, + struct message_block *block_r, bool first_line) + { + size_t line_size; ++ size_t boundary_len = boundary->len; ++ bool boundary_epilogue_found = boundary->epilogue_found; + + i_assert(ctx->last_boundary == NULL); + +@@ -341,7 +356,7 @@ static int parse_part_finish(struct message_parser_ctx *ctx, + i_assert(block_r->data[0] == '\n'); + line_size = 1; + } +- line_size += 2 + boundary->len + (boundary->epilogue_found ? 2 : 0); ++ line_size += 2 + boundary_len + (boundary_epilogue_found ? 2 : 0); + i_assert(block_r->size >= ctx->skip + line_size); + block_r->size = line_size; + parse_body_add_block(ctx, block_r); +@@ -503,9 +518,9 @@ static void parse_content_type(struct message_parser_ctx *ctx, + for (; *results != NULL; results += 2) { + if (strcasecmp(results[0], "boundary") == 0) { + /* truncate excessively long boundaries */ ++ i_free(ctx->last_boundary); + ctx->last_boundary = +- p_strndup(ctx->parser_pool, results[1], +- BOUNDARY_STRING_MAX_LEN); ++ i_strndup(results[1], BOUNDARY_STRING_MAX_LEN); + break; + } + } +@@ -628,7 +643,7 @@ static int parse_next_header(struct message_parser_ctx *ctx, + i_assert(!ctx->multipart); + part->flags = 0; + } +- ctx->last_boundary = NULL; ++ i_free(ctx->last_boundary); + + if (!ctx->part_seen_content_type || + (part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) { +@@ -692,11 +707,8 @@ message_parser_init_int(struct istream *input, + enum message_parser_flags flags) + { + struct message_parser_ctx *ctx; +- pool_t pool; + +- pool = pool_alloconly_create("Message Parser", 1024); +- ctx = p_new(pool, struct message_parser_ctx, 1); +- ctx->parser_pool = pool; ++ ctx = i_new(struct message_parser_ctx, 1); + ctx->hdr_flags = hdr_flags; + ctx->flags = flags; + ctx->input = input; +@@ -716,7 +728,7 @@ message_parser_init(pool_t part_pool, struct istream *input, + ctx->parts = ctx->part = p_new(part_pool, struct message_part, 1); + ctx->next_part = &ctx->part->children; + ctx->parse_next_block = parse_next_header_init; +- p_array_init(&ctx->next_part_stack, ctx->parser_pool, 4); ++ i_array_init(&ctx->next_part_stack, 4); + return ctx; + } + +@@ -743,8 +755,11 @@ int message_parser_deinit_from_parts(struct message_parser_ctx **_ctx, + + if (ctx->hdr_parser_ctx != NULL) + message_parse_header_deinit(&ctx->hdr_parser_ctx); ++ boundary_remove_until(ctx, NULL); + i_stream_unref(&ctx->input); +- pool_unref(&ctx->parser_pool); ++ array_free(&ctx->next_part_stack); ++ i_free(ctx->last_boundary); ++ i_free(ctx); + i_assert(ret < 0 || *parts_r != NULL); + return ret; + } +-- +2.11.0 + +From 45cd8d8fd39d301607b22ec6f4ac800331a30225 Mon Sep 17 00:00:00 2001 +From: Timo Sirainen +Date: Thu, 23 Apr 2020 16:50:56 +0300 +Subject: [PATCH 13/15] lib-mail, global: message_parser_init*() - Convert + flags to settings structure + +--- + src/doveadm/doveadm-mail-fetch.c | 7 +++-- + src/lib-imap/test-imap-bodystructure.c | 10 +++--- + src/lib-imap/test-imap-envelope.c | 10 +++--- + src/lib-mail/istream-attachment-extractor.c | 8 +++-- + src/lib-mail/istream-binary-converter.c | 8 +++-- + src/lib-mail/message-parser-from-parts.c | 5 ++- + src/lib-mail/message-parser-private.h | 3 +- + src/lib-mail/message-parser.c | 12 +++----- + src/lib-mail/message-parser.h | 11 ++++--- + src/lib-mail/message-search.c | 9 +++--- + src/lib-mail/message-snippet.c | 3 +- + src/lib-mail/test-message-decoder.c | 4 ++- + src/lib-mail/test-message-parser.c | 47 +++++++++++++++++------------ + src/lib-mail/test-message-part.c | 3 +- + src/lib-storage/index/index-mail-headers.c | 22 ++++++-------- + src/plugins/fts/fts-build-mail.c | 7 +++-- + 16 files changed, 95 insertions(+), 74 deletions(-) + +diff --git a/src/doveadm/doveadm-mail-fetch.c b/src/doveadm/doveadm-mail-fetch.c +index 75b69e4242..d8b396a199 100644 +--- a/src/doveadm/doveadm-mail-fetch.c ++++ b/src/doveadm/doveadm-mail-fetch.c +@@ -265,6 +265,9 @@ static int fetch_text(struct fetch_cmd_context *ctx) + + static int fetch_text_utf8(struct fetch_cmd_context *ctx) + { ++ const struct message_parser_settings parser_set = { ++ .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE, ++ }; + struct istream *input; + struct message_parser_ctx *parser; + struct message_decoder_context *decoder; +@@ -275,9 +278,7 @@ static int fetch_text_utf8(struct fetch_cmd_context *ctx) + if (mail_get_stream(ctx->mail, NULL, NULL, &input) < 0) + return -1; + +- parser = message_parser_init(pool_datastack_create(), input, +- MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE, +- 0); ++ parser = message_parser_init(pool_datastack_create(), input, &parser_set); + decoder = message_decoder_init(NULL, 0); + + while ((ret = message_parser_parse_next_block(parser, &raw_block)) > 0) { +diff --git a/src/lib-imap/test-imap-bodystructure.c b/src/lib-imap/test-imap-bodystructure.c +index 6f456a4453..dfc9957488 100644 +--- a/src/lib-imap/test-imap-bodystructure.c ++++ b/src/lib-imap/test-imap-bodystructure.c +@@ -381,6 +381,11 @@ static const unsigned int normalize_tests_count = N_ELEMENTS(normalize_tests); + static struct message_part * + msg_parse(pool_t pool, const char *message, bool parse_bodystructure) + { ++ const struct message_parser_settings parser_set = { ++ .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP | ++ MESSAGE_HEADER_PARSER_FLAG_DROP_CR, ++ .flags = MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK, ++ }; + struct message_parser_ctx *parser; + struct istream *input; + struct message_block block; +@@ -388,10 +393,7 @@ msg_parse(pool_t pool, const char *message, bool parse_bodystructure) + int ret; + + input = i_stream_create_from_data(message, strlen(message)); +- parser = message_parser_init(pool, input, +- MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP | +- MESSAGE_HEADER_PARSER_FLAG_DROP_CR, +- MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK); ++ parser = message_parser_init(pool, input, &parser_set); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) { + if (parse_bodystructure) { + message_part_data_parse_from_header(pool, block.part, +diff --git a/src/lib-imap/test-imap-envelope.c b/src/lib-imap/test-imap-envelope.c +index 0d0891701b..1f295e58ba 100644 +--- a/src/lib-imap/test-imap-envelope.c ++++ b/src/lib-imap/test-imap-envelope.c +@@ -118,6 +118,11 @@ static const unsigned int parse_tests_count = N_ELEMENTS(parse_tests); + static struct message_part_envelope * + msg_parse(pool_t pool, const char *message) + { ++ const struct message_parser_settings parser_set = { ++ .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP | ++ MESSAGE_HEADER_PARSER_FLAG_DROP_CR, ++ .flags = MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK, ++ }; + struct message_parser_ctx *parser; + struct message_part_envelope *envlp = NULL; + struct istream *input; +@@ -126,10 +131,7 @@ msg_parse(pool_t pool, const char *message) + int ret; + + input = i_stream_create_from_data(message, strlen(message)); +- parser = message_parser_init(pool, input, +- MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP | +- MESSAGE_HEADER_PARSER_FLAG_DROP_CR, +- MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK); ++ parser = message_parser_init(pool, input, &parser_set); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) { + i_assert(block.part->parent == NULL); + message_part_envelope_parse_from_header(pool, &envlp, block.hdr); +diff --git a/src/lib-mail/istream-attachment-extractor.c b/src/lib-mail/istream-attachment-extractor.c +index e9655a5a67..7d4ac01072 100644 +--- a/src/lib-mail/istream-attachment-extractor.c ++++ b/src/lib-mail/istream-attachment-extractor.c +@@ -696,6 +696,10 @@ i_stream_create_attachment_extractor(struct istream *input, + struct istream_attachment_settings *set, + void *context) + { ++ const struct message_parser_settings parser_set = { ++ .flags = MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS | ++ MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES, ++ }; + struct attachment_istream *astream; + + i_assert(set->min_size > 0); +@@ -722,9 +726,7 @@ i_stream_create_attachment_extractor(struct istream *input, + astream->istream.istream.seekable = FALSE; + + astream->pool = pool_alloconly_create("istream attachment", 1024); +- astream->parser = message_parser_init(astream->pool, input, 0, +- MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS | +- MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES); ++ astream->parser = message_parser_init(astream->pool, input, &parser_set); + return i_stream_create(&astream->istream, input, + i_stream_get_fd(input), 0); + } +diff --git a/src/lib-mail/istream-binary-converter.c b/src/lib-mail/istream-binary-converter.c +index 201a588152..856b854738 100644 +--- a/src/lib-mail/istream-binary-converter.c ++++ b/src/lib-mail/istream-binary-converter.c +@@ -286,6 +286,10 @@ static void i_stream_binary_converter_close(struct iostream_private *stream, + + struct istream *i_stream_create_binary_converter(struct istream *input) + { ++ const struct message_parser_settings parser_set = { ++ .flags = MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS | ++ MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES, ++ }; + struct binary_converter_istream *bstream; + + bstream = i_new(struct binary_converter_istream, 1); +@@ -299,9 +303,7 @@ struct istream *i_stream_create_binary_converter(struct istream *input) + bstream->istream.istream.seekable = FALSE; + + bstream->pool = pool_alloconly_create("istream binary converter", 128); +- bstream->parser = message_parser_init(bstream->pool, input, 0, +- MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS | +- MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES); ++ bstream->parser = message_parser_init(bstream->pool, input, &parser_set); + return i_stream_create(&bstream->istream, input, + i_stream_get_fd(input), 0); + } +diff --git a/src/lib-mail/message-parser-from-parts.c b/src/lib-mail/message-parser-from-parts.c +index b23055ab9b..8e21ec8f18 100644 +--- a/src/lib-mail/message-parser-from-parts.c ++++ b/src/lib-mail/message-parser-from-parts.c +@@ -351,14 +351,13 @@ static int preparsed_parse_next_header_init(struct message_parser_ctx *ctx, + struct message_parser_ctx * + message_parser_init_from_parts(struct message_part *parts, + struct istream *input, +- enum message_header_parser_flags hdr_flags, +- enum message_parser_flags flags) ++ const struct message_parser_settings *set) + { + struct message_parser_ctx *ctx; + + i_assert(parts != NULL); + +- ctx = message_parser_init_int(input, hdr_flags, flags); ++ ctx = message_parser_init_int(input, set); + ctx->preparsed = TRUE; + ctx->parts = ctx->part = parts; + ctx->parse_next_block = preparsed_parse_next_header_init; +diff --git a/src/lib-mail/message-parser-private.h b/src/lib-mail/message-parser-private.h +index fe106819e2..dbf8464cfb 100644 +--- a/src/lib-mail/message-parser-private.h ++++ b/src/lib-mail/message-parser-private.h +@@ -51,8 +51,7 @@ struct message_parser_ctx { + + struct message_parser_ctx * + message_parser_init_int(struct istream *input, +- enum message_header_parser_flags hdr_flags, +- enum message_parser_flags flags); ++ const struct message_parser_settings *set); + int message_parser_read_more(struct message_parser_ctx *ctx, + struct message_block *block_r, bool *full_r); + +diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c +index c9ff98576b..41b9ed133a 100644 +--- a/src/lib-mail/message-parser.c ++++ b/src/lib-mail/message-parser.c +@@ -703,14 +703,13 @@ static int parse_next_header_init(struct message_parser_ctx *ctx, + + struct message_parser_ctx * + message_parser_init_int(struct istream *input, +- enum message_header_parser_flags hdr_flags, +- enum message_parser_flags flags) ++ const struct message_parser_settings *set) + { + struct message_parser_ctx *ctx; + + ctx = i_new(struct message_parser_ctx, 1); +- ctx->hdr_flags = hdr_flags; +- ctx->flags = flags; ++ ctx->hdr_flags = set->hdr_flags; ++ ctx->flags = set->flags; + ctx->input = input; + i_stream_ref(input); + return ctx; +@@ -718,12 +717,11 @@ message_parser_init_int(struct istream *input, + + struct message_parser_ctx * + message_parser_init(pool_t part_pool, struct istream *input, +- enum message_header_parser_flags hdr_flags, +- enum message_parser_flags flags) ++ const struct message_parser_settings *set) + { + struct message_parser_ctx *ctx; + +- ctx = message_parser_init_int(input, hdr_flags, flags); ++ ctx = message_parser_init_int(input, set); + ctx->part_pool = part_pool; + ctx->parts = ctx->part = p_new(part_pool, struct message_part, 1); + ctx->next_part = &ctx->part->children; +diff --git a/src/lib-mail/message-parser.h b/src/lib-mail/message-parser.h +index 3efd851862..d159b2607d 100644 +--- a/src/lib-mail/message-parser.h ++++ b/src/lib-mail/message-parser.h +@@ -17,6 +17,11 @@ enum message_parser_flags { + MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES = 0x08 + }; + ++struct message_parser_settings { ++ enum message_header_parser_flags hdr_flags; ++ enum message_parser_flags flags; ++}; ++ + struct message_parser_ctx; + + struct message_block { +@@ -45,8 +50,7 @@ extern message_part_header_callback_t *null_message_part_header_callback; + are allocated from. */ + struct message_parser_ctx * + message_parser_init(pool_t part_pool, struct istream *input, +- enum message_header_parser_flags hdr_flags, +- enum message_parser_flags flags); ++ const struct message_parser_settings *set); + /* Deinitialize message parser. The ctx must NOT have been created by + message_parser_init_from_parts(). */ + void message_parser_deinit(struct message_parser_ctx **ctx, +@@ -55,8 +59,7 @@ void message_parser_deinit(struct message_parser_ctx **ctx, + struct message_parser_ctx * + message_parser_init_from_parts(struct message_part *parts, + struct istream *input, +- enum message_header_parser_flags hdr_flags, +- enum message_parser_flags flags); ++ const struct message_parser_settings *set); + /* Same as message_parser_deinit(), but return an error message describing + why the preparsed parts didn't match the message. This can also safely be + called even when preparsed parts weren't used - it'll always just return +diff --git a/src/lib-mail/message-search.c b/src/lib-mail/message-search.c +index 66c043c158..14d1a11470 100644 +--- a/src/lib-mail/message-search.c ++++ b/src/lib-mail/message-search.c +@@ -196,8 +196,9 @@ message_search_msg_real(struct message_search_context *ctx, + struct istream *input, struct message_part *parts, + const char **error_r) + { +- const enum message_header_parser_flags hdr_parser_flags = +- MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE; ++ const struct message_parser_settings parser_set = { ++ .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE, ++ }; + struct message_parser_ctx *parser_ctx; + struct message_block raw_block; + struct message_part *new_parts; +@@ -207,10 +208,10 @@ message_search_msg_real(struct message_search_context *ctx, + + if (parts != NULL) { + parser_ctx = message_parser_init_from_parts(parts, +- input, hdr_parser_flags, 0); ++ input, &parser_set); + } else { + parser_ctx = message_parser_init(pool_datastack_create(), +- input, hdr_parser_flags, 0); ++ input, &parser_set); + } + + while ((ret = message_parser_parse_next_block(parser_ctx, +diff --git a/src/lib-mail/message-snippet.c b/src/lib-mail/message-snippet.c +index 2100b70554..e6965fd707 100644 +--- a/src/lib-mail/message-snippet.c ++++ b/src/lib-mail/message-snippet.c +@@ -137,6 +137,7 @@ int message_snippet_generate(struct istream *input, + unsigned int max_snippet_chars, + string_t *snippet) + { ++ const struct message_parser_settings parser_set = { .flags = 0 }; + struct message_parser_ctx *parser; + struct message_part *parts; + struct message_decoder_context *decoder; +@@ -151,7 +152,7 @@ int message_snippet_generate(struct istream *input, + ctx.snippet.chars_left = max_snippet_chars; + ctx.quoted_snippet.snippet = str_new(pool, max_snippet_chars); + ctx.quoted_snippet.chars_left = max_snippet_chars - 1; /* -1 for '>' */ +- parser = message_parser_init(pool_datastack_create(), input, 0, 0); ++ parser = message_parser_init(pool_datastack_create(), input, &parser_set); + decoder = message_decoder_init(NULL, 0); + while ((ret = message_parser_parse_next_block(parser, &raw_block)) > 0) { + if (!message_decoder_decode_next_block(decoder, &raw_block, &block)) +diff --git a/src/lib-mail/test-message-decoder.c b/src/lib-mail/test-message-decoder.c +index e1faca29b4..3007283cad 100644 +--- a/src/lib-mail/test-message-decoder.c ++++ b/src/lib-mail/test-message-decoder.c +@@ -105,6 +105,7 @@ static void test_message_decoder_multipart(void) + "\n" + "?garbage\n" + "--foo--\n"; ++ const struct message_parser_settings parser_set = { .flags = 0, }; + struct message_parser_ctx *parser; + struct message_decoder_context *decoder; + struct message_part *parts; +@@ -116,7 +117,8 @@ static void test_message_decoder_multipart(void) + test_begin("message decoder multipart"); + + istream = test_istream_create(test_message_input); +- parser = message_parser_init(pool_datastack_create(), istream, 0, 0); ++ parser = message_parser_init(pool_datastack_create(), istream, ++ &parser_set); + decoder = message_decoder_init(NULL, 0); + + test_istream_set_allow_eof(istream, FALSE); +diff --git a/src/lib-mail/test-message-parser.c b/src/lib-mail/test-message-parser.c +index 6bf1643e88..5e496275fe 100644 +--- a/src/lib-mail/test-message-parser.c ++++ b/src/lib-mail/test-message-parser.c +@@ -39,6 +39,8 @@ static const char test_msg[] = + "\n"; + #define TEST_MSG_LEN (sizeof(test_msg)-1) + ++static const struct message_parser_settings set_empty = { .flags = 0 }; ++ + static bool msg_parts_cmp(struct message_part *p1, struct message_part *p2) + { + while (p1 != NULL || p2 != NULL) { +@@ -71,6 +73,9 @@ static bool msg_parts_cmp(struct message_part *p1, struct message_part *p2) + + static void test_parsed_parts(struct istream *input, struct message_part *parts) + { ++ const struct message_parser_settings parser_set = { ++ .flags = MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK, ++ }; + struct message_parser_ctx *parser; + struct message_block block; + struct message_part *parts2; +@@ -81,8 +86,7 @@ static void test_parsed_parts(struct istream *input, struct message_part *parts) + if (i_stream_get_size(input, TRUE, &input_size) < 0) + i_unreached(); + +- parser = message_parser_init_from_parts(parts, input, 0, +- MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK); ++ parser = message_parser_init_from_parts(parts, input, &parser_set); + for (i = 1; i <= input_size*2+1; i++) { + test_istream_set_size(input, i/2); + if (i > TEST_MSG_LEN*2) +@@ -111,9 +115,11 @@ static void test_message_parser_small_blocks(void) + output = t_str_new(128); + + /* full parsing */ +- parser = message_parser_init(pool, input, 0, +- MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS | +- MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES); ++ const struct message_parser_settings full_parser_set = { ++ .flags = MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS | ++ MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES, ++ }; ++ parser = message_parser_init(pool, input, &full_parser_set); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) { + if (block.hdr != NULL) + message_header_line_write(output, block.hdr); +@@ -129,7 +135,7 @@ static void test_message_parser_small_blocks(void) + i_stream_seek(input, 0); + test_istream_set_allow_eof(input, FALSE); + +- parser = message_parser_init(pool, input, 0, 0); ++ parser = message_parser_init(pool, input, &set_empty); + for (i = 1; i <= TEST_MSG_LEN*2+1; i++) { + test_istream_set_size(input, i/2); + if (i > TEST_MSG_LEN*2) +@@ -147,8 +153,11 @@ static void test_message_parser_small_blocks(void) + test_istream_set_allow_eof(input, FALSE); + + end_of_headers_idx = (strstr(test_msg, "\n-----") - test_msg); +- parser = message_parser_init_from_parts(parts, input, 0, +- MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK); ++ const struct message_parser_settings preparsed_parser_set = { ++ .flags = MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK, ++ }; ++ parser = message_parser_init_from_parts(parts, input, ++ &preparsed_parser_set); + for (i = 1; i <= TEST_MSG_LEN*2+1; i++) { + test_istream_set_size(input, i/2); + if (i > TEST_MSG_LEN*2) +@@ -190,7 +199,7 @@ static const char input_msg[] = + pool = pool_alloconly_create("message parser", 10240); + input = test_istream_create(input_msg); + +- parser = message_parser_init(pool, input, 0, 0); ++ parser = message_parser_init(pool, input, &set_empty); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); +@@ -255,7 +264,7 @@ static const char input_msg[] = + pool = pool_alloconly_create("message parser", 10240); + input = test_istream_create(input_msg); + +- parser = message_parser_init(pool, input, 0, 0); ++ parser = message_parser_init(pool, input, &set_empty); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); +@@ -311,7 +320,7 @@ static const char input_msg[] = + pool = pool_alloconly_create("message parser", 10240); + input = test_istream_create(input_msg); + +- parser = message_parser_init(pool, input, 0, 0); ++ parser = message_parser_init(pool, input, &set_empty); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); +@@ -349,7 +358,7 @@ static const char input_msg[] = + pool = pool_alloconly_create("message parser", 10240); + input = test_istream_create(input_msg); + +- parser = message_parser_init(pool, input, 0, 0); ++ parser = message_parser_init(pool, input, &set_empty); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); +@@ -394,7 +403,7 @@ static const char input_msg[] = + pool = pool_alloconly_create("message parser", 10240); + input = test_istream_create(input_msg); + +- parser = message_parser_init(pool, input, 0, 0); ++ parser = message_parser_init(pool, input, &set_empty); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); +@@ -455,7 +464,7 @@ static const char input_msg[] = + pool = pool_alloconly_create("message parser", 10240); + input = test_istream_create(input_msg); + +- parser = message_parser_init(pool, input, 0, 0); ++ parser = message_parser_init(pool, input, &set_empty); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); +@@ -516,7 +525,7 @@ static const char input_msg[] = + pool = pool_alloconly_create("message parser", 10240); + input = test_istream_create(input_msg); + +- parser = message_parser_init(pool, input, 0, 0); ++ parser = message_parser_init(pool, input, &set_empty); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); +@@ -578,7 +587,7 @@ static const char input_msg[] = + pool = pool_alloconly_create("message parser", 10240); + input = test_istream_create(input_msg); + +- parser = message_parser_init(pool, input, 0, 0); ++ parser = message_parser_init(pool, input, &set_empty); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); +@@ -661,7 +670,7 @@ static const char input_msg[] = + pool = pool_alloconly_create("message parser", 10240); + input = test_istream_create(input_msg); + +- parser = message_parser_init(pool, input, 0, 0); ++ parser = message_parser_init(pool, input, &set_empty); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); +@@ -721,7 +730,7 @@ static void test_message_parser_no_eoh(void) + pool = pool_alloconly_create("message parser", 10240); + input = test_istream_create(input_msg); + +- parser = message_parser_init(pool, input, 0, 0); ++ parser = message_parser_init(pool, input, &set_empty); + test_assert(message_parser_parse_next_block(parser, &block) > 0 && + block.hdr != NULL && strcmp(block.hdr->name, "a") == 0 && + block.hdr->value_len == 1 && block.hdr->value[0] == 'b'); +@@ -777,7 +786,7 @@ static const char input_msg[] = + pool = pool_alloconly_create("message parser", 10240); + input = test_istream_create(input_msg); + +- parser = message_parser_init(pool, input, 0, 0); ++ parser = message_parser_init(pool, input, &set_empty); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); +diff --git a/src/lib-mail/test-message-part.c b/src/lib-mail/test-message-part.c +index 6cad8ffbd3..4a51a39f99 100644 +--- a/src/lib-mail/test-message-part.c ++++ b/src/lib-mail/test-message-part.c +@@ -65,6 +65,7 @@ static const char test_msg[] = + + static void test_message_part_idx(void) + { ++ const struct message_parser_settings set = { .flags = 0 }; + struct message_parser_ctx *parser; + struct istream *input; + struct message_part *parts, *part, *prev_part; +@@ -77,7 +78,7 @@ static void test_message_part_idx(void) + pool = pool_alloconly_create("message parser", 10240); + input = i_stream_create_from_data(test_msg, TEST_MSG_LEN); + +- parser = message_parser_init(pool, input, 0, 0); ++ parser = message_parser_init(pool, input, &set); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) { + part_idx = message_part_to_idx(block.part); + test_assert(part_idx >= prev_idx); +diff --git a/src/lib-storage/index/index-mail-headers.c b/src/lib-storage/index/index-mail-headers.c +index 54a5883177..df21b9129e 100644 +--- a/src/lib-storage/index/index-mail-headers.c ++++ b/src/lib-storage/index/index-mail-headers.c +@@ -16,11 +16,11 @@ + #include "index-storage.h" + #include "index-mail.h" + +-static const enum message_header_parser_flags hdr_parser_flags = +- MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP | +- MESSAGE_HEADER_PARSER_FLAG_DROP_CR; +-static const enum message_parser_flags msg_parser_flags = +- MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK; ++static const struct message_parser_settings msg_parser_set = { ++ .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP | ++ MESSAGE_HEADER_PARSER_FLAG_DROP_CR, ++ .flags = MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK, ++}; + + static int header_line_cmp(const struct index_mail_line *l1, + const struct index_mail_line *l2) +@@ -399,7 +399,7 @@ index_mail_cache_parse_init(struct mail *_mail, struct istream *input) + mail->data.parser_input = input; + mail->data.parser_ctx = + message_parser_init(mail->mail.data_pool, input, +- hdr_parser_flags, msg_parser_flags); ++ &msg_parser_set); + i_stream_unref(&input); + return input2; + } +@@ -428,14 +428,12 @@ static void index_mail_init_parser(struct index_mail *mail) + data->parser_input = data->stream; + data->parser_ctx = message_parser_init(mail->mail.data_pool, + data->stream, +- hdr_parser_flags, +- msg_parser_flags); ++ &msg_parser_set); + } else { + data->parser_ctx = + message_parser_init_from_parts(data->parts, + data->stream, +- hdr_parser_flags, +- msg_parser_flags); ++ &msg_parser_set); + } + } + +@@ -468,7 +466,7 @@ int index_mail_parse_headers(struct index_mail *mail, + i_assert(!data->save_bodystructure_body || + data->parser_ctx != NULL); + message_parse_header(data->stream, &data->hdr_size, +- hdr_parser_flags, ++ msg_parser_set.hdr_flags, + index_mail_parse_header_cb, mail); + } + if (index_mail_stream_check_failure(mail) < 0) +@@ -526,7 +524,7 @@ int index_mail_headers_get_envelope(struct index_mail *mail) + if (mail->data.envelope == NULL) { + /* we got the headers from cache - parse them to get the + envelope */ +- message_parse_header(stream, NULL, hdr_parser_flags, ++ message_parse_header(stream, NULL, msg_parser_set.hdr_flags, + imap_envelope_parse_callback, mail); + if (stream->stream_errno != 0) { + index_mail_stream_log_failure_for(mail, stream); +diff --git a/src/plugins/fts/fts-build-mail.c b/src/plugins/fts/fts-build-mail.c +index 3cb4ea657f..e088e7397b 100644 +--- a/src/plugins/fts/fts-build-mail.c ++++ b/src/plugins/fts/fts-build-mail.c +@@ -475,6 +475,9 @@ fts_build_mail_real(struct fts_backend_update_context *update_ctx, + const char **retriable_err_msg_r, + bool *may_need_retry_r) + { ++ const struct message_parser_settings parser_set = { ++ .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE, ++ }; + struct fts_mail_build_context ctx; + struct istream *input; + struct message_parser_ctx *parser; +@@ -503,9 +506,7 @@ fts_build_mail_real(struct fts_backend_update_context *update_ctx, + ctx.pending_input = buffer_create_dynamic(default_pool, 128); + + prev_part = NULL; +- parser = message_parser_init(pool_datastack_create(), input, +- MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE, +- 0); ++ parser = message_parser_init(pool_datastack_create(), input, &parser_set); + + decoder = message_decoder_init(update_ctx->normalizer, 0); + for (;;) { +-- +2.11.0 + +From e6fe2ef5d5a8620308d42a7ed1b35e907c645f72 Mon Sep 17 00:00:00 2001 +From: Timo Sirainen +Date: Thu, 23 Apr 2020 16:59:40 +0300 +Subject: [PATCH 14/15] lib-mail: message-parser - Support limiting max number + of nested MIME parts + +The default is to allow 100 nested MIME parts. When the limit is reached, +the innermost MIME part's body contains all the rest of the inner bodies +until a parent MIME part is reached. +--- + src/lib-mail/message-parser-private.h | 2 + + src/lib-mail/message-parser.c | 39 ++++++-- + src/lib-mail/message-parser.h | 6 ++ + src/lib-mail/test-message-parser.c | 163 ++++++++++++++++++++++++++++++++++ + 4 files changed, 203 insertions(+), 7 deletions(-) + +diff --git a/src/lib-mail/message-parser-private.h b/src/lib-mail/message-parser-private.h +index dbf8464cfb..4bb0c3dbfd 100644 +--- a/src/lib-mail/message-parser-private.h ++++ b/src/lib-mail/message-parser-private.h +@@ -23,9 +23,11 @@ struct message_parser_ctx { + struct istream *input; + struct message_part *parts, *part; + const char *broken_reason; ++ unsigned int nested_parts_count; + + enum message_header_parser_flags hdr_flags; + enum message_parser_flags flags; ++ unsigned int max_nested_mime_parts; + + char *last_boundary; + struct message_boundary *boundaries; +diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c +index 41b9ed133a..ea0154d5ed 100644 +--- a/src/lib-mail/message-parser.c ++++ b/src/lib-mail/message-parser.c +@@ -157,12 +157,17 @@ message_part_append(struct message_parser_ctx *ctx) + ctx->next_part = &part->children; + + ctx->part = part; ++ ctx->nested_parts_count++; ++ i_assert(ctx->nested_parts_count < ctx->max_nested_mime_parts); + } + + static void message_part_finish(struct message_parser_ctx *ctx) + { + struct message_part **const *parent_next_partp; + ++ i_assert(ctx->nested_parts_count > 0); ++ ctx->nested_parts_count--; ++ + parent_next_partp = array_back(&ctx->next_part_stack); + array_pop_back(&ctx->next_part_stack); + ctx->next_part = *parent_next_partp; +@@ -542,6 +547,11 @@ static bool block_is_at_eoh(const struct message_block *block) + return FALSE; + } + ++static bool parse_too_many_nested_mime_parts(struct message_parser_ctx *ctx) ++{ ++ return ctx->nested_parts_count+1 >= ctx->max_nested_mime_parts; ++} ++ + #define MUTEX_FLAGS \ + (MESSAGE_PART_FLAG_MESSAGE_RFC822 | MESSAGE_PART_FLAG_MULTIPART) + +@@ -566,8 +576,12 @@ static int parse_next_header(struct message_parser_ctx *ctx, + "\n--boundary" belongs to us or to a previous boundary. + this is a problem if the boundary prefixes are identical, + because MIME requires only the prefix to match. */ +- parse_next_body_multipart_init(ctx); +- ctx->multipart = TRUE; ++ if (!parse_too_many_nested_mime_parts(ctx)) { ++ parse_next_body_multipart_init(ctx); ++ ctx->multipart = TRUE; ++ } else { ++ part->flags &= ~MESSAGE_PART_FLAG_MULTIPART; ++ } + } + + /* before parsing the header see if we can find a --boundary from here. +@@ -671,12 +685,16 @@ static int parse_next_header(struct message_parser_ctx *ctx, + i_assert(ctx->last_boundary == NULL); + ctx->multipart = FALSE; + ctx->parse_next_block = parse_next_body_to_boundary; +- } else if ((part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) != 0) ++ } else if ((part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) != 0 && ++ !parse_too_many_nested_mime_parts(ctx)) { + ctx->parse_next_block = parse_next_body_message_rfc822_init; +- else if (ctx->boundaries != NULL) +- ctx->parse_next_block = parse_next_body_to_boundary; +- else +- ctx->parse_next_block = parse_next_body_to_eof; ++ } else { ++ part->flags &= ~MESSAGE_PART_FLAG_MESSAGE_RFC822; ++ if (ctx->boundaries != NULL) ++ ctx->parse_next_block = parse_next_body_to_boundary; ++ else ++ ctx->parse_next_block = parse_next_body_to_eof; ++ } + + ctx->want_count = 1; + +@@ -710,6 +728,9 @@ message_parser_init_int(struct istream *input, + ctx = i_new(struct message_parser_ctx, 1); + ctx->hdr_flags = set->hdr_flags; + ctx->flags = set->flags; ++ ctx->max_nested_mime_parts = set->max_nested_mime_parts != 0 ? ++ set->max_nested_mime_parts : ++ MESSAGE_PARSER_DEFAULT_MAX_NESTED_MIME_PARTS; + ctx->input = input; + i_stream_ref(input); + return ctx; +@@ -754,6 +775,10 @@ int message_parser_deinit_from_parts(struct message_parser_ctx **_ctx, + if (ctx->hdr_parser_ctx != NULL) + message_parse_header_deinit(&ctx->hdr_parser_ctx); + boundary_remove_until(ctx, NULL); ++ /* caller might have stopped the parsing early */ ++ i_assert(ctx->nested_parts_count == 0 || ++ i_stream_have_bytes_left(ctx->input)); ++ + i_stream_unref(&ctx->input); + array_free(&ctx->next_part_stack); + i_free(ctx->last_boundary); +diff --git a/src/lib-mail/message-parser.h b/src/lib-mail/message-parser.h +index d159b2607d..7f6ea04936 100644 +--- a/src/lib-mail/message-parser.h ++++ b/src/lib-mail/message-parser.h +@@ -17,9 +17,15 @@ enum message_parser_flags { + MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES = 0x08 + }; + ++#define MESSAGE_PARSER_DEFAULT_MAX_NESTED_MIME_PARTS 100 ++ + struct message_parser_settings { + enum message_header_parser_flags hdr_flags; + enum message_parser_flags flags; ++ ++ /* Maximum nested MIME parts. ++ 0 = MESSAGE_PARSER_DEFAULT_MAX_NESTED_MIME_PARTS. */ ++ unsigned int max_nested_mime_parts; + }; + + struct message_parser_ctx; +diff --git a/src/lib-mail/test-message-parser.c b/src/lib-mail/test-message-parser.c +index 5e496275fe..df2586eddd 100644 +--- a/src/lib-mail/test-message-parser.c ++++ b/src/lib-mail/test-message-parser.c +@@ -175,6 +175,36 @@ static void test_message_parser_small_blocks(void) + test_end(); + } + ++static void test_message_parser_stop_early(void) ++{ ++ struct message_parser_ctx *parser; ++ struct istream *input; ++ struct message_part *parts; ++ struct message_block block; ++ unsigned int i; ++ pool_t pool; ++ int ret; ++ ++ test_begin("message parser in stop early"); ++ pool = pool_alloconly_create("message parser", 10240); ++ input = test_istream_create(test_msg); ++ ++ test_istream_set_allow_eof(input, FALSE); ++ for (i = 1; i <= TEST_MSG_LEN+1; i++) { ++ i_stream_seek(input, 0); ++ test_istream_set_size(input, i); ++ parser = message_parser_init(pool, input, &set_empty); ++ while ((ret = message_parser_parse_next_block(parser, ++ &block)) > 0) ; ++ test_assert(ret == 0); ++ message_parser_deinit(&parser, &parts); ++ } ++ ++ i_stream_unref(&input); ++ pool_unref(&pool); ++ test_end(); ++} ++ + static void test_message_parser_truncated_mime_headers(void) + { + static const char input_msg[] = +@@ -839,10 +869,141 @@ static const char input_msg[] = + test_end(); + } + ++static void test_message_parser_mime_part_nested_limit(void) ++{ ++static const char input_msg[] = ++"Content-Type: multipart/mixed; boundary=\"1\"\n" ++"\n" ++"--1\n" ++"Content-Type: multipart/mixed; boundary=\"2\"\n" ++"\n" ++"--2\n" ++"Content-Type: text/plain\n" ++"\n" ++"1\n" ++"--2\n" ++"Content-Type: text/plain\n" ++"\n" ++"22\n" ++"--1\n" ++"Content-Type: text/plain\n" ++"\n" ++"333\n"; ++ const struct message_parser_settings parser_set = { ++ .max_nested_mime_parts = 2, ++ }; ++ struct message_parser_ctx *parser; ++ struct istream *input; ++ struct message_part *parts, *part; ++ struct message_block block; ++ pool_t pool; ++ int ret; ++ ++ test_begin("message parser mime part nested limit"); ++ pool = pool_alloconly_create("message parser", 10240); ++ input = test_istream_create(input_msg); ++ ++ parser = message_parser_init(pool, input, &parser_set); ++ while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; ++ test_assert(ret < 0); ++ message_parser_deinit(&parser, &parts); ++ ++ part = parts; ++ test_assert(part->children_count == 2); ++ test_assert(part->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); ++ test_assert(part->header_size.lines == 2); ++ test_assert(part->header_size.physical_size == 45); ++ test_assert(part->header_size.virtual_size == 45+2); ++ test_assert(part->body_size.lines == 15); ++ test_assert(part->body_size.physical_size == 148); ++ test_assert(part->body_size.virtual_size == 148+15); ++ ++ part = parts->children; ++ test_assert(part->children_count == 0); ++ test_assert(part->flags == MESSAGE_PART_FLAG_IS_MIME); ++ test_assert(part->header_size.lines == 2); ++ test_assert(part->header_size.physical_size == 45); ++ test_assert(part->header_size.virtual_size == 45+2); ++ test_assert(part->body_size.lines == 7); ++ test_assert(part->body_size.physical_size == 64); ++ test_assert(part->body_size.virtual_size == 64+7); ++ ++ part = parts->children->next; ++ test_assert(part->children_count == 0); ++ test_assert(part->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); ++ test_assert(part->header_size.lines == 2); ++ test_assert(part->header_size.physical_size == 26); ++ test_assert(part->header_size.virtual_size == 26+2); ++ test_assert(part->body_size.lines == 1); ++ test_assert(part->body_size.physical_size == 4); ++ test_assert(part->body_size.virtual_size == 4+1); ++ ++ test_parsed_parts(input, parts); ++ i_stream_unref(&input); ++ pool_unref(&pool); ++ test_end(); ++} ++ ++static void test_message_parser_mime_part_nested_limit_rfc822(void) ++{ ++static const char input_msg[] = ++"Content-Type: message/rfc822\n" ++"\n" ++"Content-Type: message/rfc822\n" ++"\n" ++"Content-Type: text/plain\n" ++"\n" ++"1\n"; ++ const struct message_parser_settings parser_set = { ++ .max_nested_mime_parts = 2, ++ }; ++ struct message_parser_ctx *parser; ++ struct istream *input; ++ struct message_part *parts, *part; ++ struct message_block block; ++ pool_t pool; ++ int ret; ++ ++ test_begin("message parser mime part nested limit rfc822"); ++ pool = pool_alloconly_create("message parser", 10240); ++ input = test_istream_create(input_msg); ++ ++ parser = message_parser_init(pool, input, &parser_set); ++ while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; ++ test_assert(ret < 0); ++ message_parser_deinit(&parser, &parts); ++ ++ part = parts; ++ test_assert(part->children_count == 1); ++ test_assert(part->flags == (MESSAGE_PART_FLAG_MESSAGE_RFC822 | MESSAGE_PART_FLAG_IS_MIME)); ++ test_assert(part->header_size.lines == 2); ++ test_assert(part->header_size.physical_size == 30); ++ test_assert(part->header_size.virtual_size == 30+2); ++ test_assert(part->body_size.lines == 5); ++ test_assert(part->body_size.physical_size == 58); ++ test_assert(part->body_size.virtual_size == 58+5); ++ ++ part = parts->children; ++ test_assert(part->children_count == 0); ++ test_assert(part->flags == MESSAGE_PART_FLAG_IS_MIME); ++ test_assert(part->header_size.lines == 2); ++ test_assert(part->header_size.physical_size == 30); ++ test_assert(part->header_size.virtual_size == 30+2); ++ test_assert(part->body_size.lines == 3); ++ test_assert(part->body_size.physical_size == 28); ++ test_assert(part->body_size.virtual_size == 28+3); ++ ++ test_parsed_parts(input, parts); ++ i_stream_unref(&input); ++ pool_unref(&pool); ++ test_end(); ++} ++ + int main(void) + { + static void (*const test_functions[])(void) = { + test_message_parser_small_blocks, ++ test_message_parser_stop_early, + test_message_parser_truncated_mime_headers, + test_message_parser_truncated_mime_headers2, + test_message_parser_truncated_mime_headers3, +@@ -854,6 +1015,8 @@ int main(void) + test_message_parser_continuing_mime_boundary_reverse, + test_message_parser_long_mime_boundary, + test_message_parser_no_eoh, ++ test_message_parser_mime_part_nested_limit, ++ test_message_parser_mime_part_nested_limit_rfc822, + NULL + }; + return test_run(test_functions); +-- +2.11.0 + +From 39e1ba38c2b3a5e3e567a35b7f6c414c7ed43769 Mon Sep 17 00:00:00 2001 +From: Timo Sirainen +Date: Thu, 23 Apr 2020 17:09:33 +0300 +Subject: [PATCH 15/15] lib-mail: message-parser - Support limiting max number + of MIME parts + +The default is to allow 10000 MIME parts. When it's reached, no more +MIME boundary lines will be recognized, so the rest of the mail belongs +to the last added MIME part. +--- + src/lib-mail/message-parser-private.h | 2 + + src/lib-mail/message-parser.c | 12 +++++ + src/lib-mail/message-parser.h | 4 ++ + src/lib-mail/test-message-parser.c | 86 +++++++++++++++++++++++++++++++++++ + 4 files changed, 104 insertions(+) + +diff --git a/src/lib-mail/message-parser-private.h b/src/lib-mail/message-parser-private.h +index 4bb0c3dbfd..1f9c66b827 100644 +--- a/src/lib-mail/message-parser-private.h ++++ b/src/lib-mail/message-parser-private.h +@@ -24,10 +24,12 @@ struct message_parser_ctx { + struct message_part *parts, *part; + const char *broken_reason; + unsigned int nested_parts_count; ++ unsigned int total_parts_count; + + enum message_header_parser_flags hdr_flags; + enum message_parser_flags flags; + unsigned int max_nested_mime_parts; ++ unsigned int max_total_mime_parts; + + char *last_boundary; + struct message_boundary *boundaries; +diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c +index ea0154d5ed..6370a1bff7 100644 +--- a/src/lib-mail/message-parser.c ++++ b/src/lib-mail/message-parser.c +@@ -158,7 +158,9 @@ message_part_append(struct message_parser_ctx *ctx) + + ctx->part = part; + ctx->nested_parts_count++; ++ ctx->total_parts_count++; + i_assert(ctx->nested_parts_count < ctx->max_nested_mime_parts); ++ i_assert(ctx->total_parts_count <= ctx->max_total_mime_parts); + } + + static void message_part_finish(struct message_parser_ctx *ctx) +@@ -241,6 +243,12 @@ boundary_line_find(struct message_parser_ctx *ctx, + return -1; + } + ++ if (ctx->total_parts_count >= ctx->max_total_mime_parts) { ++ /* can't add any more MIME parts. just stop trying to find ++ more boundaries. */ ++ return -1; ++ } ++ + /* need to find the end of line */ + data += 2; + size -= 2; +@@ -731,6 +739,9 @@ message_parser_init_int(struct istream *input, + ctx->max_nested_mime_parts = set->max_nested_mime_parts != 0 ? + set->max_nested_mime_parts : + MESSAGE_PARSER_DEFAULT_MAX_NESTED_MIME_PARTS; ++ ctx->max_total_mime_parts = set->max_total_mime_parts != 0 ? ++ set->max_total_mime_parts : ++ MESSAGE_PARSER_DEFAULT_MAX_TOTAL_MIME_PARTS; + ctx->input = input; + i_stream_ref(input); + return ctx; +@@ -747,6 +758,7 @@ message_parser_init(pool_t part_pool, struct istream *input, + ctx->parts = ctx->part = p_new(part_pool, struct message_part, 1); + ctx->next_part = &ctx->part->children; + ctx->parse_next_block = parse_next_header_init; ++ ctx->total_parts_count = 1; + i_array_init(&ctx->next_part_stack, 4); + return ctx; + } +diff --git a/src/lib-mail/message-parser.h b/src/lib-mail/message-parser.h +index 7f6ea04936..f19e526284 100644 +--- a/src/lib-mail/message-parser.h ++++ b/src/lib-mail/message-parser.h +@@ -18,6 +18,7 @@ enum message_parser_flags { + }; + + #define MESSAGE_PARSER_DEFAULT_MAX_NESTED_MIME_PARTS 100 ++#define MESSAGE_PARSER_DEFAULT_MAX_TOTAL_MIME_PARTS 10000 + + struct message_parser_settings { + enum message_header_parser_flags hdr_flags; +@@ -26,6 +27,9 @@ struct message_parser_settings { + /* Maximum nested MIME parts. + 0 = MESSAGE_PARSER_DEFAULT_MAX_NESTED_MIME_PARTS. */ + unsigned int max_nested_mime_parts; ++ /* Maximum MIME parts in total. ++ 0 = MESSAGE_PARSER_DEFAULT_MAX_TOTAL_MIME_PARTS. */ ++ unsigned int max_total_mime_parts; + }; + + struct message_parser_ctx; +diff --git a/src/lib-mail/test-message-parser.c b/src/lib-mail/test-message-parser.c +index df2586eddd..0422b42265 100644 +--- a/src/lib-mail/test-message-parser.c ++++ b/src/lib-mail/test-message-parser.c +@@ -999,6 +999,91 @@ static const char input_msg[] = + test_end(); + } + ++static void test_message_parser_mime_part_limit(void) ++{ ++static const char input_msg[] = ++"Content-Type: multipart/mixed; boundary=\"1\"\n" ++"\n" ++"--1\n" ++"Content-Type: multipart/mixed; boundary=\"2\"\n" ++"\n" ++"--2\n" ++"Content-Type: text/plain\n" ++"\n" ++"1\n" ++"--2\n" ++"Content-Type: text/plain\n" ++"\n" ++"22\n" ++"--1\n" ++"Content-Type: text/plain\n" ++"\n" ++"333\n"; ++ const struct message_parser_settings parser_set = { ++ .max_total_mime_parts = 4, ++ }; ++ struct message_parser_ctx *parser; ++ struct istream *input; ++ struct message_part *parts, *part; ++ struct message_block block; ++ pool_t pool; ++ int ret; ++ ++ test_begin("message parser mime part limit"); ++ pool = pool_alloconly_create("message parser", 10240); ++ input = test_istream_create(input_msg); ++ ++ parser = message_parser_init(pool, input, &parser_set); ++ while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; ++ test_assert(ret < 0); ++ message_parser_deinit(&parser, &parts); ++ ++ part = parts; ++ test_assert(part->children_count == 3); ++ test_assert(part->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); ++ test_assert(part->header_size.lines == 2); ++ test_assert(part->header_size.physical_size == 45); ++ test_assert(part->header_size.virtual_size == 45+2); ++ test_assert(part->body_size.lines == 15); ++ test_assert(part->body_size.physical_size == 148); ++ test_assert(part->body_size.virtual_size == 148+15); ++ ++ part = parts->children; ++ test_assert(part->children_count == 2); ++ test_assert(part->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); ++ test_assert(part->header_size.lines == 2); ++ test_assert(part->header_size.physical_size == 45); ++ test_assert(part->header_size.virtual_size == 45+2); ++ test_assert(part->body_size.lines == 12); ++ test_assert(part->body_size.physical_size == 99); ++ test_assert(part->body_size.virtual_size == 99+12); ++ ++ part = parts->children->children; ++ test_assert(part->children_count == 0); ++ test_assert(part->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); ++ test_assert(part->header_size.lines == 2); ++ test_assert(part->header_size.physical_size == 26); ++ test_assert(part->header_size.virtual_size == 26+2); ++ test_assert(part->body_size.lines == 0); ++ test_assert(part->body_size.physical_size == 1); ++ test_assert(part->body_size.virtual_size == 1); ++ ++ part = parts->children->children->next; ++ test_assert(part->children_count == 0); ++ test_assert(part->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); ++ test_assert(part->header_size.lines == 2); ++ test_assert(part->header_size.physical_size == 26); ++ test_assert(part->header_size.virtual_size == 26+2); ++ test_assert(part->body_size.lines == 5); ++ test_assert(part->body_size.physical_size == 37); ++ test_assert(part->body_size.virtual_size == 37+5); ++ ++ test_parsed_parts(input, parts); ++ i_stream_unref(&input); ++ pool_unref(&pool); ++ test_end(); ++} ++ + int main(void) + { + static void (*const test_functions[])(void) = { +@@ -1017,6 +1102,7 @@ int main(void) + test_message_parser_no_eoh, + test_message_parser_mime_part_nested_limit, + test_message_parser_mime_part_nested_limit_rfc822, ++ test_message_parser_mime_part_limit, + NULL + }; + return test_run(test_functions); +-- +2.11.0 + +From 6001e4b48c5a23735eb9c4ca9a187a175fd1a1da Mon Sep 17 00:00:00 2001 +From: Timo Sirainen +Date: Thu, 2 Jul 2020 17:31:19 +0300 +Subject: [PATCH] lib-mail: Fix handling trailing "--" in MIME boundaries + +Broken by 5b8ec27fae941d06516c30476dcf4820c6d200ab +--- + src/lib-mail/message-parser.c | 14 ++++++++---- + src/lib-mail/test-message-parser.c | 46 ++++++++++++++++++++++++++++++++++++++ + 2 files changed, 56 insertions(+), 4 deletions(-) + +diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c +index 6370a1bff7..011dea9050 100644 +--- a/src/lib-mail/message-parser.c ++++ b/src/lib-mail/message-parser.c +@@ -19,7 +19,7 @@ static int parse_next_body_to_eof(struct message_parser_ctx *ctx, + + static struct message_boundary * + boundary_find(struct message_boundary *boundaries, +- const unsigned char *data, size_t len) ++ const unsigned char *data, size_t len, bool trailing_dashes) + { + struct message_boundary *best = NULL; + +@@ -33,7 +33,11 @@ boundary_find(struct message_boundary *boundaries, + memcmp(boundaries->boundary, data, boundaries->len) == 0 && + (best == NULL || best->len < boundaries->len)) { + best = boundaries; +- if (best->len == len) { ++ /* If we see "foo--", it could either mean that there ++ is a boundary named "foo" that ends now or there's ++ a boundary "foo--" which continues. */ ++ if (best->len == len || ++ (best->len == len-2 && trailing_dashes)) { + /* This is exactly the wanted boundary. There + can't be a better one. */ + break; +@@ -261,6 +265,7 @@ boundary_line_find(struct message_parser_ctx *ctx, + return 0; + } + size_t find_size = size; ++ bool trailing_dashes = FALSE; + + if (lf_pos != NULL) { + find_size = lf_pos - data; +@@ -268,11 +273,12 @@ boundary_line_find(struct message_parser_ctx *ctx, + find_size--; + if (find_size > 2 && data[find_size-1] == '-' && + data[find_size-2] == '-') +- find_size -= 2; ++ trailing_dashes = TRUE; + } else if (find_size > BOUNDARY_END_MAX_LEN) + find_size = BOUNDARY_END_MAX_LEN; + +- *boundary_r = boundary_find(ctx->boundaries, data, find_size); ++ *boundary_r = boundary_find(ctx->boundaries, data, find_size, ++ trailing_dashes); + if (*boundary_r == NULL) + return -1; + +diff --git a/src/lib-mail/test-message-parser.c b/src/lib-mail/test-message-parser.c +index 1b782f5df2..641edec4be 100644 +--- a/src/lib-mail/test-message-parser.c ++++ b/src/lib-mail/test-message-parser.c +@@ -532,6 +532,51 @@ static const char input_msg[] = + test_end(); + } + ++static void test_message_parser_trailing_dashes(void) ++{ ++static const char input_msg[] = ++"Content-Type: multipart/mixed; boundary=\"a--\"\n" ++"\n" ++"--a--\n" ++"Content-Type: multipart/mixed; boundary=\"a----\"\n" ++"\n" ++"--a----\n" ++"Content-Type: text/plain\n" ++"\n" ++"body\n" ++"--a------\n" ++"Content-Type: text/html\n" ++"\n" ++"body2\n" ++"--a----"; ++ struct message_parser_ctx *parser; ++ struct istream *input; ++ struct message_part *parts; ++ struct message_block block; ++ pool_t pool; ++ int ret; ++ ++ test_begin("message parser trailing dashes"); ++ pool = pool_alloconly_create("message parser", 10240); ++ input = test_istream_create(input_msg); ++ ++ parser = message_parser_init(pool, input, &set_empty); ++ while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; ++ test_assert(ret < 0); ++ message_parser_deinit(&parser, &parts); ++ ++ test_assert(parts->children_count == 2); ++ test_assert(parts->children->next == NULL); ++ test_assert(parts->children->children_count == 1); ++ test_assert(parts->children->children->next == NULL); ++ test_assert(parts->children->children->children_count == 0); ++ ++ test_parsed_parts(input, parts); ++ i_stream_unref(&input); ++ pool_unref(&pool); ++ test_end(); ++} ++ + static void test_message_parser_continuing_mime_boundary(void) + { + static const char input_msg[] = +@@ -1095,6 +1140,7 @@ int main(void) + test_message_parser_empty_multipart, + test_message_parser_duplicate_mime_boundary, + test_message_parser_garbage_suffix_mime_boundary, ++ test_message_parser_trailing_dashes, + test_message_parser_continuing_mime_boundary, + test_message_parser_continuing_truncated_mime_boundary, + test_message_parser_continuing_mime_boundary_reverse, +-- +2.11.0 + diff --git a/CVE-2020-12100-2.patch b/CVE-2020-12100-2.patch new file mode 100644 index 0000000..4abca04 --- /dev/null +++ b/CVE-2020-12100-2.patch @@ -0,0 +1,70 @@ +From f691580f7450b41bb3de36d825fc46fa5071cabf Mon Sep 17 00:00:00 2001 +Date: Mon, 26 Oct 2020 18:52:57 +0800 +Subject: [PATCH] fix CVE-2020-12100 + +--- + .../src/lib-sieve/plugins/notify/ext-notify-common.c | 3 ++- + .../src/lib-sieve/sieve-message.c | 12 ++++++------ + 2 files changed, 8 insertions(+), 7 deletions(-) + +diff --git a/dovecot-2.3-pigeonhole-0.5.10/src/lib-sieve/plugins/notify/ext-notify-common.c b/dovecot-2.3-pigeonhole-0.5.10/src/lib-sieve/plugins/notify/ext-notify-common.c +index 700b79d..9d950f3 100644 +--- a/dovecot-2.3-pigeonhole-0.5.10/src/lib-sieve/plugins/notify/ext-notify-common.c ++++ b/dovecot-2.3-pigeonhole-0.5.10/src/lib-sieve/plugins/notify/ext-notify-common.c +@@ -156,6 +156,7 @@ cmd_notify_extract_body_text(const struct sieve_runtime_env *renv, + { + const struct sieve_execute_env *eenv = renv->exec_env; + const struct sieve_extension *this_ext = renv->oprtn->ext; ++ const struct message_parser_settings parser_set = { .flags = 0 }; + struct ext_notify_message_context *mctx; + struct mail *mail = eenv->msgdata->mail; + struct message_parser_ctx *parser; +@@ -189,7 +190,7 @@ cmd_notify_extract_body_text(const struct sieve_runtime_env *renv, + /* Initialize body decoder */ + decoder = message_decoder_init(NULL, 0); + +- parser = message_parser_init(mctx->pool, input, 0, 0); ++ parser = message_parser_init(mctx->pool, input, &parser_set); + is_text = TRUE; + save_body = FALSE; + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) { +diff --git a/dovecot-2.3-pigeonhole-0.5.10/src/lib-sieve/sieve-message.c b/dovecot-2.3-pigeonhole-0.5.10/src/lib-sieve/sieve-message.c +index afbf31f..c791758 100644 +--- a/dovecot-2.3-pigeonhole-0.5.10/src/lib-sieve/sieve-message.c ++++ b/dovecot-2.3-pigeonhole-0.5.10/src/lib-sieve/sieve-message.c +@@ -1081,10 +1081,10 @@ static int sieve_message_parts_add_missing + struct sieve_message_context *msgctx = renv->msgctx; + pool_t pool = msgctx->context_pool; + struct mail *mail = sieve_message_get_mail(renv->msgctx); +- enum message_parser_flags mparser_flags = +- MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS; +- enum message_header_parser_flags hparser_flags = +- MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP; ++ struct message_parser_settings parser_set = { ++ .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP, ++ .flags = MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS, ++ }; + ARRAY(struct sieve_message_header) headers; + struct sieve_message_part *body_part, *header_part, *last_part; + struct message_parser_ctx *parser; +@@ -1121,7 +1121,7 @@ static int sieve_message_parts_add_missing + if (iter_all) { + t_array_init(&headers, 64); + hdr_content = t_str_new(512); +- hparser_flags |= MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE; ++ parser_set.hdr_flags |= MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE; + } else { + i_zero(&headers); + } +@@ -1133,7 +1133,7 @@ static int sieve_message_parts_add_missing + //parser = message_parser_init_from_parts(parts, input, + // hparser_flags, mparser_flags); + parser = message_parser_init(pool_datastack_create(), +- input, hparser_flags, mparser_flags); ++ input, &parser_set); + while ( (ret=message_parser_parse_next_block + (parser, &block)) > 0 ) { + struct sieve_message_part **body_part_idx; +-- +2.23.0 + diff --git a/CVE-2020-12673.patch b/CVE-2020-12673.patch new file mode 100644 index 0000000..9dd26e0 --- /dev/null +++ b/CVE-2020-12673.patch @@ -0,0 +1,31 @@ +From fb246611e62ad8c5a95b0ca180a63f17aa34b0d8 Mon Sep 17 00:00:00 2001 +From: Aki Tuomi +Date: Mon, 18 May 2020 12:33:39 +0300 +Subject: [PATCH] lib-ntlm: Check buffer length on responses + +Add missing check for buffer length. + +If this is not checked, it is possible to send message which +causes read past buffer bug. + +Broken in c7480644202e5451fbed448508ea29a25cffc99c +--- + src/lib-ntlm/ntlm-message.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/src/lib-ntlm/ntlm-message.c b/src/lib-ntlm/ntlm-message.c +index 160b9f918c..a29413b47e 100644 +--- a/src/lib-ntlm/ntlm-message.c ++++ b/src/lib-ntlm/ntlm-message.c +@@ -184,6 +184,11 @@ static bool ntlmssp_check_buffer(const struct ntlmssp_buffer *buffer, + if (length == 0 && space == 0) + return TRUE; + ++ if (length > data_size) { ++ *error = "buffer length out of bounds"; ++ return FALSE; ++ } ++ + if (offset >= data_size) { + *error = "buffer offset out of bounds"; + return FALSE; diff --git a/CVE-2020-12674.patch b/CVE-2020-12674.patch new file mode 100644 index 0000000..a9dca2a --- /dev/null +++ b/CVE-2020-12674.patch @@ -0,0 +1,22 @@ +From 69ad3c902ea4bbf9f21ab1857d8923f975dc6145 Mon Sep 17 00:00:00 2001 +From: Aki Tuomi +Date: Wed, 6 May 2020 13:40:36 +0300 +Subject: [PATCH] auth: mech-rpa - Fail on zero len buffer + +--- + src/auth/mech-rpa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/auth/mech-rpa.c b/src/auth/mech-rpa.c +index 08298ebdd6..2de8705b4f 100644 +--- a/src/auth/mech-rpa.c ++++ b/src/auth/mech-rpa.c +@@ -224,7 +224,7 @@ rpa_read_buffer(pool_t pool, const unsigned char **data, + return 0; + + len = *p++; +- if (p + len > end) ++ if (p + len > end || len == 0) + return 0; + + *buffer = p_malloc(pool, len); diff --git a/dovecot.spec b/dovecot.spec index 3320c4a..125e3b8 100644 --- a/dovecot.spec +++ b/dovecot.spec @@ -6,7 +6,7 @@ Name: dovecot Version: 2.3.10.1 -Release: 1 +Release: 2 Summary: Dovecot Secure imap server License: MIT and LGPLv2 URL: http://www.dovecot.org/ @@ -30,6 +30,10 @@ Patch6005: dovecot-2.1.10-waitonline.patch Patch6006: dovecot-2.2.20-initbysystemd.patch Patch6007: dovecot-2.2.22-systemd_w_protectsystem.patch +Patch6008: CVE-2020-12673.patch +Patch6009: CVE-2020-12674.patch +Patch6010: CVE-2020-12100-1.patch +Patch6011: CVE-2020-12100-2.patch BuildRequires: gcc-c++ openssl-devel pam-devel zlib-devel bzip2-devel libcap-devel BuildRequires: libtool autoconf automake pkgconfig sqlite-devel libpq-devel @@ -280,6 +284,9 @@ make check %changelog +* Tue Oct 27 2020 wangyue - 2.3.10.1-2 +- Fix CVE-2020-12673 CVE-2020-12674 CVE-2020-12100 + * Wed Aug 5 2020 wangyue - 2.3.10.1-1 - Upgrade to 2.3.10.1 to fix CVE-2020-10967, CVE-2020-10958, CVE-2020-10957 -- Gitee