From ada202aa077a3d52f795abd27c4d9f7011532164 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Mon, 10 Jul 2017 14:25:05 -0400 Subject: [PATCH] Provide a more accurate size check for max_document_size limit max_document_size currently checks document sizes based on Erlang's external term size of the jiffy-decoded document body. This makes sense because that's what used to store the data on disk and it's what manipulated by the CouchDB internals. However erlang term size is not always a good approximation of the size of json encoded data. Sometimes it can be way off (I've seen 30% off) and It's hard for users to estimate or check the external term size beforehand. So for example if max_document_size is 1MB, CouchDB might reject user's 600KB json document because Erlang's external term size of that document greater than 1MB. Re-encode the data using jiffy and check the size against that. That's a better check but will impact performance. Also this is also not an exact solution.Users' json encoder might insert more whitespace (say as indentation), or whitespace after commas, use a different algorithm for encoding floating point numbers (scientific notation, represent exact floating point numbers without a decimal point (5 instead of 5.0 etc.). So the size would still be off. Issue #659 --- src/chttpd/test/chttpd_db_doc_size_tests.erl | 2 +- src/couch/src/couch_doc.erl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/chttpd/test/chttpd_db_doc_size_tests.erl b/src/chttpd/test/chttpd_db_doc_size_tests.erl index c4706af4ad..f03de3155e 100644 --- a/src/chttpd/test/chttpd_db_doc_size_tests.erl +++ b/src/chttpd/test/chttpd_db_doc_size_tests.erl @@ -98,7 +98,7 @@ put_single_doc(Url) -> bulk_doc(Url) -> NewDoc = "{\"docs\": [{\"doc1\": 1}, {\"errordoc\": - \"this_should_be_the_error_document\"}]}", + \"this_should_be_the_too_large_error_document\"}]}", {ok, _, _, ResultBody} = test_request:post(Url ++ "/_bulk_docs/", [?CONTENT_JSON, ?AUTH], NewDoc), ResultJson = ?JSON_DECODE(ResultBody), diff --git a/src/couch/src/couch_doc.erl b/src/couch/src/couch_doc.erl index 381ad4b4f5..58da6bc7bf 100644 --- a/src/couch/src/couch_doc.erl +++ b/src/couch/src/couch_doc.erl @@ -127,7 +127,7 @@ doc_to_json_obj(#doc{id=Id,deleted=Del,body=Body,revs={Start, RevIds}, from_json_obj_validate(EJson) -> MaxSize = config:get_integer("couchdb", "max_document_size", 4294967296), Doc = from_json_obj(EJson), - case erlang:external_size(Doc#doc.body) =< MaxSize of + case byte_size(jiffy:encode(Doc#doc.body)) =< MaxSize of true -> Doc; false ->