diff --git a/config/galaxy.ini.sample b/config/galaxy.ini.sample index 8b956a924bf1..3dc96a3476fa 100644 --- a/config/galaxy.ini.sample +++ b/config/galaxy.ini.sample @@ -974,6 +974,12 @@ use_interactive = True # option to retry externally, or set metadata manually (when possible). #retry_metadata_internally = True +# Very large metadata values can cause Galaxy crashes. This will allow +# limiting the maximum metadata key size (in bytes used in memory, not the end +# result database value size) Galaxy will attempt to save with a dataset. 0 to +# disable this feature. 5000000 seems to be a reasonable size. +#max_metadata_value_size = 0 + # If (for example) you run on a cluster and your datasets (by default, # database/files/) are mounted read-only, this option will override tool output # paths to write outputs to the working directory instead, and the job manager diff --git a/lib/galaxy/config.py b/lib/galaxy/config.py index e1b147cf13c0..52c65ea97417 100644 --- a/lib/galaxy/config.py +++ b/lib/galaxy/config.py @@ -146,6 +146,7 @@ def __init__( self, **kwargs ): self.tool_secret = kwargs.get( "tool_secret", "" ) self.id_secret = kwargs.get( "id_secret", "USING THE DEFAULT IS NOT SECURE!" ) self.retry_metadata_internally = string_as_bool( kwargs.get( "retry_metadata_internally", "True" ) ) + self.max_metadata_value_size = int( kwargs.get( "max_metadata_value_size", 0 ) ) self.use_remote_user = string_as_bool( kwargs.get( "use_remote_user", "False" ) ) self.normalize_remote_user_email = string_as_bool( kwargs.get( "normalize_remote_user_email", "False" ) ) self.remote_user_maildomain = kwargs.get( "remote_user_maildomain", None ) diff --git a/lib/galaxy/model/custom_types.py b/lib/galaxy/model/custom_types.py index 96a8ea084cdc..5387a582d901 100644 --- a/lib/galaxy/model/custom_types.py +++ b/lib/galaxy/model/custom_types.py @@ -4,10 +4,15 @@ import logging import uuid +from sys import getsizeof +from itertools import chain +from collections import deque + from galaxy import eggs eggs.require("SQLAlchemy") import sqlalchemy +from galaxy import app from galaxy.util.aliaspickler import AliasPickleModule from sqlalchemy.types import CHAR, LargeBinary, String, TypeDecorator from sqlalchemy.ext.mutable import Mutable @@ -217,11 +222,61 @@ def remove(self, value): } ) +def total_size(o, handlers={}, verbose=False): + """ Returns the approximate memory footprint an object and all of its contents. + + Automatically finds the contents of the following builtin containers and + their subclasses: tuple, list, deque, dict, set and frozenset. + To search other containers, add handlers to iterate over their contents: + + handlers = {SomeContainerClass: iter, + OtherContainerClass: OtherContainerClass.get_elements} + + Recipe from: https://code.activestate.com/recipes/577504-compute-memory-footprint-of-an-object-and-its-cont/ + """ + dict_handler = lambda d: chain.from_iterable(d.items()) + all_handlers = { tuple: iter, + list: iter, + deque: iter, + dict: dict_handler, + set: iter, + frozenset: iter } + all_handlers.update(handlers) # user handlers take precedence + seen = set() # track which object id's have already been seen + default_size = getsizeof(0) # estimate sizeof object without __sizeof__ + + def sizeof(o): + if id(o) in seen: # do not double count the same object + return 0 + seen.add(id(o)) + s = getsizeof(o, default_size) + + for typ, handler in all_handlers.items(): + if isinstance(o, typ): + s += sum(map(sizeof, handler(o))) + break + return s + + return sizeof(o) + + class MetadataType( JSONType ): """ Backward compatible metadata type. Can read pickles or JSON, but always writes in JSON. """ + + def process_bind_param(self, value, dialect): + if value is not None: + if app.app and app.app.config.max_metadata_value_size: + for k, v in value.items(): + sz = total_size(v) + if sz > app.app.config.max_metadata_value_size: + del value[k] + log.error('Refusing to bind metadata key %s due to size (%s)' % (k, sz)) + value = json_encoder.encode(value) + return value + def process_result_value( self, value, dialect ): if value is None: return None