diff --git a/lib/galaxy/datatypes/metadata.py b/lib/galaxy/datatypes/metadata.py index e30ac336c66b..85e88bcace9f 100644 --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -122,6 +122,12 @@ def __setattr__( self, name, value ): else: self.parent._metadata[name] = value + def remove_key( self, name ): + if name in self.parent._metadata: + del self.parent._metadata[name] + else: + log.info( "Attempted to delete invalid key '%s' from MetadataCollection" % name ) + def element_is_set( self, name ): return bool( self.parent._metadata.get( name, False ) ) @@ -720,7 +726,8 @@ def setup_external_metadata( self, datasets, sa_session, exec_dir=None, output_fnames=None, config_root=None, config_file=None, datatypes_config=None, job_metadata=None, compute_tmp_dir=None, - include_command=True, kwds=None ): + include_command=True, max_metadata_value_size=0, + kwds=None): kwds = kwds or {} if tmp_dir is None: tmp_dir = MetadataTempFile.tmp_dir @@ -819,9 +826,10 @@ def __get_filename_override(): sa_session.add( metadata_files ) sa_session.flush() metadata_files_list.append( metadata_files ) - args = "%s %s %s" % ( datatypes_config, + args = "%s %s %s %s" % ( datatypes_config, job_metadata, - " ".join( map( __metadata_files_list_to_cmd_line, metadata_files_list ) ) ) + " ".join( map( __metadata_files_list_to_cmd_line, metadata_files_list ) ), + max_metadata_value_size) if include_command: # return command required to build fd, fp = tempfile.mkstemp( suffix='.py', dir=tmp_dir, prefix="set_metadata_" ) diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py index a636e797c0ee..edd816a89cd1 100644 --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -1597,6 +1597,7 @@ def setup_external_metadata( self, exec_dir=None, tmp_dir=None, config_file=config_file, datatypes_config=datatypes_config, job_metadata=os.path.join( self.working_directory, TOOL_PROVIDED_JOB_METADATA_FILE ), + max_metadata_value_size=self.app.config.max_metadata_value_size, **kwds ) @property diff --git a/lib/galaxy/tools/actions/metadata.py b/lib/galaxy/tools/actions/metadata.py index 9bda45dbe95d..d51f9e2caca4 100644 --- a/lib/galaxy/tools/actions/metadata.py +++ b/lib/galaxy/tools/actions/metadata.py @@ -83,6 +83,7 @@ def execute_via_app( self, tool, app, session_id, history_id, user=None, datatypes_config=app.datatypes_registry.integrated_datatypes_configs, job_metadata=None, include_command=False, + max_metadata_value_size=app.config.max_metadata_value_size, kwds={ 'overwrite' : overwrite } ) incoming[ '__SET_EXTERNAL_METADATA_COMMAND_LINE__' ] = cmd_line for name, value in tool.params_to_strings( incoming, app ).iteritems(): diff --git a/lib/galaxy_ext/metadata/set_metadata.py b/lib/galaxy_ext/metadata/set_metadata.py index df85133d735b..8e1cf631eb74 100644 --- a/lib/galaxy_ext/metadata/set_metadata.py +++ b/lib/galaxy_ext/metadata/set_metadata.py @@ -3,7 +3,7 @@ This was formerly scripts/set_metadata.py and expects these arguments: - %prog datatypes_conf.xml job_metadata_file metadata_in,metadata_kwds,metadata_out,metadata_results_code,output_filename_override,metadata_override... + %prog datatypes_conf.xml job_metadata_file metadata_in,metadata_kwds,metadata_out,metadata_results_code,output_filename_override,metadata_override... max_metadata_value_size Galaxy should be importable on sys.path and output_filename_override should be set to the path of the dataset on which metadata is being set @@ -30,6 +30,8 @@ from galaxy import eggs import pkg_resources import galaxy.model.mapping # need to load this before we unpickle, in order to setup properties assigned by the mappers +from galaxy.model.custom_types import total_size + galaxy.model.Job() # this looks REAL stupid, but it is REQUIRED in order for SA to insert parameters into the classes defined by the mappers --> it appears that instantiating ANY mapper'ed class would suffice here from galaxy.util import stringify_dictionary_keys from sqlalchemy.orm import clear_mappers @@ -68,6 +70,16 @@ def set_metadata(): galaxy_root = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir)) galaxy.datatypes.metadata.MetadataTempFile.tmp_dir = tool_job_working_directory = os.path.abspath(os.getcwd()) + # This is ugly, but to transition from existing jobs without this parameter + # to ones with, smoothly, it has to be the last optional parameter and we + # have to sniff it. + try: + max_metadata_value_size = int(sys.argv[-1]) + sys.argv = sys.argv[:-1] + except ValueError: + max_metadata_value_size = 0 + # max_metadata_value_size is unspecified and should be 0 + # Set up datatypes registry datatypes_config = sys.argv.pop( 1 ) datatypes_registry = galaxy.datatypes.registry.Registry() @@ -119,6 +131,11 @@ def set_metadata(): setattr( dataset.metadata, metadata_name, metadata_file_override ) file_dict = existing_job_metadata_dict.get( dataset.dataset.id, {} ) set_meta_with_tool_provided( dataset, file_dict, set_meta_kwds, datatypes_registry ) + if max_metadata_value_size: + for k, v in dataset.metadata.items(): + if total_size(v) > max_metadata_value_size: + log.info("Key %s too large for metadata, discarding" % k) + dataset.metadata.remove_key(k) dataset.metadata.to_JSON_dict( filename_out ) # write out results of set_meta json.dump( ( True, 'Metadata has been set successfully' ), open( filename_results_code, 'wb+' ) ) # setting metadata has succeeded except Exception, e: