Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OSError: Unable to synchronously create attribute (object header message is too large) #118

Open
zhouzhendiao opened this issue Aug 4, 2023 · 1 comment

Comments

@zhouzhendiao
Copy link

Describe the bug

mdata.write('mdata..h5mu')
---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
File /opt/conda/lib/python3.10/site-packages/anndata/_io/utils.py:246, in report_write_key_on_error.<locals>.func_wrapper(*args, **kwargs)
    245 try:
--> 246     return func(*args, **kwargs)
    247 except Exception as e:

File /opt/conda/lib/python3.10/site-packages/anndata/_io/specs/registry.py:311, in Writer.write_elem(self, store, k, elem, dataset_kwargs, modifiers)
    310 else:
--> 311     return write_func(store, k, elem, dataset_kwargs=dataset_kwargs)

File /opt/conda/lib/python3.10/site-packages/anndata/_io/specs/registry.py:52, in write_spec.<locals>.decorator.<locals>.wrapper(g, k, *args, **kwargs)
     50 @wraps(func)
     51 def wrapper(g, k, *args, **kwargs):
---> 52     result = func(g, k, *args, **kwargs)
     53     g[k].attrs.setdefault("encoding-type", spec.encoding_type)

File /opt/conda/lib/python3.10/site-packages/anndata/_io/specs/methods.py:563, in write_dataframe(f, key, df, _writer, dataset_kwargs)
    562 col_names = [check_key(c) for c in df.columns]
--> 563 group.attrs["column-order"] = col_names
    565 if df.index.name is not None:

File h5py/_objects.pyx:54, in h5py._objects.with_phil.wrapper()

File h5py/_objects.pyx:55, in h5py._objects.with_phil.wrapper()

File /opt/conda/lib/python3.10/site-packages/h5py/_hl/attrs.py:104, in AttributeManager.__setitem__(self, name, value)
     98 """ Set a new attribute, overwriting any existing attribute.
     99 
    100 The type and shape of the attribute are determined from the data.  To
    101 use a specific type or shape, or to preserve the type of an attribute,
    102 use the methods create() and modify().
    103 """
--> 104 self.create(name, data=value)

File /opt/conda/lib/python3.10/site-packages/h5py/_hl/attrs.py:206, in AttributeManager.create(self, name, data, shape, dtype)
    204     tempname = name
--> 206 attr = h5a.create(self._id, self._e(tempname), htype, space)
    207 try:

File h5py/_objects.pyx:54, in h5py._objects.with_phil.wrapper()

File h5py/_objects.pyx:55, in h5py._objects.with_phil.wrapper()

File h5py/h5a.pyx:50, in h5py.h5a.create()

OSError: Unable to synchronously create attribute (object header message is too large)

The above exception was the direct cause of the following exception:

OSError                                   Traceback (most recent call last)
Cell In[293], line 4
----> 4 mdata.write('./01.data/02.GEX_TCR_T_bbknn.h5mu')

File /opt/conda/lib/python3.10/site-packages/mudata/_core/mudata.py:1224, in MuData.write_h5mu(self, filename, **kwargs)
   1222     raise ValueError("Provide a filename!")
   1223 else:
-> 1224     write_h5mu(filename, self, **kwargs)
   1225     if self.isbacked:
   1226         self.file.filename = filename

File /opt/conda/lib/python3.10/site-packages/mudata/_core/io.py:207, in write_h5mu(filename, mdata, **kwargs)
    204 from .. import __version__, __mudataversion__, __anndataversion__
    206 with h5py.File(filename, "w", userblock_size=512) as f:
--> 207     _write_h5mu(f, mdata, **kwargs)
    208 with open(filename, "br+") as f:
    209     nbytes = f.write(
    210         f"MuData (format-version={__mudataversion__};creator=muon;creator-version={__version__})".encode(
    211             "utf-8"
    212         )
    213     )

File /opt/conda/lib/python3.10/site-packages/mudata/_core/io.py:80, in _write_h5mu(file, mdata, write_data, **kwargs)
     78 write_elem(group, "varp", dict(adata.varp), dataset_kwargs=kwargs)
     79 write_elem(group, "layers", dict(adata.layers), dataset_kwargs=kwargs)
---> 80 write_elem(group, "uns", dict(adata.uns), dataset_kwargs=kwargs)
     82 attrs = group.attrs
     83 attrs["encoding-type"] = "anndata"

File /opt/conda/lib/python3.10/site-packages/anndata/_io/specs/registry.py:353, in write_elem(store, k, elem, dataset_kwargs)
    329 def write_elem(
    330     store: GroupStorageType,
    331     k: str,
   (...)
    334     dataset_kwargs: Mapping = MappingProxyType({}),
    335 ) -> None:
    336     """
    337     Write an element to a storage group using anndata encoding.
    338 
   (...)
    351         E.g. for zarr this would be `chunks`, `compressor`.
    352     """
--> 353     Writer(_REGISTRY).write_elem(store, k, elem, dataset_kwargs=dataset_kwargs)

File /opt/conda/lib/python3.10/site-packages/anndata/_io/utils.py:248, in report_write_key_on_error.<locals>.func_wrapper(*args, **kwargs)
    246     return func(*args, **kwargs)
    247 except Exception as e:
--> 248     re_raise_error(e, elem, key)

File /opt/conda/lib/python3.10/site-packages/anndata/_io/utils.py:246, in report_write_key_on_error.<locals>.func_wrapper(*args, **kwargs)
    244         break
    245 try:
--> 246     return func(*args, **kwargs)
    247 except Exception as e:
    248     re_raise_error(e, elem, key)

File /opt/conda/lib/python3.10/site-packages/anndata/_io/specs/registry.py:311, in Writer.write_elem(self, store, k, elem, dataset_kwargs, modifiers)
    302     return self.callback(
    303         write_func,
    304         store,
   (...)
    308         iospec=self.registry.get_spec(elem),
    309     )
    310 else:
--> 311     return write_func(store, k, elem, dataset_kwargs=dataset_kwargs)

File /opt/conda/lib/python3.10/site-packages/anndata/_io/specs/registry.py:52, in write_spec.<locals>.decorator.<locals>.wrapper(g, k, *args, **kwargs)
     50 @wraps(func)
     51 def wrapper(g, k, *args, **kwargs):
---> 52     result = func(g, k, *args, **kwargs)
     53     g[k].attrs.setdefault("encoding-type", spec.encoding_type)
     54     g[k].attrs.setdefault("encoding-version", spec.encoding_version)

File /opt/conda/lib/python3.10/site-packages/anndata/_io/specs/methods.py:281, in write_mapping(f, k, v, _writer, dataset_kwargs)
    279 g = f.create_group(k)
    280 for sub_k, sub_v in v.items():
--> 281     _writer.write_elem(g, sub_k, sub_v, dataset_kwargs=dataset_kwargs)

File /opt/conda/lib/python3.10/site-packages/anndata/_io/utils.py:248, in report_write_key_on_error.<locals>.func_wrapper(*args, **kwargs)
    246     return func(*args, **kwargs)
    247 except Exception as e:
--> 248     re_raise_error(e, elem, key)

File /opt/conda/lib/python3.10/site-packages/anndata/_io/utils.py:246, in report_write_key_on_error.<locals>.func_wrapper(*args, **kwargs)
    244         break
    245 try:
--> 246     return func(*args, **kwargs)
    247 except Exception as e:
    248     re_raise_error(e, elem, key)

File /opt/conda/lib/python3.10/site-packages/anndata/_io/specs/registry.py:311, in Writer.write_elem(self, store, k, elem, dataset_kwargs, modifiers)
    302     return self.callback(
    303         write_func,
    304         store,
   (...)
    308         iospec=self.registry.get_spec(elem),
    309     )
    310 else:
--> 311     return write_func(store, k, elem, dataset_kwargs=dataset_kwargs)

File /opt/conda/lib/python3.10/site-packages/anndata/_io/specs/registry.py:52, in write_spec.<locals>.decorator.<locals>.wrapper(g, k, *args, **kwargs)
     50 @wraps(func)
     51 def wrapper(g, k, *args, **kwargs):
---> 52     result = func(g, k, *args, **kwargs)
     53     g[k].attrs.setdefault("encoding-type", spec.encoding_type)
     54     g[k].attrs.setdefault("encoding-version", spec.encoding_version)

File /opt/conda/lib/python3.10/site-packages/anndata/_io/specs/methods.py:281, in write_mapping(f, k, v, _writer, dataset_kwargs)
    279 g = f.create_group(k)
    280 for sub_k, sub_v in v.items():
--> 281     _writer.write_elem(g, sub_k, sub_v, dataset_kwargs=dataset_kwargs)

File /opt/conda/lib/python3.10/site-packages/anndata/_io/utils.py:248, in report_write_key_on_error.<locals>.func_wrapper(*args, **kwargs)
    246     return func(*args, **kwargs)
    247 except Exception as e:
--> 248     re_raise_error(e, elem, key)

File /opt/conda/lib/python3.10/site-packages/anndata/_io/utils.py:229, in report_write_key_on_error.<locals>.re_raise_error(e, elem, key)
    227 else:
    228     parent = _get_parent(elem)
--> 229     raise type(e)(
    230         f"{e}\n\n"
    231         f"Above error raised while writing key {key!r} of {type(elem)} "
    232         f"to {parent}"
    233     ) from e

OSError: Unable to synchronously create attribute (object header message is too large)

Above error raised while writing key 'weighted' of <class 'h5py._hl.group.Group'> to /

mdata

MuData object with n_obs × n_vars = 65648 × 36601
  2 modalities
    gex:	51145 x 36601
      obs:	'sample_id', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'leiden_res0.1', 'leiden_res0.25', 'leiden_res0.5', 'Immune_All_High_predicted_labels', 'Immune_All_High_majority_voting', 'Immune_All_Low_predicted_labels', 'Immune_All_Low_majority_voting', 'hpca', 'copykat.pred', 'scDblFinder.score', 'scDblFinder.class', 'S_score', 'G2M_score', 'phase', 'low_qc_cluster', 'leiden_res1', 'sampling_point', 'low_qc_cells', 'celltype_main_v1', 'celltype_main_v2', 'receptor_type', 'receptor_subtype', 'chain_pairing', 'leiden_res1.5', 'leiden_res2', 'cell_type_fine_20230502', 'celltype_fine_v1', 'celltype_fine_v2', 'predicted_labels', 'over_clustering', 'majority_voting', 'conf_score', 'leiden_res3', 'patient_id'
      var:	'highly_variable', 'means', 'dispersions', 'dispersions_norm'
      uns:	'Immune_All_High_predicted_labels_colors', 'Immune_All_Low_majority_voting_colors', 'Immune_All_Low_predicted_labels_colors', 'airr:chain_pairing_colors', 'airr:receptor_subtype_colors', 'bcr:chain_pairing_colors', 'bcr:receptor_subtype_colors', 'cell_type_fine_20230502_colors', 'celltype_fine_v1_colors', 'celltype_fine_v2_colors', 'celltype_main_v1_colors', 'celltype_main_v2_colors', 'copykat.pred_colors', 'hpca_colors', 'hvg', 'leiden', 'leiden_res0.1_colors', 'leiden_res0.25_colors', 'leiden_res0.5_colors', 'leiden_res1.5_colors', 'leiden_res1_colors', 'leiden_res2_colors', 'leiden_res3_colors', 'log1p', 'low_qc_cells_colors', 'low_qc_cluster_colors', 'neighbors', 'pca', 'phase_colors', 'rank_genes_groups', 'sample_id_colors', 'sampling_point_colors', 'scDblFinder.class_colors', 'tcr:chain_pairing_colors', 'tcr:receptor_subtype_colors', 'umap', 'airr:clonal_expansion_colors'
      obsm:	'X_pca', 'X_umap'
      varm:	'PCs'
      layers:	'counts', 'soupx'
      obsp:	'connectivities', 'distances'
    airr:	55165 x 0
      obs:	'sample_id', 'receptor_type', 'receptor_subtype', 'chain_pairing', 'cc_aa_identity', 'cc_aa_identity_size', 'clonal_expansion', 'patient_id'
      uns:	'cc_aa_identity', 'chain_indices', 'chain_pairing_colors', 'clonal_expansion_colors', 'clonotype_network', 'ir_dist_aa_identity', 'receptor_subtype_colors', 'repertoire_overlap'
      obsm:	'X_clonotype_network', 'airr', 'chain_indices'

System
scanpy==1.9.3 anndata==0.9.1 umap==0.5.3 numpy==1.23.5 scipy==1.10.1 pandas==2.0.1 scikit-learn==1.2.2 statsmodels==0.14.0 python-igraph==0.10.4 pynndescent==0.5.10

@zhouzhendiao zhouzhendiao added the bug Something isn't working label Aug 4, 2023
@gtca gtca removed the bug Something isn't working label Sep 10, 2023
@gtca
Copy link
Collaborator

gtca commented Sep 11, 2023

This seems like a potential HDF5 limitation rather than a bug that we can fix. Moreover, serialisation is anndata's concern so this should be propagated to a corresponding issue there in case we realise we can do something about it.

Generally, problems like this shouldn't arise in practice... How big is weighted and how can we reproduce this issue?..

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants