Source code for bioservices.biocontainers

#  This file is part of bioservices software
#
#  Copyright (c) 2013-2014 - EBI-EMBL
#  Copyright (c) 2021 - Institut Pasteur
#
#  File author(s):
#      Thomas Cokelaer <cokelaer@ebi.ac.uk>
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      http://www.gnu.org/licenses/gpl-3.0.html
#
#  website: https://github.com/cokelaer/bioservices
#  documentation: http://packages.python.org/bioservices
#
##############################################################################
"""Interface to BioContainers.

.. topic:: What is BioContainers?

    :URL: https://biocontainers.pro/
    :REST: https://api.biocontainers.pro/ga4gh/trs/v2

    .. highlights::

        BioContainers is an open-source project that aims to create,
        store, and distribute bioinformatics software containers and
        packages.

        -- From BioContainers (about), Jan 2021

"""
import pandas as pd

from bioservices import logger
from bioservices.services import REST

logger.name = __name__


__all__ = ["Biocontainers"]


[docs]class Biocontainers: """Interface to the `BioContainers <https://biocontainers.pro>`_ service. BioContainers exposes a GA4GH Tool Registry Service (TRS) v2 API for discovering bioinformatics containers (Docker, Singularity, Conda). Example:: >>> from bioservices import Biocontainers >>> b = Biocontainers() >>> b.get_tools(limit=5) >>> b.get_tool("samtools") >>> b.get_tool_classes() """ _url = "https://api.biocontainers.pro/ga4gh/trs/v2" def __init__(self, verbose=True, cache=False): """.. rubric:: Constructor :param bool verbose: set to False to suppress informative messages :param bool cache: use HTTP cache """ self.services = REST( name="biocontainers", url=Biocontainers._url, verbose=verbose, cache=cache, url_defined_later=True, )
[docs] def get_tools(self, limit=1000, search=None, toolname=None, sort_field="id", sort_order="asc"): """Return a list of available tools. :param int limit: maximum number of tools to return (default: 1000). :param str search: free-text search filter applied across tool names, descriptions and tags (e.g., ``"alignment"``). :param str toolname: filter by exact tool name (e.g., ``"samtools"``). :param str sort_field: field to sort results by (default: ``"id"``). :param str sort_order: sort direction — ``"asc"`` or ``"desc"`` (default: ``"asc"``). :return: :class:`pandas.DataFrame` with one tool per row, or the raw list if the response cannot be converted. Example:: >>> from bioservices import Biocontainers >>> b = Biocontainers() >>> df = b.get_tools(limit=10) >>> df.columns.tolist() # doctest: +SKIP ['id', 'name', 'organization', 'toolclass', 'versions', ...] >>> b.get_tools(limit=5, search="alignment") # doctest: +SKIP """ params = {"limit": limit, "sort_field": sort_field, "sort_order": sort_order} if search is not None: params["search"] = search if toolname is not None: params["toolname"] = toolname res = self.services.http_get("tools", params=params) try: return pd.DataFrame(res) except Exception: return res
[docs] def get_tool(self, tool_id): """Return metadata for a single tool. :param str tool_id: the BiGG/BioContainers tool identifier (e.g., ``"samtools"``). :return: dict with keys ``id``, ``name``, ``description``, ``organization``, ``toolclass``, ``versions``, ``pulls``, etc. Example:: >>> from bioservices import Biocontainers >>> b = Biocontainers() >>> tool = b.get_tool("samtools") >>> tool["name"] 'samtools' >>> tool["pulls"] # doctest: +SKIP 381303353 """ return self.services.http_get("tools/%s" % tool_id)
[docs] def get_tool_versions(self, tool_id): """Return all versions of a given tool. :param str tool_id: the tool identifier (e.g., ``"samtools"``). :return: :class:`pandas.DataFrame` with one version per row, or the raw list if the response cannot be converted. Each row contains image information (Docker, Singularity, Conda) and metadata such as ``id``, ``name``, ``meta_version``. Example:: >>> from bioservices import Biocontainers >>> b = Biocontainers() >>> df = b.get_tool_versions("samtools") >>> df["id"].tolist()[:3] # doctest: +SKIP ['samtools-0.1.19', 'samtools-0.1.20', 'samtools-0.1.21'] """ res = self.services.http_get("tools/%s/versions" % tool_id, params={}) try: return pd.DataFrame(res) except Exception: return res
[docs] def get_tool_version(self, tool_id, version_id): """Return metadata for a specific version of a tool. :param str tool_id: the tool identifier (e.g., ``"samtools"``). :param str version_id: the version identifier, typically in the form ``"<tool>-<version>"`` (e.g., ``"samtools-1.17"``). :return: dict with keys ``id``, ``name``, ``meta_version``, ``images`` (list of container image records). Each image entry includes ``image_name``, ``image_type`` (Docker, Singularity, or Conda), ``registry_host``, ``size``, and ``updated``. Example:: >>> from bioservices import Biocontainers >>> b = Biocontainers() >>> v = b.get_tool_version("samtools", "samtools-1.17") >>> v["meta_version"] '1.17' >>> [img["image_type"] for img in v["images"]] # doctest: +SKIP ['Conda', 'Docker', 'Singularity', ...] """ return self.services.http_get("tools/%s/versions/%s" % (tool_id, version_id))
[docs] def get_tool_classes(self): """Return all tool classes defined in BioContainers. :return: list of dicts, each with keys ``id``, ``name``, ``description``. Current classes are ``CommandLineTool``, ``Workflow``, ``CommandLineMultiTool``, and ``Service``. Example:: >>> from bioservices import Biocontainers >>> b = Biocontainers() >>> classes = b.get_tool_classes() >>> [c["name"] for c in classes] ['CommandLineTool', 'Workflow', 'CommandLineMultiTool', 'Service'] """ return self.services.http_get("toolClasses")
# ------------------------------------------------------------------ # Backward-compatible alias # ------------------------------------------------------------------
[docs] def get_versions_one_tool(self, tool_id): """Return all versions of a given tool. This is an alias for :meth:`get_tool_versions`. :param str tool_id: the tool identifier (e.g., ``"samtools"``). :return: :class:`pandas.DataFrame` or raw list. Example:: >>> from bioservices import Biocontainers >>> b = Biocontainers() >>> b.get_versions_one_tool("samtools") # doctest: +SKIP """ return self.get_tool_versions(tool_id)