Source code for bioservices.ena

#
#  This file is part of bioservices software
#
#  Copyright (c) 2013-2014 - EBI-EMBL
#
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      http://www.gnu.org/licenses/gpl-3.0.html
#
#  website: https://github.com/cokelaer/bioservices
#  documentation: http://packages.python.org/bioservices
#
##############################################################################
"""This module provides a class :class:`ENA`

.. topic:: What is ENA

    :URL:  https://www.ebi.ac.uk/ena


    .. highlights::

        The European Nucleotide Archive (ENA) provides a comprehensive
        record of the world's nucleotide sequencing information, covering
        raw sequencing data, sequence assembly information and functional
        annotation.

        -- From ENA web page Jan 2016

.. versionadded:: 1.4.4

"""
from bioservices import logger
from bioservices.services import REST

logger.name = __name__


__all__ = ["ENA"]


[docs]class ENA:
    """Interface to the `ENA <http://www.ebi.ac.uk/ena>`_ (European Nucleotide Archive)

    .. doctest::

        >>> from bioservices import ENA
        >>> s = ENA(verbose=False)

    Retrieve read domain metadata in XML format::

        print(e.get_data('ERA000092', 'xml'))

    Retrieve assembled and annotated sequences in FASTA format::

        print(e.get_data('A00145', 'fasta'))

    The range parameter can be used to retrieve a subsequence
    from sequence entry A00145 from bases 3 to 63::

        e.get_data('A00145', 'fasta', fasta_range=[3, 63])

    Retrieve assembled and annotated subsequences in HTML format::

        e.view_data('A00145')

    Retrieve expanded CON records:

    To retrieve expanded CON records use the ``expanded=True`` parameter. For
    example, the expanded CON entry AL513382 in flat file format can be
    obtained as follows::

        e.get_data('AL513382', frmt='text', expanded=True)

    Expanded CON records differ from CON records in two ways:
    firstly, they contain the full sequence in addition to the contig assembly
    instructions; secondly, if a CON record contains only source or gap
    features, the expanded CON records will also display all features from the
    segment records.

    Retrieve assembled and annotated sequence header in flat file format using
    the ``header=True`` parameter::

        e.get_data('BN000065', 'text', header=True)

    Retrieve assembled and annotated sequence records using sequence versions::

        e.get_data('AM407889.1', 'fasta')
        e.get_data('AM407889.2', 'fasta')

    """

    url = "http://www.ebi.ac.uk/ena/browser/api"

    def __init__(self, verbose=False, cache=False):
        """**Constructor**

        :param verbose: set to False to prevent informative messages
        """
        self.services = REST(name="ENA", url=ENA.url, verbose=verbose, cache=cache)
        self.services.TIMEOUT = 100

[docs]    def get_data(
        self,
        identifier,
        frmt,
        fasta_range=None,
        expanded=None,
        header=None,
        download=None,
    ):
        """Retrieve an ENA entry in the specified format.

        :param str identifier: ENA accession or identifier (e.g. ``'AL513382'``)
        :param str frmt: output format — one of ``xml``, ``text``, ``fasta``,
            ``fastq``, ``html``, ``embl`` (availability depends on entry type)
        :param list fasta_range: ``[start, end]`` base positions for subsequence
            retrieval (FASTA only)
        :param bool expanded: if True, return expanded CON records
        :param bool header: if True, return only the sequence header
        :param bool download: if True, return data as a downloadable file

        ::

            get_data("AL513382", "embl")

        .. note:: The ENA API changed in 2020; this method wraps the current REST API.
        """

        url = f"{self.url}/{frmt}/{identifier}"

        if frmt in ["text", "fasta", "fastq"]:
            res = self.services.http_get(url, frmt="txt")
        elif frmt in ["html"]:
            res = self.services.http_get(url, frmt="default")
        elif frmt in ["xml"]:
            res = self.services.http_get(url, frmt="xml")
        return res

[docs]    def data_warehouse(self):
        # http://www.ebi.ac.uk/ena/data/warehouse/search?query="geo_circ(-0.587,-90.5713,170)"&result=sequence_release&display=text&download=gzip
        pass

[docs]    def get_taxon(self, taxon):
        """.. deprecated:: 7.8 — removed due to ENA API update."""
        print("deprecated since v.7.8 due to ENA update")