Source code for bioservices.ena

#
#  This file is part of bioservices software
#
#  Copyright (c) 2013-2014 - EBI-EMBL
#
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      http://www.gnu.org/licenses/gpl-3.0.html
#
#  website: https://github.com/cokelaer/bioservices
#  documentation: http://packages.python.org/bioservices
#
##############################################################################
"""This module provides a class :class:`ENA`

.. topic:: What is ENA

    :URL:  https://www.ebi.ac.uk/ena


    .. highlights::

        The European Nucleotide Archive (ENA) provides a comprehensive
        record of the world's nucleotide sequencing information, covering
        raw sequencing data, sequence assembly information and functional
        annotation.

        -- From ENA web page Jan 2016

.. versionadded:: 1.4.4

"""
from bioservices import logger
from bioservices.services import REST

logger.name = __name__


__all__ = ["ENA"]


[docs]class ENA: """Interface to the `ENA <http://www.ebi.ac.uk/ena>`_ (European Nucleotide Archive) .. doctest:: >>> from bioservices import ENA >>> s = ENA(verbose=False) Retrieve read domain metadata in XML format:: print(e.get_data('ERA000092', 'xml')) Retrieve assembled and annotated sequences in FASTA format:: print(e.get_data('A00145', 'fasta')) The range parameter can be used to retrieve a subsequence from sequence entry A00145 from bases 3 to 63:: e.get_data('A00145', 'fasta', fasta_range=[3, 63]) Retrieve assembled and annotated subsequences in HTML format:: e.view_data('A00145') Retrieve expanded CON records: To retrieve expanded CON records use the ``expanded=True`` parameter. For example, the expanded CON entry AL513382 in flat file format can be obtained as follows:: e.get_data('AL513382', frmt='text', expanded=True) Expanded CON records differ from CON records in two ways: firstly, they contain the full sequence in addition to the contig assembly instructions; secondly, if a CON record contains only source or gap features, the expanded CON records will also display all features from the segment records. Retrieve assembled and annotated sequence header in flat file format using the ``header=True`` parameter:: e.get_data('BN000065', 'text', header=True) Retrieve assembled and annotated sequence records using sequence versions:: e.get_data('AM407889.1', 'fasta') e.get_data('AM407889.2', 'fasta') """ url = "http://www.ebi.ac.uk/ena/browser/api" def __init__(self, verbose=False, cache=False): """**Constructor** :param verbose: set to False to prevent informative messages """ self.services = REST(name="ENA", url=ENA.url, verbose=verbose, cache=cache) self.services.TIMEOUT = 100
[docs] def get_data( self, identifier, frmt, fasta_range=None, expanded=None, header=None, download=None, ): """Retrieve an ENA entry in the specified format. :param str identifier: ENA accession or identifier (e.g. ``'AL513382'``) :param str frmt: output format — one of ``xml``, ``text``, ``fasta``, ``fastq``, ``html``, ``embl`` (availability depends on entry type) :param list fasta_range: ``[start, end]`` base positions for subsequence retrieval (FASTA only) :param bool expanded: if True, return expanded CON records :param bool header: if True, return only the sequence header :param bool download: if True, return data as a downloadable file :: get_data("AL513382", "embl") .. note:: The ENA API changed in 2020; this method wraps the current REST API. """ url = f"{self.url}/{frmt}/{identifier}" if frmt in ["text", "fasta", "fastq"]: res = self.services.http_get(url, frmt="txt") elif frmt in ["html"]: res = self.services.http_get(url, frmt="default") elif frmt in ["xml"]: res = self.services.http_get(url, frmt="xml") return res
[docs] def data_warehouse(self): # http://www.ebi.ac.uk/ena/data/warehouse/search?query="geo_circ(-0.587,-90.5713,170)"&result=sequence_release&display=text&download=gzip pass
[docs] def get_taxon(self, taxon): """.. deprecated:: 7.8 — removed due to ENA API update.""" print("deprecated since v.7.8 due to ENA update")