Source code for bioservices.ena

#
#  This file is part of bioservices software
#
#  Copyright (c) 2013-2014 - EBI-EMBL
#
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      http://www.gnu.org/licenses/gpl-3.0.html
#
#  website: https://github.com/cokelaer/bioservices
#  documentation: http://packages.python.org/bioservices
#
##############################################################################
"""This module provides a class :class:`ENA`

.. topic:: What is ENA

    :URL:  https://www.ebi.ac.uk/ena


    .. highlights::

        The European Nucleotide Archive (ENA) provides a comprehensive
        record of the world's nucleotide sequencing information, covering
        raw sequencing data, sequence assembly information and functional
        annotation.

        -- From ENA web page Jan 2016

.. versionadded:: 1.4.4

"""
import os
from bioservices.services import REST
import webbrowser
from bioservices import logger

logger.name = __name__


__all__ = ["ENA"]


[docs]class ENA: """Interface to `ChEMBL <http://www.ebi.ac.uk/ena/index.php>`_ Here is a quick example to retrieve a target given its ChEMBL Id .. doctest:: >>> from bioservices import ENQ >>> s = ENA(verbose=False) Retrieve read domain metadata in XML format:: print(e.get_data('ERA000092', 'xml')) Retrieve assemble and annotated sequences in fasta format:: print(e.get_data('A00145', 'fasta')) The range parameter can be used in combination to retrieve a subsequence from sequence entry A00145 from bases 3 to 63 using :: e.get_data('A00145', 'fasta', fasta_range=[3,63]) Retrieve assembled and annotated subsequences in HTML format (same as above but in HTML page). e.view_data('A00145') Retrieve expanded CON records: To retrieve expanded CON records use the expanded=true parameter. For example, the expanded CON entry AL513382 in flat file format can be i obtained as follows:: e.get_data('AL513382', frmt='text', expanded=True) Expanded CON records are different from CON records in two ways. Firstly, the expanded CON records contain the full sequence in addition to the contig assembly instructions. Secondly, if a CON record contains only source or gap features the expanded CON records will also display all features from the segment records. Retrieve assembled and annotated sequence header in flat file format To retrieve assembled and annotated sequence header in flat file format please use the header=true parameter, e.g.: e.get_data('BN000065', 'text', header=True) Retrieve assembled and annotated sequence records using sequence versions:: e.get_data('AM407889.1', 'fasta') e.get_data('AM407889.2', 'fasta') """ url = "http://www.ebi.ac.uk/ena/browser/api" def __init__(self, verbose=False, cache=False): """**Constructor** :param verbose: set to False to prevent informative messages """ self.services = REST(name="ENA", url=ENA.url, verbose=verbose, cache=cache) self.services.TIMEOUT = 100
[docs] def get_data( self, identifier, frmt, fasta_range=None, expanded=None, header=None, download=None, ): """ :param frmt : xml, text, fasta, fastq, html, embl but does depend on the entry Example: get_data("/AL513382", "embl") ENA API changed in 2020 but we tried to keep the same services in this method. """ url = f"{self.url}/{frmt}/{identifier}" if frmt in ["text", "fasta", "fastq"]: res = self.services.http_get(url, frmt="txt") elif frmt in ["html"]: res = self.services.http_get(url, frmt="default") elif frmt in ["xml"]: res = self.services.http_get(url, frmt="xml") return res
[docs] def data_warehouse(self): # http://www.ebi.ac.uk/ena/data/warehouse/search?query="geo_circ(-0.587,-90.5713,170)"&result=sequence_release&display=text&download=gzip pass
[docs] def get_taxon(self, taxon): print("deprecated since v.7.8 due to ENA update")