Source code for bioservices.muscle

#
#  This file is part of bioservices software
#
#  Copyright (c) 2013-2014 - EMBL-EBI
#
#  File author(s):
#      Sven-Maurice Althoff, Christian Knauth
#
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      http://www.gnu.org/licenses/gpl-3.0.html
#
#  website: https://github.com/cokelaer/bioservices
#  documentation: http://packages.python.org/bioservices
#
##############################################################################
"""Interface to the MUSCLE web service

.. topic:: What is MUSCLE ?

    :URL: http://www.drive5.com/muscle/
    :service: http://www.ebi.ac.uk/Tools/webservices/services/msa/muscle_rest

    .. highlights::

        "MUSCLE - (MUltiple Sequence Comparison by Log-Expectation) 1)

        is claimed to achieve both better average accuracy and better speed than
        ClustalW or T-Coffee, depending on the chosen options. Multiple alignments
        of protein sequences are important in many applications, including
        phylogenetic tree estimation, secondary structure prediction and critical
        residue identification."

        -- from EMBL-EBI web page

"""
import sys
import time
from bioservices.services import REST
from bioservices import logger

logger.name = __name__


__all__ = ["MUSCLE"]


[docs]class MUSCLE: """Interface to the `MUSCLE <http://www.ebi.ac.uk/Tools/webservices/services/msa/muscle_rest>`_ service. :: >>> from bioservices import * >>> m = MUSCLE(verbose=False) >>> sequencesFasta = open('filename','r') >>> jobid = n.run(frmt="fasta", sequence=sequencesFasta.read(), email="name@provider") >>> s.getResult(jobid, "out") .. warning:: It is very important to provide a real e-mail address as your job otherwise very likely will be killed and your IP, Organisation or entire domain black-listed. Here is another similar example but we use :class:`~bioservices.uniprot.UniProt` class provided in bioservices to fetch the FASTA sequences:: >>> from bioservices import UniProt, MUSCLE >>> u = UniProt(verbose=False) >>> f1 = u.get_fasta("P18413") >>> f2 = u.get_fasta("P18412") >>> m = MUSCLE(verbose=False) >>> jobid = m.run(frmt="fasta", sequence=f1+f2, email="name@provider") >>> m.getResult(jobid, "out") """ def __init__(self, verbose=False): url = "http://www.ebi.ac.uk/Tools/services/rest/muscle" self.services = REST(name="MUSCLE", url=url, verbose=verbose) self._parameters = None self._parametersDetails = {} self._headers = { "User-Agent": self.services.getUserAgent(), "accept": "application/json", }
[docs] def get_parameters(self): """List parameter names. :returns: An XML document containing a list of parameter names. :: >>> from bioservices import muscle >>> n = muscle.Muscle() >>> res = n.get_parameters() >>> [x.text for x in res.findAll("id")] .. seealso:: :attr:`parameters` to get a list of the parameters without need to process the XML output. """ res = self.services.http_get("parameters", frmt="json", headers=self._headers) return res["parameters"]
def _get_parameters(self): if self._parameters: return self._parameters else: # on 2 lines in case it fails, self._parameters remaisn None res = self.get_parameters() self._parameters = res return self._parameters parameters = property(_get_parameters)
[docs] def get_parameter_details(self, parameterId): """Get detailed information about a parameter. :returns: An XML document providing details about the parameter or a list of values that can take the parameters if the XML could be parsed. For example:: >>> n.get_parameter_details("format") """ if parameterId not in self.parameters: raise ValueError("Invalid parameterId provided(%s). See parameters attribute" % parameterId) if parameterId not in self._parametersDetails.keys(): request = "parameterdetails/" + parameterId res = self.services.http_get(request, frmt="json", headers=self._headers) self._parametersDetails[parameterId] = res return res
[docs] def run(self, frmt=None, sequence=None, tree="none", email=None): """Submit a job with the specified parameters. .. python ncbiblast_urllib2.py -D ENSEMBL --email "test@yahoo.com" --sequence .. MDSTNVRSGMKSRKKKPKTTVIDDDDDCMTCSACQSKLVKISDITKVSLDYINTMRGNTLACAACGSSLKLLNDFAS .. --program blastp --database uniprotkb .. rubric:: Compulsary arguments :param str frmt: input format (e.g., fasta) :param str sequence: query sequence. The use of fasta formatted sequence is recommended. :param str tree: tree type ('none','tree1','tree2') :param str email: a valid email address. Will be checked by the service itself. :return: A jobid that can be analysed with :meth:`getResult`, :meth:`getStatus`, ... The up to data values accepted for each of these parameters can be retrieved from the :meth:`get_parameter_details`. For instance,:: from bioservices import MUSCLE m = MUSCLE() m.parameterDetails("tree") Example:: jobid = m.run(frmt="fasta", sequence=sequence_example, email="test@yahoo.fr") frmt can be a list of formats:: frmt=['fasta','clw','clwstrict','html','msf','phyi','phys'] The returned object is a jobid, which status can be checked. It must be finished before analysing/geeting the results. .. seealso:: :meth:`getResult` """ # There are compulsary arguments: if frmt is None or sequence is None or email is None: raise ValueError("frmt, sequence and email must be provided") # Here, we will check the arguments values (not the type) # Arguments will be checked by the service itself but if we can # catch some before, it is better # FIXME: return parameters from server are not valid self.services.devtools.check_param_in_list(frmt, ["fasta", "clw", "clwstrict", "html", "msf", "phyi", "phys"]) self.services.devtools.check_param_in_list(tree, ["none", "tree1", "tree2"]) # parameter structure params = {"format": frmt, "sequence": sequence, "email": email} # headers is muscle is not required. If provided # by the default values from bioservices, it does not # work. headers = {} # IMPORTANT: use data parameter, not params !!! res = self.services.http_post( "run", data=params, headers={ "User-Agent": self.services.getUserAgent(), "accept": "text/plain", }, ) return res
[docs] def get_status(self, jobid): """Get status of a submitted job :param str jobid: :param str jobid: a job identifier returned by :meth:`run`. :return: A string giving the jobid status (e.g. FINISHED). The values for the status are: * RUNNING: the job is currently being processed. * FINISHED: job has finished, and the results can then be retrieved. * ERROR: an error occurred attempting to get the job status. * FAILURE: the job failed. * NOT_FOUND: the job cannot be found. """ res = self.services.http_get( "status/{}".format(jobid), frmt="txt", headers={ "User-Agent": self.services.getUserAgent(), "accept": "text/plain", }, ) return res
[docs] def get_result_types(self, jobid): """Get available result types for a finished job. :param str jobid: a job identifier returned by :meth:`run`. :param bool verbose: print the identifiers together with their label, mediaTypes, description and filesuffix. :return: A dictionary, which keys correspond to the identifiers. Each identifier is itself a dictionary containing the label, description, file suffix and mediaType of the identifier. """ if self.get_status(jobid) != "FINISHED": self.logging.warning("waiting for the job to be finished. May take a while") self.wait(jobid, verbose=False) url = "resulttypes/" + jobid res = self.services.http_get( url, frmt="json", headers={ "User-Agent": self.services.getUserAgent(), "accept": "application/json", }, ) return [x["identifier"] for x in res["types"]]
[docs] def get_result(self, jobid, result_type): """Get the job result of the specified type. :param str jobid: a job identifier returned by :meth:`run`. :param str resultType: type of result to retrieve. See :meth:`getResultTypes`. """ if self.get_status(jobid) != "FINISHED": # pragma: no cover self.services.logging.warning("waiting for the job to be finished. May take a while") self.wait(jobid, verbose=False) if self.get_status(jobid) != "FINISHED": # pragma: no cover raise ValueError("job is not finished") assert result_type in self.get_result_types(jobid) url = "/result/" + jobid + "/" + result_type if result_type in ["out", "sequence", "aln-fasta", "pim", "phylotree"]: frmt = "txt" res = self.services.http_get( url, frmt=frmt, headers={ "User-Agent": self.services.getUserAgent(), "accept": "application/json", }, ) return res
[docs] def wait(self, jobId, checkInterval=5, verbose=True): """This function checks the status of a jobid while it is running :param str jobid: a job identifier returned by :meth:`run`. :param int checkInterval: interval between requests in seconds. """ if checkInterval < 1: # prgma: no cover raise ValueError("checkInterval must be positive and less than minute") result = "PENDING" while result == "RUNNING" or result == "PENDING": result = self.get_status(jobId) if verbose: print("WARNING: ", jobId, " is ", result, file=sys.stderr) if result == "RUNNING" or result == "PENDING": time.sleep(checkInterval) return result