Source code for bioservices.muscle

#
#  This file is part of bioservices software
#
#  Copyright (c) 2013-2014 - EMBL-EBI
#
#  File author(s):
#      Sven-Maurice Althoff, Christian Knauth
#
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      http://www.gnu.org/licenses/gpl-3.0.html
#
#  website: https://github.com/cokelaer/bioservices
#  documentation: http://packages.python.org/bioservices
#
##############################################################################
"""Interface to the MUSCLE web service

.. topic:: What is MUSCLE ?

    :URL: http://www.drive5.com/muscle/
    :service: http://www.ebi.ac.uk/Tools/webservices/services/msa/muscle_rest

    .. highlights::

        "MUSCLE - (MUltiple Sequence Comparison by Log-Expectation) 1)

        is claimed to achieve both better average accuracy and better speed than
        ClustalW or T-Coffee, depending on the chosen options. Multiple alignments
        of protein sequences are important in many applications, including
        phylogenetic tree estimation, secondary structure prediction and critical
        residue identification."

        -- from EMBL-EBI web page

"""
import sys
import time

from bioservices import logger
from bioservices.services import REST

logger.name = __name__


__all__ = ["MUSCLE"]


[docs]class MUSCLE:
    """Interface to the `MUSCLE <http://www.ebi.ac.uk/Tools/webservices/services/msa/muscle_rest>`_ service.

    ::

        >>> from bioservices import *
        >>> m = MUSCLE(verbose=False)
        >>> sequencesFasta = open('filename','r')
        >>> jobid = m.run(frmt="fasta", sequence=sequencesFasta.read(),
                        email="name@provider")
        >>> m.getResult(jobid, "out")

    .. warning:: It is very important to provide a real e-mail address as your
        job otherwise very likely will be killed and your IP, Organisation or
        entire domain black-listed.


    Here is another similar example but we use :class:`~bioservices.uniprot.UniProt`
    class provided in bioservices to fetch the FASTA sequences::


        >>> from bioservices import UniProt, MUSCLE
        >>> u = UniProt(verbose=False)
        >>> f1 = u.get_fasta("P18413")
        >>> f2 = u.get_fasta("P18412")
        >>> m = MUSCLE(verbose=False)
        >>> jobid = m.run(frmt="fasta", sequence=f1+f2, email="name@provider")
        >>> m.getResult(jobid, "out")

    """

    def __init__(self, verbose=False):
        url = "http://www.ebi.ac.uk/Tools/services/rest/muscle"
        self.services = REST(name="MUSCLE", url=url, verbose=verbose)
        self._parameters = None
        self._parametersDetails = {}
        self._headers = {
            "User-Agent": self.services.getUserAgent(),
            "accept": "application/json",
        }

[docs]    def get_parameters(self):
        """List parameter names.

        :returns: An XML document containing a list of parameter names.

        ::

            >>> from bioservices import MUSCLE
            >>> m = MUSCLE()
            >>> res = m.get_parameters()
            >>> print(res)

        .. seealso:: :attr:`parameters` to get a list of the parameters without
           need to process the XML output.
        """

        res = self.services.http_get("parameters", frmt="json", headers=self._headers)
        return res["parameters"]

    def _get_parameters(self):
        if self._parameters:
            return self._parameters
        else:
            # on 2 lines in case it fails, self._parameters remaisn None
            res = self.get_parameters()
            self._parameters = res
        return self._parameters

    parameters = property(_get_parameters)

[docs]    def get_parameter_details(self, parameterId):
        """Get detailed information about a parameter.

        :param str parameterId: a valid parameter name (see :attr:`parameters`)
        :return: a dict with parameter details including name, description, and
            allowed values

        For example::

            >>> m.get_parameter_details("format")

        """
        if parameterId not in self.parameters:
            raise ValueError("Invalid parameterId provided(%s). See parameters attribute" % parameterId)

        if parameterId not in self._parametersDetails.keys():
            request = "parameterdetails/" + parameterId
            res = self.services.http_get(request, frmt="json", headers=self._headers)
            self._parametersDetails[parameterId] = res
        return res

[docs]    def run(self, frmt=None, sequence=None, tree="none", email=None):
        """Submit a job with the specified parameters.

        .. rubric:: Compulsory arguments

        :param str frmt: input format (e.g., fasta)
        :param str sequence: query sequence. The use of fasta formatted sequence is recommended.
        :param str tree: tree type ('none','tree1','tree2')
        :param str email: a valid email address. Will be checked by the service itself.

        :return: A jobid that can be analysed with :meth:`getResult`,
            :meth:`getStatus`, ...

        The up-to-date values accepted for each of these parameters can be
        retrieved from :meth:`get_parameter_details`.

        For instance,::

            from bioservices import MUSCLE
            m = MUSCLE()
            m.parameterDetails("tree")

        Example::

            jobid = m.run(frmt="fasta",
                 sequence=sequence_example,
                 email="test@yahoo.fr")

        frmt can be a list of formats::

            frmt=['fasta','clw','clwstrict','html','msf','phyi','phys']

        The returned object is a jobid, which status can be checked. It must be
        finished before analysing/getting the results.

        .. seealso:: :meth:`getResult`

        """
        # There are compulsary arguments:
        if frmt is None or sequence is None or email is None:
            raise ValueError("frmt, sequence and email must be provided")

        # Here, we will check the arguments values (not the type)
        # Arguments will be checked by the service itself but if we can
        # catch some before, it is better

        # FIXME: return parameters from server are not valid
        self.services.devtools.check_param_in_list(frmt, ["fasta", "clw", "clwstrict", "html", "msf", "phyi", "phys"])
        self.services.devtools.check_param_in_list(tree, ["none", "tree1", "tree2"])

        # parameter structure
        params = {"format": frmt, "sequence": sequence, "email": email}

        # IMPORTANT: use data parameter, not params !!!
        res = self.services.http_post(
            "run",
            data=params,
            headers={
                "User-Agent": self.services.getUserAgent(),
                "accept": "text/plain",
            },
        )
        return res

[docs]    def get_status(self, jobid):
        """Get status of a submitted job

        :param str jobid: a job identifier returned by :meth:`run`.
        :return: A string giving the jobid status (e.g. FINISHED).

         The values for the status are:

         *   RUNNING: the job is currently being processed.
         *   FINISHED: job has finished, and the results can then be retrieved.
         *   ERROR: an error occurred attempting to get the job status.
         *   FAILURE: the job failed.
         *   NOT_FOUND: the job cannot be found.


        """
        res = self.services.http_get(
            "status/{}".format(jobid),
            frmt="txt",
            headers={
                "User-Agent": self.services.getUserAgent(),
                "accept": "text/plain",
            },
        )
        return res

[docs]    def get_result_types(self, jobid):
        """Get available result types for a finished job.

        :param str jobid: a job identifier returned by :meth:`run`.
        :return: a list of result type identifier strings (e.g., ``["out", "sequence", "aln-fasta"]``)
        """
        if self.get_status(jobid) != "FINISHED":
            self.logging.warning("waiting for the job to be finished. May take a while")
            self.wait(jobid, verbose=False)
        url = "resulttypes/" + jobid
        res = self.services.http_get(
            url,
            frmt="json",
            headers={
                "User-Agent": self.services.getUserAgent(),
                "accept": "application/json",
            },
        )
        return [x["identifier"] for x in res["types"]]

[docs]    def get_result(self, jobid, result_type):
        """Get the job result of the specified type.


        :param str jobid: a job identifier returned by :meth:`run`.
        :param str result_type: type of result to retrieve. See :meth:`get_result_types`.

        """
        if self.get_status(jobid) != "FINISHED":  # pragma: no cover
            self.services.logging.warning("waiting for the job to be finished. May take a while")
            self.wait(jobid, verbose=False)

        if self.get_status(jobid) != "FINISHED":  # pragma: no cover
            raise ValueError("job is not finished")

        valid = self.get_result_types(jobid)
        if result_type not in valid:
            raise ValueError(f"result_type must be one of {valid}")
        url = "/result/" + jobid + "/" + result_type

        if result_type in ["out", "sequence", "aln-fasta", "pim", "phylotree"]:
            frmt = "txt"
        res = self.services.http_get(
            url,
            frmt=frmt,
            headers={
                "User-Agent": self.services.getUserAgent(),
                "accept": "application/json",
            },
        )

        return res

[docs]    def wait(self, jobId, checkInterval=5, verbose=True):
        """This function checks the status of a jobid while it is running

        :param str jobId: a job identifier returned by :meth:`run`.
        :param int checkInterval: interval between status checks in seconds (default 5).

        """
        if checkInterval < 1:  # prgma: no cover
            raise ValueError("checkInterval must be positive and less than minute")
        result = "PENDING"
        while result == "RUNNING" or result == "PENDING":
            result = self.get_status(jobId)
            if verbose:
                print("WARNING: ", jobId, " is ", result, file=sys.stderr)

            if result == "RUNNING" or result == "PENDING":
                time.sleep(checkInterval)
        return result