Source code for bioservices.pride

#
#  This file is part of bioservices software
#
#  Copyright (c) 2013-2014 - EBI-EMBL
#
#  File author(s):
#      https://github.com/cokelaer/bioservices
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      http://www.gnu.org/licenses/gpl-3.0.html
#
#  source: http://github.com/cokelaer/bioservices
#  documentation: http://packages.python.org/bioservices
#
##############################################################################

"""Interface to PRIDE web service

.. topic:: What is PRIDE ?

    :URL: http://www.ebi.ac.uk/pride/archive/
    :URL: http://www.ebi.ac.uk/pride/ws/archive

    .. highlights::

         The PRIDE PRoteomics IDEntifications database is a centralized,
         standards compliant, public data repository for proteomics data,
         including protein and peptide identifications, post-translational
         modifications and supporting spectral evidence.

        -- From PRIDE web site, Jan 2015


"""
import wrapt

from bioservices.services import REST
from bioservices import logger

logger.name = __name__


__all__ = ["PRIDE"]


@wrapt.decorator
def params_to_update(wrapped, instance, args, kwargs):
    vars(wrapped)["actual_kwargs"] = kwargs
    return wrapped(*args, **kwargs)


[docs]class PRIDE(REST):
    """Interface to the `PRIDE <http://rest.ensembl.org>`_ service"""

    _url = "https://www.ebi.ac.uk/pride/ws/archive"

    def __init__(self, verbose=False, cache=False):
        """**Constructor**

        :param verbose: set to False to prevent informative messages
        """
        super(PRIDE, self).__init__(name="PRIDE", url=PRIDE._url, verbose=verbose, cache=cache)

[docs]    def get_project(self, identifier):
        """Retrieve project information by accession

        :param str identifier: a valid PRIDE identifier e.g., PRD000001
        :return: a dictionary with the project details. See
            http://www.ebi.ac.uk/pride/ws/archive/#!/project for details

        .. doctest::

            >>> from bioservices import PRIDE
            >>> p = PRIDE()
            >>> res = p.get_project("PRD000001")
            >>> res['numPeptides']
            6758

        """
        res = self.http_get("project/%s" % identifier)
        return res

[docs]    @params_to_update
    def get_project_list(
        self,
        query="",
        show=10,
        page=0,
        sort=None,
        order="desc",
        speciesFilter=None,
        ptmsFilter=None,
        tissueFilter=None,
        diseaseFilter=None,
        titleFilter=None,
        instrumentFilter=None,
        experimentTypeFilter=None,
        quantificationfilter=None,
        projectTagFilter=None,
    ):
        """list projects or given criteria

        :param str query: search term to query for
        :param int show: how many results to return per page
        :param int page: which page (starting from 0) of the result to return
        :param str sort: the field to sort on
        :param str order: the sorting order (asc or desc)
        :param str speciesFilter: filter by species (NCBI taxon ID or name)
        :param str ptmsFilter: filter by PTM annotation	query
        :param str tissueFilter: filter by tissue annotation
        :param str diseaseFilter: filter by disease annotation
        :param str titleFilter:	filter the title for keywords
        :param str instrumentFilter: filter for instrument names or keywords
        :param str experimentTypeFilter: filter by experiment type
        :param str quantificationFilter: filter by quantification annotation
        :param str projectTagFilter: filter by project tags

        ::

            >>> p = PRIDE()
            >>> projects = p.get_project_list(show=100)

        """
        params = self.get_project_list.actual_kwargs

        res = self.http_get("project/list", params=params)
        try:
            res = res["list"]
        except:
            pass
        return res

[docs]    @params_to_update
    def get_project_count(
        self,
        query="",
        speciesFilter=None,
        ptmsFilter=None,
        tissueFilter=None,
        diseaseFilter=None,
        titleFilter=None,
        instrumentFilter=None,
        experimentTypeFilter=None,
        quantificationfilter=None,
        projectTagFilter=None,
    ):

        """Count projects for given criteria

        Takes same query parameters as the /list operation; typically used to
        retrieve number of results before querying with /list

        :param str query: search term to query for
        :param str speciesFilter: filter by species (NCBI taxon ID or name)
        :param str ptmsFilter: filter by PTM annotation	query
        :param str tissueFilter: filter by tissue annotation
        :param str diseaseFilter: filter by disease annotation
        :param str titleFilter:	filter the title for keywords
        :param str instrumentFilter: filter for instrument names or keywords
        :param str experimentTypeFilter: filter by experiment type
        :param str quantificationFilter: filter by quantification annotation
        :param str projectTagFilter: filter by project tags
        :return: number of projects  (integer)


        """
        params = self.get_project_count.actual_kwargs
        res = self.http_get("project/count", params=params)
        return res

[docs]    def get_assays(self, identifier):
        """Retrieve assay information by assay accession

        :param int identifier: assay accession number

        ::

            >>> p = PRIDE()
            >>> res = p.get_assays(1643)
            >>> res['proteinCount']
            276

        """
        res = self.http_get("assay/%s" % identifier)
        return res

[docs]    def get_assay_list(self, identifier):
        """Return list of assays for a project accession nuber

        :param str identifier: project accession number. See :meth:`get_project_list`
        :return: list of dictionaries. Each dictionary represents an assay.

        ::

            >>> p = PRIDE()
            >>> assays = p.get_assay_list('PRD000001')
            >>> len(assays)  # could be found with get_assay_count_project_accession
            5
            >>> assays[1]['assayAccession']
            1643

        """
        res = self.http_get("assay/list/project/%s" % identifier)
        try:
            res = res["list"]
        except:
            pass
        return res

[docs]    def get_assay_count(self, identifier):
        """Count assays for a project accession number

        :param str identifier: a project accession number
        :return: integer

        ::

            >>> p = PRIDE()
            >>> assays = p.get_assay_count('PRD000001')
            5


        """
        res = self.http_get("assay/count/project/%s" % identifier)
        return res

[docs]    def get_file_list(self, identifier):
        """return list of files for a project

        :param str identifier: a project accession number

        ::

            >>> files = p.get_file_count('PRD000001')
            >>> len(files)
            5

        """
        res = self.http_get("file/list/project/%s" % identifier)
        try:
            res = res["list"]
        except:
            pass
        return res

[docs]    def get_file_count(self, identifier):
        """return count of files in a project

        :param str identifier: a project accession number
        :return: int

        ::

            >>> p.get_file_count('PRD000001')
            5

        """
        res = self.http_get("file/count/project/%s" % identifier)
        return res

[docs]    def get_file_list_assay(self, identifier):
        """list files for an assay

        :param int identifier: assay accession number
        :return: list of dictionary, Each dictionary represents a file data structure


        ::

            res = p.get_file_assay(1643)

        """
        res = self.http_get("file/list/assay/%s" % identifier)
        try:
            res = res["list"]
        except:
            pass
        return res

[docs]    def get_file_count_assay(self, identifier):
        """list files for an assay

        :param int identifier: assay accession number
        :return: int

        ::

            p.get_file_assay(1643)
        """
        res = self.http_get("file/count/assay/%s" % identifier)
        return res

[docs]    @params_to_update
    def get_protein_list(self, identifier, show=10, page=0):
        """Retrieve protein identifications by project accession

        :param str identifier: a project accession number
        :param int show:		how many results to return per page
        :param int page:		which page (starting from 0) of the result to return

        """
        params = self.get_protein_list.actual_kwargs
        res = self.http_get("protein/list/project/%s" % identifier, params=params)
        try:
            res = res["list"]
        except:
            pass
        return res

[docs]    def get_protein_count(self, identifier):
        """Count protein identifications by project accession

        :param str identifier: a project accession number
        :return: int

        """
        res = self.http_get("protein/count/project/%s" % identifier)
        return res

[docs]    @params_to_update
    def get_protein_list_assay(self, identifier, show=10, page=0):
        """Retrieve protein identifications by assay accession

        :param str identifier: a project accession number
        :param int show:		how many results to return per page
        :param int page:		which page (starting from 0) of the result to return

        """
        params = self.get_protein_list_assay.actual_kwargs
        res = self.http_get("protein/list/assay/%s" % identifier, params=params)
        try:
            res = res["list"]
        except:
            pass
        return res

[docs]    def get_protein_count_assay(self, identifier):
        """Count protein identifications by assay accession

        :param str identifier: a project accession number
        :return: int

        """
        res = self.http_get("protein/count/assay/%s" % identifier)
        return res

[docs]    @params_to_update
    def get_peptide_list(self, identifier, sequence=None, show=10, page=0):
        """Retrieve peptide identifications by project accession (and sequence)

        :param str identifier: a project accession number
        :param str sequence: the peptide sequence to limit the query on (optional).
            If provided, show and page are not used
        :param int show:		how many results to return per page
        :param int page:		which page (starting from 0) of the result to return

        ::


            >>> peptides = p.get_peptide_list('PRD000001',  sequence='PLIPIVVEQTGR')
            >>> len(peptides)
            4
            >>> peptides = p.get_peptide_list('PRD000001')
            >>> len(peptides)
            10
            >>> peptides = p.get_peptide_list('PRD000001', show=100)

        .. note:: the function merge two functions from the PRIDE API (get_peptide_list and
            get_peptide_list_sequence)
        """
        params = self.get_peptide_list.actual_kwargs
        if sequence is None:
            res = self.http_get("peptide/list/project/%s" % identifier, params=params)
        else:
            res = self.http_get("peptide/list/project/%s/sequence/%s" % (identifier, sequence))

        try:
            res = res["list"]
        except:
            pass
        return res

[docs]    def get_peptide_count(self, identifier, sequence=None):
        """Count peptide identifications by project accession

        :param str identifier: a project accession number
        :return: int


            >>> p.get_peptide_count('PRD000001', sequence='PLIPIVVEQTGR')
            4
            >>> p.get_peptide_count('PRD000001')
            6758

        """
        if sequence is None:
            res = self.http_get("peptide/count/project/%s" % identifier)
        else:
            res = self.http_get("peptide/count/project/%s/sequence/%s" % (identifier, sequence))
        return res

[docs]    @params_to_update
    def get_peptide_list_assay(self, identifier, sequence=None, show=10, page=0):
        """Retrieve peptide identifications by assay accession (and sequence)

        :param str identifier: an assay accession number
        :param str sequence: the peptide sequence to limit the query on (optional).
            If provided, show and page are not used
        :param int show:		how many results to return per page
        :param int page:		which page (starting from 0) of the result to return

        ::

            >>> peptides = p.get_peptide_list_assay(1643,  sequence='AAATQKKVER')
            >>> len(peptides)
            5
            >>> peptides = p.get_peptide_list_assay(1643)
            >>> len(peptides)
            10
            >>> peptides = p.get_peptide_list_assay(1643, show=100)

        .. note:: the function merge two functions from the PRIDE API (get_peptide_list and
            get_peptide_list_sequence)
        """
        params = self.get_peptide_list_assay.actual_kwargs
        if sequence is None:
            res = self.http_get("peptide/list/assay/%s" % identifier, params=params)
        else:
            res = self.http_get("peptide/list/assay/%s/sequence/%s" % (identifier, sequence))
        try:
            res = res["list"]
        except:
            pass
        return res

[docs]    def get_peptide_count_assay(self, identifier, sequence=None):
        """Count peptide identifications by assay accession

        :param str identifier: an assay accession number
        :return: int

        ::

            >>> p.get_peptide_count_assay(1643, sequence='AAATQKKVER')
            5
            >>> p.get_peptide_count_assay(1643)
            1696

        """
        if sequence is None:
            res = self.http_get("peptide/count/assay/%s" % identifier)
        else:
            res = self.http_get("peptide/count/assay/%s/sequence/%s" % (identifier, sequence))
        return res