#
# This file is part of bioservices software
#
# Copyright (c) 2013-2014 - EBI-EMBL
#
# File author(s):
# https://github.com/cokelaer/bioservices
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# http://www.gnu.org/licenses/gpl-3.0.html
#
# source: http://github.com/cokelaer/bioservices
# documentation: http://packages.python.org/bioservices
#
##############################################################################
"""Interface to PRIDE web service
.. topic:: What is PRIDE ?
:URL: http://www.ebi.ac.uk/pride/ws/archive/v2
.. highlights::
The PRIDE PRoteomics IDEntifications database is a centralized,
standards compliant, public data repository for proteomics data,
including protein and peptide identifications, post-translational
modifications and supporting spectral evidence.
-- From PRIDE web site, Jan 2015
"""
import tqdm
from bioservices.services import REST
from bioservices import logger
logger.name = __name__
__all__ = ["PRIDE"]
[docs]class PRIDE:
"""Interface to the `PRIDE <http://rest.ensembl.org>`_ service
::
from bioservices import PRIDE
p = PRIDE()
p.get_peptide_evidence(projectAccession)
.. versionchanged:: 1.10.1
Due to new API:
- the method project_count was dropped.
- get_project_list was renamed in get_project_files
- get_assays, get_assay_count, get_assay_count_project_accession, get_assay_list were dropped in v2
- get_protein_list, get_protein_count, get_protein_count_assay, get_protein_list, get_protein_list_assay
replaced by get_protein_evidences method
- get_peptide_list_assay, get_peptide_count, get_peptide_list, get_peptide_list_sequence,
get_peptide_count_assay replaced by get_peptide_evidence.
"""
_url = "https://www.ebi.ac.uk/pride/ws/archive/v2"
def __init__(self, verbose=False, cache=False):
"""**Constructor**
:param verbose: set to False to prevent informative messages
:param cache: set to True to use caching. Not recommended for
this service that evolves a lot
"""
self.services = REST(name="PRIDE", url=PRIDE._url, verbose=verbose, cache=cache)
[docs] def get_project(self, identifier):
"""Retrieve project information by accession
List of PRIDE Archive Projects. The following method do not allows
to perform search, for search functionality you will need to use
the search/projects. The result list is Paginated using the pageSize and page.
:param str identifier: a valid PRIDE identifier e.g., PRD000001
:return: if identifier is invalid, returns an emppty dictionary {}
.. doctest::
>>> from bioservices import PRIDE
>>> p = PRIDE()
>>> res = p.get_project("PRD000001")
>>> res['title']
'COFRADIC proteome of unstimulated human blood platelets'
"""
res = self.services.http_get(f"projects/{identifier}")
if res == 400:
logger.warning(f"Nothing found for {identifier}. may be this is not a valid identifier. Use get_projects")
return {}
return res
[docs] def get_projects(self, pageSize=100, max_pages=1e9):
"""Get list of all projects"""
N = self.get_projects_count()
Npages = int(N / pageSize)
Npages = min(Npages, max_pages)
results = []
page_count = 0
for page in tqdm.tqdm(range(Npages)):
res = self.services.http_get("projects", params={"pageSize": pageSize, "page": page})
results.extend(res["_embedded"]["projects"])
page_count += 1
if page_count > max_pages:
break
return results
[docs] def get_projects_count(self):
res = self.services.http_get("projects")
return res["page"]["totalElements"]
[docs] def get_project_files(self, accession, pageSize=100, page=0, sortConditions=None, sortDirection="DESC", filters=""):
"""list projects or given criteria
:param str accession: the accession number to look for
:param int pageSize: how many results to return per page
:param int page: which page (starting from 0) of the result to return
:param str sortConditions: default is submission_date but more fields
can be separated by comma and passed. Example: submission_date,project_title
:param str sortDirection: the sorting order (ASC or DESC)
:param str filters: Parameters to filter the search results. The structure of
the filter is: field1==value1, field2==value2. Example accession==PRD000001
::
>>> p = PRIDE()
>>> results = p.get_project_files(accession="PRD000001", pageSize=10, page=1)
In v1.10.1 due to new PRIDE API, the method **get_file_count** was dropped. You can use::
len(results['_embedded']['files'])
Similarly the **get_file_list** method was dropped since all results are
stored in the output of this method
"""
params = {
"pageSize": pageSize,
"page": page,
"sortDirection": sortDirection,
"sortConditions": sortConditions,
"filter": filters,
}
res = self.services.http_get(f"projects/{accession}/files", params=params)
try:
res = res["list"]
except:
pass
return res
[docs] def get_protein_evidences(
self,
project_accession=None,
assay_accession=None,
reported_accession=None,
pageSize=100,
page=0,
sortDirection="DESC",
sortConditions="projectAccession",
):
"""Get all proteins evidence
:param project_accession:
:param assay_accession:
:param reported_accession:
:param int pageSize: how many results to return per page
:param int page: which page (starting from 0) of the result to return
:param str sortConditions: default is submission_date but more fields
can be separated by comma and passed. Example: submission_date,project_title
:param str sortDirection: the sorting order (ASC or DESC)
::
p.get_protein_evidences()['_embedded']['proteinevidences']
"""
params = {}
if project_accession:
params["projectAccession"] = project_accession
if assay_accession: # pragma: no cover
params["assayAccession"] = assay_accession
if reported_accession: # pragma: no cover
params["reportedAccession"] = reported_accession
params["pageSize"] = pageSize
params["page"] = page
params["sortConditions"] = sortConditions
params["sortDirection"] = sortDirection
res = self.services.http_get(f"proteinevidences", params=params)
return res
[docs] def get_peptide_evidence(
self,
project_accession=None,
assay_accession=None,
protein_accession=None,
peptide_evidence_accession=None,
peptide_sequence=None,
pageSize=100,
page=0,
sortDirection="DESC",
sortConditions="projectAccession",
):
"""Get all the peptide evidences for an specific protein evidence
:param project_accession:
:param assay_accession:
:param protein_accession:
:param peptide_evidence_accession:
:param peptide_sequence:
:param int pageSize: how many results to return per page
:param int page: which page (starting from 0) of the result to return
:param str sortConditions: default is submission_date but more fields
can be separated by comma and passed. Example: submission_date,project_title
:param str sortDirection: the sorting order (ASC or DESC)
Retrieving data from project accession should be fast::
p.get_peptide_evidence(protein_accession="Q8IX30")
but other methods may be slow::
p.get_peptide_evidence(peptide_sequence="CQGSPGASKAMLSCNR")
"""
params = {}
if project_accession:
params["projectAccession"] = project_accession
if assay_accession: # pragma: no cover
params["assayAccession"] = assay_accession
if protein_accession: # pragma: no cover
params["proteinAccession"] = protein_accession
if peptide_evidence_accession: # pragma: no cover
params["peptideEvidenceAccession"] = peptide_evidence_accession
if peptide_sequence: # pragma: no cover
params["peptideSequence"] = peptide_sequence
params["pageSize"] = pageSize
params["page"] = page
params["sortConditions"] = sortConditions
res = self.services.http_get(f"peptideevidences", params=params)
return res
[docs] def get_stats(self, name=None):
"""Retrieve statistics by Name
If you do not have the name, just type::
p.get_stats()
and then, e.g., ::
p.get_stats("SUBMISSIONS_PER_YEAR")
"""
if name is None:
res = self.services.http_get("stats/")
else:
res = self.services.http_get(f"stats/{name}")
return res