Source code for bioservices.quickgo

# -*- python -*-
#
#  This file is part of bioservices software
#
#  Copyright (c) 2013-2014 - EBI-EMBL
#
#  File author(s):
#      Thomas Cokelaer <cokelaer@ebi.ac.uk>
#      
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      http://www.gnu.org/licenses/gpl-3.0.html
#
#  website: https://github.com/cokelaer/bioservices
#  documentation: http://packages.python.org/bioservices
#
##############################################################################
# $Id$
"""Interface to the quickGO interface

.. topic:: What is quickGO

    :URL: http://www.ebi.ac.uk/QuickGO/
    :Service: http://www.ebi.ac.uk/QuickGO/WebServices.html

    .. highlights::

        "QuickGO is a fast web-based browser for Gene Ontology terms and
        annotations, which is provided by the UniProt-GOA project at the EBI. "

        -- from QuickGO home page, Dec 2012

"""
import json

from bioservices.services import REST

__all__ = ["QuickGO"]


[docs]class QuickGO(REST): """Interface to the `QuickGO <http://www.ebi.ac.uk/QuickGO/WebServices.html>`_ service Retrieve information given a GO identifier: .. doctest:: >>> from bioservices import QuickGO >>> go = QuickGO() >>> res = go.get_go_terms("GO:0003824") .. versionchanged:: we use the new QuickGO API since version 1.5.0 To use the old API, please use version of bioservices below 1.5 """ def __init__(self, verbose=False, cache=False): """.. rubric:: Constructor :param bool verbose: print informative messages. """ #super(QuickGO, self).__init__(url="http://www.ebi.ac.uk/QuickGO-Old", super(QuickGO, self).__init__(url="https://www.ebi.ac.uk/QuickGO", name="quickGO", verbose=verbose, cache=cache)
[docs] def get_go_terms(self, query, max_number_of_pages=None): """Get information on all terms and page through the result""" query = query.replace(':', '%3A') query = query.replace(',', '%2C') url = "services/ontology/go/terms/{}".format(query) results = self.http_get(url, frmt="json", params={}, headers=self.get_headers("json")) try: return results['results'] except: #pragma: no cover return results
[docs] def get_go_ancestors(self, query, relations="is_a,part_of,occurs_in,regulates"): query = query.replace(':', '%3A') query = query.replace(',', '%2C') url = "services/ontology/go/terms/{}/ancestors".format(query) results = self.http_get(url, frmt="json", params={}, headers=self.get_headers("json")) try: return results['results'] except: #pragma: no cover return results
[docs] def get_go_children(self, query): query = query.replace(':', '%3A') query = query.replace(',', '%2C') url = "services/ontology/go/terms/{}/children".format(query) results = self.http_get(url, frmt="json", params={}, headers=self.get_headers("json")) try: return results['results'] except: #pragma: no cover return results
[docs] def get_go_chart(self, query): """ :: res = go.get_chart("GO:0022804") with open("temp.png", "wb") as fout: fout.write(res) """ query = query.replace(':', '%3A') query = query.replace(',', '%2C') url = "services/ontology/go/terms/{}/chart".format(query) res = self.http_get(url, frmt="json", params={}, headers={"Accept": "image/png"}) #import base64 #res = base64.b64decode(res).decode() return res
[docs] def get_go_paths(self, _from, _to, relations="is_a,part_of,occurs_in,regulates"): """Retrieves the paths between two specified sets of ontology terms. Each path is formed from a list of (term, relationship, term) triples. paths = go.go_terms_path("GO:0005215", "GO:0003674") # First path is found as the first item in the "results" paths["results"][0] """ _from = _from.replace(':', '%3A') _from = _from.replace(',', '%2C') _to = _to.replace(':', '%3A') _to = _to.replace(',', '%2C') url = "services/ontology/go/terms/{}/paths/{}".format(_from, _to) results = self.http_get(url, frmt="json", params={}, headers=self.get_headers("json")) return results
[docs] def Annotation(self, assignedBy=None, includeFields=None, limit=100, page=1, aspect=None, reference=None, geneProductId=None, evidenceCode=None, goId=None, qualifier=None, withFrom=None, taxonId=None, taxonUsage=None, goUsage=None, goUsageRelationships=None, evidenceCodeUsage=None, evidenceCodeUsageRelationships=None, geneProductType=None, targetSet=None, geneProductSubset=None, extension=None ): """Calling the Annotation service .. versionchanged:: 1.4.18 due to service API changes, we refactored this method completely :param str assignedBy: The database from which this annotation originates. Accepts comma separated values.E.g., BHF-UCL,Ensembl. :param str includeFields: Optional fields retrieved from external services. Accepts comma separated values. accepted values: goName, taxonName, name, synonyms. :param int limit: download limit (number of lines) (default 10,000 rows, which may not be sufficient for the data set that you are downloading. To bypass this default, and return the entire data set, specify a limit of -1). :param int page: results may be stored on several pages. You must provide this number. There is no way to retrieve more than 100 results without calling this function several times chanding this parameter (default to 1). :param char aspect: use this to limit the annotations returned to a specific ontology or ontologies (Molecular Function, Biological Process or Cellular Component). The valid character can be F,P,C. :param str reference: PubMed or GO reference supporting annotation. Can refer to a specific reference identifier or category (for category level, use `*` after ref type). Can be 'PUBMED:`*`', 'GO_REF:0000002'. :param str geneProductId: The id of the gene product annotated with the GO term. Accepts comma separated values.E.g., URS00000064B1_559292. :param str evidenceCode: Evidence code indicating how the annotation is supported. Accepts comma separated values. E.g., ECO:0000255. :param str goId: The GO id of an annotation. Accepts comma separated values. E.g., GO:0070125. :param str qualifier: Aids the interpretation of an annotation. Accepts comma separated values. E.g., enables,involved_in. :param str withFrom: Additional ids for an annotation. Accepts comma separated values. E.g., P63328. :param str taxonId: The taxonomic id of the species encoding the gene product associated to an annotation. Accepts comma separated values. E.g., 1310605. :param str taxonUsage: Indicates how the taxonomic ids within the annotations should be used. E.g., exact. :param str goUsage: Indicates how the GO terms within the annotations should be used. Used in conjunction with 'goUsageRelationships' filter. E.g., descendants. :param str goUsageRelationships: The relationship between the 'goId' values found within the annotations. Allows comma separated values. E.g., is_a,part_of. :param str evidenceCodeUsage: Indicates how the evidence code terms within the annotations should be used. Is used in conjunction with 'evidenceCodeUsageRelationships' filter. E.g., descendants, exact<F12> :param str evidenceCodeUsageRelationships: The relationship between the provided 'evidenceCode' identifiers. Allows comma separated values. E.g., is_a,part_of. :param str geneProductType: The type of gene product. Accepts comma separated values. E.g., protein,RNA. can be protein, RNA and/or complex :param str targetSet: Gene product set. Accepts comma separated values. E.g., KRUK,BHF-UCL,Exosome. :param str geneProductSubset: A database that provides a set of gene products. Accepts comma separated values. E.g., TrEMBL. :param str extension: Extensions to annotations, where each extension can be: EXTENSION(DB:ID) / EXTENSION(DB) / EXTENSION. :return: a dictionary :: >>> print(s.Annotation(protein='P12345', frmt='tsv', col="ref,evidence", ... reference='PMID:*')) >>> print(s.Annotation(protein='P12345,Q4VCS5', frmt='tsv', ... col="ref,evidence",reference='PMID:,Reactome:')) """ #_valid_formats = ["gaf", "gene2go", "proteinList", "fasta", "tsv", "dict"] _valid_db = ['UniProtKB', 'UniGene', 'Ensembl'] _valid_aspect = ['P', 'F', 'C'] validity = { "includeFields": ['goName', 'taxonName', 'name', 'synonyms'] } if isinstance(limit, int) is False or limit >100 or limit<0: raise TypeError("limit parameter must be an integer greater than zero and less than 100") if isinstance(page, int) is False or limit<0: raise TypeError("page parameter must be an integer greater than zero") # fill params with parameters that have default values. params = {'limit': limit, "page":page} # beginning of the URL url = "services/annotation/search?" # what is the ID being provided. We can have only one of: # taxonId, goid if goId is not None: params['goId'] = goId if taxonId is not None: params["taxonId"] = taxonId if assignedBy: params['assignedBy'] = assignedBy if includeFields: for this in includeFields.split(","): assert this in validity['includeFields'] params['includeFields'] = includeFields if geneProductType: for this in geneProductType.split(","): assert this in ["protein", "miRNA", "complex"] params['geneProductType'] = geneProductType if evidenceCode: params['evidenceCode'] = evidenceCode if evidenceCodeUsage: assert evidenceCodeUsage in ['descendants', 'exact'] params['evidenceCodeUsage'] = evidenceCodeUsage if taxonUsage: assert taxonUsage in ['descendants', 'exact'] params['taxonUsage'] = taxonUsage if goUsage: assert goUsage in ['descendants', 'exact', 'slim'] params['goUsage'] = goUsage if evidenceCodeUsageRelationships: for this in evidenceCodeUsageRelationships.split(","): assert this in ['part_of', 'is_a', 'regulates', 'occurs_in'] params['evidenceCodeUsageRelationships'] = evidenceCodeUsageRelationships if goUsageRelationships: for this in goUsageRelationships.split(","): assert this in ['part_of', 'is_a', 'regulates', 'occurs_in'] params['goUsageRelationships'] = goUsageRelationships if geneProductId: params['geneProductId'] = geneProductId if qualifier: params['qualifier'] = qualifier if withFrom: params['withFrom'] = withFrom if targetSet: params['targetSet'] = targetSet if geneProductSubset: params['geneProductSubset'] = geneProductSubset if extension: params['extension'] = extension if aspect is not None: aspects = { "P": "biological_process", "F": "molecular_function", "C": "cellular_component" } self.devtools.check_param_in_list(aspect, _valid_aspect) params['aspect'] = aspects[aspect] if reference: if isinstance(reference, list): reference = ",".join([x.strip() for x in reference]) elif isinstance(reference, str): pass else: raise ValueError(""" Invalid parameter: source parameters must be a list of strings ['PUBMED'] or a string (e.g., 'PUBMED:') """) params['reference'] = reference res = self.http_get(url, frmt="txt", params=params, headers=self.get_headers("json")) try: import json res = json.loads(res) except: pass return res
[docs] def Annotation_from_goid(self, goId, max_number_of_pages=25, **kargs): """Returns a DataFrame containing annotation on a given GO identifier :param str protein: a GO identifier :return: all outputs are stored into a Pandas.DataFrame data structure. All parameters from :math:`Annotation` are also valid except **format** that is set to **tsv** and cols that is made of all possible column names. """ data = self.Annotation(goId=goId, **kargs) number_of_pages = data['pageInfo']['total'] if number_of_pages > max_number_of_pages: print("As of 23d Oct 2017, the QuickGO API limits the number of pages to 25") number_of_pages = max_number_of_pages # unfortunately, the new API requires to call the service for each page. results = [] for i in range(1, number_of_pages + 1): print("fetching page %s / %s " % (i+1, number_of_pages)) data = self.Annotation(goId=goId, page=i+1, **kargs) if data not in [400, '400']: results.extend(data['results']) try: import pandas as pd return pd.DataFrame(results) except: self.logging.warning( "Cannot return a DataFrame. Returns the list. If you want the dataframe, install pandas library") return results