Source code for bioservices.quickgo

#
#  This file is part of bioservices software
#
#  Copyright (c) 2013-2014 - EBI-EMBL
#
#  File author(s):
#      Thomas Cokelaer <cokelaer@ebi.ac.uk>
#
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      http://www.gnu.org/licenses/gpl-3.0.html
#
#  website: https://github.com/cokelaer/bioservices
#  documentation: http://packages.python.org/bioservices
#
##############################################################################
"""Interface to the QuickGO interface

.. topic:: What is quickGO

    :URL: http://www.ebi.ac.uk/QuickGO/
    :Service: http://www.ebi.ac.uk/QuickGO/WebServices.html

    .. highlights::

        "QuickGO is a fast web-based browser for Gene Ontology terms and
        annotations, which is provided by the UniProt-GOA project at the EBI. "

        -- from QuickGO home page, Dec 2012

"""
from bioservices.services import REST

__all__ = ["QuickGO"]


[docs]class QuickGO: """Interface to the `QuickGO <http://www.ebi.ac.uk/QuickGO/WebServices.html>`_ service Retrieve information given a GO identifier: .. doctest:: >>> from bioservices import QuickGO >>> go = QuickGO() >>> res = go.get_go_terms("GO:0003824") .. versionchanged:: we use the new QuickGO API since version 1.5.0 To use the old API, please use version of bioservices below 1.5 """ def __init__(self, verbose=False, cache=False): """.. rubric:: Constructor :param bool verbose: print informative messages. :param bool cache: set to True to enable HTTP caching """ # super(QuickGO, self).__init__(url="http://www.ebi.ac.uk/QuickGO-Old", self.services = REST(url="https://www.ebi.ac.uk/QuickGO", name="quickGO", verbose=verbose, cache=cache)
[docs] def get_go_terms(self, query, max_number_of_pages=None): """Get information on all terms and page through the result :param str query: GO term ID(s) as a comma-separated string (e.g., ``"GO:0003824"``) :param max_number_of_pages: maximum number of pages to retrieve :return: list of GO term result dictionaries """ query = query.replace(":", "%3A") query = query.replace(",", "%2C") url = "services/ontology/go/terms/{}".format(query) results = self.services.http_get(url, frmt="json", params={}, headers=self.services.get_headers("json")) try: return results["results"] except Exception: # pragma: no cover return results
[docs] def get_go_ancestors(self, query, relations="is_a,part_of,occurs_in,regulates"): """Retrieve ancestors of given GO term(s). :param str query: GO term ID(s) as a comma-separated string :param str relations: comma-separated relationship types to traverse (default ``"is_a,part_of,occurs_in,regulates"``) :return: list of ancestor GO term results """ query = query.replace(":", "%3A") query = query.replace(",", "%2C") url = "services/ontology/go/terms/{}/ancestors".format(query) results = self.services.http_get(url, frmt="json", params={}, headers=self.services.get_headers("json")) try: return results["results"] except Exception: # pragma: no cover return results
[docs] def get_go_children(self, query): """Retrieve direct children of given GO term(s). :param str query: GO term ID(s) as a comma-separated string :return: list of child GO term results """ query = query.replace(":", "%3A") query = query.replace(",", "%2C") url = "services/ontology/go/terms/{}/children".format(query) results = self.services.http_get(url, frmt="json", params={}, headers=self.services.get_headers("json")) try: return results["results"] except Exception: # pragma: no cover return results
[docs] def get_go_chart(self, query): """Return a PNG chart image for the given GO term(s). :param str query: GO term ID(s) as a comma-separated string :return: raw PNG image bytes :: res = go.get_chart("GO:0022804") with open("temp.png", "wb") as fout: fout.write(res) """ query = query.replace(":", "%3A") query = query.replace(",", "%2C") url = "services/ontology/go/terms/{}/chart".format(query) res = self.services.http_get(url, frmt="json", params={}, headers={"Accept": "image/png"}) # import base64 # res = base64.b64decode(res).decode() return res
[docs] def get_go_paths(self, _from, _to, relations="is_a,part_of,occurs_in,regulates"): """Retrieve paths between two specified sets of ontology terms. Each path is formed from a list of (term, relationship, term) triples. :param str _from: source GO term ID (e.g., ``"GO:0005215"``) :param str _to: target GO term ID (e.g., ``"GO:0003674"``) :param str relations: comma-separated relationship types to traverse :return: dict with ``"results"`` key containing a list of paths :: paths = go.get_go_paths("GO:0005215", "GO:0003674") # First path is found as the first item in the "results" paths["results"][0] """ _from = _from.replace(":", "%3A") _from = _from.replace(",", "%2C") _to = _to.replace(":", "%3A") _to = _to.replace(",", "%2C") url = "services/ontology/go/terms/{}/paths/{}".format(_from, _to) results = self.services.http_get(url, frmt="json", params={}, headers=self.services.get_headers("json")) return results
[docs] def Annotation( self, assignedBy=None, includeFields=None, limit=100, page=1, aspect=None, reference=None, geneProductId=None, evidenceCode=None, goId=None, qualifier=None, withFrom=None, taxonId=None, taxonUsage=None, goUsage=None, goUsageRelationships=None, evidenceCodeUsage=None, evidenceCodeUsageRelationships=None, geneProductType=None, targetSet=None, geneProductSubset=None, extension=None, ): """Calling the Annotation service .. versionchanged:: 1.4.18 due to service API changes, we refactored this method completely :param str assignedBy: The database from which this annotation originates. Accepts comma separated values.E.g., BHF-UCL,Ensembl. :param str includeFields: Optional fields retrieved from external services. Accepts comma separated values. accepted values: goName, taxonName, name, synonyms. :param int limit: download limit (number of lines) (default 10,000 rows, which may not be sufficient for the data set that you are downloading. To bypass this default, and return the entire data set, specify a limit of -1). :param int page: results may be stored on several pages. You must provide this number. There is no way to retrieve more than 100 results without calling this function several times changing this parameter (default to 1). :param str aspect: use this to limit the annotations returned to a specific ontology or ontologies (Molecular Function, Biological Process or Cellular Component). The valid character can be F,P,C. :param str reference: PubMed or GO reference supporting annotation. Can refer to a specific reference identifier or category (for category level, use `*` after ref type). Can be 'PUBMED:`*`', 'GO_REF:0000002'. :param str geneProductId: The id of the gene product annotated with the GO term. Accepts comma separated values.E.g., URS00000064B1_559292. :param str evidenceCode: Evidence code indicating how the annotation is supported. Accepts comma separated values. E.g., ECO:0000255. :param str goId: The GO id of an annotation. Accepts comma separated values. E.g., GO:0070125. :param str qualifier: Aids the interpretation of an annotation. Accepts comma separated values. E.g., enables,involved_in. :param str withFrom: Additional ids for an annotation. Accepts comma separated values. E.g., P63328. :param str taxonId: The taxonomic id of the species encoding the gene product associated to an annotation. Accepts comma separated values. E.g., 1310605. :param str taxonUsage: Indicates how the taxonomic ids within the annotations should be used. E.g., exact. :param str goUsage: Indicates how the GO terms within the annotations should be used. Used in conjunction with 'goUsageRelationships' filter. E.g., descendants. :param str goUsageRelationships: The relationship between the 'goId' values found within the annotations. Allows comma separated values. E.g., is_a,part_of. :param str evidenceCodeUsage: Indicates how the evidence code terms within the annotations should be used. Is used in conjunction with 'evidenceCodeUsageRelationships' filter. E.g., descendants, exact :param str evidenceCodeUsageRelationships: The relationship between the provided 'evidenceCode' identifiers. Allows comma separated values. E.g., is_a,part_of. :param str geneProductType: The type of gene product. Accepts comma separated values. E.g., protein,RNA. can be protein, RNA and/or complex :param str targetSet: Gene product set. Accepts comma separated values. E.g., KRUK,BHF-UCL,Exosome. :param str geneProductSubset: A database that provides a set of gene products. Accepts comma separated values. E.g., TrEMBL. :param str extension: Extensions to annotations, where each extension can be: EXTENSION(DB:ID) / EXTENSION(DB) / EXTENSION. :return: a dictionary :: >>> print(go.Annotation(geneProductId='UniProtKB:P12345', reference='PMID:*')) >>> print(go.Annotation(geneProductId='UniProtKB:P12345,UniProtKB:Q4VCS5', ... reference='PMID:,Reactome:')) """ # _valid_formats = ["gaf", "gene2go", "proteinList", "fasta", "tsv", "dict"] _valid_aspect = ["P", "F", "C"] validity = {"includeFields": ["goName", "taxonName", "name", "synonyms"]} if isinstance(limit, int) is False or limit > 100 or limit < 0: raise TypeError("limit parameter must be an integer greater than zero and less than 100") if isinstance(page, int) is False or limit < 0: raise TypeError("page parameter must be an integer greater than zero") # fill params with parameters that have default values. params = {"limit": limit, "page": page} # beginning of the URL url = "services/annotation/search?" # what is the ID being provided. We can have only one of: # taxonId, goid if goId is not None: params["goId"] = goId if taxonId is not None: params["taxonId"] = taxonId if assignedBy: params["assignedBy"] = assignedBy if includeFields: for this in includeFields.split(","): if this not in validity["includeFields"]: raise ValueError(f"includeFields value '{this}' must be one of {validity['includeFields']}") params["includeFields"] = includeFields if geneProductType: for this in geneProductType.split(","): if this not in ["protein", "miRNA", "complex"]: raise ValueError(f"geneProductType value '{this}' must be one of ['protein', 'miRNA', 'complex']") params["geneProductType"] = geneProductType if evidenceCode: params["evidenceCode"] = evidenceCode if evidenceCodeUsage: if evidenceCodeUsage not in ["descendants", "exact"]: raise ValueError("evidenceCodeUsage must be 'descendants' or 'exact'") params["evidenceCodeUsage"] = evidenceCodeUsage if taxonUsage: if taxonUsage not in ["descendants", "exact"]: raise ValueError("taxonUsage must be 'descendants' or 'exact'") params["taxonUsage"] = taxonUsage if goUsage: if goUsage not in ["descendants", "exact", "slim"]: raise ValueError("goUsage must be 'descendants', 'exact', or 'slim'") params["goUsage"] = goUsage if evidenceCodeUsageRelationships: for this in evidenceCodeUsageRelationships.split(","): if this not in ["part_of", "is_a", "regulates", "occurs_in"]: raise ValueError( f"evidenceCodeUsageRelationships value '{this}' must be one of ['part_of', 'is_a', 'regulates', 'occurs_in']" ) params["evidenceCodeUsageRelationships"] = evidenceCodeUsageRelationships if goUsageRelationships: for this in goUsageRelationships.split(","): if this not in ["part_of", "is_a", "regulates", "occurs_in"]: raise ValueError( f"goUsageRelationships value '{this}' must be one of ['part_of', 'is_a', 'regulates', 'occurs_in']" ) params["goUsageRelationships"] = goUsageRelationships if geneProductId: params["geneProductId"] = geneProductId if qualifier: params["qualifier"] = qualifier if withFrom: params["withFrom"] = withFrom if targetSet: params["targetSet"] = targetSet if geneProductSubset: params["geneProductSubset"] = geneProductSubset if extension: params["extension"] = extension if aspect is not None: aspects = { "P": "biological_process", "F": "molecular_function", "C": "cellular_component", } self.services.devtools.check_param_in_list(aspect, _valid_aspect) params["aspect"] = aspects[aspect] if reference: if isinstance(reference, list): reference = ",".join([x.strip() for x in reference]) elif isinstance(reference, str): pass else: raise ValueError( """ Invalid parameter: source parameters must be a list of strings ['PUBMED'] or a string (e.g., 'PUBMED:') """ ) params["reference"] = reference res = self.services.http_get(url, frmt="txt", params=params, headers=self.services.get_headers("json")) try: import json res = json.loads(res) except Exception: pass return res
[docs] def Annotation_from_goid(self, goId, max_number_of_pages=25, **kargs): """Returns a DataFrame containing annotation on a given GO identifier :param str goId: a GO identifier (e.g., ``"GO:0003824"``) :param int max_number_of_pages: maximum number of result pages to fetch :return: a ``pandas.DataFrame`` containing the annotation data, or a list if pandas is not installed All parameters from :meth:`Annotation` are also valid except **format** that is set to **tsv** and cols that is made of all possible column names. """ data = self.Annotation(goId=goId, **kargs) number_of_pages = data["pageInfo"]["total"] if number_of_pages > max_number_of_pages: print("As of 23d Oct 2017, the QuickGO API limits the number of pages to 25") number_of_pages = max_number_of_pages # unfortunately, the new API requires to call the service for each page. results = [] for i in range(0, number_of_pages): print("fetching page %s / %s " % (i + 1, number_of_pages)) data = self.Annotation(goId=goId, page=i + 1, **kargs) if data not in [400, "400"]: results.extend(data["results"]) try: import pandas as pd return pd.DataFrame(results) except Exception: self.logging.warning( "Cannot return a DataFrame. Returns the list. If you want the dataframe, install pandas library" ) return results