#
# This file is part of bioservices software
#
# Copyright (c) 2013-2014 - EBI-EMBL
#
# File author(s):
# Thomas Cokelaer <cokelaer@ebi.ac.uk>
#
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# http://www.gnu.org/licenses/gpl-3.0.html
#
# website: https://github.com/cokelaer/bioservices
# documentation: http://packages.python.org/bioservices
#
##############################################################################
"""Interface to the Rhea web services
.. topic:: What is Rhea ?
:URL: http://www.ebi.ac.uk/rhea/
:Citations: See http://www.ebi.ac.uk/rhea/about.xhtml
.. highlights::
Rhea is a reaction database, where all reaction participants (reactants
and products) are linked to the ChEBI database (Chemical Entities of
Biological Interest) which provides detailed information about structure,
formula and charge. Rhea provides built-in validations that ensure both
elemental and charge balance of the reactions... While the main focus of
Rhea is enzyme-catalysed reactions, other biochemical reactions are also
are included.
The database is extensively cross-referenced. Reactions are currently linked
to the EC list, KEGG and MetaCyc, and the reactions will be used in the
IntEnz database and in all relevant UniProtKB entries. Furthermore, the
reactions will also be used in the UniPathway database to generate
pathways and metabolic networks.
-- from Rhea Home page, Dec 2012 (http://www.ebi.ac.uk/rhea/about.xhtml)
"""
from collections import defaultdict
from bioservices.services import REST
from bioservices import logger
logger.name = __name__
__all__ = ["Rhea"]
[docs]class Rhea:
"""Interface to the `Rhea <http://www.ebi.ac.uk/rhea/rest/1.0/>`_ service
You can search by compound name, ChEBI ID, reaction ID, cross reference
(e.g., EC number) or citation (author name, title, abstract text, publication ID).
You can use double quotes - to match an exact phrase - and the following
wildcards:
* ? (question mark = one character),
* `*` (asterisk = several characters).
Searching for caffe* will find reactions with participants such as caffeine,
trans-caffeic acid or caffeoyl-CoA::
from bioservices import Rhea
r = Rhea()
response = r.search("caffe*")
Searching for a?e?o* will find reactions with participants such as acetoin,
acetone or adenosine.::
from bioservices import Rhea
r = Rhea()
response = r.search("a?e?o*")
The :meth:`search` :meth:`entry` methods require a list of valid columns.
By default all columns are used but you can restrict to only a few. Here is
the description of the columns::
rhea-id : reaction identifier (with prefix RHEA)
equation : textual description of the reaction equation
chebi : comma-separated list of ChEBI names used as reaction participants
chebi-id : comma-separated list of ChEBI identifiers used as reaction participants
ec : comma-separated list of EC numbers (with prefix EC)
uniprot : number of proteins (UniProtKB entries) annotated with the Rhea reaction
pubmed : comma-separated list of PubMed identifiers (without prefix)
and 5 cross-references::
reaction-xref(EcoCyc)
reaction-xref(MetaCyc)
reaction-xref(KEGG)
reaction-xref(Reactome)
reaction-xref(M-CSA)
"""
_url = "https://www.rhea-db.org"
_valid_columns = [
"rhea-id",
"equation",
"chebi",
"chebi-id",
"ec",
"uniprot",
"pubmed",
"reaction-xref(EcoCyc)",
"reaction-xref(MetaCyc)",
"reaction-xref(KEGG)",
"reaction-xref(Reactome)",
"reaction-ref(M-CSA)",
]
def __init__(self, verbose=True, cache=False):
""".. rubric:: Rhea constructor
:param bool verbose: True by default
::
>>> from bioservices import Rhea
>>> r = Rhea()
"""
self.services = REST(name="Rhea", url=Rhea._url, verbose=verbose, cache=cache)
[docs] def search(self, query, columns=None, limit=None, frmt="tsv"):
"""Search for Rhea (mimics https://www.rhea-db.org/)
:param str query: the search term using format parameter
:param str format: the biopax2 or cmlreact format (default)
:Returns: A pandas DataFrame.
::
>>> r = Rhea()
>>> df = r.search("caffeine")
>>> df = r.search("caffeine", columns='rhea-id,equation')
"""
params = {}
if limit:
params["limit"] = limit
if columns:
params["columns"] = columns
params["format"] = frmt
if columns is None:
params["columns"] = ",".join(self._valid_columns)
response = self.services.http_get("rhea/?query={}".format(query), frmt="txt", params=params)
try:
import pandas as pd
import io
df = pd.read_csv(io.StringIO(response), sep="\t")
return df
except Exception as err:
return response
[docs] def query(self, query, columns=None, frmt="tsv", limit=None):
"""Retrieve a concrete reaction for the given id in a given format
:param str query: the entry to retrieve
:param str frmt: the result format (tsv); only tsv accepted for now (Nov
2020).
:param int limit: maximum number of results to retrieve
:Returns: dataframe
Retrieve Rhea reaction identifiers and equation text::
r.query("", columns="rhea-id,equation", limit=10)
Retrieve Rhea reactions with enzymes curated in UniProtKB (only first 10
entries)::
r.query("uniprot:*", columns="rhea-id,equation", limit=10)
To retrieve a specific entry::
df = r.get_entry("rhea:10661")
.. versionchanged:: 1.8.0 (entry() method renamed in query() and no
more format required. Must be given in the entry name e.g.
query("10281.rxn") instead of entry(10281, format="rxn")
the option *frmt* is now related to the result format
"""
params = {"query": query}
if limit:
params["limit"] = limit
if columns:
params["columns"] = columns
params["format"] = frmt
if columns is None:
params["columns"] = ",".join(self._valid_columns)
response = self.services.http_get("rhea?".format(query), frmt="txt", params=params)
try:
import pandas as pd
import io
df = pd.read_csv(io.StringIO(response), sep="\t")
return df
except Exception as err:
return response