Source code for bioservices.unichem

#
#  This file is part of bioservices software
#
#  Copyright (c) 2013-2014 - EBI-EMBL
#  Copyright (c) 2022 - Institut Pasteur
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      http://www.gnu.org/licenses/gpl-3.0.html
#
#  website: https://github.com/cokelaer/bioservices
#  documentation: http://packages.python.org/bioservices
#
##############################################################################
"""This module provides a class :class:`UniChem`

.. topic:: What is UniChem

    :URL:  https://www.ebi.ac.uk/unichem/info/webservices
    :REST:  https://www.ebi.ac.uk/unichem/rest


    .. highlights::

        "UniChem is a 'Unified Chemical Identifier' system, designed to assist
        in the rapid cross-referencing of chemical structures, and their identifiers,
        between databases (read more). "

        -- From UniChem web page June 2013


"""
import json

from bioservices import REST
from bioservices import logger

import colorlog

logger = colorlog.getLogger(__name__)


[docs]class UniChem: """Interface to the `UniChem <https://www.ebi.ac.uk/unichem/>`_ service .. doctest:: >>> from bioservices import UniChem >>> u = UniChem() There are lots of sources such as Chembl, Chebi, etc. You will probably need the identifiers of those sources. You can get all information about a source using these methods:: # Get information about a source u.get_source_info_by_name('chembl') u.get_source_info_by_id(10) u.get_id_from_name('chembl') u.get_all_src_ids() but for developers, everything is contained in the :attr:`source_ids` dictionary. The first important method provided by Unichem API is the :meth:`get_compounds`. For example, you can request all compounds related to the CHEMBL12 identifier from ChEMBL using:: res = u.get_compounds('CHEMBL12', 'chembl') compounds = res['compounds'][0] Note that the second argument is 'chembl' and lower/upper cases is important. All names are stored in :attr:`source_ids` together with their identifiers. You can use also :meth:`get_id_from_name` and get_name_from_id` if needed. Legacy methods are available: get_compound_ids_from_src_id --> use get_compounds() get_src_compound_ids_from_inchikey --> replaced by get_compounds() get_all_src_ids() --> uses new API get_src_compound_ids_all_from_inchikey --> get_source_by_inchikey() get_verbose_src_compound_ids_from_inchikey --> get_sources_by_inchikey_verbose() get_structure --> uses new API get_compounds() and bioservices code get_structure_all --> dropped get_src_compound_id_url --> dropped. One can use the get_compounds() get_src_compound_ids_all_from_obsolete --> removed get_src_compound_ids_from_src_compound_id --> removed; was obsolet get_src_compound_ids_all_from_src_compound_id --> remoed was already obsolet get_all_compound_ids_from_all_src_id --> removed. no more API get_mapping --> removed. no more API get_auxiliary_mappings --> removed. no more API Most old functions can be replaced by a syntax such as:: res = u.get_compound('CHEMBL12', 'chembl') res['compounds'][0] .. changed:: version 1.9. drop xml parser. """ _url = "https://www.ebi.ac.uk/unichem" def __init__(self, verbose=False, cache=False): """.. rubric:: **Constructor** UniChem :param verbose: set to False to prevent informative messages """ self.services = REST(name="UniChem", url=UniChem._url, verbose=verbose, cache=cache) # let us define the source and names _data = self.services.http_get("api/v1/sources") self._data_source = _data["sources"] self.source_ids = {x["name"]: x["sourceID"] for x in self._data_source}
[docs] def get_id_from_name(self, name): """Return the ID a a source given its name. :param str name: a valid database name (e.g., chembl) :: u.get_id_from_name("chembl") """ if name in self.source_ids.keys(): return self.source_ids[name] else: logger.error(f"You provided {name} but only those sources are available: {sorted(self.source_ids.keys())}")
[docs] def get_sources(self): """Returns all information about all sources used in Unichem :: from bioservices import UniChem u = UniChem() res = u.get_sources_information() res['sources'] """ return self._data_source
# NEWS
[docs] def get_inchi_from_inchikey(self, inchikey): """Get a list of inchis given a valid inchikey. :param inchikey: InChI Key to search. Unlike the rest API, you can also provide a list. :return: a list of inchis matching the InChI Key provided. If input is a list, a dictionary is returned where keys are the inchikey input lists. :: from bioservices import UniChem u = UniChem() res = u.get_inchi("AAOVKJBEBIDNHE-UHFFFAOYSA-N") .. note:: this is a legacy function. introduced in v1.9 after unichem API update """ # if inchikey is not found, return empty {} if isinstance(inchikey, (list, tuple)): data = {} for x in inchikey: res = self.services.http_get(f"rest/inchi/{x}") data[x] = {} if res == 500 else res return data else: res = self.services.http_get(f"rest/inchi/{inchikey}") res = {} if res == 500 else res return res
[docs] def get_sources_by_inchikey(self, inchikey): """Get sources by inchikey :param inchikey: InChI Key to search. Unlike the rest API, you can also provide a list. :return: A list of sources for the provided InChIKey if input is a single string. a dictionary with keys as inchikey if input is a list. .. note:: this is a legacy function. introduced in v1.9 after unichem API update """ # if inchikey is not found, return empty {} if isinstance(inchikey, (list, tuple)): data = {} for x in inchikey: res = self.services.http_get(f"rest/inchikey/{x}") data[x] = {} if res == 500 else res return data else: res = self.services.http_get(f"rest/inchikey/{inchikey}") res = {} if res == 500 else res return res
[docs] def get_sources_by_inchikey_verbose(self, inchikey): """Get sources by inchikey :param inchikey: InChI Key to search. Unlike the rest API, you can also provide a list. :return: A list of sources for the provided InChIKey if input is a single string. a dictionary with keys as inchikey if input is a list. .. note:: this is a legacy function. introduced in v1.9 after unichem API update """ # if inchikey is not found, return empty {} if isinstance(inchikey, (list, tuple)): data = {} for x in inchikey: res = self.services.http_get(f"rest/verbose_inchikey/{x}") data[x] = {} if res == 500 else res return data else: res = self.services.http_get(f"rest/verbose_inchikey/{inchikey}") res = {} if res == 500 else res return res
[docs] def get_all_src_ids(self): """Obtain all src_ids of sources available in UniChem :return: list of 'src_id's. :: uni.get_all_src_ids() """ return sorted([x["sourceID"] for x in self._data_source])
[docs] def get_source_info_by_name(self, src_name): """Description: Obtain all information on a source by querying with a source id :param int src_name: valid identifiers can be found in :attr:`source_ids` e.g. chebi, chembl) :return: dictionary (or list of dictionaries) with following keys: * UCICount: number of entries * baseIdUrl: URL of the source * created: date of creation * description: a description of the content of the source * lastUpdated: last date of the update * name: the unique name for the source in UniChem, always lower case * nameLabel: A name for the source suitable for use as a 'label' for the source * nameLong: the full name of the source, as defined by the source * private: is it private or not ? * sourceID: the src_id for this source * srcDetails: details about the source * srcReleaseDate: release date of the source database * srcReleaseNumber: release number of the source * srcUrl: src_url (the main home page of the source) * updateComments: possible updates from this source :: >>> res = get_source_by_name("chebi") """ keys = sorted([x["name"] for x in self._data_source]) if src_name in keys: return [x for x in self._data_source if x["name"] == src_name][0] logger.warning(f"incorrect {src_name} source name. Use one of {keys}")
[docs] def get_source_info_by_id(self, ID): ids = sorted([x["sourceID"] for x in self._data_source]) if ID in ids: return [x for x in self._data_source if x["sourceID"] == ID][0] logger.warning(f"incorrect {ID} source name. Use one of {ids}")
[docs] def get_compounds(self, compound, source_type): """Get matched compounds information :param str compound: InChI, InChIKey, Name, UCI or Compound Source ID :param source_type: uci, inchi, inchikey, sourceID (e.g. chembl) :param str sourceID: ID for the source assigned in UniChem when the type is "sourceID" :return: a list of matched compounds and their assigned sources A legacy function allows you to retrieve a compound from its inchikey:: u.get_sources_by_inchikey('GZUITABIAKMVPG-UHFFFAOYSA-N') However, this new function is faster presumably and allows you to do the same:: res = u.get_compounds('GZUITABIAKMVPG-UHFFFAOYSA-N', 'inchikey') res['compounds'] You can get the first element, from which inchi, sources, standardInchikey, uci can be extracted. The **sources** key contains all compound identifiers for each source:: res['compounds'][0]['uci'] res['compounds'][0]['sources'] Looks like there is always a single element in res['compounds'] but since it is a list, you must access to first element (unique) using [0] syntax. """ # we need a default value set to empty string sourceID = "" # source type can be either one of: if source_type in ["uci", "inchi", "inchikey"]: pass # or a valid source identifier from a valid source name. elif source_type in self.source_ids.keys(): sourceID = int(self.source_ids[source_type]) source_type = "sourceID" # or simply the valid source identifier elif source_type in self.source_ids.values(): sourceID = source_type source_type = "sourceID" else: logger.error( f"source_type must be one of uci, inchi, inchikey or a valid source from {sorted(self.source_ids.keys())}" ) return {} body = {"compound": compound, "sourceID": sourceID, "type": source_type} # somehow, the expected input is a json string and output a json string but cannot be # encode/devode by the request even though we provide fmrt=json body = json.dumps(body) res = self.services.http_post("api/v1/compounds", data=body, headers=self.services.get_headers("json")) try: # pragma: no cover res = json.loads(res) return res except TypeError: # pragma: no cover return {}
[docs] def get_connectivity(self, compound, source_type): """Fetch multiple source data sets for a given compound with common connectivity to a given id on the database source, InChI, InChIkey or UCI :param str compound: InChI, InChIKey, Name, UCI or Compound Source ID (e.g. chembl) :param source_type: uci, inchi, inchikey, sourceID The returned dictionary contains 5 keys: * response: service response ('Success' if everything is right) * searchedCompound: the summary in terms of inchi, standardInchikey and uci * sources: a dictionary with e.g. compoundID and name of the source. A 'comparison' dictionary is also provided. * totalCompounds: number of searchedCompound entries * totalSources: number of sources entries """ # we need a default value set to empty string sourceID = "" # source type can be either one of: if source_type in ["uci", "inchi", "inchikey"]: pass # or a valid source identifier from a valid source name. elif source_type in self.source_ids.keys(): sourceID = int(self.source_ids[source_type]) source_type = "sourceID" # or simply the valid source identifier elif source_type in self.source_ids.values(): sourceID = source_type source_type = "sourceID" else: logger.error( f"source_type must be one of uci, inchi, inchikey or a valid source from {sorted(self.source_ids.keys())}" ) return {} body = {"compound": compound, "sourceID": sourceID, "type": source_type} # somehow, the expected input is a json string and output a json string but cannot be # encode/devode by the request even though we provide fmrt=json body = json.dumps(body) res = self.services.http_post("api/v1/connectivity", data=body, headers=self.services.get_headers("json")) try: # pragma: no cover res = json.loads(res) return res except TypeError: # pragma: no cover return {}
[docs] def get_images(self, uci, filename=None): """Return / create compound image :param uci: the UCI of the compound :param filename: optional file name to save the SVG+XML output :return: the SVG+XML string .. plot:: res = u.get_images('304698', filename='test.svg') """ res = self.services.http_get(f"api/v1/images/{uci}", headers=self.services.get_headers("svg+xml")) try: res = res.content if filename: with open(filename, "w") as fout: fout.write(res.decode()) return res except AttributeError: logger.warning("Invalid UCI request")
# OLD ------------------------------
[docs] def get_structure(self, compound_id, src_id): """Obtain structure(s) CURRENTLY assigned to a query src_compound_id. :param str compound_id: a valid compound identifier :param int src_id: corresponding database identifier (name or id). :return: dictionary with 'standardinchi' and 'standardinchikey' keys :: >>> uni.get_structure("CHEMBL12", "chembl") """ res = self.get_compounds(compound_id, src_id) res = res["compounds"] res = res[0] return {"inchi": res["inchi"]["inchi"], "standardInchiKey": res["standardInchiKey"]}