#
# This file is part of bioservices software
#
# Copyright (c) 2013-2014 - EBI-EMBL
#
# File author(s):
# Thomas Cokelaer <cokelaer@ebi.ac.uk>
#
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# http://www.gnu.org/licenses/gpl-3.0.html
#
# website: https://github.com/cokelaer/bioservices
# documentation: http://packages.python.org/bioservices
#
##############################################################################
"""Interface to the EUtils web Service.
.. topic:: What is EUtils ?
:URL: http://www.ncbi.nlm.nih.gov/books/NBK25499/
:URL: http://www.ncbi.nlm.nih.gov/books/NBK25500/#chapter1.Demonstration_Programs
.. highlights::
The Entrez Programming Utilities (E-utilities) are a set of eight server-side programs that provide a stable interface into the Entrez query and database system at the National Center for Biotechnology Information (NCBI). The E-utilities use a fixed URL syntax that translates a standard set of input parameters into the values necessary for various NCBI software components to search for and retrieve the requested data. The E-utilities are therefore the structured interface to the Entrez system, which currently includes 38 databases covering a variety of biomedical data, including nucleotide and protein sequences, gene records, three-dimensional molecular structures, and the biomedical literature.
-- from http://www.ncbi.nlm.nih.gov/books/NBK25497/, March 2013
"""
from bioservices import REST
from bioservices import logger, version
logger.name = __name__
__all__ = ["EUtils", "EUtilsParser"]
# source:
# http://www.dalkescientific.com/writings/diary/archive/2005/09/30/using_eutils.html
[docs]class EUtils:
"""Interface to `NCBI Entrez Utilities <http://www.ncbi.nlm.nih.gov/entrez>`_ service
.. note:: Technical note: the WSDL interface was dropped in july 2015
so we now use the REST service.
.. warning:: Read the `guidelines
<http://www.ncbi.nlm.nih.gov/books/NBK25497/>`_ before sending requests.
No more than 3 requests per seconds otherwise your IP may be banned.
You should provide your email by filling the :attr:`email` so that
before being banned, you may be contacted.
There are a few methods such as :meth:`ELink`, :meth:`EFetch`.
Here is an example on how to use :meth:`EFetch` method to retrieve the
FASTA sequence of a given identifier (34577063)::
>>> from bioservices import EUtils
>>> s = EUtils()
>>> print(s.EFetch("protein", "34577063", rettype="fasta"))
>gi|34577063|ref|NP_001117.2| adenylosuccinate synthetase isozyme 2 [Homo sapiens]
MAFAETYPAASSLPNGDCGRPRARPGGNRVTVVLGAQWGDEGKGKVVDLLAQDADIVCRCQGGNNAGHTV
VVDSVEYDFHLLPSGIINPNVTAFIGNGVVIHLPGLFEEAEKNVQKGKGLEGWEKRLIISDRAHIVFDFH
QAADGIQEQQRQEQAGKNLGTTKKGIGPVYSSKAARSGLRMCDLVSDFDGFSERFKVLANQYKSIYPTLE
IDIEGELQKLKGYMEKIKPMVRDGVYFLYEALHGPPKKILVEGANAALLDIDFGTYPFVTSSNCTVGGVC
TGLGMPPQNVGEVYGVVKAYTTRVGIGAFPTEQDNEIGELLQTRGREFGVTTGRKRRCGWLDLVLLKYAH
MINGFTALALTKLDILDMFTEIKVGVAYKLDGEIIPHIPANQEVLNKVEVQYKTLPGWNTDISNARAFKE
LPVNAQNYVRFIEDELQIPVKWIGVGKSRESMIQLF
Most of the methods take a database name as input. You can obtain the
valid list by checking the :attr:`databases` attribute.
A few functions takes Identifier(s) as input. It could be a list of
strings, list of numbers, or a string where identifiers are separated
either by comma or spaces.
A few functions take an argument called **term**. You can use the **AND**
keyword with spaces or + signs as separators::
Correct: term=biomol mrna[properties] AND mouse[organism]
Correct: term=biomol+mrna[properties]+AND+mouse[organism]
Other special characters, such as quotation marks (") or the # symbol used
in referring to a query key on the History server, could be represented by
their URL encodings (%22 for "; %23 for #) or verbatim .::
Correct: term=#2+AND+"gene in genomic"[properties]
Correct: term=%232+AND+%22gene+in+genomic%22[properties]
For information about retmode and retype, please see:
http://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.T._valid_values_of__retmode_and/?report=objectonly
"""
def __init__(self, verbose=False, email="unknown", cache=False, xmlparser="EUtilsParser"):
self.services = REST(
name="EUtils",
url="https://eutils.ncbi.nlm.nih.gov/entrez/eutils",
cache=cache,
verbose=verbose,
requests_per_sec=3,
url_defined_later=True,
)
warning = """
NCBI recommends that users post no more than three URL requests per
second. Failure to comply with this policy may result in an IP address
being blocked from accessing NCBI. If NCBI blocks an IP address,
service will not be restored unless the developers of the software
accessing the E-utilities register values of the tool and email
parameters with NCBI. The value of email will be used only to contact
developers if NCBI observes requests that violate our policies, and we
will attempt such contact prior to blocking access. For more details
see http://www.ncbi.nlm.nih.gov/books/NBK25497/#chapter2.chapter2_table1
BioServices limits requests to 3 per seconds for this services.
If you choose to set to a higher rate, this will be the user
responsability. Within
BioServices, we fill the parameter **tool** and **email**, however,
to fill the latter you should provide your email either globablly
when instanciating EUtils, or locally when calling a method.
This message will not appear if you set the email as a parameter::
e = EUtils(email="name@adress")
or in you bioservices configuration file
(.config/bioservices/bioservices.cfg) under linux with a user section::
[user]
email = yourname@somewhere
"""
self._xmlparser = xmlparser
self._databases = None
self.tool = "BioServices, " + version
#: fill this with your email address
self.email = email
if self.email == "unknown":
# trying the bioservices config file
if self.services.settings.params["user.email"][0] != "unknown":
self.email = self.servicse.settings.params["user.email"][0]
else:
self.services.logging.warning(warning)
[docs] def help(self):
"""Open EUtils help page"""
self.on_web("http://www.ncbi.nlm.nih.gov/books/NBK25497")
def _get_databases(self):
"""alias to run_eInfo"""
# Let us use the REST services instead of WSDL, which fails sometimes
# and for sure since version Sept 2015
if self._databases is None:
res = self.services.http_get("einfo.fcgi", params={"retmode": "json"})
databases = res["einforesult"]["dblist"]
self._databases = sorted(databases)
return self._databases
databases = property(_get_databases, doc="Returns list of valid databases")
def _check_db(self, db=None):
msg = "You must provide a valid databases from : "
if db is None or db not in self.databases:
raise ValueError(msg, self.databases)
def _check_retmode(self, retmode, valids=["xml", "text"]):
if retmode not in valids:
raise ValueError("You must provide a retmode in %s" % valids)
def _get_params(self, keys=[], **kargs):
# could use a defaultdict from collections.
params = {"tool": self.tool, "email": self.email}
# fill the structure with None
for this in keys:
params[this] = None
# update structure with user's items if any
for k, v in kargs.items():
if k in keys:
params[k] = v
else:
# unknown so let use it but raise a warning
params[k] = v
self.services.logging.warning(
"%s does not seem to be a known parameter. " % k + "Use it anyway but may be ignored"
)
return params
def _get_einfo_params(self, **kargs):
params = self._get_params(["db", "version", "retmode"], **kargs)
return params
def _get_esummary_params(self, **kargs):
keys = ["WebEnv", "query_key", "retstart", "retmax", "retmode", "version"]
params = self._get_params(keys, **kargs)
return params
def _get_esearch_params(self, **kargs):
keys = [
"retmax",
"retstart",
"WebEnv",
"query_key",
"datetype",
"retmode",
"field",
"maxdate",
"mindate",
"reldate",
"rettype",
"sort",
"usehistory",
]
params = self._get_params(keys, **kargs)
return params
def _get_egquery_params(self, **kargs):
params = self._get_params([], **kargs)
return params
def _get_espell_params(self, **kargs):
params = self._get_params([], **kargs)
return params
def _get_efetch_params(self, **kargs):
keys = [
"WebEnv",
"query_key",
"retmode",
"rettype",
"retstart",
"retmax",
"strand",
"seq_start",
"seq_stop",
"complexity",
]
params = self._get_params(keys, **kargs)
return params
def _get_elink_params(self, **kargs):
# Note that id could be id[] ?
keys = [
"reldate",
"mindate",
"maxdate",
"datetype",
"term",
"holding",
"linkname",
"WebEnv",
"query_key",
"cmd",
]
params = self._get_params(keys, **kargs)
return params
def _get_epost_params(self, **kargs):
params = self._get_params(["WebEnv"], **kargs)
return params
def _check_ids(self, sid):
if sid is None:
return sid
elif isinstance(sid, int):
sid = str(sid)
elif isinstance(sid, list):
sid = ",".join([str(x) for x in sid])
# If there are commas, let us split, strip spaces and join back the ids
sid = ",".join([x.strip() for x in sid.split(",") if x.strip() != ""])
if len(sid.split(",")) > 200:
raise ValueError("Number of comma separated IDs must be less than 200")
return sid
[docs] def taxonomy_summary(self, id):
"""Alias to EFetch for the taxonomy database
::
>>> s = EUtils()
>>> ret = s.taxonomy("9606")
>>> ret['9606']['species']
'sapiens'
>>> ret = s.taxonomy("9606,9605,111111111,9604")
>>> ret['9604']['taxid']
9604
"""
sid = self._check_ids(id)
ret = self.ESummary("taxonomy", sid)
return ret
[docs] def snp_summary(self, id):
"""Alias to Efetch for the SNP database
:Return: a json data structure.
::
>>> ret = s.snp("123")
"""
ret = self.ESummary("snp", id)
return ret
[docs] def EFetch(self, db, id, retmode="text", **kargs):
"""Access to the EFetch E-Utilities
:param str db: database from which to retrieve UIDs.
:param str id: list of identifiers.
:param retmode: default to text (could be xml but not recommended).
:param rettype: could be fasta, summary, docsum
:return: depends on retmode parameter.
.. note:: addition to NCBI: settings rettype to "dict" returns a dictionary
::
>>> ret = s.EFetch("omim", "269840") --> ZAP70
>>> ret = s.EFetch("taxonomy", "9606", retmode="xml")
>>> [x.text for x in ret.getchildren()[0].getchildren() if x.tag=="ScientificName"]
['Homo sapiens']
>>> s = eutils.EUtils()
>>> s.EFetch("protein", "34577063", retmode="text", rettype="fasta")
>gi|34577063|ref|NP_001117.2| adenylosuccinate synthetase isozyme 2 [Homo sapiens]
MAFAETYPAASSLPNGDCGRPRARPGGNRVTVVLGAQWGDEGKGKVVDLLAQDADIVCRCQGGNNAGHTV
VVDSVEYDFHLLPSGIINPNVTAFIGNGVVIHLPGLFEEAEKNVQKGKGLEGWEKRLIISDRAHIVFDFH
QAADGIQEQQRQEQAGKNLGTTKKGIGPVYSSKAARSGLRMCDLVSDFDGFSERFKVLANQYKSIYPTLE
IDIEGELQKLKGYMEKIKPMVRDGVYFLYEALHGPPKKILVEGANAALLDIDFGTYPFVTSSNCTVGGVC
TGLGMPPQNVGEVYGVVKAYTTRVGIGAFPTEQDNEIGELLQTRGREFGVTTGRKRRCGWLDLVLLKYAH
MINGFTALALTKLDILDMFTEIKVGVAYKLDGEIIPHIPANQEVLNKVEVQYKTLPGWNTDISNARAFKE
LPVNAQNYVRFIEDELQIPVKWIGVGKSRESMIQLF
Identifiers could be provided as a single string with comma-separated
values, or a list of strings, a list of integers, or just one
string or one integer but no mixing of types in the list::
>>> e.EFetch("protein", "352, 234", retmode="text", rettype="fasta")
>>> e.EFetch("protein", 352, retmode="text", rettype="fasta")
>>> e.EFetch("protein", [352], retmode="text", rettype="fasta")
>>> e.EFetch("protein", [352, 234], retmode="text", rettype="fasta")
**retmode** should be xml or text depending on the database.
For instance, xml for pubmed::
>>> e.EFetch("pubmed", "20210808", retmode="xml")
>>> e.EFetch('nucleotide', id=15, retmode='xml')
>>> e.EFetch('nucleotide', id=15, retmode='text', rettype='fasta')
>>> e.EFetch('nucleotide', 'NT_019265', rettype='gb')
Other special characters, such as quotation marks (") or the # symbol
used in referring to a query key on the History server, should be
represented by their URL encodings (%22 for "; %23 for #).
A useful command is the following one that allows to get back a GI
identifier from its accession, which is common to NCBI/EMBL::
e.EFetch(db="nuccore",id="AP013055", rettype="seqid", retmode="text")
.. versionchanged:: 1.5.0
instead of "xml", retmode can now be set to dict, in which case an
XML is retrieved and converted to a dictionary if possible.
"""
_retmode = retmode[:]
if retmode == "dict":
retmode = "xml"
self._check_db(db)
# self._check_retmode(retmode, valids=['text', 'xml'])
sid = self._check_ids(id)
params = self._get_efetch_params(**kargs)
if "strand" in params.keys() and params["strand"] != None:
self.devtools.check_param_in_list(params["strand"], [1, 2])
if "complexity" in params.keys() and params["complexity"] != None:
self.devtools.check_param_in_list(params["complexity"], [0, 1, 2, 3, 4])
query = "efetch.fcgi?db=%s&id=%s&retmode=%s" % (db, sid, retmode)
ret = self.services.http_get(query, params=params)
try:
ret = ret.content
except:
pass
if _retmode == "dict" and isinstance(ret, (bytes, str)):
ret = self.parse_xml(ret, "dict")
return ret
[docs] def EInfo(self, db=None, **kargs):
"""Provides information about a database (e.g., number of records)
:param str db: target database about which to gather statistics.
Value must be a valid Entrez database name. See :attr:`databases`
or don't provide any value to obtain the entire list
:return: a json data structure that depends on the
value of :attr:`databases` (default to json)
::
>>> all_database_names = s.EInfo()
>>> # specific info about one database:
>>> ret = s.EInfo("taxonomy")
>>> ret[0]['count']
u'1445358'
>>> ret = s.EInfo('pubmed')
>>> ret[0]['fieldlist'][2]['fullname']
'Filter'
You can use the *retmode* parameter to 'xml' as well. In that
case, you will need a XML parser.
::
>>> ret = s.EInfo("taxonomy")
.. note:: Note that the name in the XML or json outputs
differ (some have lower cases, some have upper cases). This
is inherent to the output of EUtils.
"""
if db is not None:
self._check_db(db)
else:
return self.databases
kargs["retmode"] = "json"
# let us create the query now
query = "einfo.fcgi"
if db is not None:
query += "?db=%s" % db
# with parameters
params = self._get_einfo_params(**kargs)
# the real call using GET method
ret = self.services.http_get(query, frmt="json", params=params)
try:
ret = ret.content
except:
pass
try:
return ret["einforesult"]["dbinfo"]
except:
return ret
[docs] def parse_xml(self, ret, method=None):
if method is None:
method = self._xmlparser
if method == "EUtilsParser":
ret = self.services.easyXML(ret)
return EUtilsParser(ret)
elif method == "objectify": # used in docstrings
from bioservices.xmltools import XMLObjectify
return XMLObjectify(ret)
elif method == "dict":
import xmltodict
return xmltodict.parse(ret)
[docs] def ESummary(self, db, id=None, **kargs):
"""Returns document summaries for a list of input UIDs
:param db: a valid database
:param str id: list of identifiers (or string comma separated).
all of the UIDs must be from the database specified by db. Limited
to 200 identifiers
::
>>> from bioservices import *
>>> s = EUtils()
>>> ret = s.ESummary("snp","7535")
>>> ret = s.ESummary("snp","7535,7530")
>>> ret = s.ESummary("taxonomy", "9606,9913")
::
>>> proteins = e.ESearch("protein", "bacteriorhodopsin",
retmax=20)
>>> ret = e.ESummary("protein", 449301857)
>>> ret['result']['449301857']['extra']
'gi|449301857|gb|EMC97866.1||gnl|WGS:AEIF|BAUCODRAFT_31870'
"""
sid = self._check_ids(id)
self._check_db(db)
kargs["retmode"] = "json"
params = self._get_esummary_params(**kargs)
# the real call using GET method
query = "esummary.fcgi?db=%s&id=%s" % (db, sid)
ret = self.services.http_get(query, frmt="json", params=params)
try:
return ret["result"]
except:
return ret
[docs] def EGQuery(self, term, **kargs):
"""Provides the number of records retrieved in all Entrez databases by a text query.
:param str term: Entrez text query.
Spaces may be replaced by '+' signs. For very long queries
(more than several hundred characters long), consider using
an HTTP POST call. See the
PubMed or Entrez help for information about search field
descriptions and tags.
Search fields and tags are database specific.
:return: returns a json data structure
::
>>> ret = s.EGQuery("asthma")
>>> [(x.DbName, x.Count) for x in ret.eGQueryResult.ResultItem if x.Count!='0']
>>> ret = s.EGQuery("asthma")
>>> ret.eGQueryResult.ResultItem[0]
{'Count': '115241',
'DbName': 'pmc',
'MenuName': 'PubMed Central',
'Status': 'Ok'}
"""
params = self._get_egquery_params(**kargs)
query = "egquery.fcgi?term=%s" % (term)
ret = self.services.http_get(query, frmt="xml", params=params)
try:
ret = self.parse_xml(ret)["Result"]
return ret
except:
return ret
[docs] def ESearch(self, db, term, **kargs):
"""Responds to a query in a given database
The response can be used later in ESummary, EFetch or ELink,
along with the term translations of the query.
:param db: a valid database
:param term: an Entrez text query
.. note:: see :meth:`_get_esearch_params` for the list of valid parameters.
::
>>> ret = e.ESearch('protein', 'human', RetMax=5)
>>> ret = e.ESearch('taxonomy', 'Staphylococcus aureus[all names]')
>>> ret = e.ESearch('pubmed', "cokelaer AND BioServices")
>>> ret = e.ESearch('protein', '15718680')
>>> # Let us show the first pubmed identifier in a browser
>>> identifiers = e.pubmed(ret['idlist'][0])
More complex requests can be used. We will not cover all the
possiblities (see the NCBI website). Here is an example to tune
the search term to look into PubMed for the journal PNAS
Volume 16, and retrieve.::
>>> e.ESearch("pubmed", "PNAS[ta] AND 16[vi]")
You can then look more closely at a specific identifier using EFetch::
>>> e = EFetch("pubmed")
>>> e.Efetch(identifiers)
.. note:: valid parameters can be found by calling
:meth:`_get_esearch_params`
"""
self._check_db(db)
kargs["retmode"] = "json"
params = self._get_esearch_params(**kargs)
query = "esearch.fcgi?db=%s&term=%s" % (db, term)
ret = self.services.http_get(query, frmt="json", params=params)
try:
return ret["esearchresult"]
except:
return ret
[docs] def ESpell(self, db, term, **kargs):
"""Retrieve spelling suggestions for a text query in a given database.
:param str db: database to search. Value must be a valid Entrez
database name (default = pubmed).
:param str term: Entrez text query. All special characters must be
URL encoded.
::
>>> ret = e.ESpell(db="pubmed", term="aasthma+OR+alergy")
>>> ret = ret['eSpellResult']
>>> ret['Query'] 'asthmaa OR alergies'
>>> ret['CorrectedQuery']
'asthma or allergy'
>>> ret = e.ESpell(db="pubmed", term="biosservices")
>>> ret = ret['eSpellResult']
>>> ret['CorrectedQuery']
bioservices
"""
self._check_db(db)
params = self._get_esearch_params(**kargs)
query = "espell.fcgi?db=%s&term=%s" % (db, term)
ret = self.services.http_get(query, frmt="json", params=params)
try:
ret = ret.content
ret = self.parse_xml(ret, "EUtilsParser")
return ret
except:
return ret
[docs] def ECitMatch(self, bdata, **kargs):
r"""
:param bdata: Citation strings. Each input citation must
be represented by a citation string in the following format::
journal_title|year|volume|first_page|author_name|your_key|
Multiple citation strings may be provided by separating the
strings with a carriage return character (%0D) or simply \\r or \\n.
The your_key value is an arbitrary label provided by the user
that may serve as a local identifier for the citation,
and it will be included in the output.
all spaces must be replaced by + symbols and that citation
strings should end with a final vertical bar |.
Only xml supported at the time of this implementation.
::
from bioservices import EUtils
s = EUtils()
print(s.ECitMatch("proc+natl+acad+sci+u+s+a|1991|88|3248|mann+bj|Art1|%0Dscience|1987|235|182|palmenberg+ac|Art2|"))
"""
# Fixes https://github.com/cokelaer/bioservices/issues/169
from urllib.parse import unquote
params = {"bdata": unquote(bdata), "retmode": "xml"}
# note here, we use .cgi not .fcgi
query = "ecitmatch.cgi?db=pubmed&retmode=xml"
ret = self.services.http_get(query, None, params=params)
try:
ret = ret.content
except:
pass
return ret
[docs] def ELink(self, db=None, dbfrom=None, id=None, **kargs):
"""The Entrez links utility
Responds to a list of UIDs in a given database with either a list of
related UIDs (and relevancy scores) in the same database or a list
of linked UIDs in another Entrez database;
:param str db: valid database from which to retrieve UIDs.
:param str dbfrom: Database containing the input UIDs. The
value must be a valid database name (default = pubmed).
This is the origin database of
the link operation. If db and dbfrom are set to the same database
value, then ELink will return computational neighbors within
that database. Computational neighbors have linknames that begin
with dbname_dbname (examples: protein_protein,
pcassay_pcassay_activityneighbor).
:param str id: UID list. Either a single UID or a comma-delimited list
Limited to 200 Ids
:param str cmd: ELink command mode. The command mode specified which
function ELink will perform. Some optional parameters only
function for certain values of cmd (see
http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ELink).
Examples are neighbor, prlinks.
::
>>> # Example: Find related articles to PMID 20210808
>>> ret = s.ELink("pubmed", id="20210808", cmd="neighbor_score")
>>> ret = s.parse_xml(ret, 'EUtilsParser')
>>> ret.eLinkResult.LinkSet.LinkSetDb[0].Link[1]
{'Id': '16539535'}
>>> s.ELink(dbfrom="nucleotide", db="protein",
id="48819,7140345")
>>> s.ELink(dbfrom="nucleotide", db="protein",
id="48819,7140345")
>>> s.ELink(dbfrom='nuccore', id='21614549,219152114',
cmd='ncheck')
Convert GI number to Taxon identifiers::
>>> s.ELink(dbfrom='nuccore', db="taxonomy", id='21614549,219152114')
"""
# unlike other EUtils, db and dbfrom are here optional
sid = self._check_ids(id)
if db is not None:
self._check_db(db)
if dbfrom is not None:
self._check_db(dbfrom)
if db is None and dbfrom is None:
raise ValueError("One of db or dbfrom parameter must be provided")
if "cmd" in kargs.keys():
assert kargs["cmd"] in [
"neighbor",
"neighbor_score",
"neighbor_history",
"acheck",
"llinks",
"lcheck",
"ncheck",
"llinkslib",
"prlinks",
]
cmd = kargs["cmd"]
else:
cmd = None
if db is not None and dbfrom is not None:
query = "elink.fcgi?db=%s&dbfrom=%s" % (db, dbfrom)
elif dbfrom is not None:
query = "elink.fcgi?dbfrom=%s" % dbfrom
elif db is not None:
query = "elink.fcgi?db=%s" % db
if sid is not None:
query += "&id=%s" % sid
if cmd is not None:
query += "&cmd=%s" % cmd
params = self._get_elink_params(**kargs)
ret = self.services.http_get(query, frmt="txt", params=params)
# try: ret = ret.content
# except: pass
return ret
[docs] def EPost(self, db, id, **kargs):
"""Accepts a list of UIDs from a given database,
stores the set on the History Server, and responds with a query
key and web environment for the uploaded dataset.
:param str db: a valid database
:param id: list of strings of strings
:return: a dictionary with a Web Environment string
and a QueryKey to be re-used in another EUtils.
"""
self._check_db(db)
sid = self._check_ids(id)
params = self._get_epost_params(**kargs)
query = "epost.fcgi/?db=%s&id=%s" % (db, sid)
ret = self.services.http_get(query, "xml", params=params)
try:
ret = ret.content
except:
pass
ret = self.services.easyXML(ret)
for item in ret.getchildren():
if item.tag == "QueryKey":
query_key = item.text
elif item.tag == "WebEnv":
webenv = item.text
return {"WebEnv": webenv, "QueryKey": query_key}
class AttrDict(dict):
def __init__(self, *args, **kwargs):
super(AttrDict, self).__init__(*args, **kwargs)
self.__dict__ = self
[docs]class EUtilsParser(AttrDict):
"""Convert xml returned by EUtils into a structure easier to manipulate
Used by :meth:`EUtils.EGQuery`, :meth:`EUtils.ELink`.
"""
def __init__(self, xml):
super(EUtilsParser, self).__init__()
try:
name = xml.root.tag
self[name] = EUtilsParser(xml.root)
children = []
# children = xml.root.getchildren()[0].getchildren()
# self.__name = xml.root.getchildren()[0].tag
except:
children = [x for x in xml]
if len(children) == 0:
self[xml.tag] = xml.text
for i, child in enumerate(children):
if len([x for x in child]) == 0:
if child.tag in self.keys():
try:
self[child.tag].append(child.text)
except:
self[child.tag] = [self[child.tag]]
self[child.tag].append(child.text)
else:
self[child.tag] = child.text
else:
# This is probably a list then
e = EUtilsParser(child)
if child.tag not in self.keys():
self[child.tag] = e
else:
try:
self[child.tag].append(e)
except:
self[child.tag] = [self[child.tag]]
self[child.tag].append(e)
def __str__(self):
name = self._EUtilsParser__name
if name == "DbInfo":
txt = ""
for this in self.FieldList:
txt += "{0:10}:{1}\n".format(this.Name, this.Description)
return txt
else:
print("Not implemented for {0}".format(name))
class XMLEUtils(object):
def __init__(self, xml):
self.xml = xml