from abc import ABC
from typing import Dict, Set, Tuple
import warnings
import os
import json
import requests
import networkx as nx
from html.parser import HTMLParser
from pymantra.statics import Edge
def _json_safe_dict(py_dict: Dict[str, any]):
return {
k: list(v) if isinstance(v, set) else v for k, v in py_dict.items()}
class _HTMLBodyTextReader(HTMLParser, ABC):
"""Helper class to read html to text
adapted from https://stackoverflow.com/a/55825140 and
https://stackoverflow.com/questions/16773583
"""
def __init__(self):
super().__init__()
self.text = ""
self.inbody = False
def handle_starttag(self, tag, attrs):
self.inbody = False
if tag == "body":
self.inbody = True
def handle_endtag(self, tag):
if tag == "body":
self.inbody = False
def handle_data(self, data):
# get data but only after <body> and before </body>
if self.inbody and data.strip():
self.text += data
def _get_max_lines(to_print: str, n: int):
lines = to_print.split(os.linesep)
return os.linesep.join(lines[:min(len(lines) - 1, n)])
def _parse_html_response(response: requests.Response, n: int = 10):
hp = _HTMLBodyTextReader()
hp.feed(response.text)
return _get_max_lines(hp.text, n)
[docs]class APINetworkGenerator:
"""API mirror for :class:`~NetworkGenerator`
Querying the mantra online neo4j database containing the reference network
generated with the Neo4jGenerator class.
Most query functions depend on the requirements test with the `Verifier`
class. To ensure that all functions work as expected, only databases test
for their correctness should be used.
Attributes
----------
url: str
Base URL to where requests go
"""
__slots__ = ["url", "_using_local_api"]
url: str
[docs] def __init__(self, base_url: str = "https://exbio.wzw.tum.de/pymantradb"):
"""Initialize a new APINetworkGenerator instance
Initialize a new instance to run queries to the neo4j mantra-db API.
Parameters
----------
base_url: str, https://exbio.wzw.tum.de/pymantradb
Set the root URL where the server is located
"""
# local API requires different setting to allow for connections
self._using_local_api: bool = False
if "127.0.0.1" in base_url:
self._local_api("127.0.0.1")
elif "localhost" in base_url:
self._local_api("localhost")
# only http/https URIs are allowed
if not base_url.startswith("http"):
if not self._using_local_api:
raise ValueError(
"'base_url' needs to be a full web-address it no local "
"API is used. Did not find the required starting "
"'https://' or 'http://'."
)
else:
# NOTE: local django API cannot handle https
base_url = f"http://{base_url}"
# checking whether given base is working
self.url = base_url
if not self.verify_connection():
raise ConnectionError(
f"Base URL '{base_url}' seems to be invalid. Connection could "
"not be verified!"
)
def _local_api(self, local_option: str):
"""Make local APIs available"""
warnings.warn(
f"Setting 'NO_PROXY' to '{local_option}' to ensure that requests "
"is able to reach API server"
)
os.environ['NO_PROXY'] = local_option
self._using_local_api = True
[docs] def verify_connection(self) -> bool:
"""Check whether the given base URL is correct
Returns
-------
bool
True if status code is 200 (connection verified)
"""
try:
test_req = requests.get(f"{self.url}/verify-connection")
except requests.exceptions.RequestException:
raise ConnectionError(
f"Connection to {self.url} could not be established! Please "
"select a valid URL. Aborting verification process..."
)
return test_req.status_code == 200
def _query(self, data: Dict[str, any], subpath: str):
"""Run a query and return the data from json"""
# dicts are not send properly if passed as python objects
_prepped_data = {
k: json.dumps(_json_safe_dict(v)) if isinstance(v, dict) else v
for k, v in data.items()
}
response = requests.post(
f"{self.url}/{subpath}", data=_prepped_data)
if response.status_code != 200:
if response.status_code == 404:
raise ConnectionError(
f"URL '{self.url}/{subpath}' was not found!")
if response.status_code == 414:
if self._using_local_api:
raise ConnectionError(
"Request-URI Too Large. Please change the "
"configuration in the APIs nginx container."
)
else:
raise ConnectionError(
"Request-URI Too Large. Please contact the "
"developers to increase the allowed size or find an "
"alternative for processing your data."
)
if response.status_code == 422:
raise ValueError(
f"Invalid data reached the server: {response.text}")
if response.status_code == 500:
raise ConnectionError(
"An internal server error occurred wile processing your "
f"request: {_parse_html_response(response)}"
)
raise ValueError(
"Something went wrong while processing your request, failed "
f"with exit code {response.status_code} ("
f"{_parse_html_response(response)})."
)
return json.loads(response.text)
[docs] def get_reaction_subgraph(
self, organisms: Set[str], genes: Set[str], metabolites: Set[str],
reaction_organism: Tuple[str, str] = None
) -> Dict[str, Set[Edge]]:
"""Extract the edges for a given set of entities
Query a subgraph with all genes, organisms and metabolites given
and retain the original graph structure with reaction nodes.
**Important**: gene - organism, organism - reaction and gene - reaction
edges are of opposite direction outside the database. The database
structure is made to allow efficient queries, which do not reflect the
'passing' directions required for quantitative metabolic-network style
analyses.
Parameters
----------
organisms: Set[str]
A set of all organisms to be included in the subgraph.
The names must correspond to the nodeLabel property in
the database.
genes: Set[str]
A set of all genes to be included in the subgraph.
The names must correspond to the nodeLabel property in
the database.
metabolites: Set[str]
A set of all metabolites to be included in the subgraph.
The names must correspond to the nodeLabel property in
the database.
reaction_organism: Tuple[str, str], optional
Specify an organism for which the metabolic reactions should be
extracted as a 2-tuple of [ID type, ID], where ID type must be
'Abbreviation_KEGG' or 'KeggID' and ID the KEGG organism code or
T number, respectively. For human this would thus either be
['Abbreviation_KEGG', 'hsa'] or ['KeggID', 'T01001'].
If `organisms` is not empty the specified organism will be added
on top.
Returns
-------
Dict[str, Set[Edge]]
A dictionary, where keys represent edge types as specified in
`utils.EDGE_TYPES` pointing to a set of :obj:`Edge` representing
all edges of the respective type contained in the subgraph.
Examples
--------
>>> generator = APINetworkGenerator()
>>> metabos = {'FDMO3', 'h2o', 'FDMO2', 'fald', 'FDMO6', 'so3',
... 'FMNRx', 'nad', 'FMNRx2', 'fmn', 'nadp'}
>>> gs = {'1576', '1557', '1559'}
>>> orgs = {"Streptomyces tsukubensis", "Bacillus smithii",
... "Streptomyces fulvissimus"}
>>> edges_ = generator.get_reaction_subgraph(orgs, gs, metabos)
"""
data = {
"organisms": organisms, "genes": genes, "metabolites": metabolites,
"reaction_organism": reaction_organism
}
return {
edge_type: {Edge(*edge) for edge in edges}
for edge_type, edges in
self._query(data, "database/get-reaction-subgraph").items()
}
[docs] def get_subgraph(
self, organisms: Set[str], genes: Set[str], metabolites: Set[str]
) -> Dict[str, Set[Edge]]:
"""Returns a subgraph with all nodes given plus the reaction nodes
required to connect them
Parameters
----------
organisms: Set[str]
Set of all organisms to query
The names must correspond to the nodeLabel property in
the database.
genes: Set[str]
Set of all genes to query
The names must correspond to the nodeLabel property in
the database.
metabolites: Set[str]
Set of all metabolites to query
The names must correspond to the nodeLabel property in
the database.
Returns
-------
Dict[str, Set[Edge]]
All connections between organisms, genes and metabolites contained
in the database. organism - metabolite are third order connections
(via gene and reaction nodes), all other connections are second
order (via reaction nodes)
Examples
--------
>>> generator = APINetworkGenerator("http://127.0.0.1:8084")
>>> metabos = {'FDMO3', 'h2o', 'FDMO2', 'fald', 'FDMO6', 'so3',
... 'FMNRx', 'nad', 'FMNRx2', 'fmn', 'nadp'}
>>> gs = {'1576', '1557', '1559'}
>>> orgs = {"Streptomyces tsukubensis", "Bacillus smithii",
... "Streptomyces fulvissimus"}
>>> edges_ = generator.get_subgraph(orgs, gs, metabos)
"""
data = {
"organisms": organisms, "genes": genes, "metabolites": metabolites}
return {
edge_type: {Edge(*edge) for edge in edges}
for edge_type, edges in
self._query(data, "database/get-subgraph").items()
}
[docs] def as_networkx(
self, nodes: Dict[str, Set[str]] = None,
edges: Dict[str, Set[Edge]] = None, include_attributes: bool = True,
reaction_subgraph: bool = False, reduce: bool = True
) -> nx.DiGraph:
"""Convert a set of nodes or edges to a networkx Graph
Parameters
----------
nodes: Dict[str, Set[str]], Optional
Nodes to include by node type. Generally optional, but either
`nodes` or `edges` need to be given.
Please note: if `edges` is not specified, and `reaction_subgraph`
is True reaction nodes given in `nodes` will NOT be considered.
edges: Dict[str, Set[Edge]], Optional
Edges to include by edge type. Generally optional, but either
`nodes` or `edges` need to be given. If not specified, edges will
be queried from the database using the specified nodes using either
`get_subgraph` or `get_reaction_subgraph` depending on
`include_attributes`.
The only edge attribute currently included is `edge_type`.
include_attributes: bool, default True
If True, the nx.Graph.nodes contain the attributes specified in the
database. Else `node_type` will be the only node attribute in the
output graph. Please be aware that if True and `edges` are None,
this might make the function much less efficient.
reaction_subgraph: bool, default False
Only relevant if edges is None. If True subgraph edges queried
result in a reaction subgraph (see
:py:meth:`~NetworkGenerator.get_reaction_subgraph`) else
the subgraph will not contain reaction nodes
(:py:meth:`~APINetworkGenerator.get_subgraph`)
reduce: bool, False
Whether to reduce the reaction nodes at the end of the
Returns
-------
nx.DiGraph
Subgraph as a :obj:`nx.DiGraph`
# TODO: add sample data
Examples
--------
>>> edges_ = {
>>> EDGE_TYPE_NAMES['substrate']: {
>>> # TODO: example edges
>>> },
>>> EDGE_TYPE_NAMES['product']: {
>>> # TODO: example edges
>>> }
>>> }
>>> generator = APINetworkGenerator()
>>> generator.as_networkx(edges=edges_)
"""
if nodes is None and edges is None:
raise ValueError(
"Either 'nodes' or 'edges' must be given to run 'as_networkx")
data = {
"nodes": nodes, "edges": edges,
"include_attributes": include_attributes,
"reaction_subgraph": reaction_subgraph,
"reduce": reduce
}
graph_data = self._query(data, "database/as-networkx")
graph = nx.DiGraph()
for node, node_data in graph_data["nodes"].items():
graph.add_node(node, **node_data)
for edge, edge_data in graph_data["edges"].items():
src, tgt = edge.split(graph_data["split_str"])
graph.add_edge(src, tgt, **edge_data)
return graph