Source code for pymantra.statics

from typing import NamedTuple


[docs]class Edge(NamedTuple): """`Edge` data type as a 2-tuple representing source and target node Attributes ---------- source: str Source node name target: str Target node name """ source: str target: str
# Convention: # # * edge types should have the same spelling as node types, but # in capital letters. # * order of node types in EDGE_TYPE_LIST indicates edge direction except # for metabolite - reaction connections which can go both ways # * node types should not contain any underscores # # To change the name of a relation in the database the only object that # should be edited is 'EDGE_TYPE_LIST' # # The order EDGE_TYPE_LIST and NODE_TYPE_LIST should always stay the same. # EDGE_TYPES = {"GENE_ORGANISM", "REACTION_GENE", "REACTION_ORGANISM", "SUBSTRATE", "PRODUCT"} EDGE_TYPE_LIST = ["GENE_ORGANISM", "REACTION_GENE", "REACTION_ORGANISM", "SUBSTRATE", "PRODUCT"] NODE_TYPES = {"reaction", "organism", "gene", "metabolite"} NODE_TYPE_LIST = ["reaction", "organism", "gene", "metabolite"] NODE_ATTRIBUTES = dict(zip( NODE_TYPE_LIST, [ # TODO: adapt 'ID' attributes to match edge table requirements # reaction { "nodeLabel": "abbreviation", "Description": "description", "Formula": "formula", "KeggID": "kegg_id", "RheaID": "rhea", "ReconMap3ID": "reconMap3", "ReactomeID": "reactome_id", "isHuman": "isHuman", "isMicrobe": "isMicrobe" }, # organism { "nodeLabel": "species", "Abbreviation_KEGG": "abbreviations_kegg", "Family": "family", "Genus": "genus", "Species": "species", "KeggID": "ids_kegg", "VMH": "organisms_vmh" }, # gene { "nodeLabel": "gene_number" }, # metabolite { "nodeLabel": "abbreviation", "Name": "fullName", "IUPAC": "iupac", "KeggID": "KEGG", "PubChemID": "pubchem", "ChEBI_ID": "CHEBI", "ReactomeID": "reactome_id", "HMDBID": "hmdb_id", "FooDBID": "foodb_id", "BiGGID": "bigg_id", "NCBIID": "ncbi", "PDBID": "pdb_id", "UniprotID": "uniprot", "EnsemblID": "ensembl", "miRBaseID": "mirbase" } ] )) EDGE_ATTRIBUTES = dict(zip( EDGE_TYPE_LIST, [ # organism_gene { 'enrichment': 'gene_number', 'tgt': 'species' }, # gene_reaction { 'enrichment': 'reaction_abbreviation', 'tgt': 'gene_number' }, # reaction_organism { 'enrichment': 'reaction_abbreviation', 'tgt': 'species' }, # substrate {}, # product {} ] )) # NOTE: do NOT change! This ensures that we don't need to adapt # Neo4jGenerator and network Generator, even if global # type names are adapted EDGE_TYPE_NAMES = dict(zip(["organism_gene", "gene_reaction", "organism_reaction", "substrate", "product"], EDGE_TYPE_LIST)) NODE_TYPE_NAMES = dict(zip(["reaction", "organism", "gene", "metabolite"], NODE_TYPE_LIST)) EDGE_BY_NODE_TYPE = dict(zip( [ # gene => organism (NODE_TYPE_NAMES['gene'], NODE_TYPE_NAMES['organism']), # reaction => gene (NODE_TYPE_NAMES['reaction'], NODE_TYPE_NAMES['gene']), # reaction => organism (NODE_TYPE_NAMES['reaction'], NODE_TYPE_NAMES['organism']), # substrate (NODE_TYPE_NAMES['metabolite'], NODE_TYPE_NAMES['reaction']), # product (NODE_TYPE_NAMES['reaction'], NODE_TYPE_NAMES['metabolite']), ], EDGE_TYPE_LIST )) NODE_TYPES_BY_EDGE = dict(zip( EDGE_TYPE_LIST, [ # gene => organism (NODE_TYPE_NAMES['gene'], NODE_TYPE_NAMES['organism']), # reaction => gene (NODE_TYPE_NAMES['reaction'], NODE_TYPE_NAMES['gene']), # reaction => organism (NODE_TYPE_NAMES['organism'], NODE_TYPE_NAMES['reaction']), # substrate (NODE_TYPE_NAMES['metabolite'], NODE_TYPE_NAMES['reaction']), # product (NODE_TYPE_NAMES['reaction'], NODE_TYPE_NAMES['metabolite']), ] )) NODE_FILES = dict(zip( NODE_TYPE_LIST, ['reactions', 'organisms', 'human_catalysts', 'metabolites'] )) EDGE_FILES = dict(zip( EDGE_TYPE_LIST, ['catalyst_organism', 'reaction_catalyst', 'reaction_organism', 'substrate_relations', 'product_relations'] )) # ================================================= # # Edge types for subgraphs EXCLUDING reaction nodes # # ================================================= # DIRECT_EDGE_TYPES = { 'metabolite_metabolite', 'gene_metabolite', 'organism_gene', 'organism_metabolite' } DIRECT_EDGE_TYPE_LIST = [ 'metabolite_metabolite', 'gene_metabolite', 'organism_gene', 'organism_metabolite' ] DIRECT_EDGE_TYPE_NAMES = dict(zip( ['metabolite_metabolite', 'gene_metabolite', 'organism_gene', 'organism_metabolite'], DIRECT_EDGE_TYPE_LIST )) DIRECT_NODE_TYPES_BY_EDGE = dict(zip( DIRECT_EDGE_TYPE_LIST, [ (NODE_TYPE_NAMES['metabolite'], NODE_TYPE_NAMES['metabolite']), (NODE_TYPE_NAMES['gene'], NODE_TYPE_NAMES['metabolite']), (NODE_TYPE_NAMES['organism'], NODE_TYPE_NAMES['gene']), (NODE_TYPE_NAMES['organism'], NODE_TYPE_NAMES['metabolite']), ] )) # characters that raise an exception in neo4j if part of node label forbidden_node_characters = ["'", "\"", "{", "}", "[", "]"]