Commit aa231d8d authored by Mark Wijzenbroek's avatar Mark Wijzenbroek
Browse files

Add the network builder

parent 9db5b6b1
#!/usr/bin/python
# A simple class to write time-dependent, directed networks in GEXF format for use in Gephi. Written
# because the official libgexf for python is out of date and does not support all features needed.
# Plus, this way we avoid an external dependency. Yay!
#
# Compatible with Python 2.7 and 3.x.
#
# Written by Mark Wijzenbroek in 2018
from __future__ import print_function, unicode_literals, absolute_import, division
import xml.etree.ElementTree as ET
class GEXFNetwork:
_nodes = None
_edges = None
_nodeattrs = None
_edgeattrs = None
_creator = None
_description = None
def __init__( self, creator="minigexf", description="" ):
self._nodes = {}
self._edges = {}
self._nodeattrs = {}
self._edgeattrs = {}
self._creator = creator
self._description = description
# Add or update a node.
# times is a list of two-element lists and integers, where a two-element list means the node exists
# between t=l[0] and t=l[1] and an integer means the node exists at t=l.
# attrs is a dictionary with any additional attributes that should be written to the graph.
# tdattrs are any additional time-dependent attributes, in the form of a dictionary of lists.
# these lists are coupled to the "times" list.
def AddNode( self, id, times, attrs={}, tdattrs={} ):
for key, value in attrs.items():
if key not in self._nodeattrs:
raise Exception( "Unknown attribute " + str( key ) + " in node " + str( id ) )
for key, value in tdattrs.items():
if key not in self._nodeattrs:
raise Exception( "Unknown time-dependent attribute " + str( key ) + " in node " + str( id ) )
self._nodes[id] = { "times": times, "attr": attrs, "tdattr": tdattrs }
# Remove a node by ID.
def DelNode( self, id ):
if id in self._nodes:
del self._nodes[id]
# Add a node attribute
def AddNodeAttr( self, id, title, attrtype, default ):
self._nodeattrs[id] = { "title": title, "type": attrtype, "default": default }
# Delete a node attribute
def DelNodeAttr( self, id ):
if id in self._nodeattrs:
del self._nodeattrs[id]
# Add or update an edge.
# sourceid and targetid refer to the ids of the source and target node, respectively.
# times is a list similar as used for the nodes, which indicates that the edge exists at those times.
# weights is a list with the same length as times, indicating the weight at each time.
# attrs is a dictionary with any additional attributes that should be written to the graph.
# tdattrs are any additional time-dependent attributes, in the form of a dictionary of lists.
# these lists are coupled to the "times" list.
def AddEdge( self, id, sourceid, targetid, times, attrs={}, tdattrs={} ):
for key, value in attrs.items():
if key not in self._edgeattrs:
raise Exception( "Unknown attribute " + str( key ) + " in edge " + str( id ) )
for key, value in tdattrs.items():
if key not in self._edgeattrs:
raise Exception( "Unknown time-dependent attribute " + str( key ) + " in edge " + str( id ) )
if sourceid not in self._nodes:
raise Exception( "Source ID " + str( sourceid ) + " does not exist, please first add the proper node" )
if targetid not in self._nodes:
raise Exception( "Target ID " + str( targetid ) + " does not exist, please first add the proper node" )
self._edges[id] = { "source": sourceid, "target": targetid, "times": times, "attr": attrs, "tdattr": tdattrs }
# Delete an edge by ID.
def DelEdge( self, id ):
if id in self._edges:
del self._edges[id]
# Add an edge attribute
def AddEdgeAttr( self, id, title, attrtype, default ):
self._edgeattrs[id] = { "title": title, "type": attrtype, "default": default }
# Delete an edge attribute
def DelEdgeAttr( self, id ):
if id in self._edgeattrs:
del self._edgeattrs[id]
# Write a network to disk.
def Write( self, filename ):
# Make the XML root element
root = ET.Element( "gexf" )
root.set( "xmlns", "http://www.gexf.net/1.2draft" )
root.set( "version", "1.2" )
# Add the metadata...
meta = ET.SubElement( root, "meta" )
if self._creator:
creator = ET.SubElement( meta, "creator" )
creator.text = self._creator
if self._description:
description = ET.SubElement( meta, "description" )
description.text = self._description
# A graph...
graph = ET.SubElement( root, "graph" )
graph.set( "mode", "dynamic" )
graph.set( "defaultedgetype", "directed" )
# The node attributes...
node_attributes = ET.SubElement( graph, "attributes" )
node_attributes.set( "class", "node" )
node_attributes.set( "mode", "dynamic" )
for key, value in self._nodeattrs.items():
node_attribute = ET.SubElement( node_attributes, "attribute" )
node_attribute.set( "id", str( key ) )
node_attribute.set( "title", str( value["title"] ) )
node_attribute.set( "type", str( value["type"] ) )
default = ET.SubElement( node_attribute, "default" )
default.text = str( value["default"] )
# The edge attributes...
edge_attributes = ET.SubElement( graph, "attributes" )
edge_attributes.set( "class", "edge" )
edge_attributes.set( "mode", "dynamic" )
for key, value in self._edgeattrs.items():
edge_attribute = ET.SubElement( edge_attributes, "attribute" )
edge_attribute.set( "id", str( key ) )
edge_attribute.set( "title", str( value["title"] ) )
edge_attribute.set( "type", str( value["type"] ) )
default = ET.SubElement( edge_attribute, "default" )
default.text = str( value["default"] )
# The list of nodes...
nodes = ET.SubElement( graph, "nodes" )
for key, data in self._nodes.items():
node = ET.SubElement( nodes, "node" )
node.set( "id", str( key ) )
# First create the timespans in which the node is available...
if len( data["times"] ) == 1:
# A single timespan has been provided
if type( data["times"][0] ) in [ list, tuple ]:
node.set( "start", str( data["times"][0][0] ) )
node.set( "end", str( data["times"][0][1] ) )
else:
node.set( "start", str( data["times"][0] ) )
node.set( "end", str( data["times"][0] ) )
elif len( data["times"] ) > 1:
# Multiple timespans are provided, use "spells"
spells = ET.SubElement( node, "spells" )
for k in data["times"]:
spell = ET.SubElement( spells, "spell" )
if type( k ) in [ list, tuple ]:
spell.set( "start", str( k[0] ) )
spell.set( "end", str( k[1] ) )
else:
spell.set( "start", str( k ) )
spell.set( "end", str( k ) )
# Now do the attributes...
attvalues = ET.SubElement( node, "attvalues" )
for attr, attrdata in data["attr"].items():
# First the regular ons for all times...
attvalue = ET.SubElement( attvalues, "attvalue" )
attvalue.set( "for", attr )
attvalue.set( "value", attrdata )
for tdattr, tdattrdata in data["tdattr"].items():
# Then the time-dependent ones, let's first check whether the data is OK...
if len( tdattrdata ) != len( data["times"] ):
raise Exception( "Time-dependent attribute " + str( tdattr ) + " for node " + str( key ) + " has a mismatching number of elements" )
for k in range( 0, len( data["times"] ) ):
attvalue = ET.SubElement( attvalues, "attvalue" )
attvalue.set( "for", str( tdattr ) )
attvalue.set( "value", str( tdattrdata[k] ) )
timespan = data["times"][k]
if type( timespan ) in [ list, tuple ]:
attvalue.set( "start", str( timespan[0] ) )
attvalue.set( "end", str( timespan[1] ) )
else:
attvalue.set( "start", str( timespan ) )
attvalue.set( "end", str( timespan ) )
# The list of edges...
edges = ET.SubElement( graph, "edges" )
for key, data in self._edges.items():
edge = ET.SubElement( edges, "edge" )
edge.set( "id", str( key ) )
edge.set( "source", str( data["source"] ) )
edge.set( "target", str( data["target"] ) )
if len( data["times"] ) == 1:
# A single timespan has been provided
if type( data["times"][0] ) in [ list, tuple ]:
edge.set( "start", str( data["times"][0][0] ) )
edge.set( "end", str( data["times"][0][1] ) )
else:
edge.set( "start", str( data["times"][0] ) )
edge.set( "end", str( data["times"][0] ) )
elif len( data["times"] ) > 1:
# Multiple timespans are provided, use "spells"
spells = ET.SubElement( edge, "spells" )
for k in data["times"]:
spell = ET.SubElement( spells, "spell" )
if type( k ) in [ list, tuple ]:
spell.set( "start", str( k[0] ) )
spell.set( "end", str( k[1] ) )
else:
spell.set( "start", str( k ) )
spell.set( "end", str( k ) )
# Now do the attributes...
attvalues = ET.SubElement( edge, "attvalues" )
for attr, attrdata in data["attr"].items():
# First the regular ons for all times...
attvalue = ET.SubElement( attvalues, "attvalue" )
attvalue.set( "for", attr )
attvalue.set( "value", attrdata )
for tdattr, tdattrdata in data["tdattr"].items():
# Then the time-dependent ones, let's first check whether the data is OK...
if len( tdattrdata ) != len( data["times"] ):
raise Exception( "Time-dependent attribute " + str( tdattr ) + " for edge " + str( key ) + " has a mismatching number of elements" )
for k in range( 0, len( data["times"] ) ):
attvalue = ET.SubElement( attvalues, "attvalue" )
attvalue.set( "for", str( tdattr ) )
attvalue.set( "value", str( tdattrdata[k] ) )
timespan = data["times"][k]
if type( timespan ) in [ list, tuple ]:
attvalue.set( "start", str( timespan[0] ) )
attvalue.set( "end", str( timespan[1] ) )
else:
attvalue.set( "start", str( timespan ) )
attvalue.set( "end", str( timespan ) )
# And finally, write the XML document to disk...
et = ET.ElementTree( root )
f = open( filename, "wb" )
et.write( f, encoding="utf-8", xml_declaration=True )
f.close()
# Make a test network...
if __name__ == "__main__":
network = GEXFNetwork()
network.AddNodeAttr( "name", "Name", "string", "" )
network.AddNodeAttr( "weight", "Weight", "integer", 0 )
network.AddEdgeAttr( "name", "Name", "string", "" )
network.AddEdgeAttr( "weight", "Weight", "integer", 0 )
network.AddNode( 1, [] )
network.AddNode( 2, [ [3,4], [5,6], 8, [9,10] ], attrs={ "name": "Sensor" }, tdattrs={ "weight": [ 2, 3, 2, 3 ] } )
network.AddNode( 1, [ 5 ], attrs={ "name": "Mark" } )
network.AddEdge( 1, 1, 2, [ [3,4], 8, [10,12] ], attrs={ "name": "Edge" } )
network.Write( "test.xml" )
#!/usr/bin/python
# A simple script to convert the output of the proximity sensors into a .gexf file.
#
# Should be compatible with Python 2.7 and 3.x.
#
# Written by Mark Wijzenbroek in 2018
from __future__ import print_function, unicode_literals, absolute_import, division
import sys
import argparse
parser = argparse.ArgumentParser()
parser.add_argument( "--basename", default="graph", type=str, help="The base filename of our output" )
parser.add_argument( "systemlog", type=str, help="The location of the system log json file" )
parser.add_argument( "detectionlog", type=str, help="The location of the detection log json file" )
args = parser.parse_args( sys.argv[1:] )
systemlog = args.systemlog
detectionlog = args.detectionlog
#systemlog = "/home/mark/Desktop/kumbhserial/data/system/system-dump-ttyUSB0-20181113-121109-20181113-121321.json"
#detectionlog = "/home/mark/Desktop/kumbhserial/data/detection/detection-dump-ttyUSB0-20181113-121109-20181113-121321.json"
#systemlog = "/home/mark/Desktop/kumbhserial/data/system/system-dump-ttyUSB0-20181115-090819-20181115-091004.json"
#detectionlog = "/home/mark/Desktop/kumbhserial/data/detection/detection-dump-ttyUSB0-20181115-090819-20181115-091004.json"
#systemlog = "/home/mark/Desktop/kumbhserial/data/system/system-dump-ttyUSB0-20181120-112300-20181120-112536.json"
#detectionlog = "/home/mark/Desktop/kumbhserial/data/detection/detection-dump-ttyUSB0-20181120-112300-20181120-112536.json"
#systemlog = "/home/mark/Desktop/kumbhserial/data/system/system-dump-ttyUSB0-20181203-110000-20181203-111844.json"
#detectionlog = "/home/mark/Desktop/kumbhserial/data/detection/detection-dump-ttyUSB0-20181203-110000-20181203-111844.json"
interval = 32768
detection_cycles = 6
import json
import minigexf
# read all the data from disk...
with open( systemlog, "r" ) as f:
syslog = [ x for x in json.load( f ) if x["state"] == 1 ]
with open( detectionlog, "r" ) as f:
detlog = [ x for x in json.load( f ) ]
system_data = {}
detection_data = {}
# find information about the network...
auths = [ x["auth"] for x in syslog ]
network_id = max( set( auths ), key=auths.count ) # find the network ID that we find most of the time
print( "The network ID is", network_id )
# toss out all the data was logged on the wrong network: the timing will likely be way off and confuse everything...
syslog = [ x for x in syslog if x["auth"] == network_id ]
# the proximity sensors log events based on "cycles". by default, a cycle consists of 32768 ticks, which is exactly one second.
# each sensor will log every x cycles. although the cycles themselves are synchronized throughout the whole network, the
# cycle in which logging occurs is not. this has some consequences for the timings, for which we need to correct. two sensors
# may log at two different times, if the sensors log every x seconds, but the time difference is always an integer number of
# seconds. checkin and checkout events are logged immediately.
#
# to put this graphically:
# time ---> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
# sensor1: X X X X
# sensor2: | X | X | X |
# sensor3: | X | X | X |
#
# first of all, we need to determine the time offset, which is the time difference between the network time and when sensors
# log data: (it seems this is always 0.5 * interval??)
offset = sum( [ int( float( x["time"] ) / interval ) * interval - float( x["time"] ) for x in syslog ] ) / len( syslog )
#offset = 0.5 * interval
# split the system log by device...
cycles = []
for i in syslog:
device = i["deviceId"]
if device not in system_data.keys():
system_data[device] = []
cycle = ( i["time"] + offset ) / interval
if abs( cycle - round( cycle ) ) > 0.05:
print( "Something went wrong, cycle is", cycle, "but is supposed to be whole number. Something is screwy, skipping this..." )
else:
i["realcycle"] = int( round( ( i["time"] + offset ) / interval ) )
# round to nearest detection cycle
i["cycle"] = int( float( i["realcycle"] ) / detection_cycles ) * detection_cycles
cycles.append( i["realcycle"] )
system_data[device].append( i )
# and do the same for the detection log...
for i in detlog:
device = i["deviceId"]
if device not in detection_data.keys():
detection_data[device] = []
detection_data[device].append( i )
# print some general information about the network...
nodes = sorted( system_data.keys() )
print( "We have data for", len(nodes), "nodes:", ", ".join( [ str(x) for x in nodes ] ) )
mincycle = min( cycles )
maxcycle = max( cycles )
print( "Data is available from cycle", mincycle, "to", maxcycle )
print()
# finally we print the network
current_detection = {}
cycles = []
all_edges = []
for i in range( mincycle, maxcycle + 1 ):
devs = []
edges = []
# now check for each device whether it has logged something...
for j in nodes:
entries = [ x for x in system_data[j] if x["realcycle"] == i ]
if len( entries ) > 0:
# find which devices were detected in this cycle...
if j not in current_detection.keys():
detection = 0
else:
detection = current_detection[j]
current_detection[j] = entries[0]["detection"]
devs.append( j )
# detected = [ x["id"] for x in detection_data[j][detection:current_detection[j]] ]
detected = [ x for x in detection_data[j][detection:current_detection[j]] if x["id"] != 0 ]
for x in detected:
y = [ j, x["id"] ]
if x["rssi"] > -60:
y.append( x["rssi"] )
edges.append( tuple( y ) )
if len( devs ) != 0:
cycles.append( i )
print( i, devs, list( set( edges ) ) )
all_edges.append( ( i, edges ) )
# Build up a tree for our HTML visualizer...
#tree = {}
#tree["nodes"] = []
#for i in nodes:
# fractions = {}
# for x in all_edges:
# fractions[x[0]] = 0.1
# tree["nodes"].append( { "id": i, "fraction": fractions } )
#tree["links"] = []
#for i in all_edges:
# for j in i[1]:
# tree["links"].append( { "source": j[0], "target": j[1], "value": 80+j[2], "year": i[0] } )
# Set some important things for our gexf file...
gexf_graph = minigexf.GEXFNetwork( creator="Mark's Network Builder", description="Proximity sensors and stuff" )
gexf_graph.AddNodeAttr( "title", "Title", "string", "" )
gexf_graph.AddEdgeAttr( "weight", "Weight", "integer", 0 )
gexf_start = str( mincycle )
gexf_end = str( maxcycle )
# Add our nodes...
for i in nodes:
cycles = sorted( [ x["realcycle"] for x in system_data[i] ] )
gexf_graph.AddNode( i, [ [ min(cycles), max(cycles) ] ], attrs={ "title": "Sensor" } )
# first we create a reformatted all_edges object... we want to have something we can
# access like: all_edges_gexf[source][target] = { t1: w1, t2: w2, ... }
all_edges_gexf = {}
for i in all_edges:
for j in i[1]:
source = j[0]
target = j[1]
weight = 80 + j[2]
t = i[0]
if source not in all_edges_gexf.keys():
all_edges_gexf[source] = {}
if target not in all_edges_gexf[source].keys():
all_edges_gexf[source][target] = {}
all_edges_gexf[source][target][t] = weight
# loop through all edges to add them to our gexf file...
k = 0
for source, sdata in all_edges_gexf.items():
for target, data in sdata.items():
# now process the data...
times = []
weights = []
for t in range( mincycle, maxcycle + detection_cycles ): #, detection_cycles ):
if t in data.keys():
if data[t] > 20:
times.append( t )
weights.append( data[t] )
# edge.addAttribute( "weight", unicode(data[t]), start=unicode(t), end=unicode(t) )
gexf_graph.AddEdge( k, source, target, times, tdattrs={ "weight": weights } )
k += 1
# Write it to a file
gexf_graph.Write( args.basename + ".gexf" )
# And dump our json file for the html D3 visualizer...
#with open( args.basename + ".json", "w" ) as f:
# json.dump( tree, f )
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment