From cdbd8aa5bd5820b73618030706f44b37df8f84e2 Mon Sep 17 00:00:00 2001 From: Dan White Date: Fri, 5 May 2017 15:19:25 -0500 Subject: [PATCH] add an example to convert to CSV format --- log-to-csv.py | 166 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100755 log-to-csv.py diff --git a/log-to-csv.py b/log-to-csv.py new file mode 100755 index 0000000..178fc46 --- /dev/null +++ b/log-to-csv.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 + +# by convention, built-in imports are listed first +# see: https://www.python.org/dev/peps/pep-0008/ +import base64 +import csv +import fileinput +import json +import sys + + +# Requires installing protobuf from source and installing the python3 language +# bindings. Instructions to do this on a Pi are in the works. +from google.protobuf.json_format import MessageToJson +from google.protobuf.message import DecodeError + +# NAME_pb2.py files are generated from the NAME.proto files. There is no need +# to generate the files unless the .proto file changes or a new one is added, +# which shouldn't be the case for ECE 490. Just copy the provided files to the +# same folder as this script. +from NodeData_pb2 import NodeData # nodeIDs 42, 43 +from RAPID_pb2 import SensorData # all other nodeIDs (via Shaffer) + + +DEBUG = False + +def debug(*args): + if DEBUG: + print(*args, file=sys.stderr) + + +# nicely merges files named as arguments to the script and files piped to the +# script on stdin. +# +# Example: +# python3 decode-to-json.py *.log < another-log-file.log + + +# first print a CSV header +# THIS ONLY WORKS FOR NODES 42 and 43 +# --> change for your needs! +x = ['gatewayIP', + 'databaseTimestamp', + 'gatewayTimestamp', + 'radioFrequency', + 'RSSI', + 'nodeIDsender', + 'radioAddress', + 'nodeID', + 'vbatt', + 'timestamp', + 'temperature'] + +print(','.join(x)) + + +for line in fileinput.input(): + # Official database line format + # ----------------------------- + # 0 - gatewayIP + # 1 - databaseTimestamp + # 2 - gatewayTimestamp + # 3 - radioFrequency + # 4 - RSSI + # 5 - nodeID of sender's radio (may be a relay's nodeID) + # 6 - radioAddress + # 7 - data + line = line.rstrip() + fields = line.split() + + # only accept lines that have 8 (space-separated) fields + if len(fields) == 8: + (gatewayIP, + databaseTimestamp, + gatewayTimestamp, + radioFrequency, + RSSI, + nodeIDsender, + radioAddress, + hexdata) = fields + else: + debug(line) + debug('Wrong number of fields in database line.') + continue + + # packet data is sometimes not in the correct format + try: + bindata = base64.b16decode(hexdata) + except: + debug(line) + debug('Not a valid hex string:', hexdata) + continue + + + # not all data packets are well-formed + # ignore the bad ones and just skip to the next one + # + # TODO: this is a place to improve to attempt to decode packets with + # trailing zeros. E.g. try a few values of N to only attempt decoding the + # first N bytes. + # protodata.ParseFromString(bindata[:N]) + # would need to validate that you found the correct N somehow after this + # maybe increment N downwards from a known "too big" number??? + for n in range(128, 0, -1): + b = bindata[:n] + try: + protodata = SensorData() + protodata.ParseFromString(b) + except DecodeError: + continue + else: + # debug('decoded SensorData using', n) + break + + # now that we have a decode, check the first field to fixup the correct + # decoder given the first field (which is nodeID) if this was really node 42 + # or 43 + if protodata.sensorId in (42, 43): + protodata = NodeData() + protodata.ParseFromString(b) #re-use the known-good length???? + else: + #for now, only process 42 and 43 and ignore the others + continue + + # translate protbuf to a JSON string example, easier to human-read + jsondata = MessageToJson(protodata) + # jsondata = jsondata.replace('\n','') #all on one line + # jsondata = jsondata.replace('{ ','{') #remove leading spaces from start of array + + # this loads the JSON string into a python dictionary + data = json.loads(jsondata) + + #extract the nodeID of the node that generated the data packet from + if 'nodeID' in data: + #you only need one of these two lines. first is probably preferable + nodeID = protodata.nodeID # the canonical way direct from protobuf + nodeID = data['nodeID'] #using the python dictionary version + elif 'sensorId' in data: + nodeID = protodata.sensorId #direct + nodeID = data['sensorId'] #protobuf->JSON->dict route + else: + debug('Cannot find nodeID in the data') + continue + + # This is the point in the code where you either collect all the data into + # some large array, or filter again, looking for specific nodeID or other + # thing you are looking for. + + + # Dump all of the data to CSV format for this example. Replace the hexdata + # with extra columns for each decoded value. + # + # THIS ONLY WORKS FOR NODES 42 and 43 + c = [gatewayIP, databaseTimestamp, + gatewayTimestamp, + radioFrequency, + RSSI, + nodeIDsender, + radioAddress, + str(protodata.nodeID), + str(protodata.vbatt), + str(protodata.timestamp), + str(protodata.temperature)] + + print(','.join(c)) + -- 2.25.1