You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

53 lines
2.4 KiB

import csv, re
from datetime import datetime, date
# this script is for adding two columns to a wifi log CSV for tableau to read
# open the file "input.csv". currently there is no handling for combining files, so that can be done manually beforehand. but this can be problematic as each file we get from ITS is 30 days, so if we want to analyze 12 months we have to manually combine 12 files which is obnoxious
with open ('./input.csv', encoding="utf8", newline='') as input_csv:
input_reader = csv.reader(input_csv, delimiter=',')
data = list(input_reader)
# grab indices of needed data
duration_index = data[0].index("Session Duration")
time_index = data[0].index("Association Time")
# add column headings for our two new calculated columns
data[0].append('duration')
data[0].append('datetime')
# items to append to the split string list if durations are short
no_hours = ['0', 'hrs']
no_days = ['0', 'days']
no_mins = ['0', 'min']
for row in data[1:]:
duration_list = row[duration_index].split()
# detection for short durations, adds missing fields for calculation later
if len(duration_list) < 3:
duration_list = no_mins + duration_list
if len(duration_list) < 5:
duration_list = no_hours + duration_list
if len(duration_list) < 8 and len(duration_list) > 5:
duration_list = no_days + duration_list
# convert the supplied datetime string (with CDT removed because there was no proper handling for it, apparently) to a datetime object that tableau can read
# just encountered a problem, CDT vs CST. could be either
if "CDT" in row[time_index]:
my_datetime = datetime.strptime(re.subn('CDT ', '', row[time_index])[0], "%c") #%a %b %d %H:%M:%S %Z %Y <- this is roughly equivalent to %c
elif "CST" in row[time_index]:
my_datetime = datetime.strptime(re.subn('CST ', '', row[time_index])[0], "%c")
# calcuate the total session duration in terms of number of hours (again so tableau can read it)
my_duration = (int(duration_list[0]) * 24) + int(duration_list[2]) + (int(duration_list[4]) / 60) + (int(duration_list[6]) / 3600)
# append to the row in correct order
row.append(my_duration)
row.append(my_datetime)
# save it with the filename that is needed by tableau (I think) because I was bad at naming stuff two years ago
with open("./maybe4.csv", "w", encoding="utf8", newline='') as output_file:
write = csv.writer(output_file)
write.writerows(data)