2021-04-15 02:52:45 +02:00
import numpy as np
import pandas as pd
import argparse , glob
DayOfWeek2Date = { " Fri " : [ " 2020-03-06 " , " 2020-10-30 " ] ,
" Sat " : [ " 2020-03-07 " , " 2020-10-31 " ] ,
" Sun " : [ " 2020-03-08 " , " 2020-11-01 " ] ,
" Mon " : [ " 2020-03-09 " , " 2020-11-02 " ] }
def assign_test_timestamps ( file_path ) :
2021-04-17 00:32:20 +02:00
columns_to_delete = [ " test_time " , " day_of_week " , " time " ]
2021-04-15 02:52:45 +02:00
data = pd . read_csv ( file_path )
2021-04-17 00:32:20 +02:00
data [ [ " day_of_week " , " time " ] ] = data [ " test_time " ] . str . split ( pat = " " , n = 1 , expand = True )
2021-04-15 02:52:45 +02:00
data_with_timestamps = pd . DataFrame ( )
# 0 is for March and 1 is for Nov
for i in [ 0 , 1 ] :
2021-04-17 00:32:20 +02:00
data [ " local_date_time " ] = pd . to_datetime ( data . apply ( lambda row : DayOfWeek2Date [ row [ " day_of_week " ] ] [ i ] + " " + row [ " time " ] , axis = 1 ) )
2021-04-15 02:52:45 +02:00
data_with_timestamps = pd . concat ( [ data_with_timestamps , data ] , axis = 0 )
2021-04-17 00:32:20 +02:00
if " fitbit " in file_path :
data_with_timestamps . insert ( 0 , " timestamp " , 0 )
2021-07-16 22:56:02 +02:00
data_with_timestamps [ " local_date_time " ] = data_with_timestamps [ " local_date_time " ] . dt . strftime ( ' % Y- % m- %d % H: % M: % S ' )
2021-04-17 00:32:20 +02:00
else :
# Convert local_date_time with timezone to timestamp
data_with_timestamps . insert ( 0 , " timestamp " , data_with_timestamps [ " local_date_time " ] . dt . tz_localize ( tz = " America/New_York " ) . astype ( np . int64 ) / / 10 * * 6 )
2021-04-21 01:02:08 +02:00
columns_to_delete . append ( " local_date_time " )
2021-04-17 00:32:20 +02:00
2021-04-15 02:52:45 +02:00
# Discard useless columns
2021-04-17 00:32:20 +02:00
for col in columns_to_delete :
2021-04-15 02:52:45 +02:00
del data_with_timestamps [ col ]
return data_with_timestamps
parser = argparse . ArgumentParser ( )
parser . add_argument ( " -f " , " --files " , nargs = " + " , help = " Assign timestamps to the selected files, it could be a single file name or multiple file names separated by whitespace(s) (e.g. phone_battery_raw.csv) " )
parser . add_argument ( " -a " , " --all " , action = " store_true " , help = " Assign timestamps to all files under the tests/data/manual/aware_csv folder " )
args = parser . parse_args ( )
if args . all :
for file_path in glob . glob ( " tests/data/manual/aware_csv/* " ) :
data_with_timestamps = assign_test_timestamps ( file_path )
data_with_timestamps . to_csv ( file_path . replace ( " manual " , " external " ) , index = False )
print ( file_path + " was processed. " )
if args . files :
for file_name in args . files :
file_path = " tests/data/manual/aware_csv/ " + file_name
data_with_timestamps = assign_test_timestamps ( file_path )
data_with_timestamps . to_csv ( file_path . replace ( " manual " , " external " ) , index = False )
print ( file_path + " was processed. " )