rapids/tests/scripts/assign_test_timestamps.py

57 lines
2.4 KiB
Python

import numpy as np
import pandas as pd
import argparse, glob
DayOfWeek2Date = {"Fri": ["2020-03-06", "2020-10-30"],
"Sat": ["2020-03-07", "2020-10-31"],
"Sun": ["2020-03-08", "2020-11-01"],
"Mon": ["2020-03-09", "2020-11-02"]}
def assign_test_timestamps(file_path):
columns_to_delete = ["test_time", "day_of_week", "time"]
data = pd.read_csv(file_path)
data[["day_of_week", "time"]] = data["test_time"].str.split(pat=" ", n=1, expand=True)
data_with_timestamps = pd.DataFrame()
# 0 is for March and 1 is for Nov
for i in [0, 1]:
data["local_date_time"] = pd.to_datetime(data.apply(lambda row: DayOfWeek2Date[row["day_of_week"]][i] + " " + row["time"], axis=1))
data_with_timestamps = pd.concat([data_with_timestamps, data], axis=0)
if "fitbit" in file_path:
data_with_timestamps.insert(0, "timestamp", 0)
data_with_timestamps["local_date_time"] = data_with_timestamps["local_date_time"].dt.strftime('%Y-%m-%d %H:%M:%S')
else:
# Convert local_date_time with timezone to timestamp
data_with_timestamps.insert(0, "timestamp", data_with_timestamps["local_date_time"].dt.tz_localize(tz="America/New_York").astype(np.int64) // 10**6)
columns_to_delete.append("local_date_time")
# Discard useless columns
for col in columns_to_delete:
del data_with_timestamps[col]
return data_with_timestamps
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--files", nargs="+", help="Assign timestamps to the selected files, it could be a single file name or multiple file names separated by whitespace(s) (e.g. phone_battery_raw.csv)")
parser.add_argument("-a", "--all", action="store_true", help="Assign timestamps to all files under the tests/data/manual/aware_csv folder")
args = parser.parse_args()
if args.all:
for file_path in glob.glob("tests/data/manual/aware_csv/*"):
data_with_timestamps = assign_test_timestamps(file_path)
data_with_timestamps.to_csv(file_path.replace("manual", "external"), index=False)
print(file_path + " was processed.")
if args.files:
for file_name in args.files:
file_path = "tests/data/manual/aware_csv/" + file_name
data_with_timestamps = assign_test_timestamps(file_path)
data_with_timestamps.to_csv(file_path.replace("manual", "external"), index=False)
print(file_path + " was processed.")