rapids/tests/scripts/assign_test_timestamps.py

56 lines
2.3 KiB
Python
Raw Normal View History

2021-04-15 02:52:45 +02:00
import numpy as np
import pandas as pd
import argparse, glob
DayOfWeek2Date = {"Fri": ["2020-03-06", "2020-10-30"],
"Sat": ["2020-03-07", "2020-10-31"],
"Sun": ["2020-03-08", "2020-11-01"],
"Mon": ["2020-03-09", "2020-11-02"]}
def assign_test_timestamps(file_path):
2021-04-17 00:32:20 +02:00
columns_to_delete = ["test_time", "day_of_week", "time"]
2021-04-15 02:52:45 +02:00
data = pd.read_csv(file_path)
2021-04-17 00:32:20 +02:00
data[["day_of_week", "time"]] = data["test_time"].str.split(pat=" ", n=1, expand=True)
2021-04-15 02:52:45 +02:00
data_with_timestamps = pd.DataFrame()
# 0 is for March and 1 is for Nov
for i in [0, 1]:
2021-04-17 00:32:20 +02:00
data["local_date_time"] = pd.to_datetime(data.apply(lambda row: DayOfWeek2Date[row["day_of_week"]][i] + " " + row["time"], axis=1))
2021-04-15 02:52:45 +02:00
data_with_timestamps = pd.concat([data_with_timestamps, data], axis=0)
2021-04-17 00:32:20 +02:00
if "fitbit" in file_path:
data_with_timestamps.insert(0, "timestamp", 0)
else:
# Convert local_date_time with timezone to timestamp
data_with_timestamps.insert(0, "timestamp", data_with_timestamps["local_date_time"].dt.tz_localize(tz="America/New_York").astype(np.int64) // 10**6)
2021-04-21 01:02:08 +02:00
columns_to_delete.append("local_date_time")
2021-04-17 00:32:20 +02:00
2021-04-15 02:52:45 +02:00
# Discard useless columns
2021-04-17 00:32:20 +02:00
for col in columns_to_delete:
2021-04-15 02:52:45 +02:00
del data_with_timestamps[col]
return data_with_timestamps
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--files", nargs="+", help="Assign timestamps to the selected files, it could be a single file name or multiple file names separated by whitespace(s) (e.g. phone_battery_raw.csv)")
parser.add_argument("-a", "--all", action="store_true", help="Assign timestamps to all files under the tests/data/manual/aware_csv folder")
args = parser.parse_args()
if args.all:
for file_path in glob.glob("tests/data/manual/aware_csv/*"):
data_with_timestamps = assign_test_timestamps(file_path)
data_with_timestamps.to_csv(file_path.replace("manual", "external"), index=False)
print(file_path + " was processed.")
if args.files:
for file_name in args.files:
file_path = "tests/data/manual/aware_csv/" + file_name
data_with_timestamps = assign_test_timestamps(file_path)
data_with_timestamps.to_csv(file_path.replace("manual", "external"), index=False)
print(file_path + " was processed.")