Add google activity recognition features

2019-11-06 13:34:47 -05:00 · 2019-11-06 13:34:47 -05:00 · bdbb1904c2
parent 911e183c26
commit bdbb1904c2
3 changed files with 50 additions and 1 deletions
--- a/1
+++ b/1
@ -28,6 +28,7 @@ rule all:
        expand("data/processed/{pid}/bluetooth_{segment}.csv",
                            pid=config["PIDS"], 
                            segment = config["BLUETOOTH"]["DAY_SEGMENTS"]),
        expand("data/processed/{pid}/activity_extracted.csv",pid=config["PIDS"]),
        # Reports
        expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]),
        expand("reports/figures/{pid}/compliance_heatmap.html", pid=config["PIDS"], sensor=config["SENSORS"]),
--- a/rules/features.snakefile
+++ b/rules/features.snakefile
@ -50,4 +50,12 @@ rule bluetooth_metrics:
    output:
        "data/processed/{pid}/bluetooth_{day_segment}.csv"
    script:
-        "../src/features/bluetooth_metrics.R"
+        "../src/features/bluetooth_metrics.R"
 rule activity_metrics:
    input:
        "data/raw/{pid}/plugin_google_activity_recognition_with_datetime.csv"
    output:
        "data/processed/{pid}/activity_extracted.csv"
    script:
        "../src/features/activity_recognition.py"
--- a/src/features/activity_recognition.py
+++ b/src/features/activity_recognition.py
@ -0,0 +1,40 @@
 import pandas as pd
 import numpy as np
 import scipy.stats as stats
 #Read csv into a pandas dataframe
 data = pd.read_csv(snakemake.input[0])
 #Resampling each of the required features as a pandas series
 data.local_date_time = pd.to_datetime(data.local_date_time)
 resampledData = data.set_index(data.local_date_time)
 resampledData = resampledData[~resampledData.index.duplicated()]
 resampledData.rename_axis('time',axis='columns',inplace=True)
 resampledData.drop(columns=['local_date_time'],inplace=True)
 #Finding count grouped by day
 count = pd.DataFrame()
 count = resampledData['activity_type'].resample('D').count()
 count = count.rename(columns={"activity_type":"count"})
 #Finding most common activity of the day
 mostCommonActivity = pd.DataFrame()
 mostCommonActivity = resampledData['activity_type'].resample('D').apply(lambda x:stats.mode(x)[0])
 mostCommonActivity = mostCommonActivity.rename(columns={'activity_type':'most_common_activity'})
 #finding different number of activities during a day
 countChanges = pd.DataFrame()
 # countChanges = resampledData.to_period('D').groupby(resampledData.index)['activity_type'].value_counts()
 countChanges = resampledData['activity_type'].resample('D').nunique()
 #Concatenating all the processed data only, no other sensor data is added here for simplicity
 finalDataset = pd.DataFrame()
 finalDataset = pd.concat([count,mostCommonActivity,countChanges],axis=1)
 finalDataset.rename(columns={0:"count",1:'most_common_activity','activity_type':'activity_changes_count'},inplace = True)
 #Export final dataframe with extracted features to respective PID
 finalDataset.to_csv(snakemake.output[0])