Refactor battery feature: replace "metrics" with "features"

Co-authored-by: Meng Li <AnnieLM1996@gmail.com>
pull/95/head
Mingze Cao 2020-04-09 15:06:25 -05:00
parent 2d7d3bfccf
commit 8cc93c8791
4 changed files with 22 additions and 22 deletions

View File

@ -68,7 +68,7 @@ RESAMPLE_FUSED_LOCATION:
TIMEZONE: *timezone
BARNETT_LOCATION:
DAY_SEGMENTS: [daily] # These metrics are only available on a daily basis
DAY_SEGMENTS: [daily] # These features are only available on a daily basis
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
LOCATIONS_TO_USE: ALL # ALL, ALL_EXCEPT_FUSED OR RESAMPLE_FUSED
ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
@ -84,7 +84,7 @@ GOOGLE_ACTIVITY_RECOGNITION:
BATTERY:
DAY_SEGMENTS: *day_segments
METRICS: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]
FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]
SCREEN:
DAY_SEGMENTS: *day_segments

View File

@ -542,7 +542,7 @@ See `Battery Config Code`_
.. - Extract the deltas in Battery charge : ``expand("data/processed/{pid}/battery_deltas.csv", pid=config["PIDS"]),``
- Extract Battery Metrics:
- Extract Battery Features:
| ``expand("data/processed/{pid}/battery_{day_segment}.csv",``
| ``pid=config["PIDS"],``
@ -562,9 +562,9 @@ See `Battery Config Code`_
- **Script:** ``src/features/battery_deltas.R`` - See the battery_deltas.R_ script.
- **Rule:** ``rules/features.snakefile/battery_metrics`` - See the battery_metrics_ rule
- **Rule:** ``rules/features.snakefile/battery_features`` - See the battery_features_ rule
- **Script:** ``src/features/battery_metrics.py`` - See the battery_metrics.py_ script.
- **Script:** ``src/features/battery_features.py`` - See the battery_features.py_ script.
.. _battery-parameters:
@ -574,14 +574,14 @@ See `Battery Config Code`_
Name Description
============ ===================
day_segment The particular ``day_segments`` that will be analyzed. The available options are ``daily``, ``morning``, ``afternoon``, ``evening``, ``night``
metrics The different measures that can be retrieved from the Battery dataset. See :ref:`Available Battery Metrics <battery-available-metrics>` Table below
features The different measures that can be retrieved from the Battery dataset. See :ref:`Available Battery Features <battery-available-features>` Table below
============ ===================
.. _battery-available-metrics:
.. _battery-available-features:
**Available Battery Metrics**
**Available Battery Features**
The following table shows a list of the available metrics for Battery data.
The following table shows a list of the available features for Battery data.
===================== =============== =============
Name Units Description
@ -1167,8 +1167,8 @@ stddurationactivebout minutes Std duration active bout: The standard
.. _`Battery Config Code`: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L84
.. _battery_deltas: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L25
.. _battery_deltas.R: https://github.com/carissalow/rapids/blob/master/src/features/battery_deltas.R
.. _battery_metrics: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L86
.. _battery_metrics.py : https://github.com/carissalow/rapids/blob/master/src/features/battery_metrics.py
.. _battery_features: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L86
.. _battery_features.py : https://github.com/carissalow/rapids/blob/master/src/features/battery_features.py
.. _`Google Activity Recognition Config Code`: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L80
.. _google_activity_recognition_deltas: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L41
.. _google_activity_recognition_deltas.R: https://github.com/carissalow/rapids/blob/master/src/features/google_activity_recognition_deltas.R

View File

@ -85,16 +85,16 @@ rule activity_features:
script:
"../src/features/google_activity_recognition.py"
rule battery_metrics:
rule battery_features:
input:
"data/processed/{pid}/battery_deltas.csv"
params:
day_segment = "{day_segment}",
metrics = config["BATTERY"]["METRICS"]
features = config["BATTERY"]["FEATURES"]
output:
"data/processed/{pid}/battery_{day_segment}.csv"
script:
"../src/features/battery_metrics.py"
"../src/features/battery_features.py"
rule screen_features:
input:

View File

@ -4,10 +4,10 @@ from features_utils import splitOvernightEpisodes, splitMultiSegmentEpisodes
battery_data = pd.read_csv(snakemake.input[0], parse_dates=["local_start_date_time", "local_end_date_time", "local_start_date", "local_end_date"])
day_segment = snakemake.params["day_segment"]
metrics = snakemake.params["metrics"]
features = snakemake.params["features"]
if battery_data.empty:
battery_features = pd.DataFrame(columns=["local_date"] + ["battery_" + day_segment + "_" + x for x in metrics])
battery_features = pd.DataFrame(columns=["local_date"] + ["battery_" + day_segment + "_" + x for x in features])
else:
battery_data = splitOvernightEpisodes(battery_data, ["battery_diff"], [])
@ -19,21 +19,21 @@ else:
# for battery_data_discharge:
battery_data_discharge = battery_data[battery_data["battery_diff"] > 0]
battery_discharge_features = pd.DataFrame()
if "countdischarge" in metrics:
if "countdischarge" in features:
battery_discharge_features["battery_"+day_segment+"_countdischarge"] = battery_data_discharge.groupby(["local_start_date"])["local_start_date"].count()
if "sumdurationdischarge" in metrics:
if "sumdurationdischarge" in features:
battery_discharge_features["battery_"+day_segment+"_sumdurationdischarge"] = battery_data_discharge.groupby(["local_start_date"])["time_diff"].sum()
if "avgconsumptionrate" in metrics:
if "avgconsumptionrate" in features:
battery_discharge_features["battery_"+day_segment+"_avgconsumptionrate"] = battery_data_discharge.groupby(["local_start_date"])["battery_consumption_rate"].mean()
if "maxconsumptionrate" in metrics:
if "maxconsumptionrate" in features:
battery_discharge_features["battery_"+day_segment+"_maxconsumptionrate"] = battery_data_discharge.groupby(["local_start_date"])["battery_consumption_rate"].max()
# for battery_data_charge:
battery_data_charge = battery_data[battery_data["battery_diff"] <= 0]
battery_charge_features = pd.DataFrame()
if "countcharge" in metrics:
if "countcharge" in features:
battery_charge_features["battery_"+day_segment+"_countcharge"] = battery_data_charge.groupby(["local_start_date"])["local_start_date"].count()
if "sumdurationcharge" in metrics:
if "sumdurationcharge" in features:
battery_charge_features["battery_"+day_segment+"_sumdurationcharge"] = battery_data_charge.groupby(["local_start_date"])["time_diff"].sum()
# combine discharge features and charge features; fill the missing values with ZERO