implemented time_screen_sequence
parent
cf7e692927
commit
53df652d02
|
@ -1,8 +1,11 @@
|
|||
from collections.abc import Collection
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import re
|
||||
|
||||
from datetime import *
|
||||
|
||||
from config.models import Participant, Screen
|
||||
from setup import db_engine, session
|
||||
|
||||
|
@ -82,7 +85,12 @@ def identify_screen_sequence(df_screen: pd.DataFrame) -> pd.DataFrame:
|
|||
sequence.
|
||||
ii) (2|0)130102
|
||||
This sequence is interpreted as a nested screen-check sequence (010) inside
|
||||
a unlock sequence ((2|0)1302).
|
||||
a unlock sequence ((2|0)1302). Since the time interval between 0 and 1 is very
|
||||
short (the device hasn't even had the time to lock), we say that 010 does not costitute a
|
||||
proper check sequence and we therefore interpret the whole sequence as an unlock sequence.
|
||||
|
||||
TODO: the function time_screen_sequence returns some weird values. For example, the average check time of
|
||||
participant nr. 74 is several minutes and the real usage time percentage of participant nr. 78 is about 50%.
|
||||
"""
|
||||
|
||||
df_screen.sort_values(["device_id", "timestamp"], inplace=True)
|
||||
|
@ -131,8 +139,8 @@ def identify_screen_sequence(df_screen: pd.DataFrame) -> pd.DataFrame:
|
|||
# event of the previous sequence, so that the time interval between the first
|
||||
# and the second event in a sequence is actually the time the device is not in use.
|
||||
|
||||
unlock_pat = re.compile(r"(?=[0,2]((13|31)0+2))")
|
||||
check_pat = re.compile(r"(?=[0,2](10+2?))")
|
||||
unlock_pat = re.compile(r"(?=[0,2]((13|31)(0+|010)2))")
|
||||
check_pat = re.compile(r"(?=[0,2](10+))")
|
||||
|
||||
# Iterate over rows of the merged df and then for each row iterate over
|
||||
# regex mathes. For each match, create a dictionary containing information
|
||||
|
@ -170,8 +178,103 @@ def identify_screen_sequence(df_screen: pd.DataFrame) -> pd.DataFrame:
|
|||
|
||||
|
||||
def time_screen_sequence(df_screen: pd.DataFrame) -> pd.DataFrame:
|
||||
# TODO Use the results of indentify_screen_sequence to calculate time statistics related to transitions.
|
||||
# For example, from the two main sequences outlined above, the time of "real" phone usage can be calculated,
|
||||
# i.e. how long the screen was unlocked.
|
||||
# Another example might be the average time between screen unlocks and/or screen status checks.
|
||||
pass
|
||||
"""
|
||||
Calculates time statistics related to device usage.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df_screen: pd.DataFrame
|
||||
A dataframe containing screen data
|
||||
|
||||
Returns
|
||||
-------
|
||||
A new dataframe indexed by device_id and participant_id containing the followig collumns:
|
||||
* total_usage_time: sum of daily timespans between the last and
|
||||
the first event reported by the screen sensor measured in milliseconds
|
||||
* real_usage_time: duration of time during which the device was actually in use,
|
||||
i.e. the total duration of sequences identified by the function identify_screen_sequence
|
||||
* real_usage_time_percentage: real_usage_time / total_usage_time
|
||||
* average_time_between_unlocks
|
||||
* average_time_between_checks
|
||||
* average_check_duration
|
||||
* average_unlock_duration
|
||||
"""
|
||||
|
||||
sequences_df = identify_screen_sequence(df_screen)
|
||||
|
||||
# Fit timestamps to dates
|
||||
sequences_df["date"] = pd.to_datetime(sequences_df.beginning, unit="ms").dt.date
|
||||
|
||||
# Calculate total_usage_time, real_usage_time and real_usage_time_percentage
|
||||
usage_time_df = (
|
||||
sequences_df.groupby(["device_id","participant_id", "date"])
|
||||
[["beginning", "end", "duration"]]
|
||||
.agg({"beginning":"min", "end":"max", "duration":"sum"})
|
||||
.assign(
|
||||
total_usage_time=lambda x: x.end - x.beginning
|
||||
)
|
||||
.drop(["beginning", "end"], axis=1)
|
||||
.rename(columns={"duration":"real_usage_time"})
|
||||
.groupby(["device_id", "participant_id"])
|
||||
.agg({"real_usage_time":"sum","total_usage_time":"sum"})
|
||||
.assign(
|
||||
real_usage_time_percentage=lambda x: x.real_usage_time / x.total_usage_time
|
||||
)
|
||||
)
|
||||
|
||||
# Calculate time_between_unlocks
|
||||
time_between_unlocks_df = (
|
||||
sequences_df[sequences_df["sequence_type"] == "unlock"]
|
||||
.sort_values(["participant_id", "device_id", "beginning"])
|
||||
)
|
||||
time_between_unlocks_df = (
|
||||
time_between_unlocks_df
|
||||
.assign(end_ = time_between_unlocks_df.groupby("device_id")["end"].shift(1))
|
||||
.assign(time_between_unlocks=lambda x: x.beginning - x.end_)
|
||||
.groupby(["device_id", "participant_id"])
|
||||
.agg({"time_between_unlocks":"mean"})
|
||||
.rename(columns={"time_between_unlocks":"average_time_between_unlocks"})
|
||||
)
|
||||
|
||||
# Calculate time_between_checks
|
||||
time_between_checks_df = (
|
||||
sequences_df[sequences_df["sequence_type"] == "check"]
|
||||
.sort_values(["participant_id", "device_id", "beginning"])
|
||||
)
|
||||
time_between_checks_df = (
|
||||
time_between_checks_df
|
||||
.assign(end_ = time_between_checks_df.groupby("device_id")["end"].shift(1))
|
||||
.assign(time_between_checks=lambda x: x.beginning - x.end_)
|
||||
.groupby(["device_id", "participant_id"])
|
||||
.agg({"time_between_checks":"mean"})
|
||||
.rename(columns={"time_between_checks":"average_time_between_checks"})
|
||||
)
|
||||
|
||||
# Calculate average_check_time and average_unlock_time
|
||||
average_duration_df = (
|
||||
sequences_df
|
||||
.groupby(["device_id", "participant_id", "sequence_type"])
|
||||
.agg(
|
||||
{"duration": (lambda x: int(np.mean(x)))}
|
||||
)
|
||||
.unstack()
|
||||
)
|
||||
|
||||
# Merge the four newely created dataframes
|
||||
merged = usage_time_df.merge(
|
||||
time_between_unlocks_df,
|
||||
on=["device_id", "participant_id"]
|
||||
).merge(
|
||||
time_between_checks_df,
|
||||
on=["device_id", "participant_id"]
|
||||
).merge(
|
||||
average_duration_df,
|
||||
on=["device_id", "participant_id"]
|
||||
).rename(
|
||||
columns={
|
||||
("duration","unlock"):"average_unlock_duration",
|
||||
("duration","check"):"average_check_duration"
|
||||
}
|
||||
)
|
||||
|
||||
return merged
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue