310 lines
11 KiB
Python
310 lines
11 KiB
Python
import numpy as np
|
|
import pandas as pd
|
|
import scipy.signal as scisig
|
|
import os
|
|
import matplotlib.pyplot as plt
|
|
|
|
from load_files import getInputLoadFile, getOutputPath, get_user_input
|
|
|
|
DEBUG = True
|
|
|
|
SAMPLING_RATE = 8
|
|
|
|
ONE_MINUTE_S = 60
|
|
THIRTY_MIN_S = ONE_MINUTE_S*30
|
|
SECONDS_IN_DAY = 24*60*60
|
|
|
|
STILLNESS_MOTION_THRESHOLD = .1
|
|
PERCENT_STILLNESS_THRESHOLD = .95
|
|
|
|
STEP_DIFFERENCE_THRESHOLD = 0.3
|
|
|
|
|
|
|
|
def computeAllAccelerometerFeatures(data, time_frames):
|
|
if DEBUG: print("\t\tcomputing motion...")
|
|
motion = computeMotion(data['AccelX'], data['AccelY'], data['AccelZ'])
|
|
|
|
if DEBUG: print("\t\tcomputing steps...")
|
|
steps = computeSteps(motion)
|
|
|
|
if DEBUG: print("\t\tcomputing stillness...")
|
|
stillness = computeStillness(motion)
|
|
|
|
features = []
|
|
|
|
for time_frame in time_frames:
|
|
start = time_frame[0]
|
|
end = time_frame[1]
|
|
start1Hz = int(start / SAMPLING_RATE)
|
|
end1Hz = end if end == -1 else int(end / SAMPLING_RATE)
|
|
if DEBUG: print("\t\tcomputing features for time frame. Start index: "+ str(start)+ " end index: "+ str(end))
|
|
|
|
time_frame_feats = computeAccelerometerFeaturesOverOneTimeFrame(motion[start:end],
|
|
steps[start:end],
|
|
stillness[start1Hz:end1Hz])
|
|
features.append(time_frame_feats)
|
|
|
|
return features, steps, motion
|
|
|
|
def computeMotion(acc1, acc2, acc3):
|
|
'''Aggregates 3-axis accelerometer signal into a single motion signal'''
|
|
return np.sqrt(np.array(acc1)**2 + np.array(acc2)**2 + np.array(acc3)**2)
|
|
|
|
def computeSteps(motion):
|
|
'''Determines the location of steps from the aggregated accelerometer signal.
|
|
Signal is low-pass filtered, then minimums are located in the signal. For each
|
|
min, if the max absolute derivative (first difference) immediately surrounding
|
|
it is greater than a threshold, it is counted as a step.
|
|
|
|
Args:
|
|
motion: root mean squared 3 axis acceleration
|
|
Returns:
|
|
steps: binary array at 8Hz which is 1 everywhere there is a step'''
|
|
|
|
filtered_signal = filterSignalFIR(motion, 2, 256)
|
|
diff = filtered_signal[1:]-filtered_signal[:-1]
|
|
|
|
mins = scisig.argrelextrema(filtered_signal, np.less)[0]
|
|
|
|
steps = [0] * len(filtered_signal)
|
|
for m in mins:
|
|
if m <= 4 or m >= len(diff) - 4:
|
|
continue
|
|
if max(abs(diff[m-4:m+4])) > STEP_DIFFERENCE_THRESHOLD:
|
|
steps[m] = 1.0
|
|
|
|
return steps
|
|
|
|
def filterSignalFIR(eda, cutoff=0.4, numtaps=64):
|
|
f = cutoff/(SAMPLING_RATE/2.0)
|
|
FIR_coeff = scisig.firwin(numtaps,f)
|
|
|
|
return scisig.lfilter(FIR_coeff,1,eda)
|
|
|
|
def computeStillness(motion):
|
|
'''Locates periods in which the person is still or motionless.
|
|
Total acceleration must be less than a threshold for 95 percent of one
|
|
minute in order for that minute to count as still
|
|
|
|
Args:
|
|
motion: an array containing the root mean squared acceleration
|
|
Returns:
|
|
A 1Hz array that is 1 for each second belonging to a still period, 0 otherwise
|
|
'''
|
|
diff = motion[1:]-motion[:-1]
|
|
momentary_stillness = diff < STILLNESS_MOTION_THRESHOLD
|
|
np.append(momentary_stillness,0) # to ensure list is the same size as the full day signal
|
|
num_minutes_in_day = 24*60
|
|
|
|
#create array indicating whether person was still or not for each second of the day
|
|
#to be still the momentary_stillness signal must be true for more than 95% of the minute
|
|
#containing that second
|
|
second_stillness = [0]*SECONDS_IN_DAY
|
|
|
|
for i in range(num_minutes_in_day):
|
|
hours_start = int(i / 60)
|
|
mins_start = i % 60
|
|
hours_end = int((i+1) / 60)
|
|
mins_end = (i+1) % 60
|
|
|
|
start_idx = getIndexFromTimestamp(hours_start, mins_start)
|
|
end_idx = getIndexFromTimestamp(hours_end, mins_end)
|
|
|
|
this_minute = momentary_stillness[start_idx:end_idx]
|
|
minute_stillness = sum(this_minute) > PERCENT_STILLNESS_THRESHOLD*(60*SAMPLING_RATE)
|
|
|
|
second_idx = int(start_idx/8)
|
|
for si in range(second_idx,second_idx+60):
|
|
second_stillness[si] = float(minute_stillness)
|
|
|
|
return second_stillness
|
|
|
|
def computeAccelerometerFeaturesOverOneTimeFrame(motion, steps, stillness):
|
|
''' Computes all available features for a time period. Incoming signals are assumed to be from
|
|
only that time period.
|
|
|
|
Args:
|
|
motion: 8Hz root mean squared 3 axis acceleration
|
|
steps: 8Hz binary signal that is 1 if there is a step
|
|
stillness: 1Hz 1 if the person was still during this second, 0 otherwise
|
|
Returns:
|
|
A list of features containing (in order):
|
|
-Step count number of steps detected
|
|
-mean step time during movement average number of samples between two steps (aggregated first to 1 minute,
|
|
then we take the mean of only the parts of this signal occuring during movement)
|
|
-percent stillness percentage of time the person spent nearly motionless
|
|
'''
|
|
|
|
features = []
|
|
|
|
features.extend(computeStepFeatures(steps,stillness))
|
|
features.append(countStillness(stillness))
|
|
|
|
return features
|
|
|
|
def computeStepFeatures(steps,stillness):
|
|
'''Counts the total number of steps over a given period,
|
|
as well as the average time between steps (meant to approximate walking speed)
|
|
|
|
Args:
|
|
steps: an binary array at 8 Hz that is 1 every time there is a step
|
|
Returns:
|
|
sum: the number of steps in a period
|
|
median time: average number of samples between two steps'''
|
|
|
|
sum_steps = float(sum(steps))
|
|
|
|
step_indices = np.nonzero(steps)[0]
|
|
diff = step_indices[1:]-step_indices[:-1]
|
|
|
|
#ensure length of step difference array is the same so we can get the actual locations of step differences
|
|
timed_step_diff = np.empty(len(steps)) * np.nan
|
|
timed_step_diff[step_indices[:len(diff)]] = diff
|
|
|
|
signal_length_1s = len(stillness)
|
|
signal_length_1min = int(signal_length_1s / 60)
|
|
|
|
# if there aren't enough steps during this period, cannot accurately compute mean step diff
|
|
if len(timed_step_diff) < signal_length_1min:
|
|
return [sum_steps, np.nan]
|
|
|
|
agg_stillness = aggregateSignal(stillness, signal_length_1min, 'max')
|
|
agg_step_diff = aggregateSignal(timed_step_diff, signal_length_1min, 'mean')
|
|
|
|
movement_indices = [i for i in range(len(agg_stillness)) if agg_stillness[i] == 0.0]
|
|
step_diff_during_movement = agg_step_diff[movement_indices]
|
|
|
|
return [sum_steps,round(np.nanmean(step_diff_during_movement),10)]
|
|
|
|
def countStillness(stillness):
|
|
'''Counts the total percentage of time spent still over a period
|
|
|
|
Args:
|
|
stillness: an binary array at 1Hz that is 1 if that second is part of a still period
|
|
Returns:
|
|
the percentage time spent still over a period'''
|
|
|
|
return float(sum(stillness)) / float(len(stillness))
|
|
|
|
def aggregateSignal(signal, new_signal_length, agg_method='sum'):
|
|
new_signal = np.zeros(new_signal_length)
|
|
samples_per_bucket = int(len(signal) / new_signal_length)
|
|
|
|
#the new signal length must be large enough that there is at least 1 sample per bucket
|
|
assert(samples_per_bucket > 0)
|
|
|
|
for i in range(new_signal_length):
|
|
if agg_method == 'sum':
|
|
new_signal[i] = np.nansum(signal[i*samples_per_bucket:(i+1)*samples_per_bucket])
|
|
elif agg_method == 'percent':
|
|
new_signal[i] = np.nansum(signal[i*samples_per_bucket:(i+1)*samples_per_bucket]) / samples_per_bucket
|
|
elif agg_method == 'mean':
|
|
new_signal[i] = np.nanmean(signal[i*samples_per_bucket:(i+1)*samples_per_bucket])
|
|
elif agg_method == 'max':
|
|
new_signal[i] = np.nanmax(signal[i*samples_per_bucket:(i+1)*samples_per_bucket])
|
|
return new_signal
|
|
|
|
def getIndexFromTimestamp(hours, mins=0):
|
|
return ((hours * 60) + mins) * 60 * SAMPLING_RATE
|
|
|
|
def inputTimeFrames():
|
|
'''Allows user to choose the time frames over which they compute accelerometer features.'''
|
|
|
|
time_frames = []
|
|
print("Accelerometer features can be extracted over different time periods.")
|
|
cont = get_user_input("If you would like to enter a time period over which to compute features, enter 'y', or press enter to compute features over the entire file.")
|
|
while cont == 'y' or cont == 'Y':
|
|
start = int(get_user_input("Enter the starting hour of the time period (hour 0 is when the file starts):"))
|
|
end = int(get_user_input("Enter the ending hour of the time period (hour 0 is when the file starts; use -1 for the end of the file):"))
|
|
start = getIndexFromTimestamp(int(start))
|
|
if end != -1:
|
|
end = getIndexFromTimestamp(int(end))
|
|
time_frames.append([start,end])
|
|
print("Great! Now computing features for the following time periods:"+ str(time_frames))
|
|
cont = get_user_input("To add another time period, enter 'y'. To finish, press enter.")
|
|
|
|
if len(time_frames) == 0:
|
|
time_frames = [[0,-1]] # the whole file
|
|
|
|
return time_frames
|
|
|
|
def saveFeaturesToFile(features, time_frames, output_file):
|
|
of = open(output_file, 'w')
|
|
of.write("Time period start hour, Time period end hour, Step count, Mean step time during movement, Percent stillness\n")
|
|
tf_i = 0
|
|
for tf in time_frames:
|
|
output_str = str(tf[0]) + ' , ' + str(tf[1])
|
|
for feat in features[tf_i]:
|
|
output_str += ' , ' + str(feat)
|
|
tf_i += 1
|
|
of.write(output_str + '\n')
|
|
of.close()
|
|
print("Saved features to file"+ output_file)
|
|
|
|
# draws a graph of the data with the peaks marked on it
|
|
# assumes that 'data' dataframe already contains the 'peaks' column
|
|
def plotSteps(data, x_seconds, sampleRate = SAMPLING_RATE):
|
|
if x_seconds:
|
|
time_m = np.arange(0,len(data))/float(sampleRate)
|
|
realign = 128/(sampleRate)
|
|
else:
|
|
time_m = np.arange(0,len(data))/(sampleRate*60.)
|
|
realign = 128/(sampleRate*60.)
|
|
|
|
data_min = data['motion'].min()
|
|
data_max = data['motion'].max()
|
|
|
|
#Plot the data with the Peaks marked
|
|
plt.figure(1,figsize=(20, 5))
|
|
|
|
plt.plot(time_m,data['motion'])
|
|
|
|
for i in range(len(data)):
|
|
if data.iloc[i]["steps"]==1:
|
|
x_loc = time_m[i] - realign
|
|
plt.plot([x_loc,x_loc],[data_min,data_max],"k")
|
|
step_height = data_max * 1.15
|
|
#data['steps_plot'] = data['steps'] * step_height
|
|
#plt.plot(time_m,data['steps_plot'],'k')
|
|
|
|
plt.xlim([0,time_m[-1]])
|
|
plt.ylim([data_min-.1,data_max+.1])
|
|
plt.title('Motion with Detected "Steps" marked')
|
|
plt.ylabel('g')
|
|
if x_seconds:
|
|
plt.xlabel('Time (s)')
|
|
else:
|
|
plt.xlabel('Time (min)')
|
|
|
|
plt.show()
|
|
|
|
if __name__ == "__main__":
|
|
print("This script will extract features related to accelerometer data.")
|
|
|
|
data, filepath_confirm = getInputLoadFile()
|
|
|
|
output_path = getOutputPath()
|
|
|
|
time_frames = inputTimeFrames()
|
|
|
|
features, steps, motion = computeAllAccelerometerFeatures(data, time_frames)
|
|
|
|
data["steps"] = steps
|
|
data["motion"] = motion
|
|
|
|
saveFeaturesToFile(features, time_frames, output_path)
|
|
|
|
print("")
|
|
plot_ans = get_user_input("Do you want to plot the detected steps? (y/n): ")
|
|
if 'y' in plot_ans:
|
|
secs_ans = get_user_input("Would you like the x-axis to be in seconds or minutes? (sec/min): ")
|
|
if 'sec' in secs_ans:
|
|
x_seconds=True
|
|
else:
|
|
x_seconds=False
|
|
plotSteps(data, x_seconds)
|
|
else:
|
|
print("\tOkay, script will not produce a plot")
|
|
|