rapids/src/features/phone_locations/barnett/barnett_library.py

1534 lines
56 KiB
Python

import numpy as np
import math
import pandas as pd
from scipy.stats import *
from tqdm import tqdm
from sklearn.cluster import KMeans
from scipy.spatial.distance import pdist, squareform
import datetime
import glob
from dateutil import tz
np.set_printoptions(suppress=True, formatter={'float_kind':'{:f}'.format})
np.random.seed(22)
def run_test(output_file):
print("Output DATA: ")
print(output_file)
def run_barnett_features_for_rapids(input_dataframe, accuracy_limit=51.0, timezone="", wtype="GLR", spread_pars=[10,1], center_rad=200, interval=10, n_reps=1, min_pause_dur=300, min_pause_dist=60, r=None, w=None, tint_m=None, tint_k=None):
data_frame = input_dataframe
lonlat, r, w = preprocessing(data_frame, interval=interval, acc_threshold=accuracy_limit, r=r, w=w, tint_m=tint_m, tint_k=tint_k)
mobmatmiss = convert_to_flights_pauses(lonlat, r, w)
mobmat = guess_pause(mobmatmiss, min_pause_dur, min_pause_dist)
obj = initialize_params(mobmat)
data_for_pandas = []
if obj:
lsmf = []
lssigloc = []
for i in range(n_reps):
print("Sim #: ", i+1)
out3 = simulate_mobility_gaps(mobmat, obj, wtype, spread_pars)
out3 = np.array(out3)
IDundef=np.where(out3[:,0]==3)[0]
if len(IDundef) > 0:
out3 = np.delete(out3, IDundef,axis=0)
obj3 = initialize_params(out3)
output_features, slout, row_names = get_mobility_features(out3, obj3, mobmatmiss, timezone, center_rad, interval)
lsmf.append(output_features)
lssigloc.append(slout)
#get average
np_lsmf = np.array((lsmf))
avg_output_features = [] #convert to pandas dataframe and then return this
for j in range(len(row_names)):
temp_avg = []
for i in range(n_reps):
temp_avg.append(np_lsmf[i][j])
avg_output_features.append(np.mean(temp_avg, axis=0))
for idx, row in enumerate(avg_output_features):
date = row_names[idx]
date_str = str(date[0]) + "-" + str(date[1]) + "-" + str(date[2])
features = [date_str]
for feature in row:
features.append(feature)
data_for_pandas.append(features)
column_names = ["local_date", "hometime", "disttravelled", "rog", "maxdiam", "maxhomedist", "siglocsvisited", "avgflightlen", "stdflightlen", "avgflightdur", "stdflightdur", "probpause", "siglocentropy", "minsmissing", "circdnrtn", "wkenddayrtn"]
df = pd.DataFrame(data_for_pandas, columns = column_names)
df.set_index('local_date', inplace=True)
return df
def run_barnett_features(input_dir, output_file, wtype="GLR", spread_pars=[10,1], timezone="", center_rad=200, interval=10, acc_threshold=51.0, n_reps=1, min_pause_dur=300, min_pause_dist=60, r=None, w=None, tint_m=None, tint_k=None):
data_frame = load_beiwe(input_dir)
lonlat, r, w = preprocessing(data_frame, interval=interval, acc_threshold=acc_threshold, r=r, w=w, tint_m=tint_m, tint_k=tint_k)
mobmatmiss = convert_to_flights_pauses(lonlat, r, w)
mobmat = guess_pause(mobmatmiss, min_pause_dur, min_pause_dist)
obj = initialize_params(mobmat)
lsmf = []
lssigloc = []
for i in range(n_reps):
print("Sim #: ", i+1)
out3 = simulate_mobility_gaps(mobmat, obj, wtype, spread_pars)
out3 = np.array(out3)
IDundef=np.where(out3[:,0]==3)[0]
if len(IDundef) > 0:
out3 = np.delete(out3, IDundef,axis=0)
obj3 = initialize_params(out3)
output_features, slout, row_names = get_mobility_features(out3, obj3, mobmatmiss, timezone, center_rad, interval)
lsmf.append(output_features)
lssigloc.append(slout)
#get average
np_lsmf = np.array((lsmf))
avg_output_features = []
for j in range(len(row_names)):
temp_avg = []
for i in range(n_reps):
temp_avg.append(np_lsmf[i][j])
avg_output_features.append(np.mean(temp_avg, axis=0))
print("Writing features to ", output_file)
with open(output_file, "w") as writer:
writer.write("Date\tHometime\tDistTravelled\tRoG\tMaxDiam\tMaxHomeDist\tSigLogsVisited\tAvgFlightLen\tStdFlightLen\tAvgFlightDur\tStdFlightDur\tProbPause\tSigLocEntropy\tMinsMissing\tCircdnRtn\tWkEndDayRtn\n")
for idx, row in enumerate(avg_output_features):
date = row_names[idx]
date_str = str(date[0]) + "-" + str(date[1]) + "-" + str(date[2])
feature_str = date_str
for feature in row:
feature_str += "\t"+str(feature)
writer.write(feature_str + "\n")
print("Finish writing to ", output_file)
def load_beiwe(input_dir, debug_mode=True):
# load all files ending with .csv in input_dir
print("Reading csv files from {}".format(input_dir))
all_files = glob.glob(input_dir + "/*.csv")
the_list = []
counter = 1
for filename in all_files:
df = pd.read_csv(filename, index_col=None, header=0)
the_list.append(df)
counter += 1
new_frame = pd.concat(the_list, axis=0, ignore_index=True)
return new_frame
#Convert GPS raw data to mobile matrix (GPS2MobMat.R)
def preprocessing(df, interval=10.0, acc_threshold=51.0, r=None,w=None,tint_m=None,tint_k=None):
df.columns = ['timestamp','latitude','longitude','altitude','accuracy']
# ===============================
# 1. order by the timestamp (ascending)
# ===============================
df = df.sort_values(by=['timestamp'])
# print(df)
# ===============================
# 2. select row if accuracy < threshold #pass
# ===============================
df_acc = df[df['accuracy'] < acc_threshold]
df = df_acc
# ===============================
#3. collapse data to interval #pass
# ===============================
if(tint_k != None and tint_m != None):
t0 = np.ceil(df['timestamp'].values[0]/1000)
df = df[df['timestamp']/1000 - t0 % (tint_k + tint_m) < tint_k]
if not r:
r = np.sqrt(interval)
if not w:
w = np.mean(df['accuracy'].values) + interval
start_time = (df['timestamp'].iloc[0]/1000)
end_time = (df['timestamp'].iloc[-1]/1000)
avg_matrix_lst = []
IDam = 0
count = -1
next_line = [1, start_time + interval/2.0, df['latitude'].iloc[0], df['longitude'].iloc[0], 0.0, 0.0]
interval_counter = 1
print("Collapse data within", interval, "second intervals...\n")
counter = 0
for idx, row in tqdm(df.iterrows(), total=df.shape[0]):
if counter > 0:
timestamp = row['timestamp']
latitude = row['latitude']
longitude = row['longitude']
if timestamp/1000.00 < start_time + interval:
next_line[2] += latitude
next_line[3] += longitude
interval_counter += 1
else:
next_line[2] = next_line[2]/interval_counter
next_line[3] = next_line[3]/interval_counter
temp = next_line
avg_matrix_lst.append(temp)
num_miss = np.floor((timestamp/1000 - (start_time + interval))/interval)
if num_miss > 0:
has_miss = [4, start_time + interval/2.0, start_time + interval * (num_miss + 1) +interval/2.0, float("NaN"), float("NaN"), float("NaN")]
avg_matrix_lst.append(has_miss)
start_time = start_time + interval * (num_miss+1)
next_line = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
next_line[0] = 1
next_line[1] = start_time + interval/2.0
next_line[2] = latitude
next_line[3] = longitude
interval_counter = 1
counter += 1
avg_matrix_df = pd.DataFrame(data=avg_matrix_lst)
# ================================================================
# avg_matrix_df is a matrix of average scores per interval
# avg_matrix_df[0] = sign of the case, 1 = normal, 4 = has missing value
# avg_matrix_df[1] = time
# avg_matrix_df[2] = latitude
# avg_matrix_df[3] = longitude
# avg_matrix_df[4] = x
# avg_matrix_df[5] = y
# ================================================================
# ===============================
#4. convert from Lat/Lon to X/Y
# ===============================
print("Convert latitude/longitude to X/Y\n")
new_dataframe = lat_long_to_xy(avg_matrix_df)
return new_dataframe, r, w
def convert_to_flights_pauses(new_dataframe, r, w, output_file=None):
print("\nConvert from X/Y to flights/pauses...\n")
mobmatmiss = extract_flights_from_dataframe(new_dataframe, r, w)
if output_file: #output_file = mobmatmiss
print("Writing {} rows to mobmatmiss to tsv...".format(mobmatmiss.shape[0]))
mobmatmiss.to_csv(output_file, sep="\t")
print("Finish writing mobmatmiss to tsv...")
return mobmatmiss
def extract_flights_from_dataframe(dataframe, r, w):
np_arr = dataframe.to_numpy()
cur_idx = 0
result_matrix = []
temp_counter = 0
prev_rows_counter = 0
for i in tqdm(range(len(np_arr)), total=len(np_arr)):
if np_arr[i][0] == 4:
row_with_missing_data = []
prev_rows = extract_flights(np_arr[cur_idx:i], r, w)
last_timestamp = np_arr[i][1]
row_with_missing_data.append([4, float("NaN"), float("NaN"), last_timestamp, float("NaN"), float("NaN"), np_arr[i][2]])
result_matrix += prev_rows
result_matrix += row_with_missing_data
prev_rows_counter += len(prev_rows)
temp_counter += 1
cur_idx = i+1
if cur_idx < len(np_arr):
rows = extract_flights(np_arr[cur_idx:len(np_arr)], r, w)
last_timestamp = rows[len(rows)-1][6]
result_matrix += rows
if last_timestamp < np_arr[len(np_arr)-1][1]:
if result_matrix[len(result_matrix)-1][0] != 2:
pause = [2, rows[len(rows)-1][4], rows[len(rows)-1][5], last_timestamp, float("NaN"), float("NaN"), np_arr[len(np_arr)-1][1]]
result_matrix.append(pause)
else:
result_matrix[len(result_matrix)-1][6] = np_arr[len(np_arr)-1][1]
result_df = pd.DataFrame(data=result_matrix, index=None, columns=["code", "lon1", "lat1", "t1", "lon2", "lat2", "t2"])
return result_df
# input1 = x_list --> np array [x0, x1, x2, ...xn]
# input2 = y_list
def is_flight(x_listt, y_listt, r, w):
x_list = np.array(x_listt)
y_list = np.array(y_listt)
num_of_rows = len(x_list)
prev_lat_lon = np.array((x_list[0], y_list[0])) #x0, y0
next_lat_lon = np.array((x_list[num_of_rows-1], y_list[num_of_rows-1]))
# euclidean_dist = euclidean distance of [x1,y1] and [x2, y2]
single_euclidean_dist = np.linalg.norm(prev_lat_lon - next_lat_lon)
if single_euclidean_dist < r: #DONE
return False
next_x = x_list[1:num_of_rows]
prev_x = x_list[0:num_of_rows-1]
subs_x = next_x - prev_x
pow_x = np.power(subs_x, 2)
next_y = y_list[1:num_of_rows]
prev_y = y_list[0:num_of_rows-1]
subs_y = next_y - prev_y
pow_y = np.power(subs_y, 2)
sum_sqrt_x_y = np.sqrt(pow_x + pow_y)
if np.min(sum_sqrt_x_y) < r:
return False
if num_of_rows == 2:
return True
if x_list[0] == x_list[num_of_rows-1]:
if np.max(np.abs(x_list[1: num_of_rows])) > w:
return False
else:
return True
if x_list[0] > x_list[num_of_rows-1]:
x_list = x_list[::-1]
y_list = y_list[::-1]
x_new = x_list - x_list[0]
y_new = y_list - y_list[0]
theta = -1*np.arctan(y_new[num_of_rows-1]/x_new[num_of_rows-1])
divvv = y_new[num_of_rows-1]/x_new[num_of_rows-1]
costheta = np.cos(theta)
sintheta = np.sin(theta)
a_matrix = np.array(([[costheta, -1*sintheta], [sintheta, costheta]]))
temp_matrix = np.array((x_new[1:num_of_rows-1], y_new[1:num_of_rows-1]))
rotation_pts = np.matmul(a_matrix, temp_matrix)
if np.max(np.abs(rotation_pts[1])) > w:
return False
else:
return True
def lat_long_to_xy(df,R=6371000.0): #1000.000
df1 = df[df[0] == 1.0].copy()
lat_v,lon_v = df1[2].values, df1[3].values
th0 = np.min(lon_v)
th1 = np.max(lon_v)
ph0 = np.min(lat_v)
ph1 = np.max(lat_v)
pi = math.pi
d1 = 2*pi*R*((ph1-ph0)*2*pi/360)/(2*pi)
d1 = 2*pi*R*((ph1-ph0)*2*pi/360)/(2*pi)
d2 = 2*pi*(R*np.sin(pi/2-ph1*2*pi/360))*((th1-th0)*2*pi/360)/(2*pi)
d3 = 2*pi*(R*np.sin(pi/2-ph0*2*pi/360))*((th1-th0)*2*pi/360)/(2*pi)
for idx, row in df.iterrows():
case = row[0]
latitude = row[2]
longitude = row[3]
if case == 1:
w1=(latitude-ph0)/(ph1-ph0)
w2=(longitude-th0)/(th1-th0)
# df[4][idx]= w1 * np.abs(d3-d2)/2+w2*(d3*(1-w1)+d2*w1)
df.loc[idx, 4] = w1 * np.abs(d3-d2)/2+w2*(d3*(1-w1)+d2*w1)
# df[5][idx]= w1 * d1 * np.sin(np.arccos(np.abs((d3-d2)/(2*d1))))
df.loc[idx, 5] = w1 * d1 * np.sin(np.arccos(np.abs((d3-d2)/(2*d1))))
return df
def extract_flights(matrix, r, w):
np_matrix = np.array(matrix)
if len(np_matrix) == 1:
row = [[3, np_matrix[0][4], np_matrix[0][5], np_matrix[0][1], float("NaN"), float("NaN"), float("NaN")]]
return row
cur_idx = 0
output = []
timestamp = np_matrix[cur_idx][1]
x = np_matrix[cur_idx][4]
y = np_matrix[cur_idx][5]
distance_line = [x, y, timestamp, float("NaN"), float("NaN"), float("NaN")]
flight_stat = False
last_pause_time = 0
last_pause_idx = -1
while True:
next_idx = cur_idx + 1
if next_idx == len(np_matrix)-1:
distance_line[3] = np_matrix[next_idx][4]
distance_line[4] = np_matrix[next_idx][5]
distance_line[5] = np_matrix[next_idx][1]
output.append(distance_line)
break
while True:
part_of_matrix = np_matrix[cur_idx:next_idx+1]
x_s = part_of_matrix[:,4]
y_s = part_of_matrix[:,5]
flight_stat = is_flight(x_s, y_s, r, w)
if not flight_stat:
last_pause_time = part_of_matrix[len(part_of_matrix)-1][1]
last_pause_idx = next_idx
break
next_idx += 1
if next_idx >= len(np_matrix):
break
if next_idx == cur_idx + 1 and next_idx < len(np_matrix):
distance_line[2] = np_matrix[next_idx][1]
np_matrix = np.delete(np_matrix, (next_idx), axis=0)
else:
distance_line[3] = np_matrix[next_idx-1][4]
distance_line[4] = np_matrix[next_idx-1][5]
distance_line[5] = np_matrix[next_idx-1][1]
output.append(distance_line)
cur_idx = next_idx - 1
distance_line = [np_matrix[cur_idx][4], np_matrix[cur_idx][5], np_matrix[cur_idx][1], float("NaN"), float("NaN"), float("NaN")]
if next_idx >= len(np_matrix):
break
if len(output) == 0:
last_timestamp_in_matrix = np_matrix[len(np_matrix)-1][1]
row = [2, x, y, np_matrix[0][1], float("NaN"), float("NaN"), last_timestamp_in_matrix]
return [row]
result = []
if timestamp < output[0][2]:
row = [2, x, y, timestamp, float("NaN"), float("NaN"), output[0][2]]
result.append(row)
if len(output) == 1:
result[0][6] = output[0][5]
return result
if len(output) == 1:
result = [[1] + output[0]]
return result
for idx, row in enumerate(output):
if result != []:
cur_time = row[2]
prev_row = result[len(result)-1]
if prev_row[0] == 2: #is a pause
if cur_time == prev_row[6]:
row[0] = prev_row[1]
row[1] = prev_row[2]
result.append([1] + row)
else:
print("error sanity check") #sanity check
else:
if prev_row[6] != cur_time:
#add pause
prev_x = prev_row[4]
prev_y = prev_row[5]
prev_t = prev_row[6]
pause_row = [2, prev_x, prev_y, prev_t, float("NaN"), float("NaN"), cur_time]
result.append(pause_row)
flight_row = [1, prev_x, prev_y, cur_time, row[3], row[4], row[5]]
result.append(flight_row)
else:
result.append([1] + row)
else:
result.append([1] + row)
last_row = result[len(result)-1]
last_timestamp = last_row[6]
result = np.array(result)
ID_flight = np.where(result[:,0] == 1)[0]
ID_pause = np.where(((result[ID_flight,1] - result[ID_flight,4]) ** 2 + (result[ID_flight,2]-result[ID_flight,5])**2) == 0)[0]
if len(ID_pause) > 0:
for i in len(ID_pause):
result[ID_flight[ID_pause], 0] = 2
result[ID_flight[ID_pause], 4] = float("nan")
result[ID_flight[ID_pause], 5] = float("nan")
result = list(result)
if last_timestamp < last_pause_time:
status = last_row[0]
if status == 1:
x_0 = last_row[4]
y_0 = last_row[5]
else:
x_0 = last_row[1]
y_0 = last_row[2]
x_1 = np_matrix[last_pause_idx][4]
y_1 = np_matrix[last_pause_idx][5]
flight_stat = is_flight(np.array([x_0, x_1]), np.array([y_0, y_1]), r, w)
if flight_stat:
end_row = [1, x_0, y_0, last_timestamp, x_1, y_1, last_pause_time]
else:
end_row = [2, x_0, y_0, last_timestamp, float("NaN"), float("NaN"), last_pause_time]
result.append(end_row)
last_row_in_matrix = np_matrix[len(np_matrix)-1]
last_timestamp_in_matrix = last_row_in_matrix[1]
last_row = result[len(result)-1]
last_timestamp = last_row[6]
if last_timestamp < last_timestamp_in_matrix:
status = last_row[0]
if status == 1:
x_0 = last_row[4]
y_0 = last_row[5]
else:
x_0 = last_row[1]
y_0 = last_row[2]
x_1 = last_row_in_matrix[4]
y_1 = last_row_in_matrix[5]
flight_stat = is_flight(np.array([x_0, x_1]), np.array([y_0, y_1]), r, w)
if flight_stat:
end_row = [1, x_0, y_0, last_timestamp, x_1, y_1, last_timestamp_in_matrix]
result.append(end_row)
else:
if result[len(result)-1][0] == 2:
result[len(result)-1][6] = last_timestamp_in_matrix
else:
end_row = [2, x_0, y_0, last_timestamp, float("NaN"), float("NaN"), last_timestamp_in_matrix]
result.append(end_row)
return result
def guess_pause(matrix, min_duration=300, pause_rad=75):
print("Inferring pauses...")
np_matrix = np.array(matrix)
flatmat = get_flatmat(np_matrix, min_duration, pause_rad)
result = collapse_pause_in_matrix(np_matrix, flatmat)
return result
def collapse_pause_in_matrix(matrix, flatmat):
if len(flatmat) == 0:
return matrix
else:
output_mat = []
if flatmat[0][0] > 1:
output_mat = list(matrix[0:flatmat[0][0]-1,])
for i in range(len(flatmat)):
start_idx = flatmat[i][0]
end_idx = flatmat[i][1]
result = collapse_to_pause(matrix[start_idx:end_idx+1])
output_mat.append(result)
if i+1 < len(flatmat) and flatmat[i][1] < flatmat[i+1][1] - 1:
another_start = flatmat[i][1]+1
another_end = flatmat[i+1][0]
rows = matrix[another_start:another_end]
for row in rows:
output_mat.append(row.tolist())
if flatmat[-1][1] < len(matrix)-1:
the_idx = flatmat[-1][1]+1
rows = matrix[the_idx:len(matrix),]
for row in rows:
output_mat.append(row.tolist())
if len(output_mat) == 1:
return output_mat
else:
# Group together adjacent pauses, averaging their location
flatmat2 = []
output_mat2 = []
collapse = False
cstart = 0
for i in range(1, len(output_mat)):
if output_mat[i][0] != 2 and not collapse:
continue
elif output_mat[i][0] != 2 and collapse:
collapse = False
flatmat2.append([cstart, i-1])
elif output_mat[i][0] == 2 and output_mat[i-1][0]==2 and not collapse:
cstart = i-1
collapse = True
elif output_mat[i][0] == 2 and collapse:
continue
if collapse and output_mat[-1][0] == 2:
flatmat2.append([cstart, len(output_mat)-1])
if flatmat2 == []:
output_mat2 = output_mat
else:
output_mat2 = []
if flatmat2[0][0] > 1:
end_idx = flatmat2[0][0]
output_mat2 = output_mat[0: end_idx]
for i in range(len(flatmat2)):
start_idx = flatmat2[i][0]
end_idx = flatmat2[i][1]+1
temp = output_mat[start_idx:end_idx]
result = collapse_to_pause(temp)
output_mat2.append(result)
if i < len(flatmat2)-1 and flatmat2[i][1] < flatmat2[i+1][0]-1:
init_idx = flatmat2[i][1]+1
end_idx = flatmat2[i+1][0]
result2 = output_mat[init_idx:end_idx]
temp_result2 = output_mat[init_idx:end_idx+1]
output_mat2 += result2
if flatmat2[-1][1] < len(output_mat):
begin = (flatmat2[-1][1]+1)
end = len(output_mat)
temp = output_mat[begin:len(output_mat)]
for t in temp:
output_mat2.append(t)
# Set flight endpoints equal to pause endpoints
if output_mat2[0][0]== 1 and output_mat2[1][0] == 2:
output_mat2[0][4] = output_mat2[1][1]
output_mat2[0][5] = output_mat2[1][2]
for i in range(1, len(output_mat2)-1):
if output_mat2[i][0] == 1 and output_mat2[i-1][0] == 2:
output_mat2[i][1] = output_mat2[i-1][1]
output_mat2[i][2] = output_mat2[i-1][2]
if output_mat2[i][0] == 1 and output_mat2[i+1][0] == 2:
output_mat2[i][4] = output_mat2[i+1][1]
output_mat2[i][5] = output_mat2[i+1][2]
if output_mat2[-2][0] == 2 and output_mat2[-1][0] == 1:
output_mat2[-1][1] = output_mat2[-2][1]
output_mat2[-1][2] = output_mat2[-2][2]
return output_mat2
def get_flatmat(np_matrix, min_duration=300, pause_rad=60):
index = 0
incr = 1
t_current = np_matrix[index][3]
collapse = False
flatmat = []
counter_here = 0
while True:
sec_time = np_matrix[index+incr][6]
if sec_time:
t_next = sec_time
else:
t_next = np_matrix[index+incr][3]
tmp_again = t_next - t_current
if str(tmp_again) == "nan":
tmp_again = min_duration
if tmp_again >= min_duration:
tmp = index+incr
if np_matrix[index+incr][0] == 1:
#combine
counter_here += 1
left_cols = np_matrix[index:(index+incr+1),1:3]
right_cols = np_matrix[index:(index+incr+1),4:6]
combined = np.concatenate((left_cols, right_cols))
max_rad = get_max_radius(combined)
else:
left_cols = np_matrix[index:(index+incr+1),1:3]
max_rad = get_max_radius(left_cols)
if max_rad > pause_rad and not collapse:
index += 1
sec_time = np_matrix[index][6]
t_current = np_matrix[index][0]
incr = 0
if max_rad > pause_rad and collapse:
is_else = False
if np_matrix[index+incr-1, 0] == 4:
if index < index+incr - 2:
flatmat.append([index, index+incr-2])
else:
end_idx = index + incr - 1
if index != end_idx:
flatmat.append([index, end_idx])
is_else = True
index += incr
sec_time = np_matrix[index][6]
t_current = np_matrix[index][0]
incr = 0
collapse = False
if max_rad <= pause_rad:
collapse = True
incr += 1
if index + incr >= len(np_matrix):
if max_rad <= pause_rad and t_next - t_current >= min_duration:
flatmat.append([index, index+incr-1])
break
return flatmat
def collapse_to_pause(matrix):
if len(matrix) == 1:
return matrix[0]
#check if there is None
for i, row in enumerate(matrix):
for j, cell in enumerate(row):
if not cell:
matrix[i][j] = float("NaN")
matrix = np.array(matrix)
center = np.nanmean(matrix[:,1:3], axis=0)
if str(matrix[-1][6]) != "nan":
row = [2, center[0], center[1], matrix[0][3], float("NaN"), float("NaN"), matrix[-1][6]]
else:
row = [2, center[0], center[1], matrix[0][3], float("NaN"), float("NaN"), matrix[-1][3]]
return row
def get_max_radius(matrix):
center_of_points = np.nanmean(matrix, axis=0)
x_list = matrix[:,0]
y_list = matrix[:,1]
dists = np.sqrt((x_list - center_of_points[0])**2 + (y_list - center_of_points[1])**2)
max_dist = np.nanmax(dists)
return max_dist
def initialize_params(matrix):
np_matrix = np.array(matrix)
one = np.where(np_matrix[:,0] == 1)
two = np.where(np_matrix[:,0] == 2)
three = np.where(np_matrix[:,0] == 3)
four = np.where(np_matrix[:,0] == 4)
if len(one) > 1:
ID1p1 = one[0]+1
condition1 = one[0][len(one[0])-1]
if len(one[0]) > 0 and condition1 == len(np_matrix)-1:
ID1p1 = ID1p1[:len(ID1p1)-1]
all_timestamp = np.apply_along_axis(np.mean, 1, np_matrix[:,[3,6]])
all_x = np_matrix[:, 1]
all_y = np_matrix[:, 2]
#which code 1 is followed by 1
ind11 = ID1p1[np.where(np_matrix[ID1p1,0] == 1)]
#which flight(1) followed by pause (2)
ind12 = ID1p1[np.where(np_matrix[ID1p1,0] == 2)]
l1 = len(ind11)
l2 = len(ind12)
if (l1 + l2) > 0:
phatall = l2/(l1+l2)
if (l1+l2) == 0:
phatall = len(two)/(len(one) + len(two))
# ------ flight distances & times & pauses -------
flight_distances = []
flight_times = []
pause_times = []
for row in np_matrix:
if row[0] == 1:
distance = np.sqrt((row[1]-row[4])**2 + (row[2] - row[5])**2)
flight_distances.append(distance)
times = row[6] - row[3]
flight_times.append(times)
if row[0] == 2:
pause = row[6] - row[3]
pause_times.append(pause)
fxs = np_matrix[one][:,1]
fys = np_matrix[one][:,2]
fa = np.zeros(len(one[0]))
yvals = np_matrix[one][:,5] - np_matrix[one][:,2]
xvals = np_matrix[one][:,4] - np_matrix[one][:,1]
IDyg0 = np.where(yvals[:] >=0)[0]
IDxg0 = np.where(xvals[:] >=0)[0]
IDyl0 = np.where(yvals[:] < 0)[0]
IDxl0 = np.where(xvals[:] < 0)[0]
IDgg = list(set(IDyg0) & set(IDxg0))
IDlg = list(set(IDyg0) & set(IDxl0))
IDgl = list(set(IDyl0) & set(IDxg0))
IDll = list(set(IDyl0) & set(IDxl0))
IDgg.sort()
IDlg.sort()
IDgl.sort()
IDll.sort()
fa[IDgg] = np.arctan(list(yvals[IDgg]/xvals[IDgg]))
fa[IDgl] = np.arctan(list(yvals[IDgl]/xvals[IDgl]))+2*math.pi
fa[IDlg] = np.arctan(list(yvals[IDlg]/xvals[IDlg])) +math.pi
fa[IDll] = np.arctan(list(yvals[IDll]/xvals[IDll])) +math.pi
#flight timestamps
flight_timestamps = np_matrix[one][:,3]
pxs = np_matrix[two][:,1] #x
pys = np_matrix[two][:,2] #y
pts = np_matrix[two][:,3] #pause timestamps
the_dict = {'ID1': one, "ID2": two, "ID3": three, "ID4":four, "ID1p1": ID1p1, "allts": all_timestamp, "ind11": ind11, "ind12": ind12, "phatall": phatall, "fd": flight_distances, "ft": flight_times, "fa": fa, "fts": flight_timestamps, "pt": pause_times, "pts": pts, "fxs": fxs, "fys": fys, "pxs": pxs, "pys": pys, "allxs": all_x, "allys": all_y}
return the_dict
else:
print("No flight")
return None
#simulate_mobility_gaps
#impute the missing gaps hot-tech computation
def simulate_mobility_gaps(matrix, obj, wtype, spread_pars):
ind11 = obj['ind11']
ind12 = obj['ind12']
fd = obj['fd']
ft = obj['ft']
fts = obj['fts']
fa = obj['fa']
pts = obj['pts']
allts = obj['allts']
phatall = obj['phatall']
fxs = obj['fxs']
fys = obj['fys']
pxs = obj['pxs']
pys = obj['pys']
allxs = obj['allxs']
allys = obj['allys']
pt = obj['pt']
if len(matrix) == 0:
return matrix
f_outmat = []
for i in tqdm(range(len(matrix)), total=len(matrix)):
if matrix[i][0] == 1:
cur_x = matrix[i][4]
cur_y = matrix[i][5]
f_outmat.append(matrix[i])
elif matrix[i][0] <= 3:
cur_x = matrix[i][1]
cur_y = matrix[i][2]
f_outmat.append(matrix[i])
else: #check here
if i > 0 and i < len(matrix) - 1:
varmult = 1
while True:
#----- fw ------
denominator = varmult*(matrix[i][6]-matrix[i][3])
the_mean = np.mean([matrix[i][3],matrix[i][6]])
temp_fw = (fts-the_mean)/denominator
temp_pw = (pts-the_mean)/denominator
temp_allw = (allts-the_mean)/denominator
fw = norm.pdf(list(temp_fw))
pw = norm.pdf(list(temp_pw))
allw = norm.pdf(list(temp_allw))
if len(pts)>0 and len(fts)>0 and np.sum(fw)>0 and np.sum(pw)>0:
break
if len(pts) == 0 and len(fts)>0 and np.sum(fw)>0:
break
if len(fts) == 0 and len(pts)>0 and np.sum(pw)>0:
break
varmult = varmult*2
s11 = np.nansum(allw[ind11])
s12 = np.nansum(allw[ind12])
if (s11 + s12) == 0:
phatcur = phatall
else:
phatcur = s12/float(s11+s12)
if wtype == "LI":
new_row = [1, cur_x, cur_y, matrix[i][3], matrix[i+1][1], matrix[i+1][2], matrix[i][6]]
f_outmat.append(new_row)
else:
rb_out = random_bridge(x0=cur_x,y0=cur_y,x1=matrix[i+1][1],y1=matrix[i+1][2],t0=matrix[i][3],t1=matrix[i][6],fd=fd,ft=ft,fts=fts,fa=fa,fw=fw,probp=phatcur,pt=pt,pts=pts,pw=pw,allts=allts,allw=allw,ind11=ind11,ind12=ind12,i_ind=i,pxs=pxs,pys=pys,fxs=fxs,fys=fys,allxs=allxs,allys=allys,wtype=wtype,canpause=matrix[i-1][0]==1,spread_pars=spread_pars,niter=100)
old_foutmat = f_outmat
f_outmat = f_outmat + rb_out
return f_outmat
def get_weights(fxs, cur_x, fys, cur_y, varmult, spread_pars, fts, cur_t, t1, t0, weight_type):
if weight_type == "TL":
calc = spread_pars[0]*(fts-cur_t)/(varmult*(t1-t0))
t_score = t.pdf(calc, df=spread_pars[1])
return t_score
elif weight_type == "GL":
pow_xs = (fxs-cur_x)**2
pow_ys = (fys-cur_y)**2
sum_x_y = np.array(pow_xs + pow_ys, dtype=np.float64)
distance = np.sqrt(sum_x_y)
calc = spread_pars[0]*distance/(50*varmult)
t_score = t.pdf(calc,df=spread_pars[1])
return t_score
elif weight_type == "GLR":
pow_xs = (fxs-cur_x)**2
pow_ys = (fys-cur_y)**2
sum_x_y = np.array(pow_xs + pow_ys, dtype=np.float64)
distance = np.sqrt(sum_x_y)
calc = spread_pars[0]*distance/(50*varmult)
left = t.pdf(calc,df=spread_pars[1])
temp1 = np.abs((fts-cur_t))%(60*60*24)
temp2 = (60*60*24)-np.abs((fts-cur_t))%(60*60*24)
min_num = np.minimum(temp1, temp2)
top = np.array(spread_pars[0]* min_num)
bottom = np.array(varmult*(t1-t0))
top_div_bottom = list(top/bottom)
right = t.pdf(top_div_bottom,df=spread_pars[1])
fw = left * right
return fw
def random_bridge(x0,y0,x1,y1,t0,t1,fd,ft,fts,fa,fw,probp,pt,pts,pw,allts,allw,ind11,ind12,i_ind,pxs,pys,fxs,fys,allxs,allys,wtype,canpause,spread_pars,niter=100):
success = False
for i in range(niter):
outmat = []
cur_x = x0
cur_y = y0
cur_t = t0
t_arrive = t0
while True:
varmult = 1
while True:
fw = get_weights(fxs, cur_x, fys, cur_y, varmult, spread_pars, fts, cur_t, t1, t0, weight_type=wtype)
pw = get_weights(pxs, cur_x, pys, cur_y, varmult, spread_pars, pts, cur_t, t1, t0, weight_type=wtype)
allw = get_weights(allxs, cur_x, allys, cur_y, varmult, spread_pars, allts, cur_t, t1, t0, weight_type=wtype)
if len(pts)>0 and len(fts)>0 and np.sum(fw)>0 and np.sum(pw)>0:
break
if len(pts) == 0 and len(fts)>0 and np.sum(fw)>0:
break
if len(fts) == 0 and len(pts)>0 and np.sum(pw)>0:
break
varmult = varmult*2
s11 = np.nansum(allw[ind11])
s12 = np.nansum(allw[ind12])
if (s11 + s12) == 0:
phatcur = probp
else:
phatcur = s12/float(s11+s12)
probp = phatcur
uniform_sample = np.random.uniform(0, 1, 1)
if canpause and uniform_sample < probp:
canpause = False
normalized_pw = pw/np.sum(pw)
p_samp = np.random.choice(pt, 1, replace=False, p=normalized_pw)
if (cur_t + p_samp) < 1:
next_line = [2, cur_x, cur_y, cur_t, float("Nan"), float("Nan"), cur_t + p_samp]
cur_t = cur_t + p_samp
outmat.append(next_line)
else:
break
else:
canpause = True
the_list = list(range(0,len(fa)))
normalized_fw = fw/np.sum(fw)
IDsamp = np.random.choice(the_list, 1, replace=False, p=normalized_fw)[0]
a_samp = fa[IDsamp]
d_samp = fd[IDsamp]
t_samp = ft[IDsamp]
if cur_t + t_samp < t1:
next_x = cur_x + np.cos(a_samp)*d_samp
next_y = cur_y + np.sin(a_samp) * d_samp
if str(next_x) == "nan":
break
next_line = [1, cur_x, cur_y, cur_t, next_x, next_y, cur_t + t_samp]
cur_t = cur_t + t_samp
cur_x = next_x
cur_y = next_y
outmat.append(next_line)
t_arrive = cur_t
else:
break
if t_arrive > t0:
success = True
break
if not success:
return [[1, x0, y0, t0, x1, y1, t1]]
else:
cur_x = x0
cur_y = y0
for i in range(len(outmat)):
if outmat[i][0] == 1:
outmat[i][1] = cur_x
outmat[i][2] = cur_y
cur_t = outmat[i][6]
w = (t_arrive-cur_t)/(t_arrive - t0)
outmat[i][4]=outmat[i][4]*w+x1*(1-w)
outmat[i][5]=outmat[i][5]*w+y1*(1-w)
cur_x=outmat[i][4]
cur_y=outmat[i][5]
else:
cur_t=outmat[i][3]
outmat[i][1]=cur_x
outmat[i][2]=cur_y
outmat[-1][6] = t1
return outmat
#get significant locations
def sig_locs(mobmat, obj, center_rad=125, timezone="", min_pause_time=600):
np_matrix = np.array(mobmat, dtype=np.float64)
obj = initialize_params(np_matrix)
ID2_from_matrix = obj['ID2'][0]
if len(ID2_from_matrix) == 0:
print("No pauses in mobmat within function sig locs")
return None
elif len(ID2_from_matrix) == 1:
x_for_outmat = [np_matrix[ID2_from_matrix[1]][1]]
y_for_outmat = [np_matrix[ID2_from_matrix[1]][2]]
timepresent_for_outmat = np.array([0])
home_for_outmat = np.array([1])
outmat=[x_for_outmat, y_for_outmat, timepresent_for_outmat, home_for_outmat]
else:
pt_div_min = np.array(obj["pt"])/min_pause_time
ptred = np.floor(pt_div_min)
pause_ids = np.where(ptred > 0)[0]
if len(pause_ids) < 2:
print("No pauses long enough in mobmat within function SigLocs!")
return None
pmat = []
for i in range(len(ID2_from_matrix)):
if ptred[i] > 0:
row_i = np_matrix[ID2_from_matrix[i]][1:3]
duplicate_row_i = [row_i] * int(ptred[i])
pmat += duplicate_row_i
kmeansk_v = list(range(2, len(pause_ids)+1))
previous_fit = []
for i in range(len(kmeansk_v)):
kmeansk = kmeansk_v[i]
kmeans_fit = KMeans(n_clusters=kmeansk, random_state=23).fit(pmat)
the_distances = pdist(kmeans_fit.cluster_centers_, 'euclidean')
min_dist = np.min(the_distances)
if min_dist < center_rad*2 or i == len(kmeansk_v):
if i > 0:
kmeansk = kmeansk_v[i-1]
kmeans_fit = previous_fit
break
previous_fit = kmeans_fit
cluster_centers = kmeans_fit.cluster_centers_
num_row_centers = len(cluster_centers)
outmat = [cluster_centers[:,0], cluster_centers[:,1], np.zeros(num_row_centers), np.zeros(num_row_centers)]
#Determine time spent at these significant locations
for i in range(len(ID2_from_matrix)):
for j in range(num_row_centers):
euc_dist = np.sqrt((np_matrix[ID2_from_matrix[i]][1] - outmat[0][j])**2 + (np_matrix[ID2_from_matrix[i]][2] - outmat[1][j])**2)
if euc_dist < center_rad:
outmat[2][j] = outmat[2][j] + obj['pt'][i]
#Determine which is home (where is the night spent)
for i in range(len(ID2_from_matrix)):
avg_time = (np_matrix[ID2_from_matrix[i]][6]+np_matrix[ID2_from_matrix[i]][3])/2
if timezone == "":
hour_of_day = datetime.datetime.fromtimestamp(avg_time).hour
else:
hour_of_day = datetime.datetime.fromtimestamp(avg_time, tz=tz.gettz(timezone)).hour
if hour_of_day >= 21 or hour_of_day < 6:
for j in range(num_row_centers):
euc_dist = np.sqrt((np_matrix[ID2_from_matrix[i]][1] - outmat[0][j])**2 + (np_matrix[ID2_from_matrix[i]][2] - outmat[1][j])**2)
if euc_dist < center_rad:
outmat[3][j] = outmat[3][j] + obj['pt'][i]
IDmax = np.argmax(outmat[3])
outmat[3][IDmax] = 1
transposed_outmat = np.array(outmat).transpose()
zero_ids = np.where(transposed_outmat[:,2] ==0)[0]
new_outmat = np.delete(transposed_outmat, zero_ids, axis=0)
sorted_outmat = new_outmat[new_outmat[:,2].argsort()[::-1]]
return sorted_outmat
def home_time(matrix, slout, center_rad):
IDhome = np.where(slout[:, 3] == 1)[0]
x_center = slout[IDhome, 0]
y_center = slout[IDhome, 1]
tot_time = 0
for i in range(len(matrix)):
if matrix[i][0]==2 and np.sqrt((matrix[i][1]-x_center)**2+(matrix[i][2]-y_center)**2)<center_rad:
tot_time=tot_time+matrix[i][6]-matrix[i][3]
result = tot_time/60
return result
def distance_traveled(matrix):
dt = 0
for i in range(len(matrix)):
if matrix[i][0]==1:
temp = np.sqrt((matrix[i][4]-matrix[i][1])**2+(matrix[i][5]-matrix[i][2])**2)
dt += temp
return dt
def radius_of_gyration(matrix, interval):
IDskip = np.where(matrix[:,0]==4)[0]
if len(IDskip) > 0:
matrix = np.delete(matrix, IDskip, axis=0)
N = len(matrix)
w_v = np.zeros(N)
x_v = np.zeros(N)
y_v = np.zeros(N)
for i in range(N):
if matrix[i][0] == 4:
continue
if matrix[i][0] == 3:
x_v[i] = matrix[i][1]
y_v[i] = matrix[i][2]
w_v[i] = interval
if matrix[i][0] == 1:
x_v[i] = np.mean([matrix[i][1], matrix[i][4]])
y_v[i] = np.mean([matrix[i][2], matrix[i][5]])
w_v[i] = matrix[i][6]-matrix[i][3]
if matrix[i][0] == 2:
x_v[i] = matrix[i][1]
y_v[i] = matrix[i][2]
w_v[i] = matrix[i][6]-matrix[i][3]
sum_w_v = np.sum(w_v)
x_avg = np.sum(w_v*x_v)/sum_w_v
y_avg = np.sum(w_v*y_v)/sum_w_v
result = np.sqrt(np.sum(((x_v-x_avg)**2+(y_v-y_avg)**2)*w_v)/sum_w_v)
return result
def max_diameter(matrix):
IDmv = np.where(matrix[:,0] <=2)[0]
if len(IDmv) < 2:
return 0
else:
the_distances = pdist(matrix[IDmv,1:3], 'euclidean')
max_dist = np.max(the_distances)
return max_dist
def max_home_distance(matrix, homex, homey):
IDmv = np.where(matrix[:,0] <=2)[0]
if len(IDmv) == 0:
return None
else:
dfhome = np.zeros(len(IDmv))
for i in range(len(IDmv)):
temp = np.sqrt((matrix[IDmv[i]][1]-homex)**2+(matrix[IDmv[i]][2]-homey)**2)
dfhome[i] = temp
return np.max(dfhome)
def sig_locs_visited(matrix, slout, center_rad):
places_visited = np.zeros(len(slout))
for i in range(len(matrix)):
if matrix[i][0] <=3:
for j in range(len(slout)):
temp = np.sqrt((slout[j][0]-matrix[i][1])**2 + (slout[j][1]-matrix[i][2])**2)
if temp < center_rad:
places_visited[j] = 1
result = np.sum(places_visited)
return result
def avg_flight(matrix, avg_type):
#avg_type = "length" or "duration"
num = 0.0
total = 0.0
for i in range(len(matrix)):
if matrix[i][0] == 1:
added_number = 0
if avg_type == "length":
added_number = np.sqrt((matrix[i][4]-matrix[i][1])**2 + (matrix[i][5]-matrix[i][2])**2)
else:
added_number = matrix[i][6] - matrix[i][3]
total += added_number
num += 1
if num == 0:
return 0
else:
return total/num
def std_flight(matrix, std_type):
#std_type = "length" or "duration"
ID1 = np.where(matrix[:,0]==1)[0]
if len(ID1) <= 1:
return 0
else:
if std_type == "length":
temp = ((matrix[ID1,5]-matrix[ID1,2])**2 + (matrix[ID1, 4] - matrix[ID1, 1])**2)
dist = np.sqrt(list((matrix[ID1,5]-matrix[ID1,2])**2 + (matrix[ID1, 4] - matrix[ID1, 1])**2))
std_result = np.std(dist)
else:
std_result = np.std(matrix[ID1, 6] - matrix[ID1,3])
return std_result
def prob_pause(matrix):
t_pause = 0.0
t_flight = 0.0
for i in range(len(matrix)):
diff = matrix[i][6] - matrix[i][3]
if matrix[i][0] == 1:
t_flight += diff
elif matrix[i][0] == 2:
t_pause += diff
result = t_pause/(t_pause+t_flight)
return result
def sig_loc_entropy(matrix, slout, center_rad):
tp = np.zeros(len(slout))
for i in range(len(matrix)):
if matrix[i][0] ==2:
for j in range(len(slout)):
temp = np.sqrt((slout[j][0]-matrix[i][1])**2 + (slout[j][1]-matrix[i][2])**2)
if temp < center_rad:
tp[j] += matrix[i][6]-matrix[i][3]
total = 0
sum_tp = np.sum(tp)
if sum_tp == 0:
return 0
else:
for i in range(len(slout)):
p = tp[i]/sum_tp
if p > 0:
total = total - p*np.log(p)
return total
def mins_missing(matrix):
total = 0.0
for i in range(len(matrix)):
if matrix[i][0] == 4:
total += matrix[i][6] - matrix[i][3]
return total/60.0
def location_at(matrix, the_time):
for i in range(len(matrix)):
if matrix[i][0] <= 2:
if matrix[i][3] <= the_time and matrix[i][6] >= the_time:
return matrix[i][1], matrix[i][2]
elif matrix[i][2] == 3:
if matrix[i][3] == the_time:
return matrix[i][1], matrix[i][2]
return None
def day_dist(i1, i2, mobmat, subset_inds_v,subset_start_time_v,center_rad):
multiplier = np.arange(30*60, 60*60*24, 60*60)
checkpoints = subset_start_time_v[i1] + multiplier
checkpoints2 = subset_start_time_v[i2] + multiplier
matrix1 = mobmat[subset_inds_v[i1]]
matrix2 = mobmat[subset_inds_v[i2]]
same_place = np.empty(24)
same_place[:] = np.nan
for i in range(24):
loc1 = location_at(matrix1, checkpoints[i])
if loc1 == None:
continue
abs_diff = np.abs(matrix2[:,3]-checkpoints[i])%(60*60*24)
smaller = list(np.where(abs_diff < 30*60)[0])
greater = list(np.where(abs_diff > (60*60*24)-30*60)[0])
IDs = smaller + greater
if len(IDs) > 0:
can_be_zero = False
for j in range(len(IDs)):
if matrix2[IDs[j]][0] <= 3:
can_be_zero = True
temp = np.sqrt((matrix2[IDs[j]][1]-loc1[0])**2 + (matrix2[IDs[j]][2]-loc1[1])**2)
if temp < center_rad:
same_place[i] = 1
if can_be_zero and np.isnan(same_place[i]):
same_place[i] = 0
else:
for j in range(len(matrix2)):
if not np.isnan(matrix2[j][3]) and not np.isnan(matrix2[j][6]) and matrix2[j][3] < checkpoints2[i] and matrix2[j][6] > checkpoints2[i]:
if matrix2[j][0] !=4 and np.sqrt((matrix2[j][1]-loc1[0])**2 + (matrix2[j][2]-loc1[1])**2) < center_rad:
same_place[i] = 1
else:
same_place[i] = 0
break
if np.isnan(same_place).all():
return None
else:
return np.nanmean(same_place)
def daily_routine_index(idx, mobmat, subset_inds_v, subset_day_of_week_v, subset_start_time_v, center_rad):
submat = mobmat[subset_inds_v[idx]]
daydist_v = np.ones(len(subset_inds_v))
daydist_v[:] = np.nan
for i in range(len(subset_inds_v)):
if i == idx:
continue
daydist_v[i] = day_dist(idx, i, mobmat, subset_inds_v, subset_start_time_v, center_rad)
if len(np.where(daydist_v != -1)[0]) == 0:
circ_score = None
else:
circ_score = np.nanmean(daydist_v)
if subset_day_of_week_v[idx] == "Saturday" or subset_day_of_week_v[idx] == "Sunday":
IDcompare = np.concatenate([np.where(subset_day_of_week_v == "Saturday")[0], np.where(subset_day_of_week_v == "Sunday")[0]])
else:
IDcompare = np.concatenate([np.where(subset_day_of_week_v == "Monday")[0], np.where(subset_day_of_week_v == "Tuesday")[0], np.where(subset_day_of_week_v == "Wednesday")[0], np.where(subset_day_of_week_v == "Thursday")[0], np.where(subset_day_of_week_v == "Friday")[0]])
day_d_compare = daydist_v[IDcompare]
not_nan = day_d_compare[~np.isnan(day_d_compare)]
if len(IDcompare) == 0 or len(not_nan) == 0:
week_score = None
else:
week_score = np.nanmean(daydist_v[IDcompare])
return circ_score, week_score
def get_mobility_features(mobmat, obj, mobmatmiss, timezone, center_rad, interval):
print("Get mobility features...")
mobmat = np.array(mobmat)
mobmatmiss = np.array(mobmatmiss)
slout = sig_locs(mobmat, obj, center_rad, timezone=timezone) #x, y, timepresent, home
IDhome = np.where(slout[:,3] ==1)[0]
if len(IDhome) == 0:
IDhome = 0
homex = slout[IDhome,0]
homey = slout[IDhome,1]
if timezone != "":
date = datetime.datetime.fromtimestamp(mobmat[0][3], tz=tz.gettz(timezone))
else:
date = datetime.datetime.fromtimestamp(mobmat[0][3])
current_year = date.year
current_month = date.month
current_day = date.day
current_hour = date.hour
current_minute = date.minute
current_second = date.second
weekday_mapping = {0: "Monday", 1: "Tuesday", 2: "Wednesday", 3: "Thursday", 4: "Friday", 5: "Saturday", 6: "Sunday"}
subset_inds_v = {}
subset_day_of_week_v = []
subset_start_time_v = []
cur_date = (current_year, current_month, current_day)
daystr_v = [cur_date]
day_ind=0
subset_inds = [0]
if(len(mobmat)<2):
outmat = None
return outmat, slout
else:
for i in range(1, len(mobmat)):
next_date_raw = datetime.datetime.fromtimestamp(mobmat[i][3])
next_date = (next_date_raw.year, next_date_raw.month, next_date_raw.day)
if next_date == cur_date and i < len(mobmat)-1:
if str(mobmat[i][6]) != "nan":
end_date_raw = datetime.datetime.fromtimestamp(mobmat[i][6])
end_date = (end_date_raw.year, end_date_raw.month, end_date_raw.day)
subset_inds.append(i)
else:
subset_inds_v[day_ind] = subset_inds
cur_weekday = weekday_mapping[date.weekday()]
subset_day_of_week_v.append(cur_weekday)
to_timestamp = datetime.datetime.strptime("{}-{}-{}".format(cur_date[0], cur_date[1], cur_date[2]), "%Y-%m-%d").timestamp()
subset_start_time_v.append(to_timestamp)
day_ind += 1
if mobmat[i-1][0] == 2 and (mobmat[i-1][6]-mobmat[i-1][3])/(60*60*24)>1:
ii = 1
temp_mid = mobmat[i-1][3]+(60*60*24)*ii
mid_date_raw = datetime.datetime.fromtimestamp(temp_mid)
mid_date = (mid_date_raw.year, mid_date_raw.month, mid_date_raw.day)
while mid_date != next_date:
subset_day_of_week_v.append(weekday_mapping[mid_date_raw.weekday()])
middate_numeric = datetime.datetime.strptime("{}-{}-{}".format(mid_date[0], mid_date[1], mid_date[2]), "%Y-%m-%d").timestamp()
subset_start_time_v.append(middate_numeric)
subset_inds_v[day_ind] = [i-1]
day_ind += 1
if mid_date != daystr_v[-1]:
daystr_v.append(mid_date)
ii += 1
temp_mid = mobmat[i-1][3]+(60*60*24)*ii
mid_date_raw = datetime.datetime.fromtimestamp(temp_mid)
mid_date = (mid_date_raw.year, mid_date_raw.month, mid_date_raw.day)
cur_date = next_date
date = next_date_raw
subset_inds = [i-1, i]
if cur_date != daystr_v[-1] and not (len(daystr_v) == len(subset_inds_v) and len(mobmat) == i+1):
daystr_v.append(cur_date)
#### Partition mobmatmiss data into daily subsets: create subsetinds_v and daystr_v.
cur_date_raw = datetime.datetime.fromtimestamp(mobmatmiss[0][3])
cur_date_miss = (cur_date_raw.year, cur_date_raw.month, cur_date_raw.day)
cur_time_miss = (cur_date_raw.hour, cur_date_raw.month, cur_date_raw.second)
miss_subset_inds_v = {}
miss_daystr_v = [cur_date_miss]
miss_dayind = 0
subset_inds = [0]
for i in range(1, len(mobmatmiss)):
next_date_raw = datetime.datetime.fromtimestamp(mobmatmiss[i][3])
next_date_miss = (next_date_raw.year, next_date_raw.month, next_date_raw.day)
if next_date_miss == cur_date_miss and i < len(mobmatmiss)-1:
subset_inds.append(i)
else:
cur_date_miss = next_date_miss
miss_subset_inds_v[miss_dayind] = subset_inds
if cur_date_miss != miss_daystr_v[-1] and not (len(miss_daystr_v) == len(miss_subset_inds_v) and i == len(mobmatmiss)-1):
miss_daystr_v.append(cur_date_miss)
subset_inds = [i]
miss_dayind += 1
if mobmatmiss[i-1][0] == 2 and (mobmatmiss[i-1][6]-mobmatmiss[i-1][3])/(60*60*24)>1:
ii = 1
temp_mid = mobmatmiss[i-1][3]+(60*60*24)*ii
mid_date_raw = datetime.datetime.fromtimestamp(temp_mid)
mid_date = (mid_date_raw.year, mid_date_raw.month, mid_date_raw.day)
while mid_date != next_date_miss:
miss_subset_inds_v[miss_dayind] = i-1
miss_dayind += 1
if mid_date != miss_daystr_v[-1]:
miss_daystr_v.append(mid_date)
ii += 1
temp_mid = mobmatmiss[i-1][3]+(60*60*24)*ii
mid_date_raw = datetime.datetime.fromtimestamp(temp_mid)
mid_date = (mid_date_raw.year, mid_date_raw.month, mid_date_raw.day)
##### intersect mobmat and mobmatmiss to ignore missing data
IDkeep = []
IDkeepmiss = []
for i in range(len(daystr_v)):
for j in range(len(miss_daystr_v)):
if daystr_v[i] == miss_daystr_v[j]:
IDkeep.append(i)
IDkeepmiss.append(j)
break
if len(IDkeep) == 0:
outmat = None
return outmat, slout
daystr_v = np.array(daystr_v)[IDkeep]
subset_day_of_week_v = np.array(subset_day_of_week_v)[IDkeep]
subset_start_time_v = np.array(subset_start_time_v)[IDkeep]
miss_daystr_v = np.array(miss_daystr_v)[IDkeep]
num_of_features = 15
outmat = np.zeros((len(daystr_v), num_of_features))
submat = []
temp_last = mobmat[subset_inds_v[IDkeep[len(daystr_v)-1]]]
for i in range(len(daystr_v)):
submat = mobmat[subset_inds_v[IDkeep[i]]]
if submat[0][0] == 2 and submat[0][3] < subset_start_time_v[i]:
submat[0][3] = subset_start_time_v[i]
if submat[-1][0] == 2 and submat[-1][6] > subset_start_time_v[i]+60*60*24:
submat[-1][6] = subset_start_time_v[i]+60*60*24
submat_miss = mobmatmiss[miss_subset_inds_v[IDkeep[i]]]
if submat_miss[0][0] == 4 and submat_miss[0][3] < subset_start_time_v[i]:
submat_miss[0][3] = subset_start_time_v[i]
if submat_miss[-1][0] == 4 and submat_miss[-1][6] > subset_start_time_v[i]+60*60*24:
submat_miss[-1][6] = subset_start_time_v[i]+60*60*24
if len(submat) == 0 or len(np.where(slout[:,3] ==1)[0]) == 0:
outmat[i] = [float("NaN")]*15
outmat[i][12] = 1440
else:
outmat[i][0] = home_time(submat, slout, center_rad=200)
outmat[i][1] = distance_traveled(submat)
outmat[i][2] = radius_of_gyration(submat, interval)
outmat[i][3] = max_diameter(submat)
outmat[i][4] = max_home_distance(submat, homex, homey)
outmat[i][5] = sig_locs_visited(submat, slout, center_rad)
outmat[i][6] = avg_flight(submat, "length")
outmat[i][7] = std_flight(submat, "length")
outmat[i][8] = avg_flight(submat, "duration")
outmat[i][9] = std_flight(submat, "duration")
outmat[i][10] = prob_pause(submat)
outmat[i][11] = sig_loc_entropy(submat, slout, center_rad)
outmat[i][12] = mins_missing(submat_miss)
dri_output = daily_routine_index(i, mobmat, subset_inds_v, subset_day_of_week_v, subset_start_time_v, center_rad)
outmat[i][13] = dri_output[0] #seven days equally circadian routine
outmat[i][14] = dri_output[1] #separately
return outmat, slout, daystr_v #daystr_v = row names