import numpy as np import math import pandas as pd from scipy.stats import * from tqdm import tqdm from sklearn.cluster import KMeans from scipy.spatial.distance import pdist, squareform import datetime import glob from dateutil import tz np.set_printoptions(suppress=True, formatter={'float_kind':'{:f}'.format}) np.random.seed(22) def run_test(output_file): print("Output DATA: ") print(output_file) def run_barnett_features_for_rapids(input_dataframe, accuracy_limit=51.0, timezone="", wtype="GLR", spread_pars=[10,1], center_rad=200, interval=10, n_reps=1, min_pause_dur=300, min_pause_dist=60, r=None, w=None, tint_m=None, tint_k=None): data_frame = input_dataframe lonlat, r, w = preprocessing(data_frame, interval=interval, acc_threshold=accuracy_limit, r=r, w=w, tint_m=tint_m, tint_k=tint_k) mobmatmiss = convert_to_flights_pauses(lonlat, r, w) mobmat = guess_pause(mobmatmiss, min_pause_dur, min_pause_dist) obj = initialize_params(mobmat) data_for_pandas = [] if obj: lsmf = [] lssigloc = [] for i in range(n_reps): print("Sim #: ", i+1) out3 = simulate_mobility_gaps(mobmat, obj, wtype, spread_pars) out3 = np.array(out3) IDundef=np.where(out3[:,0]==3)[0] if len(IDundef) > 0: out3 = np.delete(out3, IDundef,axis=0) obj3 = initialize_params(out3) output_features, slout, row_names = get_mobility_features(out3, obj3, mobmatmiss, timezone, center_rad, interval) lsmf.append(output_features) lssigloc.append(slout) #get average np_lsmf = np.array((lsmf)) avg_output_features = [] #convert to pandas dataframe and then return this for j in range(len(row_names)): temp_avg = [] for i in range(n_reps): temp_avg.append(np_lsmf[i][j]) avg_output_features.append(np.mean(temp_avg, axis=0)) for idx, row in enumerate(avg_output_features): date = row_names[idx] date_str = str(date[0]) + "-" + str(date[1]) + "-" + str(date[2]) features = [date_str] for feature in row: features.append(feature) data_for_pandas.append(features) column_names = ["local_date", "hometime", "disttravelled", "rog", "maxdiam", "maxhomedist", "siglocsvisited", "avgflightlen", "stdflightlen", "avgflightdur", "stdflightdur", "probpause", "siglocentropy", "minsmissing", "circdnrtn", "wkenddayrtn"] df = pd.DataFrame(data_for_pandas, columns = column_names) df.set_index('local_date', inplace=True) return df def run_barnett_features(input_dir, output_file, wtype="GLR", spread_pars=[10,1], timezone="", center_rad=200, interval=10, acc_threshold=51.0, n_reps=1, min_pause_dur=300, min_pause_dist=60, r=None, w=None, tint_m=None, tint_k=None): data_frame = load_beiwe(input_dir) lonlat, r, w = preprocessing(data_frame, interval=interval, acc_threshold=acc_threshold, r=r, w=w, tint_m=tint_m, tint_k=tint_k) mobmatmiss = convert_to_flights_pauses(lonlat, r, w) mobmat = guess_pause(mobmatmiss, min_pause_dur, min_pause_dist) obj = initialize_params(mobmat) lsmf = [] lssigloc = [] for i in range(n_reps): print("Sim #: ", i+1) out3 = simulate_mobility_gaps(mobmat, obj, wtype, spread_pars) out3 = np.array(out3) IDundef=np.where(out3[:,0]==3)[0] if len(IDundef) > 0: out3 = np.delete(out3, IDundef,axis=0) obj3 = initialize_params(out3) output_features, slout, row_names = get_mobility_features(out3, obj3, mobmatmiss, timezone, center_rad, interval) lsmf.append(output_features) lssigloc.append(slout) #get average np_lsmf = np.array((lsmf)) avg_output_features = [] for j in range(len(row_names)): temp_avg = [] for i in range(n_reps): temp_avg.append(np_lsmf[i][j]) avg_output_features.append(np.mean(temp_avg, axis=0)) print("Writing features to ", output_file) with open(output_file, "w") as writer: writer.write("Date\tHometime\tDistTravelled\tRoG\tMaxDiam\tMaxHomeDist\tSigLogsVisited\tAvgFlightLen\tStdFlightLen\tAvgFlightDur\tStdFlightDur\tProbPause\tSigLocEntropy\tMinsMissing\tCircdnRtn\tWkEndDayRtn\n") for idx, row in enumerate(avg_output_features): date = row_names[idx] date_str = str(date[0]) + "-" + str(date[1]) + "-" + str(date[2]) feature_str = date_str for feature in row: feature_str += "\t"+str(feature) writer.write(feature_str + "\n") print("Finish writing to ", output_file) def load_beiwe(input_dir, debug_mode=True): # load all files ending with .csv in input_dir print("Reading csv files from {}".format(input_dir)) all_files = glob.glob(input_dir + "/*.csv") the_list = [] counter = 1 for filename in all_files: df = pd.read_csv(filename, index_col=None, header=0) the_list.append(df) counter += 1 new_frame = pd.concat(the_list, axis=0, ignore_index=True) return new_frame #Convert GPS raw data to mobile matrix (GPS2MobMat.R) def preprocessing(df, interval=10.0, acc_threshold=51.0, r=None,w=None,tint_m=None,tint_k=None): df.columns = ['timestamp','latitude','longitude','altitude','accuracy'] # =============================== # 1. order by the timestamp (ascending) # =============================== df = df.sort_values(by=['timestamp']) # print(df) # =============================== # 2. select row if accuracy < threshold #pass # =============================== df_acc = df[df['accuracy'] < acc_threshold] df = df_acc # =============================== #3. collapse data to interval #pass # =============================== if(tint_k != None and tint_m != None): t0 = np.ceil(df['timestamp'].values[0]/1000) df = df[df['timestamp']/1000 - t0 % (tint_k + tint_m) < tint_k] if not r: r = np.sqrt(interval) if not w: w = np.mean(df['accuracy'].values) + interval start_time = (df['timestamp'].iloc[0]/1000) end_time = (df['timestamp'].iloc[-1]/1000) avg_matrix_lst = [] IDam = 0 count = -1 next_line = [1, start_time + interval/2.0, df['latitude'].iloc[0], df['longitude'].iloc[0], 0.0, 0.0] interval_counter = 1 print("Collapse data within", interval, "second intervals...\n") counter = 0 for idx, row in tqdm(df.iterrows(), total=df.shape[0]): if counter > 0: timestamp = row['timestamp'] latitude = row['latitude'] longitude = row['longitude'] if timestamp/1000.00 < start_time + interval: next_line[2] += latitude next_line[3] += longitude interval_counter += 1 else: next_line[2] = next_line[2]/interval_counter next_line[3] = next_line[3]/interval_counter temp = next_line avg_matrix_lst.append(temp) num_miss = np.floor((timestamp/1000 - (start_time + interval))/interval) if num_miss > 0: has_miss = [4, start_time + interval/2.0, start_time + interval * (num_miss + 1) +interval/2.0, float("NaN"), float("NaN"), float("NaN")] avg_matrix_lst.append(has_miss) start_time = start_time + interval * (num_miss+1) next_line = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] next_line[0] = 1 next_line[1] = start_time + interval/2.0 next_line[2] = latitude next_line[3] = longitude interval_counter = 1 counter += 1 avg_matrix_df = pd.DataFrame(data=avg_matrix_lst) # ================================================================ # avg_matrix_df is a matrix of average scores per interval # avg_matrix_df[0] = sign of the case, 1 = normal, 4 = has missing value # avg_matrix_df[1] = time # avg_matrix_df[2] = latitude # avg_matrix_df[3] = longitude # avg_matrix_df[4] = x # avg_matrix_df[5] = y # ================================================================ # =============================== #4. convert from Lat/Lon to X/Y # =============================== print("Convert latitude/longitude to X/Y\n") new_dataframe = lat_long_to_xy(avg_matrix_df) return new_dataframe, r, w def convert_to_flights_pauses(new_dataframe, r, w, output_file=None): print("\nConvert from X/Y to flights/pauses...\n") mobmatmiss = extract_flights_from_dataframe(new_dataframe, r, w) if output_file: #output_file = mobmatmiss print("Writing {} rows to mobmatmiss to tsv...".format(mobmatmiss.shape[0])) mobmatmiss.to_csv(output_file, sep="\t") print("Finish writing mobmatmiss to tsv...") return mobmatmiss def extract_flights_from_dataframe(dataframe, r, w): np_arr = dataframe.to_numpy() cur_idx = 0 result_matrix = [] temp_counter = 0 prev_rows_counter = 0 for i in tqdm(range(len(np_arr)), total=len(np_arr)): if np_arr[i][0] == 4: row_with_missing_data = [] prev_rows = extract_flights(np_arr[cur_idx:i], r, w) last_timestamp = np_arr[i][1] row_with_missing_data.append([4, float("NaN"), float("NaN"), last_timestamp, float("NaN"), float("NaN"), np_arr[i][2]]) result_matrix += prev_rows result_matrix += row_with_missing_data prev_rows_counter += len(prev_rows) temp_counter += 1 cur_idx = i+1 if cur_idx < len(np_arr): rows = extract_flights(np_arr[cur_idx:len(np_arr)], r, w) last_timestamp = rows[len(rows)-1][6] result_matrix += rows if last_timestamp < np_arr[len(np_arr)-1][1]: if result_matrix[len(result_matrix)-1][0] != 2: pause = [2, rows[len(rows)-1][4], rows[len(rows)-1][5], last_timestamp, float("NaN"), float("NaN"), np_arr[len(np_arr)-1][1]] result_matrix.append(pause) else: result_matrix[len(result_matrix)-1][6] = np_arr[len(np_arr)-1][1] result_df = pd.DataFrame(data=result_matrix, index=None, columns=["code", "lon1", "lat1", "t1", "lon2", "lat2", "t2"]) return result_df # input1 = x_list --> np array [x0, x1, x2, ...xn] # input2 = y_list def is_flight(x_listt, y_listt, r, w): x_list = np.array(x_listt) y_list = np.array(y_listt) num_of_rows = len(x_list) prev_lat_lon = np.array((x_list[0], y_list[0])) #x0, y0 next_lat_lon = np.array((x_list[num_of_rows-1], y_list[num_of_rows-1])) # euclidean_dist = euclidean distance of [x1,y1] and [x2, y2] single_euclidean_dist = np.linalg.norm(prev_lat_lon - next_lat_lon) if single_euclidean_dist < r: #DONE return False next_x = x_list[1:num_of_rows] prev_x = x_list[0:num_of_rows-1] subs_x = next_x - prev_x pow_x = np.power(subs_x, 2) next_y = y_list[1:num_of_rows] prev_y = y_list[0:num_of_rows-1] subs_y = next_y - prev_y pow_y = np.power(subs_y, 2) sum_sqrt_x_y = np.sqrt(pow_x + pow_y) if np.min(sum_sqrt_x_y) < r: return False if num_of_rows == 2: return True if x_list[0] == x_list[num_of_rows-1]: if np.max(np.abs(x_list[1: num_of_rows])) > w: return False else: return True if x_list[0] > x_list[num_of_rows-1]: x_list = x_list[::-1] y_list = y_list[::-1] x_new = x_list - x_list[0] y_new = y_list - y_list[0] theta = -1*np.arctan(y_new[num_of_rows-1]/x_new[num_of_rows-1]) divvv = y_new[num_of_rows-1]/x_new[num_of_rows-1] costheta = np.cos(theta) sintheta = np.sin(theta) a_matrix = np.array(([[costheta, -1*sintheta], [sintheta, costheta]])) temp_matrix = np.array((x_new[1:num_of_rows-1], y_new[1:num_of_rows-1])) rotation_pts = np.matmul(a_matrix, temp_matrix) if np.max(np.abs(rotation_pts[1])) > w: return False else: return True def lat_long_to_xy(df,R=6371000.0): #1000.000 df1 = df[df[0] == 1.0].copy() lat_v,lon_v = df1[2].values, df1[3].values th0 = np.min(lon_v) th1 = np.max(lon_v) ph0 = np.min(lat_v) ph1 = np.max(lat_v) pi = math.pi d1 = 2*pi*R*((ph1-ph0)*2*pi/360)/(2*pi) d1 = 2*pi*R*((ph1-ph0)*2*pi/360)/(2*pi) d2 = 2*pi*(R*np.sin(pi/2-ph1*2*pi/360))*((th1-th0)*2*pi/360)/(2*pi) d3 = 2*pi*(R*np.sin(pi/2-ph0*2*pi/360))*((th1-th0)*2*pi/360)/(2*pi) for idx, row in df.iterrows(): case = row[0] latitude = row[2] longitude = row[3] if case == 1: w1=(latitude-ph0)/(ph1-ph0) w2=(longitude-th0)/(th1-th0) # df[4][idx]= w1 * np.abs(d3-d2)/2+w2*(d3*(1-w1)+d2*w1) df.loc[idx, 4] = w1 * np.abs(d3-d2)/2+w2*(d3*(1-w1)+d2*w1) # df[5][idx]= w1 * d1 * np.sin(np.arccos(np.abs((d3-d2)/(2*d1)))) df.loc[idx, 5] = w1 * d1 * np.sin(np.arccos(np.abs((d3-d2)/(2*d1)))) return df def extract_flights(matrix, r, w): np_matrix = np.array(matrix) if len(np_matrix) == 1: row = [[3, np_matrix[0][4], np_matrix[0][5], np_matrix[0][1], float("NaN"), float("NaN"), float("NaN")]] return row cur_idx = 0 output = [] timestamp = np_matrix[cur_idx][1] x = np_matrix[cur_idx][4] y = np_matrix[cur_idx][5] distance_line = [x, y, timestamp, float("NaN"), float("NaN"), float("NaN")] flight_stat = False last_pause_time = 0 last_pause_idx = -1 while True: next_idx = cur_idx + 1 if next_idx == len(np_matrix)-1: distance_line[3] = np_matrix[next_idx][4] distance_line[4] = np_matrix[next_idx][5] distance_line[5] = np_matrix[next_idx][1] output.append(distance_line) break while True: part_of_matrix = np_matrix[cur_idx:next_idx+1] x_s = part_of_matrix[:,4] y_s = part_of_matrix[:,5] flight_stat = is_flight(x_s, y_s, r, w) if not flight_stat: last_pause_time = part_of_matrix[len(part_of_matrix)-1][1] last_pause_idx = next_idx break next_idx += 1 if next_idx >= len(np_matrix): break if next_idx == cur_idx + 1 and next_idx < len(np_matrix): distance_line[2] = np_matrix[next_idx][1] np_matrix = np.delete(np_matrix, (next_idx), axis=0) else: distance_line[3] = np_matrix[next_idx-1][4] distance_line[4] = np_matrix[next_idx-1][5] distance_line[5] = np_matrix[next_idx-1][1] output.append(distance_line) cur_idx = next_idx - 1 distance_line = [np_matrix[cur_idx][4], np_matrix[cur_idx][5], np_matrix[cur_idx][1], float("NaN"), float("NaN"), float("NaN")] if next_idx >= len(np_matrix): break if len(output) == 0: last_timestamp_in_matrix = np_matrix[len(np_matrix)-1][1] row = [2, x, y, np_matrix[0][1], float("NaN"), float("NaN"), last_timestamp_in_matrix] return [row] result = [] if timestamp < output[0][2]: row = [2, x, y, timestamp, float("NaN"), float("NaN"), output[0][2]] result.append(row) if len(output) == 1: result[0][6] = output[0][5] return result if len(output) == 1: result = [[1] + output[0]] return result for idx, row in enumerate(output): if result != []: cur_time = row[2] prev_row = result[len(result)-1] if prev_row[0] == 2: #is a pause if cur_time == prev_row[6]: row[0] = prev_row[1] row[1] = prev_row[2] result.append([1] + row) else: print("error sanity check") #sanity check else: if prev_row[6] != cur_time: #add pause prev_x = prev_row[4] prev_y = prev_row[5] prev_t = prev_row[6] pause_row = [2, prev_x, prev_y, prev_t, float("NaN"), float("NaN"), cur_time] result.append(pause_row) flight_row = [1, prev_x, prev_y, cur_time, row[3], row[4], row[5]] result.append(flight_row) else: result.append([1] + row) else: result.append([1] + row) last_row = result[len(result)-1] last_timestamp = last_row[6] result = np.array(result) ID_flight = np.where(result[:,0] == 1)[0] ID_pause = np.where(((result[ID_flight,1] - result[ID_flight,4]) ** 2 + (result[ID_flight,2]-result[ID_flight,5])**2) == 0)[0] if len(ID_pause) > 0: for i in len(ID_pause): result[ID_flight[ID_pause], 0] = 2 result[ID_flight[ID_pause], 4] = float("nan") result[ID_flight[ID_pause], 5] = float("nan") result = list(result) if last_timestamp < last_pause_time: status = last_row[0] if status == 1: x_0 = last_row[4] y_0 = last_row[5] else: x_0 = last_row[1] y_0 = last_row[2] x_1 = np_matrix[last_pause_idx][4] y_1 = np_matrix[last_pause_idx][5] flight_stat = is_flight(np.array([x_0, x_1]), np.array([y_0, y_1]), r, w) if flight_stat: end_row = [1, x_0, y_0, last_timestamp, x_1, y_1, last_pause_time] else: end_row = [2, x_0, y_0, last_timestamp, float("NaN"), float("NaN"), last_pause_time] result.append(end_row) last_row_in_matrix = np_matrix[len(np_matrix)-1] last_timestamp_in_matrix = last_row_in_matrix[1] last_row = result[len(result)-1] last_timestamp = last_row[6] if last_timestamp < last_timestamp_in_matrix: status = last_row[0] if status == 1: x_0 = last_row[4] y_0 = last_row[5] else: x_0 = last_row[1] y_0 = last_row[2] x_1 = last_row_in_matrix[4] y_1 = last_row_in_matrix[5] flight_stat = is_flight(np.array([x_0, x_1]), np.array([y_0, y_1]), r, w) if flight_stat: end_row = [1, x_0, y_0, last_timestamp, x_1, y_1, last_timestamp_in_matrix] result.append(end_row) else: if result[len(result)-1][0] == 2: result[len(result)-1][6] = last_timestamp_in_matrix else: end_row = [2, x_0, y_0, last_timestamp, float("NaN"), float("NaN"), last_timestamp_in_matrix] result.append(end_row) return result def guess_pause(matrix, min_duration=300, pause_rad=75): print("Inferring pauses...") np_matrix = np.array(matrix) flatmat = get_flatmat(np_matrix, min_duration, pause_rad) result = collapse_pause_in_matrix(np_matrix, flatmat) return result def collapse_pause_in_matrix(matrix, flatmat): if len(flatmat) == 0: return matrix else: output_mat = [] if flatmat[0][0] > 1: output_mat = list(matrix[0:flatmat[0][0]-1,]) for i in range(len(flatmat)): start_idx = flatmat[i][0] end_idx = flatmat[i][1] result = collapse_to_pause(matrix[start_idx:end_idx+1]) output_mat.append(result) if i+1 < len(flatmat) and flatmat[i][1] < flatmat[i+1][1] - 1: another_start = flatmat[i][1]+1 another_end = flatmat[i+1][0] rows = matrix[another_start:another_end] for row in rows: output_mat.append(row.tolist()) if flatmat[-1][1] < len(matrix)-1: the_idx = flatmat[-1][1]+1 rows = matrix[the_idx:len(matrix),] for row in rows: output_mat.append(row.tolist()) if len(output_mat) == 1: return output_mat else: # Group together adjacent pauses, averaging their location flatmat2 = [] output_mat2 = [] collapse = False cstart = 0 for i in range(1, len(output_mat)): if output_mat[i][0] != 2 and not collapse: continue elif output_mat[i][0] != 2 and collapse: collapse = False flatmat2.append([cstart, i-1]) elif output_mat[i][0] == 2 and output_mat[i-1][0]==2 and not collapse: cstart = i-1 collapse = True elif output_mat[i][0] == 2 and collapse: continue if collapse and output_mat[-1][0] == 2: flatmat2.append([cstart, len(output_mat)-1]) if flatmat2 == []: output_mat2 = output_mat else: output_mat2 = [] if flatmat2[0][0] > 1: end_idx = flatmat2[0][0] output_mat2 = output_mat[0: end_idx] for i in range(len(flatmat2)): start_idx = flatmat2[i][0] end_idx = flatmat2[i][1]+1 temp = output_mat[start_idx:end_idx] result = collapse_to_pause(temp) output_mat2.append(result) if i < len(flatmat2)-1 and flatmat2[i][1] < flatmat2[i+1][0]-1: init_idx = flatmat2[i][1]+1 end_idx = flatmat2[i+1][0] result2 = output_mat[init_idx:end_idx] temp_result2 = output_mat[init_idx:end_idx+1] output_mat2 += result2 if flatmat2[-1][1] < len(output_mat): begin = (flatmat2[-1][1]+1) end = len(output_mat) temp = output_mat[begin:len(output_mat)] for t in temp: output_mat2.append(t) # Set flight endpoints equal to pause endpoints if output_mat2[0][0]== 1 and output_mat2[1][0] == 2: output_mat2[0][4] = output_mat2[1][1] output_mat2[0][5] = output_mat2[1][2] for i in range(1, len(output_mat2)-1): if output_mat2[i][0] == 1 and output_mat2[i-1][0] == 2: output_mat2[i][1] = output_mat2[i-1][1] output_mat2[i][2] = output_mat2[i-1][2] if output_mat2[i][0] == 1 and output_mat2[i+1][0] == 2: output_mat2[i][4] = output_mat2[i+1][1] output_mat2[i][5] = output_mat2[i+1][2] if output_mat2[-2][0] == 2 and output_mat2[-1][0] == 1: output_mat2[-1][1] = output_mat2[-2][1] output_mat2[-1][2] = output_mat2[-2][2] return output_mat2 def get_flatmat(np_matrix, min_duration=300, pause_rad=60): index = 0 incr = 1 t_current = np_matrix[index][3] collapse = False flatmat = [] counter_here = 0 while True: sec_time = np_matrix[index+incr][6] if sec_time: t_next = sec_time else: t_next = np_matrix[index+incr][3] tmp_again = t_next - t_current if str(tmp_again) == "nan": tmp_again = min_duration if tmp_again >= min_duration: tmp = index+incr if np_matrix[index+incr][0] == 1: #combine counter_here += 1 left_cols = np_matrix[index:(index+incr+1),1:3] right_cols = np_matrix[index:(index+incr+1),4:6] combined = np.concatenate((left_cols, right_cols)) max_rad = get_max_radius(combined) else: left_cols = np_matrix[index:(index+incr+1),1:3] max_rad = get_max_radius(left_cols) if max_rad > pause_rad and not collapse: index += 1 sec_time = np_matrix[index][6] t_current = np_matrix[index][0] incr = 0 if max_rad > pause_rad and collapse: is_else = False if np_matrix[index+incr-1, 0] == 4: if index < index+incr - 2: flatmat.append([index, index+incr-2]) else: end_idx = index + incr - 1 if index != end_idx: flatmat.append([index, end_idx]) is_else = True index += incr sec_time = np_matrix[index][6] t_current = np_matrix[index][0] incr = 0 collapse = False if max_rad <= pause_rad: collapse = True incr += 1 if index + incr >= len(np_matrix): if max_rad <= pause_rad and t_next - t_current >= min_duration: flatmat.append([index, index+incr-1]) break return flatmat def collapse_to_pause(matrix): if len(matrix) == 1: return matrix[0] #check if there is None for i, row in enumerate(matrix): for j, cell in enumerate(row): if not cell: matrix[i][j] = float("NaN") matrix = np.array(matrix) center = np.nanmean(matrix[:,1:3], axis=0) if str(matrix[-1][6]) != "nan": row = [2, center[0], center[1], matrix[0][3], float("NaN"), float("NaN"), matrix[-1][6]] else: row = [2, center[0], center[1], matrix[0][3], float("NaN"), float("NaN"), matrix[-1][3]] return row def get_max_radius(matrix): center_of_points = np.nanmean(matrix, axis=0) x_list = matrix[:,0] y_list = matrix[:,1] dists = np.sqrt((x_list - center_of_points[0])**2 + (y_list - center_of_points[1])**2) max_dist = np.nanmax(dists) return max_dist def initialize_params(matrix): np_matrix = np.array(matrix) one = np.where(np_matrix[:,0] == 1) two = np.where(np_matrix[:,0] == 2) three = np.where(np_matrix[:,0] == 3) four = np.where(np_matrix[:,0] == 4) if len(one) > 1: ID1p1 = one[0]+1 condition1 = one[0][len(one[0])-1] if len(one[0]) > 0 and condition1 == len(np_matrix)-1: ID1p1 = ID1p1[:len(ID1p1)-1] all_timestamp = np.apply_along_axis(np.mean, 1, np_matrix[:,[3,6]]) all_x = np_matrix[:, 1] all_y = np_matrix[:, 2] #which code 1 is followed by 1 ind11 = ID1p1[np.where(np_matrix[ID1p1,0] == 1)] #which flight(1) followed by pause (2) ind12 = ID1p1[np.where(np_matrix[ID1p1,0] == 2)] l1 = len(ind11) l2 = len(ind12) if (l1 + l2) > 0: phatall = l2/(l1+l2) if (l1+l2) == 0: phatall = len(two)/(len(one) + len(two)) # ------ flight distances & times & pauses ------- flight_distances = [] flight_times = [] pause_times = [] for row in np_matrix: if row[0] == 1: distance = np.sqrt((row[1]-row[4])**2 + (row[2] - row[5])**2) flight_distances.append(distance) times = row[6] - row[3] flight_times.append(times) if row[0] == 2: pause = row[6] - row[3] pause_times.append(pause) fxs = np_matrix[one][:,1] fys = np_matrix[one][:,2] fa = np.zeros(len(one[0])) yvals = np_matrix[one][:,5] - np_matrix[one][:,2] xvals = np_matrix[one][:,4] - np_matrix[one][:,1] IDyg0 = np.where(yvals[:] >=0)[0] IDxg0 = np.where(xvals[:] >=0)[0] IDyl0 = np.where(yvals[:] < 0)[0] IDxl0 = np.where(xvals[:] < 0)[0] IDgg = list(set(IDyg0) & set(IDxg0)) IDlg = list(set(IDyg0) & set(IDxl0)) IDgl = list(set(IDyl0) & set(IDxg0)) IDll = list(set(IDyl0) & set(IDxl0)) IDgg.sort() IDlg.sort() IDgl.sort() IDll.sort() fa[IDgg] = np.arctan(list(yvals[IDgg]/xvals[IDgg])) fa[IDgl] = np.arctan(list(yvals[IDgl]/xvals[IDgl]))+2*math.pi fa[IDlg] = np.arctan(list(yvals[IDlg]/xvals[IDlg])) +math.pi fa[IDll] = np.arctan(list(yvals[IDll]/xvals[IDll])) +math.pi #flight timestamps flight_timestamps = np_matrix[one][:,3] pxs = np_matrix[two][:,1] #x pys = np_matrix[two][:,2] #y pts = np_matrix[two][:,3] #pause timestamps the_dict = {'ID1': one, "ID2": two, "ID3": three, "ID4":four, "ID1p1": ID1p1, "allts": all_timestamp, "ind11": ind11, "ind12": ind12, "phatall": phatall, "fd": flight_distances, "ft": flight_times, "fa": fa, "fts": flight_timestamps, "pt": pause_times, "pts": pts, "fxs": fxs, "fys": fys, "pxs": pxs, "pys": pys, "allxs": all_x, "allys": all_y} return the_dict else: print("No flight") return None #simulate_mobility_gaps #impute the missing gaps hot-tech computation def simulate_mobility_gaps(matrix, obj, wtype, spread_pars): ind11 = obj['ind11'] ind12 = obj['ind12'] fd = obj['fd'] ft = obj['ft'] fts = obj['fts'] fa = obj['fa'] pts = obj['pts'] allts = obj['allts'] phatall = obj['phatall'] fxs = obj['fxs'] fys = obj['fys'] pxs = obj['pxs'] pys = obj['pys'] allxs = obj['allxs'] allys = obj['allys'] pt = obj['pt'] if len(matrix) == 0: return matrix f_outmat = [] for i in tqdm(range(len(matrix)), total=len(matrix)): if matrix[i][0] == 1: cur_x = matrix[i][4] cur_y = matrix[i][5] f_outmat.append(matrix[i]) elif matrix[i][0] <= 3: cur_x = matrix[i][1] cur_y = matrix[i][2] f_outmat.append(matrix[i]) else: #check here if i > 0 and i < len(matrix) - 1: varmult = 1 while True: #----- fw ------ denominator = varmult*(matrix[i][6]-matrix[i][3]) the_mean = np.mean([matrix[i][3],matrix[i][6]]) temp_fw = (fts-the_mean)/denominator temp_pw = (pts-the_mean)/denominator temp_allw = (allts-the_mean)/denominator fw = norm.pdf(list(temp_fw)) pw = norm.pdf(list(temp_pw)) allw = norm.pdf(list(temp_allw)) if len(pts)>0 and len(fts)>0 and np.sum(fw)>0 and np.sum(pw)>0: break if len(pts) == 0 and len(fts)>0 and np.sum(fw)>0: break if len(fts) == 0 and len(pts)>0 and np.sum(pw)>0: break varmult = varmult*2 s11 = np.nansum(allw[ind11]) s12 = np.nansum(allw[ind12]) if (s11 + s12) == 0: phatcur = phatall else: phatcur = s12/float(s11+s12) if wtype == "LI": new_row = [1, cur_x, cur_y, matrix[i][3], matrix[i+1][1], matrix[i+1][2], matrix[i][6]] f_outmat.append(new_row) else: rb_out = random_bridge(x0=cur_x,y0=cur_y,x1=matrix[i+1][1],y1=matrix[i+1][2],t0=matrix[i][3],t1=matrix[i][6],fd=fd,ft=ft,fts=fts,fa=fa,fw=fw,probp=phatcur,pt=pt,pts=pts,pw=pw,allts=allts,allw=allw,ind11=ind11,ind12=ind12,i_ind=i,pxs=pxs,pys=pys,fxs=fxs,fys=fys,allxs=allxs,allys=allys,wtype=wtype,canpause=matrix[i-1][0]==1,spread_pars=spread_pars,niter=100) old_foutmat = f_outmat f_outmat = f_outmat + rb_out return f_outmat def get_weights(fxs, cur_x, fys, cur_y, varmult, spread_pars, fts, cur_t, t1, t0, weight_type): if weight_type == "TL": calc = spread_pars[0]*(fts-cur_t)/(varmult*(t1-t0)) t_score = t.pdf(calc, df=spread_pars[1]) return t_score elif weight_type == "GL": pow_xs = (fxs-cur_x)**2 pow_ys = (fys-cur_y)**2 sum_x_y = np.array(pow_xs + pow_ys, dtype=np.float64) distance = np.sqrt(sum_x_y) calc = spread_pars[0]*distance/(50*varmult) t_score = t.pdf(calc,df=spread_pars[1]) return t_score elif weight_type == "GLR": pow_xs = (fxs-cur_x)**2 pow_ys = (fys-cur_y)**2 sum_x_y = np.array(pow_xs + pow_ys, dtype=np.float64) distance = np.sqrt(sum_x_y) calc = spread_pars[0]*distance/(50*varmult) left = t.pdf(calc,df=spread_pars[1]) temp1 = np.abs((fts-cur_t))%(60*60*24) temp2 = (60*60*24)-np.abs((fts-cur_t))%(60*60*24) min_num = np.minimum(temp1, temp2) top = np.array(spread_pars[0]* min_num) bottom = np.array(varmult*(t1-t0)) top_div_bottom = list(top/bottom) right = t.pdf(top_div_bottom,df=spread_pars[1]) fw = left * right return fw def random_bridge(x0,y0,x1,y1,t0,t1,fd,ft,fts,fa,fw,probp,pt,pts,pw,allts,allw,ind11,ind12,i_ind,pxs,pys,fxs,fys,allxs,allys,wtype,canpause,spread_pars,niter=100): success = False for i in range(niter): outmat = [] cur_x = x0 cur_y = y0 cur_t = t0 t_arrive = t0 while True: varmult = 1 while True: fw = get_weights(fxs, cur_x, fys, cur_y, varmult, spread_pars, fts, cur_t, t1, t0, weight_type=wtype) pw = get_weights(pxs, cur_x, pys, cur_y, varmult, spread_pars, pts, cur_t, t1, t0, weight_type=wtype) allw = get_weights(allxs, cur_x, allys, cur_y, varmult, spread_pars, allts, cur_t, t1, t0, weight_type=wtype) if len(pts)>0 and len(fts)>0 and np.sum(fw)>0 and np.sum(pw)>0: break if len(pts) == 0 and len(fts)>0 and np.sum(fw)>0: break if len(fts) == 0 and len(pts)>0 and np.sum(pw)>0: break varmult = varmult*2 s11 = np.nansum(allw[ind11]) s12 = np.nansum(allw[ind12]) if (s11 + s12) == 0: phatcur = probp else: phatcur = s12/float(s11+s12) probp = phatcur uniform_sample = np.random.uniform(0, 1, 1) if canpause and uniform_sample < probp: canpause = False normalized_pw = pw/np.sum(pw) p_samp = np.random.choice(pt, 1, replace=False, p=normalized_pw) if (cur_t + p_samp) < 1: next_line = [2, cur_x, cur_y, cur_t, float("Nan"), float("Nan"), cur_t + p_samp] cur_t = cur_t + p_samp outmat.append(next_line) else: break else: canpause = True the_list = list(range(0,len(fa))) normalized_fw = fw/np.sum(fw) IDsamp = np.random.choice(the_list, 1, replace=False, p=normalized_fw)[0] a_samp = fa[IDsamp] d_samp = fd[IDsamp] t_samp = ft[IDsamp] if cur_t + t_samp < t1: next_x = cur_x + np.cos(a_samp)*d_samp next_y = cur_y + np.sin(a_samp) * d_samp if str(next_x) == "nan": break next_line = [1, cur_x, cur_y, cur_t, next_x, next_y, cur_t + t_samp] cur_t = cur_t + t_samp cur_x = next_x cur_y = next_y outmat.append(next_line) t_arrive = cur_t else: break if t_arrive > t0: success = True break if not success: return [[1, x0, y0, t0, x1, y1, t1]] else: cur_x = x0 cur_y = y0 for i in range(len(outmat)): if outmat[i][0] == 1: outmat[i][1] = cur_x outmat[i][2] = cur_y cur_t = outmat[i][6] w = (t_arrive-cur_t)/(t_arrive - t0) outmat[i][4]=outmat[i][4]*w+x1*(1-w) outmat[i][5]=outmat[i][5]*w+y1*(1-w) cur_x=outmat[i][4] cur_y=outmat[i][5] else: cur_t=outmat[i][3] outmat[i][1]=cur_x outmat[i][2]=cur_y outmat[-1][6] = t1 return outmat #get significant locations def sig_locs(mobmat, obj, center_rad=125, timezone="", min_pause_time=600): np_matrix = np.array(mobmat, dtype=np.float64) obj = initialize_params(np_matrix) ID2_from_matrix = obj['ID2'][0] if len(ID2_from_matrix) == 0: print("No pauses in mobmat within function sig locs") return None elif len(ID2_from_matrix) == 1: x_for_outmat = [np_matrix[ID2_from_matrix[1]][1]] y_for_outmat = [np_matrix[ID2_from_matrix[1]][2]] timepresent_for_outmat = np.array([0]) home_for_outmat = np.array([1]) outmat=[x_for_outmat, y_for_outmat, timepresent_for_outmat, home_for_outmat] else: pt_div_min = np.array(obj["pt"])/min_pause_time ptred = np.floor(pt_div_min) pause_ids = np.where(ptred > 0)[0] if len(pause_ids) < 2: print("No pauses long enough in mobmat within function SigLocs!") return None pmat = [] for i in range(len(ID2_from_matrix)): if ptred[i] > 0: row_i = np_matrix[ID2_from_matrix[i]][1:3] duplicate_row_i = [row_i] * int(ptred[i]) pmat += duplicate_row_i kmeansk_v = list(range(2, len(pause_ids)+1)) previous_fit = [] for i in range(len(kmeansk_v)): kmeansk = kmeansk_v[i] kmeans_fit = KMeans(n_clusters=kmeansk, random_state=23).fit(pmat) the_distances = pdist(kmeans_fit.cluster_centers_, 'euclidean') min_dist = np.min(the_distances) if min_dist < center_rad*2 or i == len(kmeansk_v): if i > 0: kmeansk = kmeansk_v[i-1] kmeans_fit = previous_fit break previous_fit = kmeans_fit cluster_centers = kmeans_fit.cluster_centers_ num_row_centers = len(cluster_centers) outmat = [cluster_centers[:,0], cluster_centers[:,1], np.zeros(num_row_centers), np.zeros(num_row_centers)] #Determine time spent at these significant locations for i in range(len(ID2_from_matrix)): for j in range(num_row_centers): euc_dist = np.sqrt((np_matrix[ID2_from_matrix[i]][1] - outmat[0][j])**2 + (np_matrix[ID2_from_matrix[i]][2] - outmat[1][j])**2) if euc_dist < center_rad: outmat[2][j] = outmat[2][j] + obj['pt'][i] #Determine which is home (where is the night spent) for i in range(len(ID2_from_matrix)): avg_time = (np_matrix[ID2_from_matrix[i]][6]+np_matrix[ID2_from_matrix[i]][3])/2 if timezone == "": hour_of_day = datetime.datetime.fromtimestamp(avg_time).hour else: hour_of_day = datetime.datetime.fromtimestamp(avg_time, tz=tz.gettz(timezone)).hour if hour_of_day >= 21 or hour_of_day < 6: for j in range(num_row_centers): euc_dist = np.sqrt((np_matrix[ID2_from_matrix[i]][1] - outmat[0][j])**2 + (np_matrix[ID2_from_matrix[i]][2] - outmat[1][j])**2) if euc_dist < center_rad: outmat[3][j] = outmat[3][j] + obj['pt'][i] IDmax = np.argmax(outmat[3]) outmat[3][IDmax] = 1 transposed_outmat = np.array(outmat).transpose() zero_ids = np.where(transposed_outmat[:,2] ==0)[0] new_outmat = np.delete(transposed_outmat, zero_ids, axis=0) sorted_outmat = new_outmat[new_outmat[:,2].argsort()[::-1]] return sorted_outmat def home_time(matrix, slout, center_rad): IDhome = np.where(slout[:, 3] == 1)[0] x_center = slout[IDhome, 0] y_center = slout[IDhome, 1] tot_time = 0 for i in range(len(matrix)): if matrix[i][0]==2 and np.sqrt((matrix[i][1]-x_center)**2+(matrix[i][2]-y_center)**2) 0: matrix = np.delete(matrix, IDskip, axis=0) N = len(matrix) w_v = np.zeros(N) x_v = np.zeros(N) y_v = np.zeros(N) for i in range(N): if matrix[i][0] == 4: continue if matrix[i][0] == 3: x_v[i] = matrix[i][1] y_v[i] = matrix[i][2] w_v[i] = interval if matrix[i][0] == 1: x_v[i] = np.mean([matrix[i][1], matrix[i][4]]) y_v[i] = np.mean([matrix[i][2], matrix[i][5]]) w_v[i] = matrix[i][6]-matrix[i][3] if matrix[i][0] == 2: x_v[i] = matrix[i][1] y_v[i] = matrix[i][2] w_v[i] = matrix[i][6]-matrix[i][3] sum_w_v = np.sum(w_v) x_avg = np.sum(w_v*x_v)/sum_w_v y_avg = np.sum(w_v*y_v)/sum_w_v result = np.sqrt(np.sum(((x_v-x_avg)**2+(y_v-y_avg)**2)*w_v)/sum_w_v) return result def max_diameter(matrix): IDmv = np.where(matrix[:,0] <=2)[0] if len(IDmv) < 2: return 0 else: the_distances = pdist(matrix[IDmv,1:3], 'euclidean') max_dist = np.max(the_distances) return max_dist def max_home_distance(matrix, homex, homey): IDmv = np.where(matrix[:,0] <=2)[0] if len(IDmv) == 0: return None else: dfhome = np.zeros(len(IDmv)) for i in range(len(IDmv)): temp = np.sqrt((matrix[IDmv[i]][1]-homex)**2+(matrix[IDmv[i]][2]-homey)**2) dfhome[i] = temp return np.max(dfhome) def sig_locs_visited(matrix, slout, center_rad): places_visited = np.zeros(len(slout)) for i in range(len(matrix)): if matrix[i][0] <=3: for j in range(len(slout)): temp = np.sqrt((slout[j][0]-matrix[i][1])**2 + (slout[j][1]-matrix[i][2])**2) if temp < center_rad: places_visited[j] = 1 result = np.sum(places_visited) return result def avg_flight(matrix, avg_type): #avg_type = "length" or "duration" num = 0.0 total = 0.0 for i in range(len(matrix)): if matrix[i][0] == 1: added_number = 0 if avg_type == "length": added_number = np.sqrt((matrix[i][4]-matrix[i][1])**2 + (matrix[i][5]-matrix[i][2])**2) else: added_number = matrix[i][6] - matrix[i][3] total += added_number num += 1 if num == 0: return 0 else: return total/num def std_flight(matrix, std_type): #std_type = "length" or "duration" ID1 = np.where(matrix[:,0]==1)[0] if len(ID1) <= 1: return 0 else: if std_type == "length": temp = ((matrix[ID1,5]-matrix[ID1,2])**2 + (matrix[ID1, 4] - matrix[ID1, 1])**2) dist = np.sqrt(list((matrix[ID1,5]-matrix[ID1,2])**2 + (matrix[ID1, 4] - matrix[ID1, 1])**2)) std_result = np.std(dist) else: std_result = np.std(matrix[ID1, 6] - matrix[ID1,3]) return std_result def prob_pause(matrix): t_pause = 0.0 t_flight = 0.0 for i in range(len(matrix)): diff = matrix[i][6] - matrix[i][3] if matrix[i][0] == 1: t_flight += diff elif matrix[i][0] == 2: t_pause += diff result = t_pause/(t_pause+t_flight) return result def sig_loc_entropy(matrix, slout, center_rad): tp = np.zeros(len(slout)) for i in range(len(matrix)): if matrix[i][0] ==2: for j in range(len(slout)): temp = np.sqrt((slout[j][0]-matrix[i][1])**2 + (slout[j][1]-matrix[i][2])**2) if temp < center_rad: tp[j] += matrix[i][6]-matrix[i][3] total = 0 sum_tp = np.sum(tp) if sum_tp == 0: return 0 else: for i in range(len(slout)): p = tp[i]/sum_tp if p > 0: total = total - p*np.log(p) return total def mins_missing(matrix): total = 0.0 for i in range(len(matrix)): if matrix[i][0] == 4: total += matrix[i][6] - matrix[i][3] return total/60.0 def location_at(matrix, the_time): for i in range(len(matrix)): if matrix[i][0] <= 2: if matrix[i][3] <= the_time and matrix[i][6] >= the_time: return matrix[i][1], matrix[i][2] elif matrix[i][2] == 3: if matrix[i][3] == the_time: return matrix[i][1], matrix[i][2] return None def day_dist(i1, i2, mobmat, subset_inds_v,subset_start_time_v,center_rad): multiplier = np.arange(30*60, 60*60*24, 60*60) checkpoints = subset_start_time_v[i1] + multiplier checkpoints2 = subset_start_time_v[i2] + multiplier matrix1 = mobmat[subset_inds_v[i1]] matrix2 = mobmat[subset_inds_v[i2]] same_place = np.empty(24) same_place[:] = np.nan for i in range(24): loc1 = location_at(matrix1, checkpoints[i]) if loc1 == None: continue abs_diff = np.abs(matrix2[:,3]-checkpoints[i])%(60*60*24) smaller = list(np.where(abs_diff < 30*60)[0]) greater = list(np.where(abs_diff > (60*60*24)-30*60)[0]) IDs = smaller + greater if len(IDs) > 0: can_be_zero = False for j in range(len(IDs)): if matrix2[IDs[j]][0] <= 3: can_be_zero = True temp = np.sqrt((matrix2[IDs[j]][1]-loc1[0])**2 + (matrix2[IDs[j]][2]-loc1[1])**2) if temp < center_rad: same_place[i] = 1 if can_be_zero and np.isnan(same_place[i]): same_place[i] = 0 else: for j in range(len(matrix2)): if not np.isnan(matrix2[j][3]) and not np.isnan(matrix2[j][6]) and matrix2[j][3] < checkpoints2[i] and matrix2[j][6] > checkpoints2[i]: if matrix2[j][0] !=4 and np.sqrt((matrix2[j][1]-loc1[0])**2 + (matrix2[j][2]-loc1[1])**2) < center_rad: same_place[i] = 1 else: same_place[i] = 0 break if np.isnan(same_place).all(): return None else: return np.nanmean(same_place) def daily_routine_index(idx, mobmat, subset_inds_v, subset_day_of_week_v, subset_start_time_v, center_rad): submat = mobmat[subset_inds_v[idx]] daydist_v = np.ones(len(subset_inds_v)) daydist_v[:] = np.nan for i in range(len(subset_inds_v)): if i == idx: continue daydist_v[i] = day_dist(idx, i, mobmat, subset_inds_v, subset_start_time_v, center_rad) if len(np.where(daydist_v != -1)[0]) == 0: circ_score = None else: circ_score = np.nanmean(daydist_v) if subset_day_of_week_v[idx] == "Saturday" or subset_day_of_week_v[idx] == "Sunday": IDcompare = np.concatenate([np.where(subset_day_of_week_v == "Saturday")[0], np.where(subset_day_of_week_v == "Sunday")[0]]) else: IDcompare = np.concatenate([np.where(subset_day_of_week_v == "Monday")[0], np.where(subset_day_of_week_v == "Tuesday")[0], np.where(subset_day_of_week_v == "Wednesday")[0], np.where(subset_day_of_week_v == "Thursday")[0], np.where(subset_day_of_week_v == "Friday")[0]]) day_d_compare = daydist_v[IDcompare] not_nan = day_d_compare[~np.isnan(day_d_compare)] if len(IDcompare) == 0 or len(not_nan) == 0: week_score = None else: week_score = np.nanmean(daydist_v[IDcompare]) return circ_score, week_score def get_mobility_features(mobmat, obj, mobmatmiss, timezone, center_rad, interval): print("Get mobility features...") mobmat = np.array(mobmat) mobmatmiss = np.array(mobmatmiss) slout = sig_locs(mobmat, obj, center_rad, timezone=timezone) #x, y, timepresent, home IDhome = np.where(slout[:,3] ==1)[0] if len(IDhome) == 0: IDhome = 0 homex = slout[IDhome,0] homey = slout[IDhome,1] if timezone != "": date = datetime.datetime.fromtimestamp(mobmat[0][3], tz=tz.gettz(timezone)) else: date = datetime.datetime.fromtimestamp(mobmat[0][3]) current_year = date.year current_month = date.month current_day = date.day current_hour = date.hour current_minute = date.minute current_second = date.second weekday_mapping = {0: "Monday", 1: "Tuesday", 2: "Wednesday", 3: "Thursday", 4: "Friday", 5: "Saturday", 6: "Sunday"} subset_inds_v = {} subset_day_of_week_v = [] subset_start_time_v = [] cur_date = (current_year, current_month, current_day) daystr_v = [cur_date] day_ind=0 subset_inds = [0] if(len(mobmat)<2): outmat = None return outmat, slout else: for i in range(1, len(mobmat)): next_date_raw = datetime.datetime.fromtimestamp(mobmat[i][3]) next_date = (next_date_raw.year, next_date_raw.month, next_date_raw.day) if next_date == cur_date and i < len(mobmat)-1: if str(mobmat[i][6]) != "nan": end_date_raw = datetime.datetime.fromtimestamp(mobmat[i][6]) end_date = (end_date_raw.year, end_date_raw.month, end_date_raw.day) subset_inds.append(i) else: subset_inds_v[day_ind] = subset_inds cur_weekday = weekday_mapping[date.weekday()] subset_day_of_week_v.append(cur_weekday) to_timestamp = datetime.datetime.strptime("{}-{}-{}".format(cur_date[0], cur_date[1], cur_date[2]), "%Y-%m-%d").timestamp() subset_start_time_v.append(to_timestamp) day_ind += 1 if mobmat[i-1][0] == 2 and (mobmat[i-1][6]-mobmat[i-1][3])/(60*60*24)>1: ii = 1 temp_mid = mobmat[i-1][3]+(60*60*24)*ii mid_date_raw = datetime.datetime.fromtimestamp(temp_mid) mid_date = (mid_date_raw.year, mid_date_raw.month, mid_date_raw.day) while mid_date != next_date: subset_day_of_week_v.append(weekday_mapping[mid_date_raw.weekday()]) middate_numeric = datetime.datetime.strptime("{}-{}-{}".format(mid_date[0], mid_date[1], mid_date[2]), "%Y-%m-%d").timestamp() subset_start_time_v.append(middate_numeric) subset_inds_v[day_ind] = [i-1] day_ind += 1 if mid_date != daystr_v[-1]: daystr_v.append(mid_date) ii += 1 temp_mid = mobmat[i-1][3]+(60*60*24)*ii mid_date_raw = datetime.datetime.fromtimestamp(temp_mid) mid_date = (mid_date_raw.year, mid_date_raw.month, mid_date_raw.day) cur_date = next_date date = next_date_raw subset_inds = [i-1, i] if cur_date != daystr_v[-1] and not (len(daystr_v) == len(subset_inds_v) and len(mobmat) == i+1): daystr_v.append(cur_date) #### Partition mobmatmiss data into daily subsets: create subsetinds_v and daystr_v. cur_date_raw = datetime.datetime.fromtimestamp(mobmatmiss[0][3]) cur_date_miss = (cur_date_raw.year, cur_date_raw.month, cur_date_raw.day) cur_time_miss = (cur_date_raw.hour, cur_date_raw.month, cur_date_raw.second) miss_subset_inds_v = {} miss_daystr_v = [cur_date_miss] miss_dayind = 0 subset_inds = [0] for i in range(1, len(mobmatmiss)): next_date_raw = datetime.datetime.fromtimestamp(mobmatmiss[i][3]) next_date_miss = (next_date_raw.year, next_date_raw.month, next_date_raw.day) if next_date_miss == cur_date_miss and i < len(mobmatmiss)-1: subset_inds.append(i) else: cur_date_miss = next_date_miss miss_subset_inds_v[miss_dayind] = subset_inds if cur_date_miss != miss_daystr_v[-1] and not (len(miss_daystr_v) == len(miss_subset_inds_v) and i == len(mobmatmiss)-1): miss_daystr_v.append(cur_date_miss) subset_inds = [i] miss_dayind += 1 if mobmatmiss[i-1][0] == 2 and (mobmatmiss[i-1][6]-mobmatmiss[i-1][3])/(60*60*24)>1: ii = 1 temp_mid = mobmatmiss[i-1][3]+(60*60*24)*ii mid_date_raw = datetime.datetime.fromtimestamp(temp_mid) mid_date = (mid_date_raw.year, mid_date_raw.month, mid_date_raw.day) while mid_date != next_date_miss: miss_subset_inds_v[miss_dayind] = i-1 miss_dayind += 1 if mid_date != miss_daystr_v[-1]: miss_daystr_v.append(mid_date) ii += 1 temp_mid = mobmatmiss[i-1][3]+(60*60*24)*ii mid_date_raw = datetime.datetime.fromtimestamp(temp_mid) mid_date = (mid_date_raw.year, mid_date_raw.month, mid_date_raw.day) ##### intersect mobmat and mobmatmiss to ignore missing data IDkeep = [] IDkeepmiss = [] for i in range(len(daystr_v)): for j in range(len(miss_daystr_v)): if daystr_v[i] == miss_daystr_v[j]: IDkeep.append(i) IDkeepmiss.append(j) break if len(IDkeep) == 0: outmat = None return outmat, slout daystr_v = np.array(daystr_v)[IDkeep] subset_day_of_week_v = np.array(subset_day_of_week_v)[IDkeep] subset_start_time_v = np.array(subset_start_time_v)[IDkeep] miss_daystr_v = np.array(miss_daystr_v)[IDkeep] num_of_features = 15 outmat = np.zeros((len(daystr_v), num_of_features)) submat = [] temp_last = mobmat[subset_inds_v[IDkeep[len(daystr_v)-1]]] for i in range(len(daystr_v)): submat = mobmat[subset_inds_v[IDkeep[i]]] if submat[0][0] == 2 and submat[0][3] < subset_start_time_v[i]: submat[0][3] = subset_start_time_v[i] if submat[-1][0] == 2 and submat[-1][6] > subset_start_time_v[i]+60*60*24: submat[-1][6] = subset_start_time_v[i]+60*60*24 submat_miss = mobmatmiss[miss_subset_inds_v[IDkeep[i]]] if submat_miss[0][0] == 4 and submat_miss[0][3] < subset_start_time_v[i]: submat_miss[0][3] = subset_start_time_v[i] if submat_miss[-1][0] == 4 and submat_miss[-1][6] > subset_start_time_v[i]+60*60*24: submat_miss[-1][6] = subset_start_time_v[i]+60*60*24 if len(submat) == 0 or len(np.where(slout[:,3] ==1)[0]) == 0: outmat[i] = [float("NaN")]*15 outmat[i][12] = 1440 else: outmat[i][0] = home_time(submat, slout, center_rad=200) outmat[i][1] = distance_traveled(submat) outmat[i][2] = radius_of_gyration(submat, interval) outmat[i][3] = max_diameter(submat) outmat[i][4] = max_home_distance(submat, homex, homey) outmat[i][5] = sig_locs_visited(submat, slout, center_rad) outmat[i][6] = avg_flight(submat, "length") outmat[i][7] = std_flight(submat, "length") outmat[i][8] = avg_flight(submat, "duration") outmat[i][9] = std_flight(submat, "duration") outmat[i][10] = prob_pause(submat) outmat[i][11] = sig_loc_entropy(submat, slout, center_rad) outmat[i][12] = mins_missing(submat_miss) dri_output = daily_routine_index(i, mobmat, subset_inds_v, subset_day_of_week_v, subset_start_time_v, center_rad) outmat[i][13] = dri_output[0] #seven days equally circadian routine outmat[i][14] = dri_output[1] #separately return outmat, slout, daystr_v #daystr_v = row names