From 13a6537b1420b0f13badc066ca4dfacd3bd40a92 Mon Sep 17 00:00:00 2001 From: JulioV Date: Tue, 5 Nov 2019 15:17:20 -0500 Subject: [PATCH] Add Barnett's location features --- Snakefile | 1 + config.yaml | 12 +- rules/features.snakefile | 13 +- src/features/location_barnett/AvgFlightDur.R | 13 ++ src/features/location_barnett/AvgFlightLen.R | 13 ++ .../location_barnett/Collapse2Pause.R | 9 ++ .../location_barnett/DailyMobilityPlots.R | 42 +++++ .../location_barnett/DailyRoutineIndex.R | 25 +++ src/features/location_barnett/DayDist.R | 42 +++++ .../location_barnett/DistanceTravelled.R | 10 ++ .../location_barnett/ExtractFlights.R | 71 ++++++++ .../location_barnett/ExtractTimePeriod.R | 8 + src/features/location_barnett/GPS2MobMat.R | 78 +++++++++ .../location_barnett/GPSmobility-internal.R | 127 +++++++++++++++ .../location_barnett/GetMobilityFeaturesMat.R | 151 ++++++++++++++++++ src/features/location_barnett/GuessPause.R | 134 ++++++++++++++++ src/features/location_barnett/Hometime.R | 13 ++ .../location_barnett/InitializeParams.R | 58 +++++++ src/features/location_barnett/IsFlight.R | 33 ++++ src/features/location_barnett/LatLong2XY.R | 19 +++ src/features/location_barnett/LocationAt.R | 15 ++ src/features/location_barnett/MaxDiam.R | 6 + .../MaxDistBetweenTrajectories.R | 29 ++++ src/features/location_barnett/MaxHomeDist.R | 10 ++ src/features/location_barnett/MaxRadius.R | 5 + src/features/location_barnett/MinsMissing.R | 10 ++ .../location_barnett/MobilityFeatures.R | 56 +++++++ .../location_barnett/MobmatQualityOK.R | 14 ++ src/features/location_barnett/ProbPause.R | 14 ++ src/features/location_barnett/ProgressBar.R | 30 ++++ .../location_barnett/RadiusOfGyration.R | 36 +++++ src/features/location_barnett/RandomBridge.R | 101 ++++++++++++ src/features/location_barnett/SigLocEntropy.R | 22 +++ src/features/location_barnett/SigLocs.R | 68 ++++++++ .../location_barnett/SigLocsVisited.R | 14 ++ .../location_barnett/SimulateMobilityGaps.R | 41 +++++ src/features/location_barnett/StdFlightDur.R | 11 ++ src/features/location_barnett/StdFlightLen.R | 11 ++ .../WriteSurveyAnswers2File.R | 35 ++++ src/features/location_barnett/plot.flights.R | 96 +++++++++++ src/features/location_barnett/plotlimits.R | 14 ++ src/features/location_barnett_metrics.R | 22 +++ 42 files changed, 1529 insertions(+), 3 deletions(-) create mode 100644 src/features/location_barnett/AvgFlightDur.R create mode 100644 src/features/location_barnett/AvgFlightLen.R create mode 100644 src/features/location_barnett/Collapse2Pause.R create mode 100644 src/features/location_barnett/DailyMobilityPlots.R create mode 100644 src/features/location_barnett/DailyRoutineIndex.R create mode 100644 src/features/location_barnett/DayDist.R create mode 100644 src/features/location_barnett/DistanceTravelled.R create mode 100644 src/features/location_barnett/ExtractFlights.R create mode 100644 src/features/location_barnett/ExtractTimePeriod.R create mode 100644 src/features/location_barnett/GPS2MobMat.R create mode 100644 src/features/location_barnett/GPSmobility-internal.R create mode 100644 src/features/location_barnett/GetMobilityFeaturesMat.R create mode 100644 src/features/location_barnett/GuessPause.R create mode 100644 src/features/location_barnett/Hometime.R create mode 100644 src/features/location_barnett/InitializeParams.R create mode 100644 src/features/location_barnett/IsFlight.R create mode 100644 src/features/location_barnett/LatLong2XY.R create mode 100644 src/features/location_barnett/LocationAt.R create mode 100644 src/features/location_barnett/MaxDiam.R create mode 100644 src/features/location_barnett/MaxDistBetweenTrajectories.R create mode 100644 src/features/location_barnett/MaxHomeDist.R create mode 100644 src/features/location_barnett/MaxRadius.R create mode 100644 src/features/location_barnett/MinsMissing.R create mode 100644 src/features/location_barnett/MobilityFeatures.R create mode 100644 src/features/location_barnett/MobmatQualityOK.R create mode 100644 src/features/location_barnett/ProbPause.R create mode 100644 src/features/location_barnett/ProgressBar.R create mode 100644 src/features/location_barnett/RadiusOfGyration.R create mode 100644 src/features/location_barnett/RandomBridge.R create mode 100644 src/features/location_barnett/SigLocEntropy.R create mode 100644 src/features/location_barnett/SigLocs.R create mode 100644 src/features/location_barnett/SigLocsVisited.R create mode 100644 src/features/location_barnett/SimulateMobilityGaps.R create mode 100644 src/features/location_barnett/StdFlightDur.R create mode 100644 src/features/location_barnett/StdFlightLen.R create mode 100644 src/features/location_barnett/WriteSurveyAnswers2File.R create mode 100644 src/features/location_barnett/plot.flights.R create mode 100644 src/features/location_barnett/plotlimits.R create mode 100644 src/features/location_barnett_metrics.R diff --git a/Snakefile b/Snakefile index 4d64cfc6..5976cc0a 100644 --- a/Snakefile +++ b/Snakefile @@ -24,6 +24,7 @@ rule all: call_type = config["COM_CALL"]["CALL_TYPE_TAKEN"], segment = config["COM_CALL"]["DAY_SEGMENTS"], metric = config["COM_CALL"]["METRICS_TAKEN"]), + expand("data/processed/{pid}/location_barnett_metrics.csv", pid=config["PIDS"]), # Reports expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]), expand("reports/figures/{pid}/compliance_heatmap.html", pid=config["PIDS"], sensor=config["SENSORS"]), diff --git a/config.yaml b/config.yaml index 66b251b8..48ff0c3d 100644 --- a/config.yaml +++ b/config.yaml @@ -10,13 +10,17 @@ PIDS: [p01, p02] DAY_SEGMENTS: &day_segments [daily, morning, afternoon, evening, night] +# Global timezone +TIMEZONE: &timezone + EST + # Download data config DOWNLOAD_DATASET: GROUP: AAPECS # Readable datetime config READABLE_DATETIME: - FIXED_TIMEZONE: EST + FIXED_TIMEZONE: *timezone # Communication SMS features config COM_SMS: @@ -36,4 +40,8 @@ COM_CALL: PHONE_VALID_SENSED_DAYS: BIN_SIZE: 5 # (in minutes) MIN_VALID_HOURS: 20 # (out of 24) - MIN_BINS_PER_HOUR: 8 # (out of 60min/BIN_SIZE bins) \ No newline at end of file + MIN_BINS_PER_HOUR: 8 # (out of 60min/BIN_SIZE bins) + +BARNETT_LOCATION: + ACCURACY_LIMIT: 51 # filters location coordinates with an accuracy higher than this + TIMEZONE: *timezone \ No newline at end of file diff --git a/rules/features.snakefile b/rules/features.snakefile index b1ced2b7..14544319 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -28,4 +28,15 @@ rule battery_deltas: output: "data/processed/{pid}/battery_deltas.csv" script: - "../src/features/battery_deltas.R" \ No newline at end of file + "../src/features/battery_deltas.R" + +rule location_barnett_metrics: + input: + "data/raw/{pid}/locations_with_datetime.csv" + params: + accuracy_limit = config["BARNETT_LOCATION"]["ACCURACY_LIMIT"], + timezone = config["BARNETT_LOCATION"]["TIMEZONE"] + output: + "data/processed/{pid}/location_barnett_metrics.csv" + script: + "../src/features/location_barnett_metrics.R" \ No newline at end of file diff --git a/src/features/location_barnett/AvgFlightDur.R b/src/features/location_barnett/AvgFlightDur.R new file mode 100644 index 00000000..02667081 --- /dev/null +++ b/src/features/location_barnett/AvgFlightDur.R @@ -0,0 +1,13 @@ +AvgFlightDur <- +function(mat){ + num=0 + tot=0 + for(i in 1:nrow(mat)){ + if(mat[i,1]==1){ + tot=tot+mat[i,7]-mat[i,4] + num=num+1 + } + } + if(num==0){return(0)} + return(tot/num) +} diff --git a/src/features/location_barnett/AvgFlightLen.R b/src/features/location_barnett/AvgFlightLen.R new file mode 100644 index 00000000..41fe1b25 --- /dev/null +++ b/src/features/location_barnett/AvgFlightLen.R @@ -0,0 +1,13 @@ +AvgFlightLen <- +function(mat){ + num=0 + tot=0 + for(i in 1:nrow(mat)){ + if(mat[i,1]==1){ + tot=tot+sqrt((mat[i,5]-mat[i,2])^2+(mat[i,6]-mat[i,3])^2) + num=num+1 + } + } + if(num==0){return(0)} + return(tot/num) +} diff --git a/src/features/location_barnett/Collapse2Pause.R b/src/features/location_barnett/Collapse2Pause.R new file mode 100644 index 00000000..0cac7777 --- /dev/null +++ b/src/features/location_barnett/Collapse2Pause.R @@ -0,0 +1,9 @@ +Collapse2Pause <- +function(mat){ + cent=colMeans(mat[,2:3],na.rm=TRUE) + if(!is.na(mat[nrow(mat),7])){ + return(c(2,cent[1],cent[2],mat[1,4],NA,NA,mat[nrow(mat),7])) + }else{ + return(c(2,cent[1],cent[2],mat[1,4],NA,NA,mat[nrow(mat),4])) + } +} diff --git a/src/features/location_barnett/DailyMobilityPlots.R b/src/features/location_barnett/DailyMobilityPlots.R new file mode 100644 index 00000000..d0f3498f --- /dev/null +++ b/src/features/location_barnett/DailyMobilityPlots.R @@ -0,0 +1,42 @@ +DailyMobilityPlots <- +function(mobmat,obj,tz,filename){ + curdate=strsplit(as.character(as.POSIXct(mobmat[1,4],tz=tz,origin="1970-01-01"))," ")[[1]][1] + curtime=strsplit(strsplit(as.character(as.POSIXct(mobmat[1,4],tz=tz,origin="1970-01-01"))," ")[[1]][2],":") + subsetinds_v = list() + daystr_v = c(curdate) + dayind=1 + subsetinds = c(1) + for(i in 2:nrow(mobmat)){ + nexdate=strsplit(as.character(as.POSIXct(mobmat[i,4],tz=tz,origin="1970-01-01"))," ")[[1]][1] + if(nexdate == curdate && i1){ + ii=1 + middate = strsplit(as.character(as.POSIXct(mobmat[i-1,4]+(60*60*24)*ii,tz=tz,origin="1970-01-01"))," ")[[1]][1] + while(middate!=nexdate){ + subsetinds_v[[dayind]]=i-1 + dayind=dayind+1 + if(middate!=daystr_v[length(daystr_v)]){ + daystr_v=c(daystr_v,middate) + } + ii=ii+1 + middate = strsplit(as.character(as.POSIXct(mobmat[i-1,4]+(60*60*24)*ii,tz=tz,origin="1970-01-01"))," ")[[1]][1] + } + } + curdate=nexdate + subsetinds=c(i-1,i) + if(curdate!=daystr_v[length(daystr_v)] && length(daystr_v)(60*60*24)-30*60)) + if(length(IDs)>0){ + CanBe0=FALSE + for(j in 1:length(IDs)){ + if(mat2[IDs[j],1]<=3){ + CanBe0 = TRUE + if(sqrt((mat2[IDs[j],2]-loc1$x)^2+(mat2[IDs[j],3]-loc1$y)^2)chkpts2[i]){ + if(mat2[j,1]!=4 && sqrt((mat2[j,2]-loc1$x)^2+(mat2[j,3]-loc1$y)^2)nrow(mat)){ + break + } + } + if(nexind==curind+1 && curind!=nrow(mat)){ + nextline[3]=mat[nexind,3] + mat=mat[-nexind,] + }else{ + nextline[4:6]=mat[nexind-1,] + out=rbind(out,nextline) + nextline=rep(NA,6) + curind=nexind-1 + nextline[1:3]=mat[curind,] + } + if(nexind>nrow(mat)){break} + } + outp=c() + if(out[1,3]>min(mat[,3])){ + outp=rbind(outp,c(2,out[1,1],out[1,2],min(mat[,3]),NA,NA,out[1,3])) + if(nrow(out)==1){ + outp[1,7]=out[1,6] + return(outp) + } + } + if(nrow(out)==1){ + outp=rbind(outp,c(1,out)) + return(outp) + } + for(i in 1:(nrow(out)-1)){ + outp=rbind(outp,c(1,out[i,])) + if(out[i,6]0){ + for(i in 1:length(IDp)){ + outp[IDf[IDp[i]],1]=2 + outp[IDf[IDp[i]],5]=NA + outp[IDf[IDp[i]],6]=NA + } + } + outp=rbind(outp,c(1,out[nrow(out),])) + colnames(outp)=c("code","lon1","lat1","t1","lon2","lat2","t2") + return(outp) +} diff --git a/src/features/location_barnett/ExtractTimePeriod.R b/src/features/location_barnett/ExtractTimePeriod.R new file mode 100644 index 00000000..9ba5a006 --- /dev/null +++ b/src/features/location_barnett/ExtractTimePeriod.R @@ -0,0 +1,8 @@ +ExtractTimePeriod <- +function(tstart,tend,out){ + tstart = as.POSIXct(tstart) + tend = as.POSIXct(tend) + INDs=intersect(which(apply(out[,c(4,7)],1,function(x) max(x,na.rm=T))>=tstart),which(out[,4]<=tend)) + suboutmat=out[INDs,] + return(suboutmat) +} diff --git a/src/features/location_barnett/GPS2MobMat.R b/src/features/location_barnett/GPS2MobMat.R new file mode 100644 index 00000000..89eeab7a --- /dev/null +++ b/src/features/location_barnett/GPS2MobMat.R @@ -0,0 +1,78 @@ +GPS2MobMat <- +function(locations_df,itrvl=10,accuracylim=51,r=NULL,w=NULL,tint_m=NULL,tint_k=NULL){ + if(is.null(r)){ + r=sqrt(itrvl) + } + cat("Read GPS coordinates...\n") + mat=as.matrix(locations_df) + colnames(mat)=c("timestamp","latitude","longitude","altitude","accuracy") + mat=data.frame(mat) + mat = mat[order(mat[,1]),] + mat=mat[which(mat$accuracy0){ + #avgmat = rbind(avgmat,c(4,tstart+itrvl/2,tstart+itrvl*(nummiss+1)+itrvl/2,NA)) + avgmat[IDam,] = c(4,tstart+itrvl/2,tstart+itrvl*(nummiss+1)+itrvl/2,NA) + count=count+1 + IDam=IDam+1 + } + tstart=tstart+itrvl*(nummiss+1) + nextline[1]=1 + nextline[2]=tstart+itrvl/2 + nextline[3]=mat[i,2] + nextline[4]=mat[i,3] + numitrvl=1 + } + } + avgmat = avgmat[1:count,] + avgmat=cbind(avgmat[,1:4],NA,NA) + ID1 = which(avgmat[,1]==1) + cat("Convert from Lat/Lon to X/Y...\n") + obj=LatLong2XY(avgmat[ID1,3],avgmat[ID1,4]) + avgmat[ID1,5:6]=cbind(obj$x_v,obj$y_v) + outmat=c() + curind=1 + cat("Convert from X/Y to flights/pauses...\n") + for(i in 1:nrow(avgmat)){ + #ProgressBar(nrow(avgmat),i) + if(avgmat[i,1]==4){ + outmat=rbind(outmat,ExtractFlights(avgmat[curind:(i-1),c(5,6,2)],r,w), + c(avgmat[i,1],NA,NA,avgmat[i,2],NA,NA,avgmat[i,3])) + curind=i+1 + } + } + if(curind<=nrow(avgmat)){ + outmat=rbind(outmat,ExtractFlights(avgmat[curind:nrow(avgmat),c(4,3,2)],r,w)) + } + rownames(outmat)=NULL + colnames(outmat)=c("Code","x0","y0","t0","x1","y1","t1") + return(outmat) +} diff --git a/src/features/location_barnett/GPSmobility-internal.R b/src/features/location_barnett/GPSmobility-internal.R new file mode 100644 index 00000000..29820ac3 --- /dev/null +++ b/src/features/location_barnett/GPSmobility-internal.R @@ -0,0 +1,127 @@ +.Random.seed <- +c(403L, 532L, 1313347924L, 1871810394L, 1011383459L, 542971573L, +-1187392539L, -1841977675L, 1217131604L, -1944452603L, 1098034341L, +-804520486L, 840805590L, -1928576924L, -1430251380L, -1139436677L, +-2136175183L, 1399219124L, 983913715L, -1958100421L, 819814570L, +-101387085L, 1988446300L, 1773434296L, 1028871771L, 1264709853L, +1233822260L, -352907293L, -1874091233L, 1376956845L, -1760093132L, +-1522448913L, -1077291149L, 21512807L, 1374310655L, -216158555L, +1032372804L, -2131685284L, -1247877548L, -195661280L, -1193461112L, +406537232L, 1366304989L, -154045753L, -918621168L, 769800744L, +1093857437L, 1837589316L, 959891772L, -719808962L, 764802012L, +-714351040L, 113797869L, 1889521760L, -1691668836L, 1180793697L, +-839820112L, -991859193L, 1862820205L, -695036218L, 1812812603L, +575668592L, 1297789652L, 1615165201L, 1872370822L, 1275602355L, +238960436L, 217263229L, -1743699477L, -336430177L, 1946276898L, +-51047926L, 335686119L, 1868745363L, -1879114386L, -239507944L, +-123202664L, -1086065649L, 520396406L, 85978089L, -1175046236L, +1884875031L, -1192695774L, 1266617047L, 1536889729L, -764534926L, +-921373109L, 1047329928L, -2075286222L, -322845886L, 1550309448L, +-1443985874L, 426960922L, 329268207L, -1292289121L, -733595876L, +-47705416L, 1538462075L, 461864706L, 901229296L, -949594389L, +-1372822103L, 1078727042L, -852223136L, 2064479292L, 469194534L, +-1692101348L, -1326676204L, -1167590146L, 1447819969L, 1740561448L, +310324356L, 1159333394L, -1649710625L, 1309005753L, 1620056043L, +-276036353L, -1947036044L, -167957757L, -1073647714L, -2116038119L, +-805885834L, -744848943L, 366767435L, -352340781L, 1599535855L, +514109103L, 323296404L, -335287805L, -224216873L, -161023865L, +786897141L, -130409155L, 1160446270L, 556864385L, 1119301421L, +1686985943L, -258984081L, -1980102978L, -467100411L, -209450057L, +1139741027L, 1425551935L, -172315197L, 1696991401L, 1544576071L, +-1235589375L, -651734299L, 1211549043L, -1804251286L, -1833360148L, +-984096802L, -1628949810L, 8538312L, 432153794L, 305942613L, +1808431964L, 853733524L, -1605692859L, -385367678L, 2141939616L, +889773407L, -1560609642L, 670905649L, 334411293L, 1986402167L, +2033870206L, 2134300301L, -2064582382L, -626346565L, 1439629109L, +-1488737583L, 226757300L, -1344407788L, -276329690L, -1609546781L, +-1931070875L, -1080666228L, -1697864201L, -740486497L, 957639L, +835570953L, 84725091L, -1091466133L, 91751578L, -1466541146L, +28398023L, -279877742L, -1912375291L, 2143554198L, 1895134001L, +859721695L, -1642408008L, 148459404L, 1279722084L, 11639361L, +-1186157268L, -2049545183L, 1929906479L, 479992801L, -1233036366L, +1132495183L, -1717869394L, 1192119769L, 701016453L, -353462329L, +1849679222L, -88610627L, 1268564785L, 1780888828L, 2125555696L, +-479454132L, 381014479L, 1731827411L, -1497293413L, 1765610629L, +-257146116L, -1672646416L, 279572202L, 306061705L, 363542578L, +2043684836L, -2087600401L, -18100679L, 130429363L, -206253433L, +-1877476275L, -1110886980L, 370895704L, 1937535833L, -458899417L, +1933513839L, 1851745682L, -1759551850L, -1424097819L, -1409590771L, +-1445940234L, 1476939385L, -75749314L, -512187962L, -246840201L, +-1402437621L, -712433900L, 748518695L, -1206528520L, 1269096214L, +-1424522002L, 306993153L, -1748232238L, -1953861210L, 1854731988L, +1949973704L, 2113990654L, -1035297672L, -982113110L, 1947747532L, +-445683723L, -1422336527L, 342451150L, -507937006L, 835723234L, +-1305861749L, -2076961L, 1963086836L, 1777323305L, -603589976L, +1080406293L, -1525958639L, -2102281422L, -1949428444L, 534768422L, +-956054234L, 1217023252L, 1166857033L, 623089757L, 2063298512L, +-620443337L, 57617896L, -1412861617L, -2143192173L, 1221810917L, +-1831504418L, 433245121L, -1494205908L, -1595423097L, 618652748L, +1823284534L, -1975790459L, 944624583L, -1978478274L, -253539083L, +-462197135L, -1114266908L, -922829841L, 1205417697L, 646277154L, +1729713966L, 355692987L, 132643960L, 1888913730L, 2132596438L, +795730174L, 2087034140L, -791602328L, 835529196L, 1599434274L, +-561592179L, 1725668202L, 1560611807L, -2012209521L, -1629575171L, +-458089249L, -23267003L, 964024514L, -1272882513L, 1236391732L, +1392262569L, 1719968820L, -816708679L, -253696196L, -1999483729L, +-879828673L, 774026514L, 2032852514L, 265849025L, -1032488828L, +888611850L, -557908513L, 242929886L, -853928924L, 1795396519L, +-645381594L, -1265672393L, -48545467L, 1751878167L, -1176229327L, +1712417278L, -192564525L, 1961264818L, -251913527L, -17410384L, +-482501633L, 735216203L, 1651195944L, 1487942672L, -1690451385L, +981921524L, 1875073328L, -1902904317L, -1276058861L, 124430661L, +1201133121L, -36499579L, -109105456L, 392425457L, 897127536L, +-1279061377L, -1363233155L, -2007340730L, -905842946L, -887240161L, +1479417822L, 1500778929L, 1466912977L, 812638707L, -2057320565L, +2068831285L, -1049121129L, 1998379119L, 207096865L, 447981423L, +-937392999L, 1943901795L, -107458212L, -138672881L, -1023681436L, +-1446415999L, 66593794L, -274569338L, -2142851735L, 1962274868L, +-260423225L, 848068533L, -1542630154L, -868285626L, 638924723L, +-854628924L, 1125942533L, -649860011L, -973015008L, 590638971L, +-158072121L, 294744765L, 4475861L, -1168788891L, 1088711993L, +1866282938L, -1972286214L, 730559500L, 476356180L, -643462965L, +1145283670L, -1789320019L, 1394756070L, 1360865552L, 442757964L, +482432621L, 866748070L, 1098009718L, 1829802919L, 1430291759L, +-1092585109L, 1585044133L, -2048575248L, -1733890113L, 1959791537L, +-1554152768L, -1232796444L, 55241962L, 618750999L, -1604134765L, +-334997900L, -329001808L, -131647435L, -1173666250L, -390668357L, +-1435906115L, -1800810938L, 515985026L, 2135250434L, -1131126307L, +1500178268L, -1419074199L, -2097621264L, -1595537173L, -119195356L, +-933183808L, 1258750368L, -1576216741L, -2109603222L, -114161474L, +407866045L, 143180735L, -1955314723L, -563023849L, 943100547L, +-525904233L, 1203626173L, 298193165L, 243465545L, 1781374980L, +193452035L, 1680045763L, -1527622795L, -1795637087L, -741238918L, +-1464754509L, 393604555L, 815707509L, 394882594L, -1224578437L, +-1812068853L, -326236336L, 884326570L, -242406610L, -796869644L, +-2009573940L, -1929478690L, -24546421L, 1645754250L, 1758904072L, +-625256378L, 1484952523L, 51300200L, 2097643165L, -644469584L, +1463679013L, 1230080733L, -225241322L, 1340024590L, -906536550L, +267804114L, -2053227290L, 1280884626L, -1509434552L, 715641015L, +-1230219346L, -2080901838L, 1526982232L, -961182793L, 1354426796L, +-1819048544L, 402152231L, 1364528591L, 1804221413L, -391129488L, +1767561636L, -327907125L, 1136370555L, -1993797245L, 575464991L, +-1344405105L, 596664572L, -179105035L, 1371915085L, 1160648991L, +-1278296665L, 1038267328L, 2124553203L, 1844569236L, -1935370541L, +768719328L, -862231222L, 2006606051L, 1600764073L, -160569050L, +-641177427L, 1450448111L, 964835699L, -168653407L, -235867833L, +-2140989170L, -979222517L, 1847316285L, -1375890997L, -619527280L, +-268442869L, -555700851L, -947320017L, -117270963L, 905734377L, +2040206464L, 189927684L, 1946772916L, 2063644288L, 1236821922L, +356581126L, 961632549L, 1477105165L, 2021674991L, -201833932L, +-1674640108L, -374676286L, 900859425L, -1120503690L, 858929571L, +580402156L, 1850701053L, 482648387L, -1887578264L, -1272735428L, +1847053487L, 1682931620L, 91199275L, -698201210L, 1455847850L, +1617511191L, -2010753409L, -138736895L, 1011671890L, 1358821823L, +535141707L, 902747138L, -1671815282L, 1725905177L, 942921105L, +-1944705495L, -1123750932L, -1814790220L, 811191462L, 1136178868L, +2108013845L, 2036310821L, 1647796877L, 737275416L, 448693843L, +551943530L, -295170845L, -1213943211L, 1569163441L, -1826759528L, +115090139L, 388763308L, -131506358L, -1491736754L, -1496442366L, +1060591815L, -1737500855L, -772006308L, -458085092L, 1697885227L, +-617111090L, -1379906226L, -1815127813L, 1111219806L, -1255421555L, +999218138L, 1977126068L, 781672536L, 1205250271L, 1414748230L, +1331187882L, 1481475622L, 892770178L, -1889921295L, -1870961089L, +-2137441515L, 1504371917L, 1834797012L, 1264278039L, 1648781970L, +-2140624793L, 53578265L, 1201061792L, 1516041549L, -216621021L, +1529156071L, 1880973897L, 993084056L, -2099800818L, -786845703L, +-1773470008L, -1939021033L, 1778465361L, 532321917L, 1286568102L +) diff --git a/src/features/location_barnett/GetMobilityFeaturesMat.R b/src/features/location_barnett/GetMobilityFeaturesMat.R new file mode 100644 index 00000000..3a1642ce --- /dev/null +++ b/src/features/location_barnett/GetMobilityFeaturesMat.R @@ -0,0 +1,151 @@ +GetMobilityFeaturesMat <- +function(mobmat,obj,mobmatmiss,tz,CENTERRAD,ITRVL){ + #### Get significant locations + slout=SigLocs(mobmat,obj,CENTERRAD,tz=tz) + IDhome=which(slout[,4]==1) + if(length(IDhome)==0){IDhome=1} + homex=slout[IDhome,1];homey=slout[IDhome,2] + #### Partion mobmat data into daily subsets: create subsetinds_v and daystr_v. + curdate=strsplit(as.character(as.POSIXct(mobmat[1,4],tz=tz,origin="1970-01-01"))," ")[[1]][1] + curtime=strsplit(strsplit(as.character(as.POSIXct(mobmat[1,4],tz=tz,origin="1970-01-01"))," ")[[1]][2],":") + subsetinds_v = list() + subsetdayofweek_v = c() + subsetstarttime_v = c() + daystr_v = c(curdate) + dayind=1 + subsetinds = c(1) + if(nrow(mobmat)<2){ + outmat=NULL + return(list(outmat,slout)) + } + for(i in 2:nrow(mobmat)){ + nexdate=strsplit(as.character(as.POSIXct(mobmat[i,4],tz=tz,origin="1970-01-01"))," ")[[1]][1] + if(nexdate == curdate && i1){ + ii=1 + middate = strsplit(as.character(as.POSIXct(mobmat[i-1,4]+(60*60*24)*ii,tz=tz,origin="1970-01-01"))," ")[[1]][1] + while(middate!=nexdate){ + subsetdayofweek_v = c(subsetdayofweek_v,weekdays(as.Date(middate))) + subsetstarttime_v = c(subsetstarttime_v,as.numeric(as.POSIXct(paste(middate," 00:00:00",sep=""),tz=tz,origin="1970-01-01"))) + subsetinds_v[[dayind]]=i-1 + dayind=dayind+1 + if(middate!=daystr_v[length(daystr_v)]){ + daystr_v=c(daystr_v,middate) + } + ii=ii+1 + middate = strsplit(as.character(as.POSIXct(mobmat[i-1,4]+(60*60*24)*ii,tz=tz,origin="1970-01-01"))," ")[[1]][1] + } + } + curdate=nexdate + subsetinds=c(i-1,i) + if(curdate!=daystr_v[length(daystr_v)]&& !(length(daystr_v)==length(subsetinds_v) && nrow(mobmat)==i)){ + daystr_v=c(daystr_v,curdate) + } + } + } + #### Partion mobmatmiss data into daily subsets: create subsetinds_v and daystr_v. + curdate=strsplit(as.character(as.POSIXct(mobmatmiss[1,4],tz=tz,origin="1970-01-01"))," ")[[1]][1] + curtime=strsplit(strsplit(as.character(as.POSIXct(mobmatmiss[1,4],tz=tz,origin="1970-01-01"))," ")[[1]][2],":") + subsetindsmiss_v = list() + daystrmiss_v=c(curdate) + dayind=1 + subsetinds = c(1) + for(i in 2:nrow(mobmatmiss)){ + nexdate=strsplit(as.character(as.POSIXct(mobmatmiss[i,4],tz=tz,origin="1970-01-01"))," ")[[1]][1] + if(nexdate == curdate && i1){ + ii=1 + middate = strsplit(as.character(as.POSIXct(mobmatmiss[i-1,4]+(60*60*24)*ii,tz=tz,origin="1970-01-01"))," ")[[1]][1] + while(middate!=nexdate){ + subsetindsmiss_v[[dayind]]=i-1 + dayind=dayind+1 + if(middate!=daystrmiss_v[length(daystrmiss_v)]){ + daystrmiss_v=c(daystrmiss_v,middate) + } + ii=ii+1 + middate = strsplit(as.character(as.POSIXct(mobmatmiss[i-1,4]+(60*60*24)*ii,tz=tz,origin="1970-01-01"))," ")[[1]][1] + } + } + } + } + ##### intersect mobmat and mobmatmiss to ignore missing data + IDkeep=c() + IDkeepmiss=c() + for(i in 1:length(daystr_v)){ + for(j in 1:length(daystrmiss_v)){ + if(daystr_v[i]==daystrmiss_v[j]){ + IDkeep=c(IDkeep,i) + IDkeepmiss=c(IDkeepmiss,j) + break + } + } + } + if(length(IDkeep)==0){ + outmat=NULL + return(list(outmat,slout)) + } + daystr_v=daystr_v[IDkeep] + subsetinds_v=subsetinds_v[IDkeep] + subsetdayofweek_v=subsetdayofweek_v[IDkeep] + subsetstarttime_v=subsetstarttime_v[IDkeep] + daystrmiss_v=daystrmiss_v[IDkeepmiss] + subsetindsmiss_v=subsetindsmiss_v[IDkeepmiss] + ##### Compute mobility features for each day + Nfeatures=15 + outmat=matrix(,nrow=length(daystr_v),ncol=Nfeatures) + colnames(outmat)=c("Hometime","DistTravelled","RoG","MaxDiam","MaxHomeDist","SigLocsVisited","AvgFlightLen","StdFlightLen","AvgFlightDur","StdFlightDur","ProbPause","SigLocEntropy","MinsMissing","CircdnRtn","WkEndDayRtn") + rownames(outmat)=daystr_v + for(i in 1:length(daystr_v)){ + if(length(subsetinds_v[[i]])==0){next} + submat=matrix(mobmat[subsetinds_v[[i]],],ncol=7) + if(submat[1,1]==2 && submat[1,4]subsetstarttime_v[[i]]+60*60*24){ + submat[nrow(submat),7]=subsetstarttime_v[[i]]+60*60*24 + } + submatmiss=matrix(mobmatmiss[subsetindsmiss_v[[i]],],ncol=7) + if(submatmiss[1,1]==4 && submatmiss[1,4]subsetstarttime_v[[i]]+60*60*24){ + submatmiss[nrow(submatmiss),7]=subsetstarttime_v[[i]]+60*60*24 + } + if(nrow(submat)==0 || length(which(slout$home==1))==0){ + outmat[i,]=c(rep(NA,12),1440,rep(NA,2)) + }else{ + outmat[i,1]=Hometime(submat,slout,CENTERRAD=200) + outmat[i,2]=DistanceTravelled(submat) + outmat[i,3]=RadiusOfGyration(submat,ITRVL) + outmat[i,4]=MaxDiam(submat) + outmat[i,5]=MaxHomeDist(submat,homex,homey) + outmat[i,6]=SigLocsVisited(submat,slout,CENTERRAD) + outmat[i,7]=AvgFlightLen(submat) + outmat[i,8]=StdFlightLen(submat) + outmat[i,9]=AvgFlightDur(submat) + outmat[i,10]=StdFlightDur(submat) + outmat[i,11]=ProbPause(submat) + outmat[i,12]=SigLocEntropy(submat,slout,CENTERRAD) + outmat[i,13]=MinsMissing(submatmiss) + DRIout=DailyRoutineIndex(i,mobmat,subsetinds_v,subsetdayofweek_v,subsetstarttime_v,tz,CENTERRAD) + outmat[i,14]=DRIout$cscore + outmat[i,15]=DRIout$wscore + } + } + return(list(outmat,slout)) +} diff --git a/src/features/location_barnett/GuessPause.R b/src/features/location_barnett/GuessPause.R new file mode 100644 index 00000000..997dba0e --- /dev/null +++ b/src/features/location_barnett/GuessPause.R @@ -0,0 +1,134 @@ +GuessPause <- +function(mat,mindur=300,r=75){ + cat("Inferring pauses...\n") + flatmat=c() + collapse=FALSE + inds=1 + incr=1 + tcur=mat[inds,4] + while(TRUE){ + if(!is.na(mat[inds+incr,7])){ + tnex=mat[inds+incr,7] + }else{ + tnex=mat[inds+incr,4] + } + if(tnex-tcur>=mindur){ + if(mat[inds+incr,1]==1){ + maxr=MaxRadius(rbind(mat[inds:(inds+incr),2:3],mat[inds:(inds+incr),5:6])) + }else{ + maxr=MaxRadius(mat[inds:(inds+incr),2:3]) + } + if(maxr>r && !collapse){ + inds=inds+1 + tcur=mat[inds,1] + incr=0 + } + if(maxr>r && collapse){ + if(mat[inds+incr-1,1]==4){ + if(indsnrow(mat)){ + if(maxr<=r && tnex-tcur>=mindur){ + flatmat=rbind(flatmat,c(inds,inds+incr-1)) + } + break + } + } + if(nrow(flatmat)==0){ + return(mat) + }else{ + outmat=c() + if(flatmat[1,1]>1){ + outmat=mat[1:(flatmat[1,1]-1),] + } + for(i in 1:nrow(flatmat)){ + #ProgressBar(nrow(flatmat),i) + outmat=rbind(outmat,Collapse2Pause(mat[flatmat[i,1]:flatmat[i,2],])) + if(i1){ + outmat2=outmat[1:(flatmat2[1,1]-1),] + } + for(i in 1:nrow(flatmat2)){ + outmat2=rbind(outmat2,Collapse2Pause(outmat[flatmat2[i,1]:flatmat2[i,2],])) + if(i0 && ID1[length(ID1)]==nrow(out)){ + ID1p1=ID1p1[-length(ID1p1)] + } + allts=apply(out,1,function(xx) mean(xx[c(4,7)])) + allxs=out[,2] + allys=out[,3] + ind11=ID1p1[which(out[ID1p1,1]==1)] + ind12=ID1p1[which(out[ID1p1,1]==2)] + l1=length(ind11) + l2=length(ind12) + if(l1+l2>0){ + phatall=l2/(l1+l2) + } + if(l1+l2==0){phatall=length(ID2)/(length(ID1)+length(ID2))} + #flight distances + fd=apply(out[ID1,],1,function(xx) sqrt((xx[2]-xx[5])^2+(xx[3]-xx[6])^2)) + + # flight times: ft + ft=apply(out[ID1,],1,function(xx) (xx[7]-xx[4])) + fxs=out[ID1,2] + fys=out[ID1,3] + # flight angles range [0,2pi]: fa + #fa=apply(out[ID1,],1,function(xx) atan((xx[6]-xx[3])/(xx[5]-xx[2]))-((sign(xx[6]-xx[3])-1)/2)*pi) + fa=rep(0,length(ID1)) + yvals=out[ID1,6]-out[ID1,3] + xvals=out[ID1,5]-out[ID1,2] + IDyg0=which(yvals>=0) + IDxg0=which(xvals>=0) + IDyl0=which(yvals<0) + IDxl0=which(xvals<0) + IDgg=intersect(IDyg0,IDxg0) + IDlg=intersect(IDyg0,IDxl0) + IDgl=intersect(IDyl0,IDxg0) + IDll=intersect(IDyl0,IDxl0) + fa[IDgg]=atan(yvals[IDgg]/xvals[IDgg]) + fa[IDgl]=atan(yvals[IDgl]/xvals[IDgl])+2*pi + fa[IDlg]=atan(yvals[IDlg]/xvals[IDlg])+pi + fa[IDll]=atan(yvals[IDll]/xvals[IDll])+pi + # flight time stamps: fts + fts=out[ID1,4] + + # pause times + pt=apply(matrix(out[ID2,],ncol=7),1,function(xx) xx[7]-xx[4]) + pxs=out[ID2,2] + pys=out[ID2,3] + #pause time stamp: pts + pts=out[ID2,4] + return(list(ID1=ID1,ID2=ID2,ID3=ID3,ID4=ID4,ID1p1=ID1p1,allts=allts,ind11=ind11,ind12=ind12,phatall=phatall,fd=fd,ft=ft,fa=fa,fts=fts,pt=pt,pts=pts,fxs=fxs,fys=fys,pxs=pxs,pys=pys,allxs=allxs,allys=allys)) +} diff --git a/src/features/location_barnett/IsFlight.R b/src/features/location_barnett/IsFlight.R new file mode 100644 index 00000000..c11f2dce --- /dev/null +++ b/src/features/location_barnett/IsFlight.R @@ -0,0 +1,33 @@ +IsFlight <- +function(mat,r,w){ + num=nrow(mat) + if(sqrt((mat[1,1]-mat[num,1])^2+(mat[1,2]-mat[num,2])^2)w){ + return(FALSE) + }else{ + return(TRUE) + } + } + if(mat[1,1]>mat[num,1]){ + mat=mat[num:1,] + } + mat[,1]=mat[,1]-mat[1,1] + mat[,2]=mat[,2]-mat[1,2] + theta=-atan(mat[num,2]/mat[num,1]) + A=matrix(c(cos(theta),-sin(theta),sin(theta),cos(theta)),nrow=2,byrow=TRUE) + rotpts=A%*%t(matrix(mat[2:(num-1),1:2],ncol=2)) + if(max(abs(rotpts[2,]))>w){ + return(FALSE) + }else{ + return(TRUE) + } +} diff --git a/src/features/location_barnett/LatLong2XY.R b/src/features/location_barnett/LatLong2XY.R new file mode 100644 index 00000000..4fcff459 --- /dev/null +++ b/src/features/location_barnett/LatLong2XY.R @@ -0,0 +1,19 @@ +LatLong2XY <- +function(lat_v,lon_v,R=6.371*10^6){ + th0 = min(lon_v) + th1 = max(lon_v) + ph0 = min(lat_v) + ph1 = max(lat_v) + d1 = 2*pi*R*((ph1-ph0)*2*pi/360)/(2*pi) + d2 = 2*pi*(R*sin(pi/2-ph1*2*pi/360))*((th1-th0)*2*pi/360)/(2*pi) + d3 = 2*pi*(R*sin(pi/2-ph0*2*pi/360))*((th1-th0)*2*pi/360)/(2*pi) + x_v=rep(0,length(lon_v)) + y_v=rep(0,length(lat_v)) + for(i in 1:length(lat_v)){ + w1=(lat_v[i]-ph0)/(ph1-ph0) + w2=(lon_v[i]-th0)/(th1-th0) + x_v[i]=w1*abs(d3-d2)/2+w2*(d3*(1-w1)+d2*w1) + y_v[i]=w1*d1*sin(acos(abs((d3-d2)/(2*d1)))) + } + return(list("x_v"=x_v,"y_v"=y_v)) +} diff --git a/src/features/location_barnett/LocationAt.R b/src/features/location_barnett/LocationAt.R new file mode 100644 index 00000000..7b8f6649 --- /dev/null +++ b/src/features/location_barnett/LocationAt.R @@ -0,0 +1,15 @@ +LocationAt <- +function(mat,tt){ + for(i in 1:nrow(mat)){ + if(mat[i,1]<=2){ + if(mat[i,4]<=tt && mat[i,7]>=tt){ + return(list('x'=mat[i,2],'y'=mat[i,3])) + } + }else if(mat[i,1]==3){ + if(mat[i,4]==tt){ + return(list('x'=mat[i,2],'y'=mat[i,3])) + } + } + } + return(NULL) +} diff --git a/src/features/location_barnett/MaxDiam.R b/src/features/location_barnett/MaxDiam.R new file mode 100644 index 00000000..fd9ae0a5 --- /dev/null +++ b/src/features/location_barnett/MaxDiam.R @@ -0,0 +1,6 @@ +MaxDiam <- +function(mat){ + IDmv=which(mat[,1]<=2) + if(length(IDmv)<2){return(0)} + return(max(dist(mat[IDmv,2:3]))) +} diff --git a/src/features/location_barnett/MaxDistBetweenTrajectories.R b/src/features/location_barnett/MaxDistBetweenTrajectories.R new file mode 100644 index 00000000..f854b6bc --- /dev/null +++ b/src/features/location_barnett/MaxDistBetweenTrajectories.R @@ -0,0 +1,29 @@ +MaxDistBetweenTrajectories <- +function(mat1,mat2,t_gap=1){ + mat1=matrix(mat1,ncol=7);mat2=matrix(mat2,ncol=7) + t0=mat1[1,4];t1=mat2[nrow(mat2),7] + t_mesh = seq(t0,t1,t_gap) + d_v = rep(0,length(t_mesh)) + for(i in 1:length(t_mesh)){ + ID1=intersect(which(mat1[,4]<=t_mesh[i]),which(mat1[,7]>=t_mesh[i]))[1] + if(mat1[ID1,1]==2){ + x1=mat1[ID1,2] + y1=mat1[ID1,3] + }else{ + w1=(t_mesh[i]-mat1[ID1,4])/(mat1[ID1,7]-mat1[ID1,4]) + x1=mat1[ID1,2]*(1-w1)+mat1[ID1,5]*w1 + y1=mat1[ID1,3]*(1-w1)+mat1[ID1,6]*w1 + } + ID2=intersect(which(mat2[,4]<=t_mesh[i]),which(mat2[,7]>=t_mesh[i]))[1] + if(mat2[ID2,1]==2){ + x2=mat2[ID2,2] + y2=mat2[ID2,3] + }else{ + w2=(t_mesh[i]-mat2[ID2,4])/(mat2[ID2,7]-mat2[ID2,4]) + x2=mat2[ID2,2]*(1-w2)+mat2[ID2,5]*w2 + y2=mat2[ID2,3]*(1-w2)+mat2[ID2,6]*w2 + } + d_v[i] = sqrt((x1-x2)^2+(y1-y2)^2) + } + return(max(d_v)) +} diff --git a/src/features/location_barnett/MaxHomeDist.R b/src/features/location_barnett/MaxHomeDist.R new file mode 100644 index 00000000..b263b38d --- /dev/null +++ b/src/features/location_barnett/MaxHomeDist.R @@ -0,0 +1,10 @@ +MaxHomeDist <- +function(mat,homex,homey){ + IDmv=which(mat[,1]<=2) + if(length(IDmv)==0){return(NA)} + dfhome=rep(NA,length(IDmv)) + for(i in 1:length(IDmv)){ + dfhome[i]=sqrt((mat[IDmv[i],2]-homex)^2+(mat[IDmv[i],3]-homey)^2) + } + return(max(dfhome)) +} diff --git a/src/features/location_barnett/MaxRadius.R b/src/features/location_barnett/MaxRadius.R new file mode 100644 index 00000000..bd7c1075 --- /dev/null +++ b/src/features/location_barnett/MaxRadius.R @@ -0,0 +1,5 @@ +MaxRadius <- +function(mat){ + cent=colMeans(mat,na.rm=TRUE) + return(max(apply(mat,1,function(x) sqrt((x[1]-cent[1])^2+(x[2]-cent[2])^2)),na.rm=TRUE)) +} diff --git a/src/features/location_barnett/MinsMissing.R b/src/features/location_barnett/MinsMissing.R new file mode 100644 index 00000000..5d63678c --- /dev/null +++ b/src/features/location_barnett/MinsMissing.R @@ -0,0 +1,10 @@ +MinsMissing <- +function(mat){ + tot=0 + for(i in 1:nrow(mat)){ + if(mat[i,1]==4){ + tot = tot+mat[i,7]-mat[i,4] + } + } + return(tot/60) +} diff --git a/src/features/location_barnett/MobilityFeatures.R b/src/features/location_barnett/MobilityFeatures.R new file mode 100644 index 00000000..312bd7bd --- /dev/null +++ b/src/features/location_barnett/MobilityFeatures.R @@ -0,0 +1,56 @@ +MobilityFeatures <- +function(locations_df, + ACCURACY_LIM=51, ### meters GPS accuracy + ITRVL=10, ### seconds (data concatenation) + nreps=1, ### simulate missing data numer of times + tz="", ### time zone of data, defaults to current time zone + CENTERRAD=200, ### meters radius from significant locations considered + wtype="GLR", + spread_pars=c(10,1), + minpausedur=300, + minpausedist=60, + rad_fp=NULL, + wid_fp=NULL +){ + mobmatmiss=GPS2MobMat(locations_df,itrvl=ITRVL,accuracylim=ACCURACY_LIM,r=rad_fp,w=wid_fp) + mobmat = GuessPause(mobmatmiss,mindur=minpausedur,r=minpausedist) + obj=InitializeParams(mobmat) + qOKmsg=MobmatQualityOK(mobmat,obj) + if(qOKmsg!=""){ + cat(qOKmsg,"\n") + return(NULL) + } + lsmf = list() + lssigloc = list() + for(repnum in 1:nreps){ + if(repnum==1){ + cat("Sim #: 1") + }else if(repnum<=nreps-1){ + cat(paste(" ",repnum,sep="")) + }else{ + cat(paste(" ",nreps,"\n",sep="")) + } + out3=SimulateMobilityGaps(mobmat,obj,wtype,spread_pars) + IDundef=which(out3[,1]==3) + if(length(IDundef)>0){ + out3=out3[-IDundef,] + } + obj3=InitializeParams(out3) + out_GMFM=GetMobilityFeaturesMat(out3,obj3,mobmatmiss,tz,CENTERRAD,ITRVL) + lsmf[[repnum]]=out_GMFM[[1]] + lssigloc[[repnum]]=out_GMFM[[2]] + } + cat("\n\n") + if(length(lsmf)!=0){ + featavg = lsmf[[1]] + if(nreps>1){ + for(i in 2:nreps){ + featavg=featavg+lsmf[[i]] + } + featavg=featavg/nreps + } + }else{ + featavg=NULL + } + return(list('mobmat'=mobmat,'mobmatmiss'=mobmatmiss,'featsims'=lsmf,'siglocsims'=lssigloc,'featavg'=featavg)) +} diff --git a/src/features/location_barnett/MobmatQualityOK.R b/src/features/location_barnett/MobmatQualityOK.R new file mode 100644 index 00000000..07acc3f3 --- /dev/null +++ b/src/features/location_barnett/MobmatQualityOK.R @@ -0,0 +1,14 @@ +MobmatQualityOK <- +function(mobmat,obj){ + msg="" + if(!is.matrix(mobmat)){ + msg = "Mobmat not a matrix. Removing individual from analysis.\n" + } + if(length(obj$ID1)==0){ + msg= "No flights in mobmat. Removing individual from analysis.\n" + } + if(length(obj$ID2)==0){ + msg= "No pauses in mobmat. Removing individual from analysis.\n" + } + return(msg) +} diff --git a/src/features/location_barnett/ProbPause.R b/src/features/location_barnett/ProbPause.R new file mode 100644 index 00000000..41c8a4d3 --- /dev/null +++ b/src/features/location_barnett/ProbPause.R @@ -0,0 +1,14 @@ +ProbPause <- +function(mat){ + tpause = 0 + tflight = 0 + for(i in 1:nrow(mat)){ + if(mat[i,1]==1){ + tflight = tflight + mat[i,7]-mat[i,4] + } + if(mat[i,1]==2){ + tpause = tpause + mat[i,7]-mat[i,4] + } + } + return(tpause/(tpause+tflight)) +} diff --git a/src/features/location_barnett/ProgressBar.R b/src/features/location_barnett/ProgressBar.R new file mode 100644 index 00000000..0b0652f1 --- /dev/null +++ b/src/features/location_barnett/ProgressBar.R @@ -0,0 +1,30 @@ +ProgressBar <- +function (maxn, ind){ + if(maxn<51){ + if (ind == 1) { + cat("|1%--------------------50%--------------------100%|\n") + cat("|") + return() + }else{ + numprint=floor(50*ind/maxn)-floor(50*(ind-1)/maxn) + if(numprint>0){ + for(i in 1:numprint){ + cat("|") + } + } + } + }else{ + if (ind == 1) { + cat("|1%--------------------50%--------------------100%|\n") + cat("|") + return() + } + if (maxn == ind) { + cat("|\n") + return() + } + if (floor(50 * ind/maxn) != floor(50 * (ind - 1)/maxn)) { + cat("|") + } + } +} diff --git a/src/features/location_barnett/RadiusOfGyration.R b/src/features/location_barnett/RadiusOfGyration.R new file mode 100644 index 00000000..a117d982 --- /dev/null +++ b/src/features/location_barnett/RadiusOfGyration.R @@ -0,0 +1,36 @@ +RadiusOfGyration <- +function(mat,ITRVL){ + mat=matrix(mat,ncol=7) + IDskip=which(mat[,1]==4) + if(length(IDskip)>0){ + mat=matrix(mat[-IDskip,],ncol=7) + } + N=nrow(mat) + w_v=rep(0,N) + x_v=rep(0,N) + y_v=rep(0,N) + for(i in 1:N){ + if(mat[i,1]==4){ + next + } + if(mat[i,1]==3){ + x_v[i]=mat[i,2] + y_v[i]=mat[i,3] + w_v[i]=ITRVL + } + if(mat[i,1]==1){ + x_v[i]=mean(mat[i,c(2,5)]) + y_v[i]=mean(mat[i,c(3,6)]) + w_v[i]=mat[i,7]-mat[i,4] + } + if(mat[i,1]==2){ + x_v[i]=mat[i,2] + y_v[i]=mat[i,3] + w_v[i]=mat[i,7]-mat[i,4] + } + } + sumw_v=sum(w_v) + xavg=sum(w_v*x_v)/sumw_v + yavg=sum(w_v*y_v)/sumw_v + return(sqrt(sum(((x_v-xavg)^2+(y_v-yavg)^2)*w_v)/sumw_v)) +} diff --git a/src/features/location_barnett/RandomBridge.R b/src/features/location_barnett/RandomBridge.R new file mode 100644 index 00000000..7ec63bca --- /dev/null +++ b/src/features/location_barnett/RandomBridge.R @@ -0,0 +1,101 @@ +RandomBridge <- +function(x0,y0,x1,y1,t0,t1,fd,ft,fts,fa,fw,probp,pt,pts,pw,allts,allw,ind11,ind12,i_ind,pxs,pys,fxs,fys,allxs,allys,wtype,canpause,niter=100,spread_pars){ + success=FALSE + for(i in 1:niter){ + outmat=c() + curx=x0 + cury=y0 + curt=t0 + tarrive=t0 + while(TRUE){ + ## new + varmult=1 + while(TRUE){ + if(wtype=="TL"){ + fw=dt(spread_pars[1]*(fts-curt)/(varmult*(t1-t0)),df=spread_pars[2]) + pw=dt(spread_pars[1]*(pts-curt)/(varmult*(t1-t0)),df=spread_pars[2]) + allw=dt(spread_pars[1]*(allts-curt)/(varmult*(t1-t0)),df=spread_pars[2]) + }else if(wtype=="GL"){ + fw=dt(spread_pars[1]*sqrt((fxs-curx)^2+(fys-cury)^2)/(50*varmult),df=spread_pars[2]) + pw=dt(spread_pars[1]*sqrt((pxs-curx)^2+(pys-cury)^2)/(50*varmult),df=spread_pars[2]) + allw=dt(spread_pars[1]*sqrt((allxs-curx)^2+(allys-cury)^2)/(50*varmult),df=spread_pars[2]) + }else if(wtype=="GLR"){ + fw=dt(spread_pars[1]*sqrt((fxs-curx)^2+(fys-cury)^2)/(50*varmult),df=spread_pars[2])*dt(spread_pars[1]*apply(cbind(abs((fts-curt))%%(60*60*24),(60*60*24)-abs((fts-curt))%%(60*60*24)),1,min)/(varmult*(t1-t0)),df=spread_pars[2]) + pw=dt(spread_pars[1]*sqrt((pxs-curx)^2+(pys-cury)^2)/(50*varmult),df=spread_pars[2])*dt(spread_pars[1]*apply(cbind(abs((pts-curt))%%(60*60*24),(60*60*24)-abs((pts-curt))%%(60*60*24)),1,min)/(varmult*(t1-t0)),df=spread_pars[2]) + allw=dt(spread_pars[1]*sqrt((allxs-curx)^2+(allys-cury)^2)/(50*varmult),df=spread_pars[2])*dt(spread_pars[1]*apply(cbind(abs((allts-curt))%%(60*60*24),(60*60*24)-abs((allts-curt))%%(60*60*24)),1,min)/(varmult*(t1-t0)),df=spread_pars[2]) + } + if(length(pts)>0 && length(fts)>0 && sum(fw)>0 && sum(pw)>0){break} + if(length(pts)==0 && length(fts)>0 && sum(fw)>0){break} + if(length(fts)==0 && length(pts)>0 && sum(pw)>0){break} + varmult=varmult*2 + } + s11=sum(allw[ind11],na.rm=T) + s12=sum(allw[ind12],na.rm=T) + if(s11+s12==0){phatcur=probp}else{phatcur=s12/(s11+s12)} + probp=phatcur + ## end + + if(canpause && runif(1)t0){ + success=TRUE + break + } + } + if(!success){ + return(c(1,x0,y0,t0,x1,y1,t1)) + return(NULL) + }else{ + curx=x0 + cury=y0 + for(i in 1:nrow(outmat)){ + if(outmat[i,1]==1){ + outmat[i,2]=curx + outmat[i,3]=cury + curt=outmat[i,7] + w=(tarrive-curt)/(tarrive-t0) + outmat[i,5]=outmat[i,5]*w+x1*(1-w) + outmat[i,6]=outmat[i,6]*w+y1*(1-w) + curx=outmat[i,5] + cury=outmat[i,6] + }else{ + curt=outmat[i,4] + outmat[i,2]=curx + outmat[i,3]=cury + } + } + outmat[nrow(outmat),7]=t1 + rownames(outmat)=NULL + colnames(outmat)=c("Code","x0","y0","t0","x1","y1","t1") + return(outmat) + } +} diff --git a/src/features/location_barnett/SigLocEntropy.R b/src/features/location_barnett/SigLocEntropy.R new file mode 100644 index 00000000..801806e0 --- /dev/null +++ b/src/features/location_barnett/SigLocEntropy.R @@ -0,0 +1,22 @@ +SigLocEntropy <- +function(mat,slout,CENTERRAD){ + tp = rep(0,nrow(slout)) + for(i in 1:nrow(mat)){ + if(mat[i,1]==2){ + for(j in 1:nrow(slout)){ + if(sqrt((slout[j,1]-mat[i,2])^2+(slout[j,2]-mat[i,3])^2)0){ + tot=tot-p*log(p) + } + } + return(tot) +} diff --git a/src/features/location_barnett/SigLocs.R b/src/features/location_barnett/SigLocs.R new file mode 100644 index 00000000..4385f9d3 --- /dev/null +++ b/src/features/location_barnett/SigLocs.R @@ -0,0 +1,68 @@ +SigLocs <- +function(mobmat,obj,CENTERRAD=125,MINPAUSETIME=600,tz=""){ + if(length(obj$ID2)==0){ + warning("No pauses in mobmat within function SigLocs!") + return(NULL) + }else if(length(obj$ID2)==1){ + outmat=data.frame('x'=mobmat[obj$ID2[1],2],'y'=mobmat[obj$ID2[1],3],'timepresent'=c(0),'home'=c(1)) + nrowfc=1 + }else{ + ptred=floor(obj$pt/MINPAUSETIME) + if(length(which(ptred>0))<2){ + warning("No pauses long enough in mobmat within function SigLocs!") + return(NULL) + } + pmat=c() + for(i in 1:length(obj$ID2)){ + if(ptred[i]>0){ + pmat=rbind(pmat,matrix(rep(mobmat[obj$ID2[i],2:3],ptred[i]),ncol=2,byrow=T)) + } + } + kmeansk_v=2:length(which(ptred>0)) + lsfit = list() + for(i in 1:length(kmeansk_v)){ + kmeansk = kmeansk_v[i] + fit = kmeans(pmat,centers=kmeansk) + lsfit[[i]]=fit + if(min(dist(fit$centers))1){ + kmeansk=kmeansk_v[i-1] + fit = lsfit[[i-1]] + } + break + } + } + nrowfc = nrow(fit$centers) + outmat=data.frame('x'=fit$centers[,1],'y'=fit$centers[,2],'timepresent'=rep(0,nrow(fit$centers)),'home'=rep(0,nrow(fit$centers))) + } + #Determine time spent at these significant locations + for(i in 1:length(obj$ID2)){ + for(j in 1:nrowfc){ + if(sqrt((mobmat[obj$ID2[i],2]-outmat$x[j])^2+(mobmat[obj$ID2[i],3]-outmat$y[j])^2)=21 || hourofday<6){ + for(j in 1:nrowfc){ + if(sqrt((mobmat[obj$ID2[i],2]-outmat$x[j])^2+(mobmat[obj$ID2[i],3]-outmat$y[j])^2)0){ + outmat=outmat[-IDrm,] + } + rownames(outmat)=NULL + return(outmat) +} diff --git a/src/features/location_barnett/SigLocsVisited.R b/src/features/location_barnett/SigLocsVisited.R new file mode 100644 index 00000000..0c046d92 --- /dev/null +++ b/src/features/location_barnett/SigLocsVisited.R @@ -0,0 +1,14 @@ +SigLocsVisited <- +function(mat,slout,CENTERRAD){ + places_visited = rep(0,nrow(slout)) + for(i in 1:nrow(mat)){ + if(mat[i,1]<=3){ + for(j in 1:nrow(slout)){ + if(sqrt((slout[j,1]-mat[i,2])^2 + (slout[j,2]-mat[i,3])^2)1 && i0 && length(fts)>0 && sum(fw)>0 && sum(pw)>0){break} + if(length(pts)==0 && length(fts)>0 && sum(fw)>0){break} + if(length(fts)==0 && length(pts)>0 && sum(pw)>0){break} + varmult=varmult*2 + } + s11=sum(allw[ind11],na.rm=T) + s12=sum(allw[ind12],na.rm=T) + if(s11+s12==0){phatcur=phatall}else{phatcur=s12/(s11+s12)} + if(wtype=="LI"){ + foutmat=rbind(foutmat,c(1,curx,cury,suboutmat[i,4],suboutmat[i+1,2],suboutmat[i+1,3],suboutmat[i,7])) + }else{ + rbout=matrix(RandomBridge(x0=curx,y0=cury,x1=suboutmat[i+1,2],y1=suboutmat[i+1,3],t0=suboutmat[i,4],t1=suboutmat[i,7],fd=fd,ft=ft,fts=fts,fa=fa,fw=fw,probp=phatcur,pt=pt,pts=pts,pw=pw,allts=allts,allw=allw,ind11=ind11,ind12=ind12,i_ind=i,pxs=pxs,pys=pys,fxs=fxs,fys=fys,allxs=allxs,allys=allys,wtype=wtype,canpause=suboutmat[i-1,1]==1,niter=100,spread_pars=spread_pars),ncol=7) + foutmat=rbind(foutmat,rbout) + } + } + } + return(foutmat) +} diff --git a/src/features/location_barnett/StdFlightDur.R b/src/features/location_barnett/StdFlightDur.R new file mode 100644 index 00000000..3caadd2e --- /dev/null +++ b/src/features/location_barnett/StdFlightDur.R @@ -0,0 +1,11 @@ +StdFlightDur <- +function(mat){ + ID1=which(mat[,1]==1) + if(length(ID1)==0){return(0)} + try1=try(sd(as.numeric(mat[ID1,7]-mat[ID1,4])),silent=TRUE) + if(class(try1) == "try-error" || is.na(try1)){ + return(0) + }else{ + return(try1) + } +} diff --git a/src/features/location_barnett/StdFlightLen.R b/src/features/location_barnett/StdFlightLen.R new file mode 100644 index 00000000..38a8f182 --- /dev/null +++ b/src/features/location_barnett/StdFlightLen.R @@ -0,0 +1,11 @@ +StdFlightLen <- +function(mat){ + ID1=which(mat[,1]==1) + if(length(ID1)<=1){return(0)} + try1=try(sd(as.numeric(sqrt((mat[ID1,6]-mat[ID1,3])^2+(mat[ID1,5]-mat[ID1,2])^2))),silent=TRUE) + if(class(try1) == "try-error" || is.na(try1)){ + return(0) + }else{ + return(try1) + } +} diff --git a/src/features/location_barnett/WriteSurveyAnswers2File.R b/src/features/location_barnett/WriteSurveyAnswers2File.R new file mode 100644 index 00000000..5660d81f --- /dev/null +++ b/src/features/location_barnett/WriteSurveyAnswers2File.R @@ -0,0 +1,35 @@ +WriteSurveyAnswers2File <- +function(fildir,survey_id,SID){ + try1=try(setwd(fildir),silent=TRUE) + if(class(try1) == "try-error"){ + warning(paste(fildir,"does not exist.")) + return(NULL) + } + filelist <- list.files(pattern = "\\.csv$") + if(length(filelist)==0){return(NULL)} + date_v=substr(filelist,1,10) + ## keep only the last survey of each day + IDkeep=length(date_v) + curdate = date_v[length(date_v)] + for(i in length(date_v):1){ + nexdate=date_v[i] + if(curdate!=nexdate){ + IDkeep=c(IDkeep,i) + curdate=nexdate + } + } + IDkeep=rev(IDkeep) + + outmat=c() + for(i in IDkeep){ + x=read.csv(filelist[i],fileEncoding="UTF-8") + outmat=rbind(outmat,c(date_v[i],x$answer)) + } + try1=try(colnames(outmat)<-c("Date",paste(as.character(x$question.text)," (",as.character(x$question.answer.options),")",sep="")),silent=TRUE) + if(class(try1) == "try-error"){ + warning(paste("Survey non-constant over time for ID:",SID)) + return(NULL) + } + write.table(outmat,paste("SurveyAnswers_",survey_id,"_",SID,".txt",sep=""),quote=F,col.names=T,row.names=F,sep="\t") + return("success") +} diff --git a/src/features/location_barnett/plot.flights.R b/src/features/location_barnett/plot.flights.R new file mode 100644 index 00000000..0daf2771 --- /dev/null +++ b/src/features/location_barnett/plot.flights.R @@ -0,0 +1,96 @@ +plot.flights <- +function(mat,xrang=NULL,yrang=NULL,diminch=6,add2plot=FALSE,addlegend=TRUE,outfile=NULL,title=NULL){ + #col24hour_v=c("#253494","#2c7fb8","#41b6c4","#7fcdbb","#c7e9b4","#ffffcc","#fed976","#feb24c","#fd8d3c","#fc4e2a","#e31a1c","#b10026","#7a0177","#ae017e","#dd3497","#f768a1","#fa9fb5","#fcc5c0","#edf8fb","#ccece6","#99d8c9","#66c2a4","#2ca25f","#006d2c") + col24hour_v=c(c("#08306b","#08519c","#2171b5","#4292c6","#6baed6","#9ecae1","#fcbba1","#fc9272","#fb6a4a","#ef3b2c","#cb181d","#99000d"),rev(c("#08306b","#08519c","#2171b5","#4292c6","#6baed6","#9ecae1","#fcbba1","#fc9272","#fb6a4a","#ef3b2c","#cb181d","#99000d"))) + if(nrow(mat)==0){ + return(NULL) + } + if(add2plot){outfile=NULL} + write2file=FALSE + if(!is.null(outfile)){ + write2file=TRUE + } + if(write2file){ + pdf(outfile,width=diminch*11/10,height=diminch) + } + if(is.null(xrang)){ + xrang=plotlimits(mat)$xrang + } + if(is.null(yrang)){ + yrang=plotlimits(mat)$yrang + } + if(xrang[2]-xrang[1]>yrang[2]-yrang[1]){ + dif=((xrang[2]-xrang[1])-(yrang[2]-yrang[1])) + yrang[2] = yrang[2]+dif/2 + yrang[1] = yrang[1]-dif/2 + }else{ + dif=((yrang[2]-yrang[1])-(xrang[2]-xrang[1])) + xrang[2] = xrang[2]+dif/2 + xrang[1] = xrang[1]-dif/2 + } + xrang[1]=xrang[1]-(xrang[2]-xrang[1])/10 + if(!add2plot){ + if(!is.null(title)){ + par(mai=c(0,0,.4,0)) + plot(NA,xlim=xrang + ,ylim=yrang + ,xaxt="n" + ,yaxt="n" + ,xlab="" + ,ylab="" + ,bty="n" + ,main=title) + }else{ + par(mai=c(0,0,0,0)) + plot(NA,xlim=xrang + ,ylim=yrang + ,xaxt="n" + ,yaxt="n" + ,xlab="" + ,ylab="" + ,bty="n" + ,main="") + } + xleg1=xrang[1] + xleg2=xleg1+(xrang[2]-xrang[1])/30 + yincr=(yrang[2]-yrang[1])/40 + legtext=c(" 6AM"," 12PM"," 6PM"," 12AM") + if(addlegend){ + for(i in 1:24){ + polygon(c(xleg1,xleg1,xleg2,xleg2),c(yrang[1]+(i-1)*yincr,yrang[1]+i*yincr,yrang[1]+i*yincr,yrang[1]+(i-1)*yincr),col=col24hour_v[i]) + if(i%%6==0){ + text(xleg2,yrang[1]+i*yincr,legtext[floor(i/6)],adj=0,cex=.5) + } + } + points(xleg1,yrang[1]+26*yincr,cex=2,pch=16) + text(xleg2,yrang[1]+26*yincr,">4 hrs",adj=0,cex=.5) + points(xleg1,yrang[1]+28*yincr,cex=1,pch=16) + text(xleg2,yrang[1]+28*yincr,"1 hr",adj=0,cex=.5) + points(xleg1,yrang[1]+30*yincr,cex=.5,pch=16) + text(xleg2,yrang[1]+30*yincr,"<30 mins",adj=0,cex=.5) + #text(xleg1,yrang[1]+32.3*yincr,"Pause\nDuration",adj=0,cex=.5) + legdist=10^floor(log10((yrang[2]-yrang[1])/5)) + lines(c(xrang[1],xrang[1]),c(yrang[2],yrang[2]-legdist)) + lines(c(xrang[1]-(xrang[2]-xrang[1])/120,xrang[1]+(xrang[2]-xrang[1])/120),c(yrang[2],yrang[2])) + lines(c(xrang[1]-(xrang[2]-xrang[1])/120,xrang[1]+(xrang[2]-xrang[1])/120),c(yrang[2]-legdist,yrang[2]-legdist)) + if(log10(legdist)<3){ + text(xrang[1]+(xrang[2]-xrang[1])/50,yrang[2]-legdist/2,paste(as.character(legdist),"m"),cex=.5,srt=270,adj=c(.5,0)) + }else{ + text(xrang[1]+(xrang[2]-xrang[1])/50,yrang[2]-legdist/2,paste(as.character(legdist/1000),"km"),cex=.5,srt=270,adj=c(.5,0)) + } + } + } + for(i in 1:nrow(mat)){ + hour=as.numeric(strsplit(strsplit(as.character(as.POSIXct(mean(c(mat[i,4],mat[i,7]),na.rm=T),origin='1970-01-01'))," ")[[1]][2],":")[[1]][1]) + if(mat[i,1]==1){ + lines(c(mat[i,2],mat[i,5]),c(mat[i,3],mat[i,6]),col=col24hour_v[hour+1]) + } + if(mat[i,1]==2){ + pwidth=max(.5,min(sqrt(2*(mat[i,7]-mat[i,4])/7200),2)) + points(mat[i,2],mat[i,3],pch=19,col=paste(col24hour_v[hour+1],"CC",sep=""),cex=pwidth) + } + } + if(write2file){ + dev.off() + } +} diff --git a/src/features/location_barnett/plotlimits.R b/src/features/location_barnett/plotlimits.R new file mode 100644 index 00000000..d15035f6 --- /dev/null +++ b/src/features/location_barnett/plotlimits.R @@ -0,0 +1,14 @@ +plotlimits <- +function(mat,defaultdist=100){ + xrang=range(c(mat[which(mat[,1]<=2),2],mat[which(mat[,1]<=1),5])) + yrang=range(c(mat[which(mat[,1]<=2),3],mat[which(mat[,1]<=1),6])) + if(xrang[2]==xrang[1]){ + xrang[2]=xrang[2]+defaultdist/2 + xrang[1]=xrang[1]-defaultdist/2 + } + if(yrang[2]==yrang[1]){ + yrang[2]=yrang[2]+defaultdist/2 + yrang[1]=yrang[1]-defaultdist/2 + } + return(list(xrang=xrang,yrang=yrang)) +} diff --git a/src/features/location_barnett_metrics.R b/src/features/location_barnett_metrics.R new file mode 100644 index 00000000..ff646c7e --- /dev/null +++ b/src/features/location_barnett_metrics.R @@ -0,0 +1,22 @@ +library(dplyr) + +# Load Ian Barnett's code. Taken from https://scholar.harvard.edu/ibarnett/software/gpsmobility +file.sources = list.files(c("src/features/location_barnett"), pattern="*.R$", full.names=TRUE, ignore.case=TRUE) +sapply(file.sources,source,.GlobalEnv) + +accuracy_limit <- snakemake@params[["accuracy_limit"]] +timezone <- snakemake@params[["timezone"]] + +location <- read.csv(snakemake@input[[1]], stringsAsFactors = F) %>% + select(timestamp, latitude = double_latitude, longitude = double_longitude, altitude = double_altitude, accuracy) + +if (nrow(location) > 0){ + features <- MobilityFeatures(location, ACCURACY_LIM = accuracy_limit, tz = timezone) + + # Copy index (dates) as a column + outmatrix = cbind(rownames(features$featavg), features$featavg) + colnames(outmatrix)=c("local_date",tolower(colnames(features$featavg))) + write.csv(outmatrix,snakemake@output[[1]], row.names = F) +} else { + write.csv(data.frame(),snakemake@output[[1]]) +} \ No newline at end of file