import numpy as np import pandas as pd def convertInputInto2d(input, windowLength, overlap=0): """Convert input into 2d matrix with width = numCols. The last row is padded with zeros to match the other rows. Overlap has to be smaller than window length :param input: the one dimensional array :param windowLength: window length, expressed in number of samples :param overlap: Amount of overlap :return: 2D matrix """ if windowLength <= overlap: raise Exception("Overlap has to be smaller than window length") inputWasList = True if type(input) != list: inputWasList = False input = input.tolist() out = [input[i: i + windowLength] for i in range(0, len(input), windowLength - overlap)] out[-1].extend([0] * (windowLength - len(out[-1]))) return out if inputWasList else np.asarray(out) def convertInputInto2dTime(input, timeThreshold): """ Convert input array into 2D matrix by time interval. When the timeThreshold is reached in each row, the process continues in the next row. :param input: the pandas dataframe with rows "time" and "data" :param timeThreshold: the threshold with which the row width is defined :return: 2D matrix """ outData = [[]] outTime = [[]] startTime = 0 for index, row in input.iterrows(): t = row["time"] data = row["data"] if t - startTime >= timeThreshold: startTime = t outData.append([]) outTime.append([]) outData[-1].append(data) outTime[-1].append(t) return outData, outTime def convert1DEmpaticaToArray(pathToEmpaticaCsvFile): """ Convert 1D empatica file to array :param pathToEmpaticaCsvFile: path to Empatica csv file :return: array of data, starting timestamp of data, sample rate of data """ df = pd.read_csv(pathToEmpaticaCsvFile, names=["name"]) startTimeStamp = df.name[0] sampleRate = df.name[1] df.drop([0, 1], inplace=True) data = df.name.ravel() return data, startTimeStamp, sampleRate def convert3DEmpaticaToArray(pathToEmpaticaCsvFile): """ Convert 3D empatica file to array :param pathToEmpaticaCsvFile: path to Empatica csv file :return: array of data, starting timestamp of data, sample rate of data """ df = pd.read_csv(pathToEmpaticaCsvFile, names=["x", "y", "z"]) startTimeStamp = df.x[0] sampleRate = df.x[1] df.drop([0, 1], inplace=True) data = np.vstack((df.x.ravel(), df.y.ravel(), df.z.ravel())) return data, startTimeStamp, sampleRate def checkForFeature(featureName, featureNames): return featureNames is None or featureName in featureNames frequencyFeatureNames = ["fqHighestPeakFreqs", "fqHighestPeaks", "fqEnergyFeat", "fqEntropyFeat", "fqHistogramBins", "fqAbsMean", "fqSkewness", "fqKurtosis", "fqInterquart"] genericFeatureNames = ["autocorrelations", "countAboveMean", "countBelowMean", "maximum", "minimum", "meanAbsChange", "longestStrikeAboveMean", "longestStrikeBelowMean", "stdDev", "median", "meanChange", "numberOfZeroCrossings", "absEnergy", "linearTrendSlope", "ratioBeyondRSigma", "binnedEntropy", "numOfPeaksAutocorr", "numberOfZeroCrossingsAutocorr", "areaAutocorr", "calcMeanCrossingRateAutocorr", "countAboveMeanAutocorr", "sumPer", "sumSquared", "squareSumOfComponent", "sumOfSquareComponents"] accelerometerFeatureNames = ["meanLow", "areaLow", "totalAbsoluteAreaBand", "totalMagnitudeBand", "entropyBand", "skewnessBand", "kurtosisBand", "postureDistanceLow", "absoluteMeanBand", "absoluteAreaBand", "quartilesBand", "interQuartileRangeBand", "varianceBand", "coefficientOfVariationBand", "amplitudeBand", "totalEnergyBand", "dominantFrequencyEnergyBand", "meanCrossingRateBand", "correlationBand", "quartilesMagnitudesBand", "interQuartileRangeMagnitudesBand", "areaUnderAccelerationMagnitude", "peaksDataLow", "sumPerComponentBand", "velocityBand", "meanKineticEnergyBand", "totalKineticEnergyBand", "squareSumOfComponent", "sumOfSquareComponents", "averageVectorLength", "averageVectorLengthPower", "rollAvgLow", "pitchAvgLow", "rollStdDevLow", "pitchStdDevLow", "rollMotionAmountLow", "rollMotionRegularityLow", "manipulationLow", "rollPeaks", "pitchPeaks", "rollPitchCorrelation"] gyroscopeFeatureNames = ["meanLow", "areaLow", "totalAbsoluteAreaLow", "totalMagnitudeLow", "entropyLow", "skewnessLow", "kurtosisLow", "quartilesLow", "interQuartileRangeLow", "varianceLow", "coefficientOfVariationLow", "amplitudeLow", "totalEnergyLow", "dominantFrequencyEnergyLow", "meanCrossingRateLow", "correlationLow", "quartilesMagnitudeLow", "interQuartileRangeMagnitudesLow", "areaUnderMagnitude", "peaksCountLow", "averageVectorLengthLow", "averageVectorLengthPowerLow"] gsrFeatureNames = ['mean', 'std', 'q25', 'q75', 'qd', 'deriv', 'power', 'numPeaks', 'ratePeaks', 'powerPeaks', 'sumPosDeriv', 'propPosDeriv', 'derivTonic', 'sigTonicDifference', 'freqFeats', 'maxPeakAmplitudeChangeBefore', 'maxPeakAmplitudeChangeAfter', 'avgPeakAmplitudeChangeBefore', 'avgPeakAmplitudeChangeAfter', 'avgPeakChangeRatio', 'maxPeakIncreaseTime', 'maxPeakDecreaseTime', 'maxPeakDuration', 'maxPeakChangeRatio', 'avgPeakIncreaseTime', 'avgPeakDecreaseTime', 'avgPeakDuration', 'maxPeakResponseSlopeBefore', 'maxPeakResponseSlopeAfter', 'signalOverallChange', 'changeDuration', 'changeRate', 'significantIncrease', 'significantDecrease'] hrvFeatureNames = ['meanHr', 'ibi', 'sdnn', 'sdsd', 'rmssd', 'pnn20', 'pnn50', 'sd', 'sd2', 'sd1/sd2', 'numRR']