Merge branch 'master' of https://github.com/carissalow/rapids into docker
commit
29eb56155e
|
@ -199,8 +199,9 @@ CONVERSATION:
|
||||||
IOS: plugin_studentlife_audio
|
IOS: plugin_studentlife_audio
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
|
FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
|
||||||
"sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy",
|
"sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","noisesumenergy",
|
||||||
"avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction",
|
"noiseavgenergy","noisesdenergy","noiseminenergy","noisemaxenergy","voicesumenergy",
|
||||||
|
"voiceavgenergy","voicesdenergy","voiceminenergy","voicemaxenergy","silencesensedfraction","noisesensedfraction",
|
||||||
"voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction",
|
"voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction",
|
||||||
"unknownexpectedfraction","countconversation"]
|
"unknownexpectedfraction","countconversation"]
|
||||||
RECORDINGMINUTES: 1
|
RECORDINGMINUTES: 1
|
||||||
|
|
|
@ -842,11 +842,16 @@ avgconversationduration minutes Average duration of all conversa
|
||||||
sdconversationduration minutes Standard Deviation of the duration of all conversations
|
sdconversationduration minutes Standard Deviation of the duration of all conversations
|
||||||
timefirstconversation minutes Minutes since midnight when the first conversation for a day segment was detected
|
timefirstconversation minutes Minutes since midnight when the first conversation for a day segment was detected
|
||||||
timelastconversation minutes Minutes since midnight when the last conversation for a day segment was detected
|
timelastconversation minutes Minutes since midnight when the last conversation for a day segment was detected
|
||||||
sumenergy L2-norm Sum of all energy values
|
noisesumenergy L2-norm Sum of all energy values when inference is noise
|
||||||
avgenergy L2-norm Average of all energy values
|
noiseavgenergy L2-norm Average of all energy values when inference is noise
|
||||||
sdenergy L2-norm Standard Deviation of all energy values
|
noisesdenergy L2-norm Standard Deviation of all energy values when inference is noise
|
||||||
minenergy L2-norm Minimum of all energy values
|
noiseminenergy L2-norm Minimum of all energy values when inference is noise
|
||||||
maxenergy L2-norm Maximum of all energy values
|
noisemaxenergy L2-norm Maximum of all energy values when inference is noise
|
||||||
|
voicesumenergy L2-norm Sum of all energy values when inference is voice
|
||||||
|
voiceavgenergy L2-norm Average of all energy values when inference is voice
|
||||||
|
voicesdenergy L2-norm Standard Deviation of all energy values when inference is voice
|
||||||
|
voiceminenergy L2-norm Minimum of all energy values when inference is voice
|
||||||
|
voicemaxenergy L2-norm Maximum of all energy values when inference is voice
|
||||||
silencesensedfraction Ratio between minutessilence and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown)
|
silencesensedfraction Ratio between minutessilence and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown)
|
||||||
noisesensedfraction Ratio between minutesnoise and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown)
|
noisesensedfraction Ratio between minutesnoise and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown)
|
||||||
voicesensedfraction Ratio between minutesvoice and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown)
|
voicesensedfraction Ratio between minutesvoice and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown)
|
||||||
|
|
|
@ -8,6 +8,8 @@ RAPIDS
|
||||||
|
|
||||||
**R**\ eproducible **A**\ nalysis **Pi**\ pline for **D**\ ata **S**\ treams
|
**R**\ eproducible **A**\ nalysis **Pi**\ pline for **D**\ ata **S**\ treams
|
||||||
|
|
||||||
|
Do you want to keep up to date with new functionality or have a question? Join the #rapids channel in AWARE Framework's slack_
|
||||||
|
|
||||||
Contents:
|
Contents:
|
||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
|
@ -37,3 +39,5 @@ Contents:
|
||||||
develop/contributors
|
develop/contributors
|
||||||
develop/testing
|
develop/testing
|
||||||
develop/test_cases
|
develop/test_cases
|
||||||
|
|
||||||
|
.. _slack: http://awareframework.com:3000/
|
||||||
|
|
|
@ -15,12 +15,12 @@ This is a quick guide for creating and running a simple pipeline to analysis an
|
||||||
- If you are trying to connect to a local MySQL server from our docker container set your host according to this link_.
|
- If you are trying to connect to a local MySQL server from our docker container set your host according to this link_.
|
||||||
- You can name your database any way you want, for example ``rapids_example``
|
- You can name your database any way you want, for example ``rapids_example``
|
||||||
|
|
||||||
.. code-block::
|
.. code-block:: bash
|
||||||
|
|
||||||
[MY_GROUP]
|
[MY_GROUP]
|
||||||
user=rapids
|
user=rapids
|
||||||
password=rapids
|
password=rapids
|
||||||
host=127.0.0.1 # or use host.docker.internal from our docker container
|
host=127.0.0.1
|
||||||
port=3306
|
port=3306
|
||||||
database=rapids_example
|
database=rapids_example
|
||||||
|
|
||||||
|
|
|
@ -46,10 +46,11 @@ macOS (tested on Catalina 10.15)
|
||||||
- ``brew install mysql``
|
- ``brew install mysql``
|
||||||
- ``brew services start mysql``
|
- ``brew services start mysql``
|
||||||
|
|
||||||
#. Install R 4.0 and pandoc. If you have other instances of R, we recommend uninstalling them.
|
#. Install R 4.0, pandoc and rmarkdown. If you have other instances of R, we recommend uninstalling them.
|
||||||
|
|
||||||
- ``brew install r``
|
- ``brew install r``
|
||||||
- ``brew install pandoc``
|
- ``brew install pandoc``
|
||||||
|
- ``Rscript --vanilla -e 'install.packages("rmarkdown", repos="http://cran.us.r-project.org")'``
|
||||||
|
|
||||||
#. Install miniconda:
|
#. Install miniconda:
|
||||||
|
|
||||||
|
@ -102,9 +103,10 @@ Linux (tested on Ubuntu 18.04 & 20.04)
|
||||||
- ``sudo apt update``
|
- ``sudo apt update``
|
||||||
- ``sudo apt install r-base``
|
- ``sudo apt install r-base``
|
||||||
|
|
||||||
#. Install Pandoc
|
#. Install Pandoc and rmarkdown
|
||||||
|
|
||||||
- ``sudo apt install pandoc``
|
- ``sudo apt install pandoc``
|
||||||
|
- ``Rscript --vanilla -e 'install.packages("rmarkdown", repos="http://cran.us.r-project.org")'``
|
||||||
|
|
||||||
#. Install GIT
|
#. Install GIT
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,8 @@ At the moment, mobile data can be collected using different sensing frameworks (
|
||||||
|
|
||||||
We recommend reading Snakemake_ docs, but the main idea behind the pipeline is that every link in the analysis chain is a rule with an input and an output. Input and output are files, which can be manipulated using any programming language (although Snakemake_ has wrappers for Julia_, Python_, and R_ that can make development slightly more comfortable). Snakemake_ also allows the pipeline rules to be executed in parallel on multiple cores without any code changes. This can drastically reduce the time needed to complete an analysis.
|
We recommend reading Snakemake_ docs, but the main idea behind the pipeline is that every link in the analysis chain is a rule with an input and an output. Input and output are files, which can be manipulated using any programming language (although Snakemake_ has wrappers for Julia_, Python_, and R_ that can make development slightly more comfortable). Snakemake_ also allows the pipeline rules to be executed in parallel on multiple cores without any code changes. This can drastically reduce the time needed to complete an analysis.
|
||||||
|
|
||||||
|
Do you want to keep up to date with new functionality or have a question? Join the #rapids channel in AWARE Framework's slack_
|
||||||
|
|
||||||
Available features:
|
Available features:
|
||||||
|
|
||||||
- :ref:`accelerometer-sensor-doc`
|
- :ref:`accelerometer-sensor-doc`
|
||||||
|
@ -39,3 +41,4 @@ We are updating these docs constantly, but if you think something needs clarific
|
||||||
.. _Fitbit: https://www.fitbit.com/us/home
|
.. _Fitbit: https://www.fitbit.com/us/home
|
||||||
.. _Python: https://www.python.org/
|
.. _Python: https://www.python.org/
|
||||||
.. _Julia: https://julialang.org/
|
.. _Julia: https://julialang.org/
|
||||||
|
.. _slack: http://awareframework.com:3000/
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
name: rapids202007
|
name: rapids202007
|
||||||
channels:
|
channels:
|
||||||
- conda-forge
|
|
||||||
- anaconda
|
- anaconda
|
||||||
|
- conda-forge
|
||||||
- defaults
|
- defaults
|
||||||
dependencies:
|
dependencies:
|
||||||
|
- _py-xgboost-mutex=2.0
|
||||||
- appdirs=1.4.3
|
- appdirs=1.4.3
|
||||||
- arrow=0.15.2
|
- arrow=0.15.2
|
||||||
- asn1crypto=1.2.0
|
- asn1crypto=1.2.0
|
||||||
|
@ -12,7 +13,7 @@ dependencies:
|
||||||
- binaryornot=0.4.4
|
- binaryornot=0.4.4
|
||||||
- blas=1.0
|
- blas=1.0
|
||||||
- bzip2=1.0.8
|
- bzip2=1.0.8
|
||||||
- ca-certificates=2020.6.24
|
- ca-certificates=2020.6.20
|
||||||
- certifi=2020.6.20
|
- certifi=2020.6.20
|
||||||
- cffi=1.13.1
|
- cffi=1.13.1
|
||||||
- chardet=3.0.4
|
- chardet=3.0.4
|
||||||
|
@ -25,16 +26,22 @@ dependencies:
|
||||||
- gitdb2=2.0.6
|
- gitdb2=2.0.6
|
||||||
- gitpython=3.0.4
|
- gitpython=3.0.4
|
||||||
- idna=2.8
|
- idna=2.8
|
||||||
|
- imbalanced-learn=0.6.2
|
||||||
- importlib_metadata=0.23
|
- importlib_metadata=0.23
|
||||||
- intel-openmp=2019.4
|
- intel-openmp=2019.4
|
||||||
- jinja2=2.10.3
|
- jinja2=2.10.3
|
||||||
- jinja2-time=0.2.0
|
- jinja2-time=0.2.0
|
||||||
- joblib=0.16.0
|
- joblib=0.16.0
|
||||||
- jsonschema=3.1.1
|
- jsonschema=3.1.1
|
||||||
|
- libblas=3.8.0
|
||||||
|
- libcblas=3.8.0
|
||||||
- libcxx=9.0.0
|
- libcxx=9.0.0
|
||||||
- libedit=3.1.20181209
|
- libedit=3.1.20181209
|
||||||
- libffi=3.2.1
|
- libffi=3.2.1
|
||||||
- libgfortran
|
- libgfortran
|
||||||
|
- liblapack=3.8.0
|
||||||
|
- libxgboost=0.90
|
||||||
|
- lightgbm=2.3.0
|
||||||
- llvm-openmp=10.0.0
|
- llvm-openmp=10.0.0
|
||||||
- markupsafe=1.1.1
|
- markupsafe=1.1.1
|
||||||
- mkl=2019.4
|
- mkl=2019.4
|
||||||
|
@ -52,11 +59,13 @@ dependencies:
|
||||||
- plotly=4.2.1
|
- plotly=4.2.1
|
||||||
- poyo=0.5.0
|
- poyo=0.5.0
|
||||||
- psutil=5.6.3
|
- psutil=5.6.3
|
||||||
|
- py-xgboost=0.90
|
||||||
- pycparser=2.19
|
- pycparser=2.19
|
||||||
- pyopenssl=19.0.0
|
- pyopenssl=19.0.0
|
||||||
- pysocks=1.7.1
|
- pysocks=1.7.1
|
||||||
- python=3.7.3
|
- python=3.7.3
|
||||||
- python-dateutil=2.8.0
|
- python-dateutil=2.8.0
|
||||||
|
- python_abi=3.7
|
||||||
- pytz=2019.3
|
- pytz=2019.3
|
||||||
- pyyaml=5.1.2
|
- pyyaml=5.1.2
|
||||||
- readline=8.0
|
- readline=8.0
|
||||||
|
@ -73,6 +82,7 @@ dependencies:
|
||||||
- wheel=0.33.6
|
- wheel=0.33.6
|
||||||
- whichcraft=0.6.1
|
- whichcraft=0.6.1
|
||||||
- wrapt=1.11.2
|
- wrapt=1.11.2
|
||||||
|
- xgboost=0.90
|
||||||
- xz=5.2.4
|
- xz=5.2.4
|
||||||
- yaml=0.1.7
|
- yaml=0.1.7
|
||||||
- zipp=0.6.0
|
- zipp=0.6.0
|
||||||
|
|
|
@ -311,9 +311,3 @@ PARAMS_FOR_ANALYSIS:
|
||||||
{"clf__learning_rate": [0.01, 0.1, 1], "clf__n_estimators": [5, 10, 100, 200], "clf__num_leaves": [5, 16, 31, 62]}
|
{"clf__learning_rate": [0.01, 0.1, 1], "clf__n_estimators": [5, 10, 100, 200], "clf__num_leaves": [5, 16, 31, 62]}
|
||||||
LightGBM:
|
LightGBM:
|
||||||
{"clf__learning_rate": [0.01, 0.1, 1], "clf__n_estimators": [5, 10, 100, 200], "clf__num_leaves": [5, 16, 31, 62]}
|
{"clf__learning_rate": [0.01, 0.1, 1], "clf__n_estimators": [5, 10, 100, 200], "clf__num_leaves": [5, 16, 31, 62]}
|
||||||
|
|
||||||
|
|
||||||
# Target Settings:
|
|
||||||
# 1 => TARGETS_RATIO_THRESHOLD (ceiling) or more of available CESD scores were TARGETS_VALUE_THRESHOLD or higher; 0 => otherwise
|
|
||||||
TARGETS_RATIO_THRESHOLD: 0.5
|
|
||||||
TARGETS_VALUE_THRESHOLD: 16
|
|
||||||
|
|
|
@ -17,9 +17,7 @@ rule targets:
|
||||||
participant_info = "data/raw/{pid}/" + config["PARAMS_FOR_ANALYSIS"]["TARGET_TABLE"] + "_raw.csv"
|
participant_info = "data/raw/{pid}/" + config["PARAMS_FOR_ANALYSIS"]["TARGET_TABLE"] + "_raw.csv"
|
||||||
params:
|
params:
|
||||||
pid = "{pid}",
|
pid = "{pid}",
|
||||||
summarised = "{summarised}",
|
summarised = "{summarised}"
|
||||||
targets_ratio_threshold = config["PARAMS_FOR_ANALYSIS"]["TARGETS_RATIO_THRESHOLD"],
|
|
||||||
targets_value_threshold = config["PARAMS_FOR_ANALYSIS"]["TARGETS_VALUE_THRESHOLD"]
|
|
||||||
output:
|
output:
|
||||||
"data/processed/{pid}/targets_{summarised}.csv"
|
"data/processed/{pid}/targets_{summarised}.csv"
|
||||||
script:
|
script:
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import configparser
|
import configparser
|
||||||
|
import subprocess
|
||||||
import os
|
import os
|
||||||
|
|
||||||
# read database credentials
|
# read database credentials
|
||||||
|
@ -8,14 +9,20 @@ config = configparser.ConfigParser()
|
||||||
config.read(snakemake.input["db_credentials"])
|
config.read(snakemake.input["db_credentials"])
|
||||||
|
|
||||||
# bash command to create table and restore tables from sql file
|
# bash command to create table and restore tables from sql file
|
||||||
checkdb_cmd = "mysql -h " + config[group]["host"] + " -u " + config[group]["user"] + " -p" + config[group]["password"] + " -e \"use " + config[group]["database"] + "\""
|
checkdb_cmd = "mysql -h " + config[group]["host"] + " -u " + config[group]["user"] + " -p" + config[group]["password"] + " -e use " + config[group]["database"]
|
||||||
create_cmd = "mysql -h " + config[group]["host"] + " -u " + config[group]["user"] + " -p" + config[group]["password"] + " -e \"CREATE DATABASE IF NOT EXISTS " + config[group]["database"] + ";\""
|
create_cmd = "mysql -h " + config[group]["host"] + " -u " + config[group]["user"] + " -p" + config[group]["password"] + " -e \"CREATE DATABASE IF NOT EXISTS " + config[group]["database"] + ";\""
|
||||||
restore_cmd = "mysql -h " + config[group]["host"] + " -u " + config[group]["user"] + " -p" + config[group]["password"] + " " + config[group]["database"] + " < data/external/" + config[group]["database"] + ".sql"
|
restore_cmd = "mysql -h " + config[group]["host"] + " -u " + config[group]["user"] + " -p" + config[group]["password"] + " " + config[group]["database"] + " < data/external/rapids_example.sql"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
os.system(checkdb_cmd)
|
print("Checking if " + config[group]["database"] + " database exists")
|
||||||
except:
|
subprocess.run(checkdb_cmd.split(), check = True, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
|
||||||
print(config[group]["database"] + " DB already exists.")
|
except subprocess.CalledProcessError:
|
||||||
else:
|
print(config[group]["database"] + " database does not exist")
|
||||||
|
print("Creating " + config[group]["database"] + " database")
|
||||||
os.system(create_cmd)
|
os.system(create_cmd)
|
||||||
|
print(config[group]["database"] + " database created")
|
||||||
|
print("Restoring rapids_example.sql")
|
||||||
os.system(restore_cmd)
|
os.system(restore_cmd)
|
||||||
|
print("rapids_example.sql restored in " + config[group]["database"] + " database")
|
||||||
|
else:
|
||||||
|
raise ValueError(config[group]["database"] + " DB already exists")
|
||||||
|
|
|
@ -3,8 +3,9 @@ import pandas as pd
|
||||||
def base_conversation_features(conversation_data, day_segment, requested_features,recordingMinutes,pausedMinutes,expectedMinutes):
|
def base_conversation_features(conversation_data, day_segment, requested_features,recordingMinutes,pausedMinutes,expectedMinutes):
|
||||||
# name of the features this function can compute
|
# name of the features this function can compute
|
||||||
base_features_names = ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
|
base_features_names = ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
|
||||||
"sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy",
|
"sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","noisesumenergy",
|
||||||
"avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction",
|
"noiseavgenergy","noisesdenergy","noiseminenergy","noisemaxenergy","voicesumenergy",
|
||||||
|
"voiceavgenergy","voicesdenergy","voiceminenergy","voicemaxenergy","silencesensedfraction","noisesensedfraction",
|
||||||
"voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction",
|
"voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction",
|
||||||
"unknownexpectedfraction","countconversation"]
|
"unknownexpectedfraction","countconversation"]
|
||||||
|
|
||||||
|
@ -96,21 +97,35 @@ def base_conversation_features(conversation_data, day_segment, requested_feature
|
||||||
else:
|
else:
|
||||||
conversation_features["conversation_" + day_segment + "_timelastconversation"] = 0
|
conversation_features["conversation_" + day_segment + "_timelastconversation"] = 0
|
||||||
|
|
||||||
if "sumenergy" in features_to_compute:
|
if "noisesumenergy" in features_to_compute:
|
||||||
conversation_features["conversation_" + day_segment + "_sumenergy"] = conversation_data.groupby(["local_date"])["double_energy"].sum()
|
conversation_features["conversation_" + day_segment + "_noisesumenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_date"])["double_energy"].sum()
|
||||||
|
|
||||||
if "avgenergy" in features_to_compute:
|
if "noiseavgenergy" in features_to_compute:
|
||||||
conversation_features["conversation_" + day_segment + "_avgenergy"] = conversation_data.groupby(["local_date"])["double_energy"].mean()
|
conversation_features["conversation_" + day_segment + "_noiseavgenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_date"])["double_energy"].mean()
|
||||||
|
|
||||||
if "sdenergy" in features_to_compute:
|
if "noisesdenergy" in features_to_compute:
|
||||||
conversation_features["conversation_" + day_segment + "_sdenergy"] = conversation_data.groupby(["local_date"])["double_energy"].std()
|
conversation_features["conversation_" + day_segment + "_noisesdenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_date"])["double_energy"].std()
|
||||||
|
|
||||||
if "minenergy" in features_to_compute:
|
if "noiseminenergy" in features_to_compute:
|
||||||
conversation_features["conversation_" + day_segment + "_minenergy"] = conversation_data.groupby(["local_date"])["double_energy"].min()
|
conversation_features["conversation_" + day_segment + "_noiseminenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_date"])["double_energy"].min()
|
||||||
|
|
||||||
if "maxenergy" in features_to_compute:
|
if "noisemaxenergy" in features_to_compute:
|
||||||
conversation_features["conversation_" + day_segment + "_maxenergy"] = conversation_data.groupby(["local_date"])["double_energy"].max()
|
conversation_features["conversation_" + day_segment + "_noisemaxenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_date"])["double_energy"].max()
|
||||||
|
|
||||||
|
if "voicesumenergy" in features_to_compute:
|
||||||
|
conversation_features["conversation_" + day_segment + "_voicesumenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_date"])["double_energy"].sum()
|
||||||
|
|
||||||
|
if "voiceavgenergy" in features_to_compute:
|
||||||
|
conversation_features["conversation_" + day_segment + "_voiceavgenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_date"])["double_energy"].mean()
|
||||||
|
|
||||||
|
if "voicesdenergy" in features_to_compute:
|
||||||
|
conversation_features["conversation_" + day_segment + "_voicesdenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_date"])["double_energy"].std()
|
||||||
|
|
||||||
|
if "voiceminenergy" in features_to_compute:
|
||||||
|
conversation_features["conversation_" + day_segment + "_voiceminenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_date"])["double_energy"].min()
|
||||||
|
|
||||||
|
if "voicemaxenergy" in features_to_compute:
|
||||||
|
conversation_features["conversation_" + day_segment + "_voicemaxenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_date"])["double_energy"].max()
|
||||||
|
|
||||||
conversation_features = conversation_features.reset_index()
|
conversation_features = conversation_features.reset_index()
|
||||||
|
|
||||||
|
|
|
@ -49,7 +49,10 @@ def getMetrics(pred_y, pred_y_prob, true_y):
|
||||||
metrics = {}
|
metrics = {}
|
||||||
# metrics for all categories
|
# metrics for all categories
|
||||||
metrics["accuracy"] = accuracy_score(true_y, pred_y)
|
metrics["accuracy"] = accuracy_score(true_y, pred_y)
|
||||||
|
try:
|
||||||
metrics["auc"] = roc_auc_score(true_y, pred_y_prob)
|
metrics["auc"] = roc_auc_score(true_y, pred_y_prob)
|
||||||
|
except:
|
||||||
|
metrics["auc"] = None
|
||||||
metrics["kappa"] = cohen_kappa_score(true_y, pred_y)
|
metrics["kappa"] = cohen_kappa_score(true_y, pred_y)
|
||||||
# metrics for label 0
|
# metrics for label 0
|
||||||
metrics["precision0"] = precision_score(true_y, pred_y, average=None, labels=[0,1], zero_division=0)[0]
|
metrics["precision0"] = precision_score(true_y, pred_y, average=None, labels=[0,1], zero_division=0)[0]
|
||||||
|
|
|
@ -3,19 +3,10 @@ import numpy as np
|
||||||
|
|
||||||
pid = snakemake.params["pid"]
|
pid = snakemake.params["pid"]
|
||||||
summarised = snakemake.params["summarised"]
|
summarised = snakemake.params["summarised"]
|
||||||
targets_ratio_threshold = snakemake.params["targets_ratio_threshold"]
|
|
||||||
targets_value_threshold = snakemake.params["targets_value_threshold"]
|
|
||||||
participant_info = pd.read_csv(snakemake.input["participant_info"])
|
participant_info = pd.read_csv(snakemake.input["participant_info"])
|
||||||
|
|
||||||
if summarised == "summarised":
|
if summarised == "summarised":
|
||||||
targets = pd.DataFrame(columns=["pid", "target"])
|
raise ValueError("Do not support summarised features for example dataset.")
|
||||||
|
|
||||||
if not participant_info.empty:
|
|
||||||
cesds = participant_info.loc[0, ["preop_cesd_total", "inpatient_cesd_total", "postop_cesd_total", "3month_cesd_total"]]
|
|
||||||
# targets: 1 => 50% (ceiling) or more of available CESD scores were 16 or higher; 0 => otherwise
|
|
||||||
num_threshold = int((cesds.count() + 1) * targets_ratio_threshold)
|
|
||||||
target = 1 if cesds.apply(lambda x : 1 if x >= targets_value_threshold else 0).sum() >= num_threshold else 0
|
|
||||||
targets.loc[0, :] = [pid, target]
|
|
||||||
|
|
||||||
elif summarised == "notsummarised":
|
elif summarised == "notsummarised":
|
||||||
targets = participant_info[["local_date", "target"]]
|
targets = participant_info[["local_date", "target"]]
|
||||||
|
|
Loading…
Reference in New Issue