Add merge module for demographic features and target
parent
eac721de84
commit
5696b4f6d4
11
Snakefile
11
Snakefile
|
@ -72,20 +72,23 @@ rule all:
|
||||||
pid=config["PIDS"],
|
pid=config["PIDS"],
|
||||||
segment = config["WIFI"]["DAY_SEGMENTS"]),
|
segment = config["WIFI"]["DAY_SEGMENTS"]),
|
||||||
# Models
|
# Models
|
||||||
expand("data/processed/{pid}/features_for_individual_model/{source}_{day_segment}_original.csv",
|
expand("data/processed/{pid}/data_for_individual_model/{source}_{day_segment}_original.csv",
|
||||||
pid = config["PIDS"],
|
pid = config["PIDS"],
|
||||||
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
||||||
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
||||||
expand("data/processed/features_for_population_model/{source}_{day_segment}_original.csv",
|
expand("data/processed/data_for_population_model/{source}_{day_segment}_original.csv",
|
||||||
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
||||||
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
||||||
expand("data/processed/{pid}/features_for_individual_model/{source}_{day_segment}_clean.csv",
|
expand("data/processed/{pid}/data_for_individual_model/{source}_{day_segment}_clean.csv",
|
||||||
pid = config["PIDS"],
|
pid = config["PIDS"],
|
||||||
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
||||||
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
||||||
expand("data/processed/features_for_population_model/{source}_{day_segment}_clean.csv",
|
expand("data/processed/data_for_population_model/{source}_{day_segment}_clean.csv",
|
||||||
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
||||||
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
||||||
|
expand("data/processed/data_for_population_model/demographic_features.csv"),
|
||||||
|
expand("data/processed/data_for_population_model/targets_{summarised}.csv",
|
||||||
|
summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"]),
|
||||||
# Vizualisations
|
# Vizualisations
|
||||||
expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]),
|
expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]),
|
||||||
expand("reports/figures/{pid}/compliance_heatmap.html", pid=config["PIDS"]),
|
expand("reports/figures/{pid}/compliance_heatmap.html", pid=config["PIDS"]),
|
||||||
|
|
|
@ -156,6 +156,8 @@ PARAMS_FOR_ANALYSIS:
|
||||||
ROWS_NAN_THRESHOLD: 0.5
|
ROWS_NAN_THRESHOLD: 0.5
|
||||||
PARTICIPANTS_DAY_THRESHOLD: 7
|
PARTICIPANTS_DAY_THRESHOLD: 7
|
||||||
|
|
||||||
|
SUMMARISED: ["summarised"] # "summarised" or "notsummarised"
|
||||||
|
|
||||||
# Target Settings:
|
# Target Settings:
|
||||||
# 1 => TARGETS_RATIO_THRESHOLD (ceiling) or more of available CESD scores were TARGETS_VALUE_THRESHOLD or higher; 0 => otherwise
|
# 1 => TARGETS_RATIO_THRESHOLD (ceiling) or more of available CESD scores were TARGETS_VALUE_THRESHOLD or higher; 0 => otherwise
|
||||||
TARGETS_RATIO_THRESHOLD: 0.5
|
TARGETS_RATIO_THRESHOLD: 0.5
|
||||||
|
|
|
@ -29,15 +29,31 @@ rule merge_features_for_individual_model:
|
||||||
params:
|
params:
|
||||||
source = "{source}"
|
source = "{source}"
|
||||||
output:
|
output:
|
||||||
"data/processed/{pid}/features_for_individual_model/{source}_{day_segment}_original.csv"
|
"data/processed/{pid}/data_for_individual_model/{source}_{day_segment}_original.csv"
|
||||||
script:
|
script:
|
||||||
"../src/models/merge_features_for_individual_model.R"
|
"../src/models/merge_features_for_individual_model.R"
|
||||||
|
|
||||||
|
rule merge_features_for_population_model:
|
||||||
|
input:
|
||||||
|
feature_files = expand("data/processed/{pid}/data_for_individual_model/{{source}}_{{day_segment}}_original.csv", pid=config["PIDS"])
|
||||||
|
output:
|
||||||
|
"data/processed/data_for_population_model/{source}_{day_segment}_original.csv"
|
||||||
|
script:
|
||||||
|
"../src/models/merge_features_for_population_model.R"
|
||||||
|
|
||||||
|
rule merge_demographicfeatures_for_population_model:
|
||||||
|
input:
|
||||||
|
data_files = expand("data/processed/{pid}/demographic_features.csv", pid=config["PIDS"])
|
||||||
|
output:
|
||||||
|
"data/processed/data_for_population_model/demographic_features.csv"
|
||||||
|
script:
|
||||||
|
"../src/models/merge_data_for_population_model.py"
|
||||||
|
|
||||||
rule merge_targets_for_population_model:
|
rule merge_targets_for_population_model:
|
||||||
input:
|
input:
|
||||||
data_files = expand("data/processed/{pid}/targets_{{summarised}}.csv", pid=config["PIDS"])
|
data_files = expand("data/processed/{pid}/targets_{{summarised}}.csv", pid=config["PIDS"])
|
||||||
output:
|
output:
|
||||||
"data/processed/features_for_population_model/targets_{summarised}.csv"
|
"data/processed/data_for_population_model/targets_{summarised}.csv"
|
||||||
script:
|
script:
|
||||||
"../src/models/merge_data_for_population_model.py"
|
"../src/models/merge_data_for_population_model.py"
|
||||||
|
|
||||||
|
@ -50,7 +66,7 @@ rule clean_features_for_individual_model:
|
||||||
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
||||||
participants_day_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANTS_DAY_THRESHOLD"]
|
participants_day_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANTS_DAY_THRESHOLD"]
|
||||||
output:
|
output:
|
||||||
"data/processed/{pid}/features_for_individual_model/{source}_{day_segment}_clean.csv"
|
"data/processed/{pid}/data_for_individual_model/{source}_{day_segment}_clean.csv"
|
||||||
script:
|
script:
|
||||||
"../src/models/clean_features_for_model.R"
|
"../src/models/clean_features_for_model.R"
|
||||||
|
|
||||||
|
@ -63,7 +79,7 @@ rule clean_features_for_population_model:
|
||||||
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
||||||
participants_day_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANTS_DAY_THRESHOLD"]
|
participants_day_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANTS_DAY_THRESHOLD"]
|
||||||
output:
|
output:
|
||||||
"data/processed/features_for_population_model/{source}_{day_segment}_clean.csv"
|
"data/processed/data_for_population_model/{source}_{day_segment}_clean.csv"
|
||||||
script:
|
script:
|
||||||
"../src/models/clean_features_for_model.R"
|
"../src/models/clean_features_for_model.R"
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
data_all_participants = pd.DataFrame()
|
||||||
|
for data_file in snakemake.input["data_files"]:
|
||||||
|
data_single_participant = pd.read_csv(data_file)
|
||||||
|
data_all_participants = pd.concat([data_all_participants, data_single_participant], axis=0)
|
||||||
|
|
||||||
|
data_all_participants.to_csv(snakemake.output[0], index=False)
|
Loading…
Reference in New Issue