Add merge module for demographic features and target
parent
eac721de84
commit
5696b4f6d4
11
Snakefile
11
Snakefile
|
@ -72,20 +72,23 @@ rule all:
|
|||
pid=config["PIDS"],
|
||||
segment = config["WIFI"]["DAY_SEGMENTS"]),
|
||||
# Models
|
||||
expand("data/processed/{pid}/features_for_individual_model/{source}_{day_segment}_original.csv",
|
||||
expand("data/processed/{pid}/data_for_individual_model/{source}_{day_segment}_original.csv",
|
||||
pid = config["PIDS"],
|
||||
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
||||
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
||||
expand("data/processed/features_for_population_model/{source}_{day_segment}_original.csv",
|
||||
expand("data/processed/data_for_population_model/{source}_{day_segment}_original.csv",
|
||||
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
||||
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
||||
expand("data/processed/{pid}/features_for_individual_model/{source}_{day_segment}_clean.csv",
|
||||
expand("data/processed/{pid}/data_for_individual_model/{source}_{day_segment}_clean.csv",
|
||||
pid = config["PIDS"],
|
||||
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
||||
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
||||
expand("data/processed/features_for_population_model/{source}_{day_segment}_clean.csv",
|
||||
expand("data/processed/data_for_population_model/{source}_{day_segment}_clean.csv",
|
||||
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
||||
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
||||
expand("data/processed/data_for_population_model/demographic_features.csv"),
|
||||
expand("data/processed/data_for_population_model/targets_{summarised}.csv",
|
||||
summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"]),
|
||||
# Vizualisations
|
||||
expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]),
|
||||
expand("reports/figures/{pid}/compliance_heatmap.html", pid=config["PIDS"]),
|
||||
|
|
|
@ -156,6 +156,8 @@ PARAMS_FOR_ANALYSIS:
|
|||
ROWS_NAN_THRESHOLD: 0.5
|
||||
PARTICIPANTS_DAY_THRESHOLD: 7
|
||||
|
||||
SUMMARISED: ["summarised"] # "summarised" or "notsummarised"
|
||||
|
||||
# Target Settings:
|
||||
# 1 => TARGETS_RATIO_THRESHOLD (ceiling) or more of available CESD scores were TARGETS_VALUE_THRESHOLD or higher; 0 => otherwise
|
||||
TARGETS_RATIO_THRESHOLD: 0.5
|
||||
|
|
|
@ -29,15 +29,31 @@ rule merge_features_for_individual_model:
|
|||
params:
|
||||
source = "{source}"
|
||||
output:
|
||||
"data/processed/{pid}/features_for_individual_model/{source}_{day_segment}_original.csv"
|
||||
"data/processed/{pid}/data_for_individual_model/{source}_{day_segment}_original.csv"
|
||||
script:
|
||||
"../src/models/merge_features_for_individual_model.R"
|
||||
|
||||
rule merge_features_for_population_model:
|
||||
input:
|
||||
feature_files = expand("data/processed/{pid}/data_for_individual_model/{{source}}_{{day_segment}}_original.csv", pid=config["PIDS"])
|
||||
output:
|
||||
"data/processed/data_for_population_model/{source}_{day_segment}_original.csv"
|
||||
script:
|
||||
"../src/models/merge_features_for_population_model.R"
|
||||
|
||||
rule merge_demographicfeatures_for_population_model:
|
||||
input:
|
||||
data_files = expand("data/processed/{pid}/demographic_features.csv", pid=config["PIDS"])
|
||||
output:
|
||||
"data/processed/data_for_population_model/demographic_features.csv"
|
||||
script:
|
||||
"../src/models/merge_data_for_population_model.py"
|
||||
|
||||
rule merge_targets_for_population_model:
|
||||
input:
|
||||
data_files = expand("data/processed/{pid}/targets_{{summarised}}.csv", pid=config["PIDS"])
|
||||
output:
|
||||
"data/processed/features_for_population_model/targets_{summarised}.csv"
|
||||
"data/processed/data_for_population_model/targets_{summarised}.csv"
|
||||
script:
|
||||
"../src/models/merge_data_for_population_model.py"
|
||||
|
||||
|
@ -50,7 +66,7 @@ rule clean_features_for_individual_model:
|
|||
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
||||
participants_day_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANTS_DAY_THRESHOLD"]
|
||||
output:
|
||||
"data/processed/{pid}/features_for_individual_model/{source}_{day_segment}_clean.csv"
|
||||
"data/processed/{pid}/data_for_individual_model/{source}_{day_segment}_clean.csv"
|
||||
script:
|
||||
"../src/models/clean_features_for_model.R"
|
||||
|
||||
|
@ -63,7 +79,7 @@ rule clean_features_for_population_model:
|
|||
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
||||
participants_day_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANTS_DAY_THRESHOLD"]
|
||||
output:
|
||||
"data/processed/features_for_population_model/{source}_{day_segment}_clean.csv"
|
||||
"data/processed/data_for_population_model/{source}_{day_segment}_clean.csv"
|
||||
script:
|
||||
"../src/models/clean_features_for_model.R"
|
||||
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
import pandas as pd
|
||||
|
||||
data_all_participants = pd.DataFrame()
|
||||
for data_file in snakemake.input["data_files"]:
|
||||
data_single_participant = pd.read_csv(data_file)
|
||||
data_all_participants = pd.concat([data_all_participants, data_single_participant], axis=0)
|
||||
|
||||
data_all_participants.to_csv(snakemake.output[0], index=False)
|
Loading…
Reference in New Issue