diff --git a/README.md b/README.md
new file mode 100644
index 0000000..7bbe0b9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,113 @@
+# CPC 预后分类（影像组学 + NSE）
+
+基于 CT 影像组学特征与 NSE 实验室指标，对心脏骤停患者 CPC 评分进行二分类（CPC 1–2 vs 3–5），支持多种机器学习模型与 LASSO 特征筛选。
+
+## 仓库结构
+
+```
+code/cls/
+├── config/           # 实验默认配置（标签分箱、列名等）
+├── models/           # 分类器工厂（统一超参入口）
+├── pipeline/         # 数据准备 + 训练评估主流程
+├── utils/            # 工具函数（LASSO、指标、结果保存）
+│   ├── pre4data.py
+│   ├── util.py
+│   └── tools/        # 数据对齐、NSE 处理等脚本
+├── classify_lab1.py  # 固定 train/test + 可选 NSE（lab1）
+├── classify_single.py# 单文件划分 train/test
+├── classify_kfold.py # 5 折交叉验证（无 NSE）
+├── classify_kfold_nse.py # 5 折 + NSE
+├── run_experiment.py # 统一 CLI（任意 profile）
+├── config/experiments.yaml # 路径与实验配置（外置）
+├── feature_process.py# 特征表合并与清洗（数据预处理）
+├── new_data_process.py
+├── train_test.py     # 划分并导出 train/test Excel
+└── requirements.txt
+```
+
+## 环境
+
+```bash
+cd code/cls
+pip install -r requirements.txt
+```
+
+## 配置路径（YAML + 环境变量）
+
+所有实验参数集中在 `code/cls/config/experiments.yaml`。路径使用占位符，无需改 Python 代码：
+
+| 变量 | 含义 | 默认值 |
+|------|------|--------|
+| `CLS_DATA_ROOT` | Excel 数据目录 | `./data` |
+| `CLS_RESULTS_ROOT` | 结果输出根目录 | `./results` |
+| `CLS_EXPERIMENT` | 默认 profile | 各入口脚本不同 |
+
+YAML 内可使用 `{data_root}/文件名.xlsx`，或 `${CLS_DATA_ROOT:-./data}/文件名.xlsx`。
+
+**Windows 示例**
+
+```bat
+set CLS_DATA_ROOT=D:\thrid_beijing_hospital_data
+set CLS_RESULTS_ROOT=D:\thrid_beijing_hospital_data
+python classify_lab1.py
+```
+
+**Linux / macOS 示例**
+
+```bash
+export CLS_DATA_ROOT=/path/to/your/data
+export CLS_RESULTS_ROOT=/path/to/your/results
+python run_experiment.py -p kfold_nse
+```
+
+查看可用 profile：
+
+```bash
+python run_experiment.py --list-profiles
+```
+
+| Profile | 说明 |
+|---------|------|
+| `lab1` | 固定 train/test，含 NSE |
+| `single` | 单表 8:2 分层划分 |
+| `kfold` | 5 折交叉验证（仅 CT 特征） |
+| `kfold_nse` | 5 折 + NSE（每折独立 LASSO 与 NSE 拼接） |
+
+修改或新增实验：编辑 `config/experiments.yaml` 中 `experiments` 节点，或复制一份自定义 YAML 并用 `-c` 指定。
+
+## 运行实验
+
+在 `code/cls` 目录下执行：
+
+```bash
+pip install -r requirements.txt
+python classify_lab1.py          # 等同: python run_experiment.py -p lab1
+python classify_single.py
+python classify_kfold.py
+python classify_kfold_nse.py       # 5 折 + NSE
+python run_experiment.py -p lab1 -c config/experiments.yaml
+```
+
+### 可选分类器
+
+`classifier` 取值：`svm` | `logistic` | `gaussian_nb` | `xgboost` | `lightgbm` | `catboost`
+
+### 标签定义
+
+默认：CPC 1–2 → 0，CPC 3–5 → 1。可在 `ExperimentConfig` 中调整 `cpc_bins` / `cpc_labels`。
+
+## 设计说明
+
+- **配置与逻辑分离**：路径、模型、是否使用 NSE 等集中在 `ExperimentConfig`，避免在多个脚本中复制数百行训练代码。
+- **统一流水线**：`pipeline.experiment.run_experiment` 负责 LASSO、标准化、训练、指标汇总与结果写入。
+- **可扩展**：新增模型只需在 `models/factory.py` 注册；新增实验类型可复用 `pipeline.data` 与 `_run_single_fold`。
+
+## 数据预处理脚本
+
+| 脚本 | 用途 |
+|------|------|
+| `feature_process.py` | 多时间点影像组学表合并、剔除 diagnostics 列 |
+| `feature_process.py` / `new_data_process.py` | 院内数据流水线（需配置本地 Excel 路径） |
+| `utils/tools/*` | CTid–姓名–NSE 对齐等 |
+
+预处理脚本（`feature_process.py`、`utils/tools/*`）仍可能含历史绝对路径；建议同样改为读取 `CLS_DATA_ROOT` 或从 `config/experiments.yaml` 的 `paths` 段复制路径约定。
diff --git a/code/cls/.gitignore b/code/cls/.gitignore
new file mode 100644
index 0000000..a2c0a65
--- /dev/null
+++ b/code/cls/.gitignore
@@ -0,0 +1,9 @@
+__pycache__/
+*.py[cod]
+.pytest_cache/
+.venv/
+venv/
+results/
+results_*/
+*.xlsx
+!data/.gitkeep
diff --git a/code/cls/classify_kfold.py b/code/cls/classify_kfold.py
index e6e94f7..2e1cce1 100644
--- a/code/cls/classify_kfold.py
+++ b/code/cls/classify_kfold.py
@@ -1,207 +1,6 @@
-import pandas as pd
-from sklearn.naive_bayes import GaussianNB
-from sklearn.preprocessing import StandardScaler
-import numpy as np
-from sklearn.linear_model import LogisticRegression
-import os
-from utils.pre4data import lasso_dimension_reduction, if_same
-import xgboost as xgb
-from sklearn.svm import SVC
-from lightgbm import LGBMClassifier
-from catboost import CatBoostClassifier
-from sklearn.model_selection import train_test_split
+"""5-fold cross-validation without NSE (profile: kfold)."""
 
-from utils.util import get_next_result_folder,  save_results, calculate_metrics
-
-
-def main():
-    data_train_path = './0721/0728data_delete.xlsx' 
-    ct_mode = data_train_path.split('data')[0].split('/')[-1]
-    data_train = pd.read_excel(data_train_path)
-
-    # CPC 1-2 --> label 0,  CPC3-5 --> label 1  这里你可以参照classify_lab1.py设置不同的label
-    data_train['label'] = pd.cut(data_train['CPC'], bins=[0, 2, 5], labels=[1, 0])
-    label0_num = len(data_train[data_train['label'] == 0])
-    label1_num = len(data_train[data_train['label'] == 1])
-    data_train = data_train.drop(['CPC'], axis=1)  
-    print(f"训练集形状: {data_train.shape}, 训练集时间: {ct_mode}")
-    result_folder = get_next_result_folder(base_path='./results_0728_delete')
-
-    y_index_s = data_train.iloc[:, -1]
-    scale_pos_weight = len(y_index_s[y_index_s == 0]) / len(y_index_s[y_index_s == 1])
-    scaler = StandardScaler()
-    print(f"数据划分完毕")
-
-   # 初始化分类器
-    ratio0 = len([x for x in data_train['label'].tolist() if x == 0]) / len(data_train['label'].tolist())
-    ratio1 = len([x for x in data_train['label'].tolist() if x == 1]) / len(data_train['label'].tolist())
-    clf = GaussianNB(priors=[ratio0, ratio1])    # priors=[0.5, 0.5]
-    clf = LogisticRegression()
-
-    lgbm_params = {
-        'objective': 'binary',       # 二分类任务
-        'metric': 'binary_logloss',  # 使用logloss作为评价指标
-        'learning_rate': 0.016,
-        'max_depth': 6,
-        'n_estimators': 500,
-        'subsample': 0.7,            # 构建每棵树时使用的样本比例
-        'colsample_bytree': 0.7,     # 每棵树使用的特征比例
-        'scale_pos_weight': scale_pos_weight,  # 根据数据不平衡调整正负样本的权重
-        'random_state': 42,
-    }
-    clf = LGBMClassifier(**lgbm_params)
-
-    catboost_params = {
-        'iterations': 500,           # 迭代次数
-        'depth': 6,                  # 树的深度
-        'learning_rate': 0.01,       # 学习率
-        'loss_function': 'Logloss',  # 损失函数
-        'eval_metric': 'AUC',        # AUC作为评价指标
-        'scale_pos_weight': scale_pos_weight,  # 样本不平衡的调整
-        'random_seed': 42,
-        'verbose': 0                 # 不输出训练过程
-    }
-    clf = CatBoostClassifier(**catboost_params)
-
-    xgb_params = {
-        'objective': 'binary:logistic', 'eval_metric': ['logloss'],
-        'learning_rate': 0.016,
-        'max_depth': 6,
-        'n_estimators': 600,
-        'subsample': 0.72,           # 用于构建每棵树的样本比例
-        'colsample_bytree': 0.705,   # 控制每棵树在构建时使用的特征比例
-        # 'scale_pos_weight': 1.4,   # 根据实际正负样本比例设置权重 len(y[y==0]) / len(y[y==1]),
-        'gamma': 0.1,
-        'min_child_weight': 1,       # 降低以增加模型灵活性
-        'scale_pos_weight': scale_pos_weight,
-        'random_state': 42,
-        # 'tree_method': 'hist',  
-        # 'device': 'cuda',
-    }
-    clf = xgb.XGBClassifier(**xgb_params)
-
-    clf = SVC(
-        kernel='rbf',              # 使用RBF核函数
-        C=10,                      # 正则化参数
-        gamma='auto',              # scale
-        probability=True,          # 启用概率估计
-        class_weight='balanced',   # 处理类别不平衡 
-        random_state=42)  
-    
-    mode = clf.__class__.__name__
-    parameter_clf = clf.get_params()
-    print(f"classifier is: {mode}")
-    acc_scores = []
-    recall_scores = []
-    specificity_scores = []
-    precision_scores = []
-    npv_scores = []
-    auc_scores = []
-    selected_features_all = []
-    save_path = './results/roc_curve_{}_time_{}'.format(mode, ct_mode)
-    os.makedirs(save_path, exist_ok=True)
-
-    random_states = [3, 13, 42, 87, 1307]
-    results = "classifier mode is: {}\n\nrandom_states: {}\n".format(mode, random_states)
-    for fold in range(1, 6):
-        print(f"==============第 {fold} 次实验==============")
-        X = data_train.drop('label', axis=1)
-        y = data_train['label']
-        X_train, X_val, y_train, y_val = train_test_split(
-            X, y, 
-            test_size=0.2,                         # 20% 验证集
-            stratify=y,   
-            shuffle=True,  
-            random_state=random_states[fold-1]     # 用于复现以及设置不同的实验
-        ) 
-        train_index = X_train.index
-        lasso_train = data_train.iloc[train_index]
-        results += "\ntrain index: \n{}\ntest index: \n{}\n".format(X_train.index.tolist(), X_val.index.tolist())
-        results += f"train: label 0 num: {y_train.values.tolist().count(0)}, label 1 num: {y_train.values.tolist().count(1)}\n"
-        results += f"test : label 0 num: {y_val.values.tolist().count(0)}, label 1 num: {y_val.values.tolist().count(1)}\n\n\n"
-
-        # 特征降维, 需要先根据原始CT特征进行降维, 然后再把其他需要增加的特征与降维后的特征进行拼接(需要通过CTid对齐人名, 可通过pd.merge实现)
-        print(f"data for {fold} time, train shape is : {X_train.shape}")
-        data_train_lasso, selected_features, best_alphas = lasso_dimension_reduction(lasso_train)
-        X_train = data_train_lasso.iloc[:, :-1] 
-        y_train = data_train_lasso.iloc[:, -1]
-        print(f"data train shape is : {X_train.shape}")
-
-        print(f"data for {fold} time, selected features num is : {len(selected_features)}")
-        X_val = X_val[selected_features]
-
-        t = if_same(X_train, X_val)
-        if t:
-            print(f"X_train and X_val is same")
-        else:
-            print(f"X_train is not same as X_val")
-        print(f"for {fold} time, X_test.shape: {X_val.shape}, X_train.shape: {X_train.shape}")
-
-        # 标准化
-        X_train_scaled = scaler.fit_transform(X_train)  
-        X_val_scaled = scaler.transform(X_val)
-
-        sample_weights = np.ones(len(y_train))
-        sample_weights[y_train == 0] = len(y_train) / (2 * (y_train == 0).sum())
-        sample_weights[y_train == 1] = len(y_train) / (2 * (y_train == 1).sum())
-
-        print(f"训练开始")
-        clf.fit(X_train_scaled, y_train, sample_weight=sample_weights)         #贝叶斯分类器 , logic分类器
-        # clf.fit(X_train_scaled, y_train)   #SVM, xgb, lgbm, catboost分类器
-        print(f"训练完成")
-
-        y_pred = clf.predict(X_val_scaled)
-        y_prob = clf.predict_proba(X_val_scaled)[:, -1]
-
-        ACC, Recall, Specificity, Precision, NPV, roc_auc = calculate_metrics(y_val, y_pred, y_prob, save_roc_path=save_path, mode=mode)
-
-        acc_scores.append(ACC)
-        recall_scores.append(Recall)
-        specificity_scores.append(Specificity)
-        precision_scores.append(Precision)
-        npv_scores.append(NPV)
-        auc_scores.append(roc_auc) 
-        selected_features_all.append(f"次数: {fold}, number: {len(selected_features)}, features: {selected_features}\n")
-
-        print(f"第 {fold} 次实验ACC:{ACC:.3f}")
-        print(f"第 {fold} 次实验Recall:{Recall:.3f}")
-        print(f"第 {fold} 次实验Specificity:{Specificity:.3f}")
-        print(f"第 {fold} 次实验Precision:{Precision:.3f}")
-        print(f"第 {fold} 次实验NPV:{NPV:.3f}")
-        print(f"第 {fold} 次实验AUC:{roc_auc:.3f}")
-                
-    final_ACC = np.mean(acc_scores)
-    final_Recall = np.mean(recall_scores)
-    final_Specificity = np.mean(specificity_scores)
-    final_Precision = np.mean(precision_scores)
-    final_NPV = np.mean(npv_scores)
-    final_AUC = np.mean(auc_scores)
-    print("\n最终测试集的具体指标值:")
-    print(f"准确率 (ACC): {final_ACC:.3f} ± {np.std(acc_scores):.3f}")
-    print(f"召回率 (Recall): {final_Recall:.3f} ± {np.std(recall_scores):.3f}")
-    print(f"特异性 (Specificity): {final_Specificity:.3f} ± {np.std(specificity_scores):.3f}")
-    print(f"精确率 (PPV): {final_Precision:.3f} ± {np.std(precision_scores):.3f}")
-    print(f"阴性预测值 (NPV): {final_NPV:.3f}  ± {np.std(npv_scores):.3f}")
-    print(f"AUC值: {final_AUC:.3f} ± {np.std(auc_scores):.3f}")
-
-    final_results = {
-        'Recall': f"{final_Recall:.3f} ± {np.std(recall_scores):.3f}",
-        'Specificity': f"{final_Specificity:.3f} ± {np.std(specificity_scores):.3f}",
-        'ACC': f"{final_ACC:.3f} ± {np.std(acc_scores):.3f}",
-        'PPV': f"{final_Precision:.3f} ± {np.std(precision_scores):.3f}",
-        'NPV': f"{final_NPV:.3f}  ± {np.std(npv_scores):.3f}",
-        'AUC': f"{final_AUC:.3f} ± {np.std(auc_scores):.3f}\n", }
-    
-    results += "all dataset label 0 num: {}, label 1 num: {}\n".format(label0_num, label1_num)
-    print(f"all dataset label 0 num: {label0_num}, label 1 num: {label1_num}\n")
-    for metric, value in final_results.items():
-        results += f"\n{metric}: {value}\n"
-    for feature in selected_features_all:
-        results += f"{feature}\n"
-    results += "\n\n"
-    for pm in parameter_clf:
-        results += "\nparameter: {} \n{} values is: {}\n".format(pm, pm, parameter_clf[pm])
-    save_results(results, result_folder)
+from config.cli import main
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main(default_profile="kfold")
diff --git a/code/cls/classify_kfold_nse.py b/code/cls/classify_kfold_nse.py
new file mode 100644
index 0000000..fc1cf4e
--- /dev/null
+++ b/code/cls/classify_kfold_nse.py
@@ -0,0 +1,6 @@
+"""5-fold cross-validation with NSE features (profile: kfold_nse)."""
+
+from config.cli import main
+
+if __name__ == "__main__":
+    main(default_profile="kfold_nse")
diff --git a/code/cls/classify_lab1.py b/code/cls/classify_lab1.py
index df7534d..2e8fa65 100644
--- a/code/cls/classify_lab1.py
+++ b/code/cls/classify_lab1.py
@@ -1,248 +1,6 @@
-import pandas as pd
-from sklearn.naive_bayes import GaussianNB
-from sklearn.preprocessing import StandardScaler
-import numpy as np
-from sklearn.linear_model import LogisticRegression
-import os
-from datetime import datetime
-from utils.pre4data import lasso_dimension_reduction, if_same
-import xgboost as xgb
-import matplotlib.pyplot as plt
-from sklearn.svm import SVC
-from lightgbm import LGBMClassifier
-from catboost import CatBoostClassifier
+"""Lab1: fixed train/test split with optional NSE (profile: lab1)."""
 
-from utils.util import get_next_result_folder,  save_results, calculate_metrics
-
-
-def main():
-    nseif = True
-
-    data_train_path = 'D:/thrid_beijing_hospital_data/0804lab1-train.xlsx'
-    data_test_path  = 'D:/thrid_beijing_hospital_data/0804lab1-test.xlsx'
-    base_dir        = 'D:/thrid_beijing_hospital_data/results_0804_lab1'
-    ct_mode = data_train_path.split('-')[0].split('/')[-1]
-    data_train = pd.read_excel(data_train_path)
-    data_test  = pd.read_excel(data_test_path)
-    if nseif:
-        lab_describe = f'cpc1-2=0_cpc3-5=1_lab1_withnse'
-        train_nse = data_train[['nse极值', 'nse极值差']]
-        test_nse = data_test[['nse极值', 'nse极值差']]
-        data_train = data_train.drop(columns=['CTid', 'name', 'nse极值', 'nse极值差'])
-        data_test = data_test.drop(columns=['CTid', 'name', 'nse极值', 'nse极值差'])
-    else:
-        data_train = data_train.drop(columns=['CTid', 'name'])
-        data_test = data_test.drop(columns=['CTid', 'name'])
-        lab_describe = f'cpc1-2=0_cpc3-5=1_lab1_withoutnse'
-    
-    # without cpc5  ------->  means dead people data
-    # train_df1 = train_df1[train_df1['CPC'] != 5]
-    # test_df1 = test_df1[test_df1['CPC'] != 5]
-
-    # ============================================= set dataset cpc split =============================================
-    # CPC 1-2 --> label 0,  CPC3-5 --> label 1
-    data_train['label'] = pd.cut(data_train['CPC'], bins=[0, 2, 5], labels=[0, 1])
-    data_test['label']  = pd.cut(data_test['CPC'],  bins=[0, 2, 5], labels=[0, 1])
-
-    # CPC 1-4 --> label 0,  CPC5   --> label 1
-    # train_df1['label'] = pd.cut(train_df1['CPC'], bins=[0, 4, 5], labels=[0, 1])
-    # test_df1['label'] = pd.cut(test_df1['CPC'], bins=[0, 4, 5], labels=[0, 1])
-
-    # CPC 1-2 --> label 0,  CPC3-4 --> label 1
-    # train_df1['label'] = pd.cut(train_df1['CPC'], bins=[0, 2, 4], labels=[0, 1])
-    # test_df1['label'] = pd.cut(test_df1['CPC'], bins=[0, 2, 4], labels=[0, 1])
-    # ============================================= set dataset cpc split =============================================
-
-    data_train = data_train.drop(['CPC'], axis=1) 
-    data_test = data_test.drop(['CPC'], axis=1) 
-
-    # dataset 
-    print(f"训练集形状: {data_train.shape}, 训练集时间: {ct_mode}")
-    result_folder = get_next_result_folder(base_path=base_dir)
-
-    y_index_s = data_train.iloc[:, -1]
-    scale_pos_weight = len(y_index_s[y_index_s == 0]) / len(y_index_s[y_index_s == 1])
-    scaler = StandardScaler()
-    print(f"数据划分完毕")
-
-    # 初始化分类器
-    ratio0 = len([x for x in data_train['label'].tolist() if x == 0]) / len(data_train['label'].tolist())
-    ratio1 = len([x for x in data_train['label'].tolist() if x == 1]) / len(data_train['label'].tolist())
-    clf = GaussianNB(priors=[ratio0, ratio1])    # priors=[0.5, 0.5]
-    clf = LogisticRegression()
-
-    lgbm_params = {
-        'objective': 'binary',       # 二分类任务
-        'metric': 'binary_logloss',  # 使用logloss作为评价指标
-        'learning_rate': 0.016,
-        'max_depth': 6,
-        'n_estimators': 500,
-        'subsample': 0.7,            # 构建每棵树时使用的样本比例
-        'colsample_bytree': 0.7,     # 每棵树使用的特征比例
-        'scale_pos_weight': scale_pos_weight,  # 根据数据不平衡调整正负样本的权重
-        'random_state': 42,
-    }
-    clf = LGBMClassifier(**lgbm_params)
-
-    catboost_params = {
-        'iterations': 500,           # 迭代次数
-        'depth': 6,                  # 树的深度
-        'learning_rate': 0.01,       # 学习率
-        'loss_function': 'Logloss',  # 损失函数
-        'eval_metric': 'AUC',        # AUC作为评价指标
-        'scale_pos_weight': scale_pos_weight,  # 样本不平衡的调整
-        'random_seed': 42,
-        'verbose': 0                 # 不输出训练过程
-    }
-    clf = CatBoostClassifier(**catboost_params)
-
-    xgb_params = {
-        'objective': 'binary:logistic', 'eval_metric': ['logloss'],
-        'learning_rate': 0.016,
-        'max_depth': 6,
-        'n_estimators': 600,
-        'subsample': 0.72,           # 用于构建每棵树的样本比例
-        'colsample_bytree': 0.705,   # 控制每棵树在构建时使用的特征比例
-        # 'scale_pos_weight': 1.4,   # 根据实际正负样本比例设置权重 len(y[y==0]) / len(y[y==1]),
-        'gamma': 0.1,
-        'min_child_weight': 1,       # 降低以增加模型灵活性
-        'scale_pos_weight': scale_pos_weight,
-        'random_state': 42,
-        # 'tree_method': 'hist',  
-        # 'device': 'cuda',
-    }
-    clf = xgb.XGBClassifier(**xgb_params)
-
-    clf = SVC(
-        kernel='rbf',              # 使用RBF核函数
-        C=10,                      # 正则化参数
-        gamma='auto',              # scale
-        probability=True,          # 启用概率估计
-        class_weight='balanced',   # 处理类别不平衡 
-        random_state=42)    
-                                                    
-    random_var = [42, 46, 52]
-
-    # 参数网格
-    param_grid = {
-        'learning_rate': [0.01, 0.05, 0.1],
-        'max_depth': [5, 6, 7],
-        'n_estimators': [100, 200, 500],
-        'subsample': [0.8, 0.9, 1.0],
-        'colsample_bytree': [0.8, 0.9, 1.0],
-    }
-    mode = clf.__class__.__name__
-    parameter_clf = clf.get_params()
-    print(f"classifier is: {mode}")
-    acc_scores = []
-    recall_scores = []
-    specificity_scores = []
-    precision_scores = []
-    npv_scores = []
-    auc_scores = []
-    selected_features_all = []
-    save_path = os.path.join(result_folder, 'roc_curve_{}_time_{}_{}_{}'.format(mode, ct_mode, lab_describe, datetime.now().strftime('%Y-%m-%d_%H-%M-%S')))
-    os.makedirs(save_path, exist_ok=True)
-    random_states = 1307
-
-    results = "lab: {}\nclassifier mode is: {}\n\nrandom_states: {}, train num: {}, test num: {}\n".format(lab_describe, mode, random_states, len(data_train), len(data_test))
-    X_train = data_train.drop('label', axis=1)
-    y_train = data_train['label']
-    X_test = data_test.drop('label', axis=1)
-    y_test = data_test['label']
-
-    # save progress
-    results += "\ntrain index: \n{}\ntest index: \n{}\n".format(X_train.index.tolist(), X_test.index.tolist())
-    results += f"train: label 0 num: {y_train.values.tolist().count(0)} ratio: {y_train.values.tolist().count(0) / len(y_train):.4f}, label 1 num: {y_train.values.tolist().count(1)} ratio: {y_train.values.tolist().count(1) / len(y_train):.4f}\n"
-    results += f"test : label 0 num: {y_test.values.tolist().count(0)} ratio: {y_test.values.tolist().count(0) / len(y_test):.4f}, label 1 num: {y_test.values.tolist().count(1)} ratio: {y_test.values.tolist().count(1) / len(y_test):.4f}\n\n\n"
-
-    # 特征降维
-    print(f"data train shape is : {X_train.shape}")
-    data_train_lasso, selected_features, best_alphas = lasso_dimension_reduction(data_train)
-    if nseif:
-        print(f"..........with nse data training...")
-        results+=f"..........with nse data training..."
-        data_train_withnse = pd.concat([data_train_lasso, train_nse], axis=1)
-        cols = [c for c in data_train_withnse.columns if c != 'label'] + ['label']
-        data_train_final = data_train_withnse[cols]
-        X_test = X_test[selected_features]
-        X_test = pd.concat([X_test, test_nse], axis=1)
-
-    else:
-        print(f"..........without nse data training...")
-        results+=f"..........without nse data training..."
-        data_train_final = data_train_lasso
-        X_test = X_test[selected_features]
-
-    X_train = data_train_final.iloc[:, :-1] 
-    y_train = data_train_final.iloc[:, -1]
-    print(f"data train shape is : {X_train.shape}")
-    print(f"data selected features num is : {len(selected_features)}")
-
-    t = if_same(X_train, X_test)
-    if t:
-        print(f"X_train and X_val is same")
-    else:
-        print(f"X_train is not same as X_val")
-    print(f"X_test.shape: {X_test.shape}, X_train.shape: {X_train.shape}")
-
-    # 标准化
-    X_train_scaled = scaler.fit_transform(X_train)  
-    X_val_scaled = scaler.transform(X_test)
-
-    sample_weights = np.ones(len(y_train))
-    sample_weights[y_train == 0] = len(y_train) / (2 * (y_train == 0).sum())
-    sample_weights[y_train == 1] = len(y_train) / (2 * (y_train == 1).sum())
-
-    print(f"训练开始")
-    clf.fit(X_train_scaled, y_train, sample_weight=sample_weights)         #贝叶斯分类器 , logic分类器
-    # clf.fit(X_train_scaled, y_train)                                     #SVM, xgb, lgbm, catboost分类器
-    print(f"训练完成")
-
-    y_pred = clf.predict(X_val_scaled)
-    y_prob = clf.predict_proba(X_val_scaled)[:, -1]
-
-    ACC, Recall, Specificity, Precision, NPV, roc_auc = calculate_metrics(y_test, y_pred, y_prob, save_roc_path=save_path, mode=mode)
-
-    acc_scores.append(ACC)
-    recall_scores.append(Recall)
-    specificity_scores.append(Specificity)
-    precision_scores.append(Precision)
-    npv_scores.append(NPV)
-    auc_scores.append(roc_auc) 
-    selected_features_all.append(f"number: {len(selected_features)}, features: {selected_features}\n")
-            
-    final_ACC = np.mean(acc_scores)
-    final_Recall = np.mean(recall_scores)
-    final_Specificity = np.mean(specificity_scores)
-    final_Precision = np.mean(precision_scores)
-    final_NPV = np.mean(npv_scores)
-    final_AUC = np.mean(auc_scores)
-    print("\n最终测试集的具体指标值:")
-    print(f"准确率 (ACC): {final_ACC:.3f} ± {np.std(acc_scores):.3f}")
-    print(f"召回率 (Recall): {final_Recall:.3f} ± {np.std(recall_scores):.3f}")
-    print(f"特异性 (Specificity): {final_Specificity:.3f} ± {np.std(specificity_scores):.3f}")
-    print(f"精确率 (PPV): {final_Precision:.3f} ± {np.std(precision_scores):.3f}")
-    print(f"阴性预测值 (NPV): {final_NPV:.3f}  ± {np.std(npv_scores):.3f}")
-    print(f"AUC值: {final_AUC:.3f} ± {np.std(auc_scores):.3f}")
-    print(f"classifier is: {mode}")
-
-    final_results = {
-        'Recall': f"{final_Recall:.3f} ± {np.std(recall_scores):.3f}",
-        'Specificity': f"{final_Specificity:.3f} ± {np.std(specificity_scores):.3f}",
-        'ACC': f"{final_ACC:.3f} ± {np.std(acc_scores):.3f}",
-        'PPV': f"{final_Precision:.3f} ± {np.std(precision_scores):.3f}",
-        'NPV': f"{final_NPV:.3f}  ± {np.std(npv_scores):.3f}",
-        'AUC': f"{final_AUC:.3f} ± {np.std(auc_scores):.3f}\n",
-    }
-    for metric, value in final_results.items():
-        results += f"\n{metric}: {value}\n"
-    for feature in selected_features_all:
-        results += f"{feature}\n"
-    results += "\n\n"
-    for pm in parameter_clf:
-        results += "\nparameter: {} \n{} values is: {}\n".format(pm, pm, parameter_clf[pm])
-    save_results(results, result_folder)
+from config.cli import main
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main(default_profile="lab1")
diff --git a/code/cls/classify_single.py b/code/cls/classify_single.py
index 045d5b9..59abec2 100644
--- a/code/cls/classify_single.py
+++ b/code/cls/classify_single.py
@@ -1,335 +1,6 @@
-import pandas as pd
-from sklearn.naive_bayes import GaussianNB
-from sklearn.preprocessing import StandardScaler
-import numpy as np
-from sklearn.metrics import roc_auc_score, confusion_matrix
-from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import StratifiedKFold
-import os
-from datetime import datetime
-from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score
-from utils.pre4data import lasso_dimension_reduction, if_same
-import xgboost as xgb
-import matplotlib.pyplot as plt
-from sklearn.svm import SVC
-from lightgbm import LGBMClassifier
-from catboost import CatBoostClassifier
-from sklearn.model_selection import StratifiedShuffleSplit
-from sklearn.model_selection import train_test_split
+"""Single-table stratified split (profile: single)."""
 
-def get_next_result_folder(base_path='D:/PycharmProject/classification/results_nse'):
-    if not os.path.exists(base_path):
-        os.makedirs(base_path)
-        return os.path.join(base_path, 'results_1')
-    
-    # 查找现有的results_i文件夹
-    existing_folders = [d for d in os.listdir(base_path) 
-                       if os.path.isdir(os.path.join(base_path, d)) 
-                       and d.startswith('results_')]
-    
-    if not existing_folders:
-        return os.path.join(base_path, 'results_1')
-    
-    # 获取现有文件夹的最大编号
-    max_num = max([int(f.split('_')[1]) for f in existing_folders])
-    
-    # 返回下一个编号的文件夹路径
-    return os.path.join(base_path, f'results_{max_num + 1}')
-
-def save_results(results_text, result_folder):
-    """保存结果到results.txt文件"""
-    os.makedirs(result_folder, exist_ok=True)
-    
-    result_file = os.path.join(result_folder, 'results.txt')
-    
-    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
-    results_with_timestamp = f"实验时间: {timestamp}\n\n{results_text}"
-    
-    with open(result_file, 'w', encoding='utf-8') as f:
-        f.write(results_with_timestamp)
-    
-    print(f"\n结果已保存到: {result_file}")
-
-def calculate_metrics(y_true, y_pred, y_prob, save_roc_path=None, mode=None, fold=None):
-    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
-    
-    ACC = (tp + tn) / (tp + tn + fp + fn)
-    Recall = tp / (tp + fn) if (tp + fn) != 0 else 0  
-    Specificity = tn / (tn + fp) if (tn + fp) != 0 else 0  
-    Precision = tp / (tp + fp) if (tp + fp) != 0 else 0  
-    NPV = tn / (tn + fn) if (tn + fn) != 0 else 0  
-    # ROC曲线绘制逻辑
-    roc_auc = None
-    if y_prob is not None:
-        roc_auc = roc_auc_score(y_true, y_prob)
-        
-        # 内置绘制ROC曲线
-        fpr, tpr, _ = roc_curve(y_true, y_prob)
-        plt.figure()
-        plt.plot(fpr, tpr, color='darkorange', lw=2, 
-                label=f'ROC curve (AUC = {roc_auc:.2f})')
-        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
-        plt.xlim([0.0, 1.0])
-        plt.ylim([0.0, 1.05])
-        plt.xlabel('False Positive Rate')
-        plt.ylabel('True Positive Rate')
-        plt.title('Receiver Operating Characteristic (ROC)')
-        plt.legend(loc="lower right")
-        
-        if fold:
-            plt.savefig(os.path.join(save_roc_path, 'model-{}_fold-{}'.format(mode, fold)), dpi=300, bbox_inches='tight')
-        else:
-            plt.savefig(os.path.join(save_roc_path, 'model-{}'.format(mode)), dpi=300, bbox_inches='tight')
-        plt.close()
-
-    
-    roc_auc = roc_auc_score(y_true, y_prob) if y_prob is not None else None
-    return ACC, Recall, Specificity, Precision, NPV, roc_auc
-
-
-def balanced_train_test_split(X, y, test_size=0.4, random_state=None):
-    print("初始各类别比例  :", np.unique(y, return_counts=True)[1]/len(y))
-    sss = StratifiedShuffleSplit(n_splits=1, test_size=test_size, random_state=random_state)
-    for train_index, val_index in sss.split(X, y):
-        X_train = X.iloc[train_index]
-        X_val = X.iloc[val_index]
-        y_train = y.iloc[train_index]
-        y_val = y.iloc[val_index]
-    
-    # 验证各类别比例是否相同
-    print("验证集各类别比例:", np.unique(y_val, return_counts=True)[1]/len(y_val))
-    print("训练集各类别比例:", np.unique(y_train, return_counts=True)[1]/len(y_train))
-    
-    return X_train, X_val, y_train, y_val
-
-
-def main():
-    data_train_path = './data/0804data_0.xlsx' 
-    ct_mode = data_train_path.split('data')[0].split('/')[-1]
-    data_train = pd.read_excel(data_train_path)
-    lab_describe = 'cpc1-2=0_cpc3-5=1'
-
-    train_df1, test_df1 = train_test_split(
-        data_train,
-        test_size=0.2,               # 20% 作为测试集
-        stratify=data_train['CPC'],  # 分层抽样
-        random_state=42)
-    
-    # without cpc5  ------->  means dead people data
-    # train_df1 = train_df1[train_df1['CPC'] != 5]
-    # test_df1 = test_df1[test_df1['CPC'] != 5]
-
-    # ============================================= set dataset cpc split =============================================
-    # CPC 1-2 --> label 0,  CPC3-5 --> label 1
-    train_df1['label'] = pd.cut(train_df1['CPC'], bins=[0, 2, 5], labels=[0, 1])
-    test_df1['label'] = pd.cut(test_df1['CPC'], bins=[0, 2, 5], labels=[0, 1])
-
-    # # # CPC 1-4 --> label 0,  CPC5 --> label 1
-    # train_df1['label'] = pd.cut(train_df1['CPC'], bins=[0, 4, 5], labels=[0, 1])
-    # test_df1['label'] = pd.cut(test_df1['CPC'], bins=[0, 4, 5], labels=[0, 1])
-
-    # # CPC 1-2 --> label 0,  CPC3-4 --> label 1
-    # train_df1['label'] = pd.cut(train_df1['CPC'], bins=[0, 2, 4], labels=[0, 1])
-    # test_df1['label'] = pd.cut(test_df1['CPC'], bins=[0, 2, 4], labels=[0, 1])
-    # ============================================= set dataset cpc split =============================================
-
-    train_df1 = train_df1.drop(['CPC'], axis=1) 
-    test_df1 = test_df1.drop(['CPC'], axis=1) 
-
-    # dataset 
-    label1_num = len(data_train[data_train['CPC'] == 1])
-    label2_num = len(data_train[data_train['CPC'] == 2])
-    label3_num = len(data_train[data_train['CPC'] == 3])
-    label4_num = len(data_train[data_train['CPC'] == 4])
-    label5_num = len(data_train[data_train['CPC'] == 5])
-    print(f"训练集形状: {train_df1.shape}, 训练集时间: {ct_mode}")
-    result_folder = get_next_result_folder(base_path='./results_0728_delete_new')
-
-    y_index_s = train_df1.iloc[:, -1]
-    scale_pos_weight = len(y_index_s[y_index_s == 0]) / len(y_index_s[y_index_s == 1])
-    scaler = StandardScaler()
-    print(f"数据划分完毕")
-
-    # 初始化分类器
-    ratio0 = len([x for x in data_train['label'].tolist() if x == 0]) / len(data_train['label'].tolist())
-    ratio1 = len([x for x in data_train['label'].tolist() if x == 1]) / len(data_train['label'].tolist())
-    clf = GaussianNB(priors=[ratio0, ratio1])    # priors=[0.5, 0.5]
-    clf = LogisticRegression()
-
-    lgbm_params = {
-        'objective': 'binary',       # 二分类任务
-        'metric': 'binary_logloss',  # 使用logloss作为评价指标
-        'learning_rate': 0.016,
-        'max_depth': 6,
-        'n_estimators': 500,
-        'subsample': 0.7,            # 构建每棵树时使用的样本比例
-        'colsample_bytree': 0.7,     # 每棵树使用的特征比例
-        'scale_pos_weight': scale_pos_weight,  # 根据数据不平衡调整正负样本的权重
-        'random_state': 42,
-    }
-    clf = LGBMClassifier(**lgbm_params)
-
-    catboost_params = {
-        'iterations': 500,           # 迭代次数
-        'depth': 6,                  # 树的深度
-        'learning_rate': 0.01,       # 学习率
-        'loss_function': 'Logloss',  # 损失函数
-        'eval_metric': 'AUC',        # AUC作为评价指标
-        'scale_pos_weight': scale_pos_weight,  # 样本不平衡的调整
-        'random_seed': 42,
-        'verbose': 0                 # 不输出训练过程
-    }
-    clf = CatBoostClassifier(**catboost_params)
-
-    xgb_params = {
-        'objective': 'binary:logistic', 'eval_metric': ['logloss'],
-        'learning_rate': 0.016,
-        'max_depth': 6,
-        'n_estimators': 600,
-        'subsample': 0.72,           # 用于构建每棵树的样本比例
-        'colsample_bytree': 0.705,   # 控制每棵树在构建时使用的特征比例
-        # 'scale_pos_weight': 1.4,   # 根据实际正负样本比例设置权重 len(y[y==0]) / len(y[y==1]),
-        'gamma': 0.1,
-        'min_child_weight': 1,       # 降低以增加模型灵活性
-        'scale_pos_weight': scale_pos_weight,
-        'random_state': 42,
-        # 'tree_method': 'hist',  
-        # 'device': 'cuda',
-    }
-    clf = xgb.XGBClassifier(**xgb_params)
-
-    clf = SVC(
-        kernel='rbf',              # 使用RBF核函数
-        C=10,                      # 正则化参数
-        gamma='auto',              # scale
-        probability=True,          # 启用概率估计
-        class_weight='balanced',   # 处理类别不平衡 
-        random_state=42)  
-
-    # 参数网格
-    param_grid = {
-        'learning_rate': [0.01, 0.05, 0.1],
-        'max_depth': [5, 6, 7],
-        'n_estimators': [100, 200, 500],
-        'subsample': [0.8, 0.9, 1.0],
-        'colsample_bytree': [0.8, 0.9, 1.0],
-    }
-    mode = clf.__class__.__name__
-    parameter_clf = clf.get_params()
-    print(f"classifier is: {mode}")
-    acc_scores = []
-    recall_scores = []
-    specificity_scores = []
-    precision_scores = []
-    npv_scores = []
-    auc_scores = []
-    selected_features_all = []
-    save_path = './results/roc_curve_{}_time_{}_{}_{}'.format(mode, ct_mode, lab_describe, datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
-    os.makedirs(save_path, exist_ok=True)
-
-    metrics_history = {
-        'ACC': [], 'Recall': [], 'Specificity': [], 'Precision': [], 'NPV': [], 'AUC': [], 
-    }
-    random_states = 1307
-    results = "lab: {}\nclassifier mode is: {}\n\nrandom_states: {}, train num: {}, test num: {}\n".format(lab_describe, mode, random_states, len(train_df1), len(test_df1))
-    X_train = train_df1.drop('label', axis=1)
-    y_train = train_df1['label']
-    X_test = test_df1.drop('label', axis=1)
-    y_test = test_df1['label']
-    # X_train, X_val, y_train, y_val = train_test_split(
-    #     X, y, 
-    #     test_size=0.4,  # 40% 验证集, 76 val  113 train
-    #     stratify=y,   
-    #     shuffle=True,  
-    #     random_state=random_states[fold-1]  
-    # ) 
-    # X_train, X_val, y_train, y_val = balanced_train_test_split(
-    #     X, y, 
-    #     test_size=0.4,  # 40% 验证集, 76 val  113 train
-    #     random_state=random_states
-    # )
-    # train_index = X_train.index
-    # lasso_train = data_train.iloc[train_index]
-    results += "\ntrain index: \n{}\ntest index: \n{}\n".format(X_train.index.tolist(), X_test.index.tolist())
-    results += f"train: label 0 num: {y_train.values.tolist().count(0)} ratio: {y_train.values.tolist().count(0) / len(y_train):.4f}, label 1 num: {y_train.values.tolist().count(1)} ratio: {y_train.values.tolist().count(1) / len(y_train):.4f}\n"
-    results += f"test : label 0 num: {y_test.values.tolist().count(0)} ratio: {y_test.values.tolist().count(0) / len(y_test):.4f}, label 1 num: {y_test.values.tolist().count(1)} ratio: {y_test.values.tolist().count(1) / len(y_test):.4f}\n\n\n"
-
-    # 特征降维
-    print(f"data train shape is : {X_train.shape}")
-    data_train_lasso, selected_features, best_alphas = lasso_dimension_reduction(train_df1)
-    X_train = data_train_lasso.iloc[:, :-1] 
-    y_train = data_train_lasso.iloc[:, -1]
-    print(f"data train shape is : {X_train.shape}")
-
-
-    print(f"data selected features num is : {len(selected_features)}")
-    X_test = X_test[selected_features]
-
-    t = if_same(X_train, X_test)
-    if t:
-        print(f"X_train and X_val is same")
-    else:
-        print(f"X_train is not same as X_val")
-    print(f"X_test.shape: {X_test.shape}, X_train.shape: {X_train.shape}")
-
-    # 标准化
-    X_train_scaled = scaler.fit_transform(X_train)  
-    X_val_scaled = scaler.transform(X_test)
-
-    sample_weights = np.ones(len(y_train))
-    sample_weights[y_train == 0] = len(y_train) / (2 * (y_train == 0).sum())
-    sample_weights[y_train == 1] = len(y_train) / (2 * (y_train == 1).sum())
-
-    print(f"训练开始")
-    clf.fit(X_train_scaled, y_train, sample_weight=sample_weights)         #贝叶斯分类器 , logic分类器
-    # clf.fit(X_train_scaled, y_train)   #SVM, xgb, lgbm, catboost分类器
-    print(f"训练完成")
-
-    y_pred = clf.predict(X_val_scaled)
-    y_prob = clf.predict_proba(X_val_scaled)[:, -1]
-
-    ACC, Recall, Specificity, Precision, NPV, roc_auc = calculate_metrics(y_test, y_pred, y_prob, save_roc_path=save_path, mode=mode)
-
-    acc_scores.append(ACC)
-    recall_scores.append(Recall)
-    specificity_scores.append(Specificity)
-    precision_scores.append(Precision)
-    npv_scores.append(NPV)
-    auc_scores.append(roc_auc) 
-    selected_features_all.append(f"number: {len(selected_features)}, features: {selected_features}\n")
-            
-    final_ACC = np.mean(acc_scores)
-    final_Recall = np.mean(recall_scores)
-    final_Specificity = np.mean(specificity_scores)
-    final_Precision = np.mean(precision_scores)
-    final_NPV = np.mean(npv_scores)
-    final_AUC = np.mean(auc_scores)
-    print("\n最终测试集的具体指标值:")
-    print(f"准确率 (ACC): {final_ACC:.3f} ± {np.std(acc_scores):.3f}")
-    print(f"召回率 (Recall): {final_Recall:.3f} ± {np.std(recall_scores):.3f}")
-    print(f"特异性 (Specificity): {final_Specificity:.3f} ± {np.std(specificity_scores):.3f}")
-    print(f"精确率 (PPV): {final_Precision:.3f} ± {np.std(precision_scores):.3f}")
-    print(f"阴性预测值 (NPV): {final_NPV:.3f}  ± {np.std(npv_scores):.3f}")
-    print(f"AUC值: {final_AUC:.3f} ± {np.std(auc_scores):.3f}")
-
-    final_results = {
-        'Recall': f"{final_Recall:.3f} ± {np.std(recall_scores):.3f}",
-        'Specificity': f"{final_Specificity:.3f} ± {np.std(specificity_scores):.3f}",
-        'ACC': f"{final_ACC:.3f} ± {np.std(acc_scores):.3f}",
-        'PPV': f"{final_Precision:.3f} ± {np.std(precision_scores):.3f}",
-        'NPV': f"{final_NPV:.3f}  ± {np.std(npv_scores):.3f}",
-        'AUC': f"{final_AUC:.3f} ± {np.std(auc_scores):.3f}\n",
-    }
-    # results = "classifier mode is: {}\n\ntrain index: \n{}\n\ntest index: \n{}".format(mode, X_train.index, X_val.index)
-    results += "all dataset label 1 num: {}, label 2 num: {}, label 3 num: {}, label 4 num: {}, label 5 num: {}\n".format(label1_num, label2_num, label3_num, label4_num, label5_num)
-    for metric, value in final_results.items():
-        results += f"\n{metric}: {value}\n"
-    for feature in selected_features_all:
-        results += f"{feature}\n"
-    results += "\n\n"
-    for pm in parameter_clf:
-        results += "\nparameter: {} \n{} values is: {}\n".format(pm, pm, parameter_clf[pm])
-    save_results(results, result_folder)
+from config.cli import main
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main(default_profile="single")
diff --git a/code/cls/config/__init__.py b/code/cls/config/__init__.py
new file mode 100644
index 0000000..70f87f2
--- /dev/null
+++ b/code/cls/config/__init__.py
@@ -0,0 +1,16 @@
+from config.defaults import (
+    CPC_LABEL_BINS,
+    ID_COLUMNS,
+    NSE_COLUMNS,
+    ExperimentConfig,
+)
+from config.load_config import list_profiles, load_experiment_config
+
+__all__ = [
+    "CPC_LABEL_BINS",
+    "ID_COLUMNS",
+    "NSE_COLUMNS",
+    "ExperimentConfig",
+    "list_profiles",
+    "load_experiment_config",
+]
diff --git a/code/cls/config/cli.py b/code/cls/config/cli.py
new file mode 100644
index 0000000..6154bde
--- /dev/null
+++ b/code/cls/config/cli.py
@@ -0,0 +1,65 @@
+"""CLI entry for running experiments from YAML profiles."""
+
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+from config.load_config import list_profiles, load_experiment_config
+
+
+def build_parser(*, default_profile: str) -> argparse.ArgumentParser:
+    env_default = os.environ.get("CLS_EXPERIMENT", default_profile)
+    parser = argparse.ArgumentParser(
+        description="Run CPC classification experiment from a YAML profile.",
+    )
+    parser.add_argument(
+        "--profile",
+        "-p",
+        default=env_default,
+        help=f"Experiment profile name (default: {env_default!r}, env CLS_EXPERIMENT)",
+    )
+    parser.add_argument(
+        "--config",
+        "-c",
+        type=Path,
+        default=None,
+        help="Path to experiments.yaml (default: config/experiments.yaml)",
+    )
+    parser.add_argument(
+        "--list-profiles",
+        action="store_true",
+        help="List available profile names and exit",
+    )
+    return parser
+
+
+def main(*, default_profile: str = "single") -> None:
+    parser = build_parser(default_profile=default_profile)
+    args = parser.parse_args()
+
+    if args.list_profiles:
+        for name in list_profiles(args.config):
+            print(name)
+        return
+
+    try:
+        config = load_experiment_config(args.profile, args.config)
+    except (FileNotFoundError, KeyError) as exc:
+        print(exc, file=sys.stderr)
+        sys.exit(1)
+
+    print(f"Profile: {args.profile}")
+    print(f"  train: {config.train_path}")
+    print(f"  test:  {config.test_path}")
+    print(f"  use_nse={config.use_nse}, n_folds={config.n_folds}, classifier={config.classifier}")
+
+    from pipeline.experiment import run_experiment
+
+    run_experiment(config)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/code/cls/config/defaults.py b/code/cls/config/defaults.py
new file mode 100644
index 0000000..b06f794
--- /dev/null
+++ b/code/cls/config/defaults.py
@@ -0,0 +1,64 @@
+"""Shared defaults for CPC classification experiments."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field, fields
+from pathlib import Path
+from typing import Literal, Sequence
+
+# CPC 1–2 → 0, CPC 3–5 → 1 (good vs poor outcome)
+CPC_LABEL_BINS: tuple[int, int, int] = (0, 2, 5)
+CPC_LABELS: tuple[int, int] = (0, 1)
+
+ID_COLUMNS: Sequence[str] = ("CTid", "name")
+NSE_COLUMNS: Sequence[str] = ("nse极值", "nse极值差")
+
+ClassifierName = Literal[
+    "svm",
+    "logistic",
+    "gaussian_nb",
+    "xgboost",
+    "lightgbm",
+    "catboost",
+]
+
+
+@dataclass
+class ExperimentConfig:
+    """Runtime settings for a single training/evaluation run."""
+
+    train_path: Path | str
+    test_path: Path | str | None = None
+    results_base_dir: Path | str = "./results"
+    lab_describe: str = "cpc1-2=0_cpc3-5=1"
+    use_nse: bool = False
+    classifier: ClassifierName = "svm"
+    random_state: int = 42
+    # K-fold mode: when test_path is None and n_folds > 1, split from train_path
+    n_folds: int = 1
+    fold_random_states: Sequence[int] = field(
+        default_factory=lambda: (3, 13, 42, 87, 1307)
+    )
+    test_size: float = 0.2
+    exclude_cpc5: bool = False
+    cpc_bins: tuple[int, int, int] = CPC_LABEL_BINS
+    cpc_labels: tuple[int, int] = CPC_LABELS
+
+    def resolve_paths(self) -> None:
+        self.train_path = Path(self.train_path).expanduser()
+        if self.test_path is not None:
+            self.test_path = Path(self.test_path).expanduser()
+        self.results_base_dir = Path(self.results_base_dir).expanduser()
+
+    @classmethod
+    def from_mapping(cls, data: dict) -> ExperimentConfig:
+        """Build config from a YAML profile dict (unknown keys ignored)."""
+        field_names = {f.name for f in fields(cls)}
+        kwargs = {k: v for k, v in data.items() if k in field_names and v is not None}
+        if "fold_random_states" in kwargs:
+            kwargs["fold_random_states"] = tuple(kwargs["fold_random_states"])
+        if "cpc_bins" in kwargs:
+            kwargs["cpc_bins"] = tuple(kwargs["cpc_bins"])
+        if "cpc_labels" in kwargs:
+            kwargs["cpc_labels"] = tuple(kwargs["cpc_labels"])
+        return cls(**kwargs)
diff --git a/code/cls/config/experiments.yaml b/code/cls/config/experiments.yaml
new file mode 100644
index 0000000..f9d864a
--- /dev/null
+++ b/code/cls/config/experiments.yaml
@@ -0,0 +1,62 @@
+# 实验配置：路径与超参外置
+#
+# 路径占位符：
+#   {data_root}、{results_root} — 来自下方 paths（支持环境变量展开）
+#   ${CLS_DATA_ROOT:-./data}   — 直接写环境变量亦可
+#
+# 环境变量（推荐）：
+#   CLS_DATA_ROOT     数据目录（Excel）
+#   CLS_RESULTS_ROOT  结果输出根目录
+#   CLS_EXPERIMENT    默认 profile 名（可被 --profile 覆盖）
+#
+# 示例（Windows）：
+#   set CLS_DATA_ROOT=D:\thrid_beijing_hospital_data
+#   set CLS_RESULTS_ROOT=D:\thrid_beijing_hospital_data
+
+paths:
+  data_root: ${CLS_DATA_ROOT:-./data}
+  results_root: ${CLS_RESULTS_ROOT:-./results}
+
+experiments:
+  lab1:
+    train_path: "{data_root}/0804lab1-train.xlsx"
+    test_path: "{data_root}/0804lab1-test.xlsx"
+    results_base_dir: "{results_root}/results_0804_lab1"
+    lab_describe: cpc1-2=0_cpc3-5=1_lab1
+    use_nse: true
+    classifier: svm
+    n_folds: 1
+    random_state: 42
+
+  single:
+    train_path: "{data_root}/0804data_0.xlsx"
+    test_path: null
+    results_base_dir: "{results_root}/results_single"
+    lab_describe: cpc1-2=0_cpc3-5=1
+    use_nse: false
+    classifier: svm
+    n_folds: 1
+    test_size: 0.2
+    random_state: 42
+
+  kfold:
+    train_path: "{data_root}/0728data_delete.xlsx"
+    test_path: null
+    results_base_dir: "{results_root}/results_kfold"
+    lab_describe: cpc1-2=0_cpc3-5=1_kfold
+    use_nse: false
+    classifier: svm
+    n_folds: 5
+    test_size: 0.2
+    fold_random_states: [3, 13, 42, 87, 1307]
+
+  kfold_nse:
+    train_path: "{data_root}/0804lab1-CTdata_withCTidname_nse.xlsx"
+    test_path: null
+    results_base_dir: "{results_root}/results_kfold_nse"
+    lab_describe: cpc1-2=0_cpc3-5=1_kfold_nse
+    use_nse: true
+    classifier: svm
+    n_folds: 5
+    test_size: 0.2
+    fold_random_states: [3, 13, 42, 87, 1307]
diff --git a/code/cls/config/feature_columns.py b/code/cls/config/feature_columns.py
new file mode 100644
index 0000000..2729000
--- /dev/null
+++ b/code/cls/config/feature_columns.py
@@ -0,0 +1,72 @@
+"""Columns to drop from PyRadiomics exports before modeling."""
+
+RADIOMICS_DROP_COLUMNS: list[str] = [
+    "diagnostics_Image-original_Hash",
+    "diagnostics_Imag e-original_Hash_1",
+    "diagnostics_Image-original_Hash_2",
+    "diagnostics_Image-original_Hash_3",
+    "diagnostics_Mask-original_Hash",
+    "diagnostics_Mask-original_Hash_1",
+    "diagnostics_Mask-original_Hash_2",
+    "diagnostics_Mask-original_Hash_3",
+    "diagnostics_Image-original_Spacing",
+    "diagnostics_Image-original_Spacing_1",
+    "diagnostics_Image-original_Spacing_2",
+    "diagnostics_Image-original_Spacing_3",
+    "diagnostics_Image-original_Size",
+    "diagnostics_Image-original_Size_1",
+    "diagnostics_Image-original_Size_2",
+    "diagnostics_Image-original_Size_3",
+    "diagnostics_Mask-original_Spacing",
+    "diagnostics_Mask-original_Spacing_1",
+    "diagnostics_Mask-original_Spacing_2",
+    "diagnostics_Mask-original_Spacing_3",
+    "diagnostics_Mask-original_Size",
+    "diagnostics_Mask-original_Size_1",
+    "diagnostics_Mask-original_Size_2",
+    "diagnostics_Mask-original_Size_3",
+    "diagnostics_Mask-original_BoundingBox",
+    "diagnostics_Mask-original_BoundingBox_1",
+    "diagnostics_Mask-original_BoundingBox_2",
+    "diagnostics_Mask-original_BoundingBox_3",
+    "diagnostics_Mask-original_CenterOfMassIndex",
+    "diagnostics_Mask-original_CenterOfMassIndex_1",
+    "diagnostics_Mask-original_CenterOfMassIndex_2",
+    "diagnostics_Mask-original_CenterOfMassIndex_3",
+    "diagnostics_Mask-original_CenterOfMass",
+    "diagnostics_Mask-original_CenterOfMass_1",
+    "diagnostics_Mask-original_CenterOfMass_2",
+    "diagnostics_Mask-original_CenterOfMass_3",
+    "diagnostics_Mask-original_BoundingBox.1",
+    "diagnostics_Mask-original_BoundingBox.1_1",
+    "diagnostics_Mask-original_BoundingBox.1_2",
+    "diagnostics_Mask-original_BoundingBox.1_3",
+    "CPC_1",
+    "CPC_2",
+    "CPC_3",
+    "CTid_1",
+    "CTid_2",
+    "CTid_3",
+    "name_1",
+    "name_2",
+    "name_3",
+    "diagnostics_Mask-corrected_Spacing",
+    "diagnostics_Mask-corrected_Size",
+    "diagnostics_Mask-corrected_BoundingBox",
+    "diagnostics_Mask-corrected_VoxelNum",
+    "diagnostics_Mask-corrected_VolumeNum",
+    "diagnostics_Mask-corrected_CenterOfMassIndex",
+    "diagnostics_Mask-corrected_CenterOfMass",
+    "diagnostics_Mask-corrected_Mean",
+    "diagnostics_Mask-corrected_Minimum",
+    "diagnostics_Mask-corrected_Maximum",
+    "live",
+    "diagnostics_Versions_PyRadiomics",
+    "diagnostics_Versions_Numpy",
+    "diagnostics_Versions_SimpleITK",
+    "diagnostics_Versions_PyWavelet",
+    "diagnostics_Versions_Python",
+    "diagnostics_Configuration_Settings",
+    "diagnostics_Configuration_EnabledImageTypes",
+    "diagnostics_Image-original_Dimensionality",
+]
diff --git a/code/cls/config/load_config.py b/code/cls/config/load_config.py
new file mode 100644
index 0000000..7995274
--- /dev/null
+++ b/code/cls/config/load_config.py
@@ -0,0 +1,89 @@
+"""Load ExperimentConfig from YAML profiles and environment variables."""
+
+from __future__ import annotations
+
+import os
+import re
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+from config.defaults import ExperimentConfig
+
+_ENV_PATTERN = re.compile(
+    r"\$\{([^}:]+)(?::-([^}]*))?\}"
+)
+
+
+def _expand_env_string(value: str) -> str:
+    """Replace ``${VAR}`` and ``${VAR:-default}`` in a string."""
+
+    def repl(match: re.Match[str]) -> str:
+        name, default = match.group(1), match.group(2)
+        if name in os.environ:
+            return os.environ[name]
+        if default is not None:
+            return default
+        return ""
+
+    return _ENV_PATTERN.sub(repl, value)
+
+
+def _resolve_string(value: str, context: dict[str, str]) -> str:
+    expanded = _expand_env_string(value)
+    return expanded.format(**context)
+
+
+def _resolve_value(value: Any, context: dict[str, str]) -> Any:
+    if isinstance(value, str):
+        return _resolve_string(value, context)
+    if isinstance(value, dict):
+        return {k: _resolve_value(v, context) for k, v in value.items()}
+    if isinstance(value, list):
+        return [_resolve_value(v, context) for v in value]
+    return value
+
+
+def _default_config_path() -> Path:
+    return Path(__file__).resolve().parent / "experiments.yaml"
+
+
+def list_profiles(config_path: Path | None = None) -> list[str]:
+    path = config_path or _default_config_path()
+    with path.open(encoding="utf-8") as f:
+        data = yaml.safe_load(f) or {}
+    experiments = data.get("experiments", {})
+    return sorted(experiments.keys())
+
+
+def load_experiment_config(
+    profile: str,
+    config_path: Path | str | None = None,
+) -> ExperimentConfig:
+    """
+    Load one experiment profile from YAML.
+
+    Raises:
+        FileNotFoundError: config file missing
+        KeyError: unknown profile name
+    """
+    path = Path(config_path) if config_path else _default_config_path()
+    if not path.is_file():
+        raise FileNotFoundError(f"Config not found: {path}")
+
+    with path.open(encoding="utf-8") as f:
+        data = yaml.safe_load(f) or {}
+
+    experiments = data.get("experiments") or {}
+    if profile not in experiments:
+        available = ", ".join(sorted(experiments.keys())) or "(none)"
+        raise KeyError(f"Unknown profile {profile!r}. Available: {available}")
+
+    raw_paths = data.get("paths") or {}
+    context: dict[str, str] = {}
+    for key, raw in raw_paths.items():
+        context[key] = _resolve_string(str(raw), context)
+
+    resolved = _resolve_value(experiments[profile], context)
+    return ExperimentConfig.from_mapping(resolved)
diff --git a/code/cls/data/.gitkeep b/code/cls/data/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/code/cls/models/__init__.py b/code/cls/models/__init__.py
new file mode 100644
index 0000000..a8c710f
--- /dev/null
+++ b/code/cls/models/__init__.py
@@ -0,0 +1,3 @@
+from models.factory import build_classifier
+
+__all__ = ["build_classifier"]
diff --git a/code/cls/models/factory.py b/code/cls/models/factory.py
new file mode 100644
index 0000000..1367568
--- /dev/null
+++ b/code/cls/models/factory.py
@@ -0,0 +1,80 @@
+"""Classifier construction with shared hyperparameters."""
+
+from __future__ import annotations
+
+from catboost import CatBoostClassifier
+from lightgbm import LGBMClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.naive_bayes import GaussianNB
+from sklearn.svm import SVC
+import xgboost as xgb
+
+from config.defaults import ClassifierName
+
+
+def build_classifier(
+    name: ClassifierName,
+    *,
+    scale_pos_weight: float,
+    class_priors: tuple[float, float] | None = None,
+    random_state: int = 42,
+):
+    """Return a fresh sklearn-compatible classifier instance."""
+    if name == "gaussian_nb":
+        priors = list(class_priors) if class_priors else [0.5, 0.5]
+        return GaussianNB(priors=priors)
+
+    if name == "logistic":
+        return LogisticRegression(random_state=random_state)
+
+    if name == "lightgbm":
+        return LGBMClassifier(
+            objective="binary",
+            metric="binary_logloss",
+            learning_rate=0.016,
+            max_depth=6,
+            n_estimators=500,
+            subsample=0.7,
+            colsample_bytree=0.7,
+            scale_pos_weight=scale_pos_weight,
+            random_state=random_state,
+        )
+
+    if name == "catboost":
+        return CatBoostClassifier(
+            iterations=500,
+            depth=6,
+            learning_rate=0.01,
+            loss_function="Logloss",
+            eval_metric="AUC",
+            scale_pos_weight=scale_pos_weight,
+            random_seed=random_state,
+            verbose=0,
+        )
+
+    if name == "xgboost":
+        return xgb.XGBClassifier(
+            objective="binary:logistic",
+            eval_metric=["logloss"],
+            learning_rate=0.016,
+            max_depth=6,
+            n_estimators=600,
+            subsample=0.72,
+            colsample_bytree=0.705,
+            gamma=0.1,
+            min_child_weight=1,
+            scale_pos_weight=scale_pos_weight,
+            random_state=random_state,
+        )
+
+    if name == "svm":
+        return SVC(
+            kernel="rbf",
+            C=10,
+            gamma="auto",
+            probability=True,
+            class_weight="balanced",
+            random_state=random_state,
+        )
+
+    raise ValueError(f"Unknown classifier: {name!r}")
diff --git a/code/cls/new_data_process.py b/code/cls/new_data_process.py
index 733cc52..2565b46 100644
--- a/code/cls/new_data_process.py
+++ b/code/cls/new_data_process.py
@@ -1,6 +1,7 @@
 import pandas as pd
 import os
 
+from config.feature_columns import RADIOMICS_DROP_COLUMNS
 from utils.pre4data import drop_columns
 from sklearn.model_selection import train_test_split
 
@@ -30,29 +31,8 @@
 # data_1 = pd.concat([data0_1, data1_1, data2_1, data3_1])
 data_1 = pd.concat([data0_1, data1_1, data2_1])
 
-# dropdata and dropdata0 是不需要的特征, 且这些特征在进行降维时的值会导致无法读取, 字符串类型或者其他不可使用的类型
-dropdata = ['diagnostics_Image-original_Hash', 'diagnostics_Imag e-original_Hash_1', 'diagnostics_Image-original_Hash_2', 'diagnostics_Image-original_Hash_3',
-            'diagnostics_Mask-original_Hash', 'diagnostics_Mask-original_Hash_1', 'diagnostics_Mask-original_Hash_2', 'diagnostics_Mask-original_Hash_3',
-            'diagnostics_Image-original_Spacing', 'diagnostics_Image-original_Spacing_1', 'diagnostics_Image-original_Spacing_2','diagnostics_Image-original_Spacing_3',
-            'diagnostics_Image-original_Size', 'diagnostics_Image-original_Size_1', 'diagnostics_Image-original_Size_2', 'diagnostics_Image-original_Size_3',
-            'diagnostics_Mask-original_Spacing', 'diagnostics_Mask-original_Spacing_1', 'diagnostics_Mask-original_Spacing_2', 'diagnostics_Mask-original_Spacing_3',
-            'diagnostics_Mask-original_Size', 'diagnostics_Mask-original_Size_1', 'diagnostics_Mask-original_Size_2', 'diagnostics_Mask-original_Size_3',
-            'diagnostics_Mask-original_BoundingBox', 'diagnostics_Mask-original_BoundingBox_1', 'diagnostics_Mask-original_BoundingBox_2', 'diagnostics_Mask-original_BoundingBox_3',
-            'diagnostics_Mask-original_CenterOfMassIndex', 'diagnostics_Mask-original_CenterOfMassIndex_1', 'diagnostics_Mask-original_CenterOfMassIndex_2', 'diagnostics_Mask-original_CenterOfMassIndex_3',
-            'diagnostics_Mask-original_CenterOfMass', 'diagnostics_Mask-original_CenterOfMass_1', 'diagnostics_Mask-original_CenterOfMass_2', 'diagnostics_Mask-original_CenterOfMass_3',
-            'diagnostics_Mask-original_BoundingBox.1', 'diagnostics_Mask-original_BoundingBox.1_1', 'diagnostics_Mask-original_BoundingBox.1_2', 'diagnostics_Mask-original_BoundingBox.1_3',  
-            'diagnostics_Mask-original_BoundingBox', 'diagnostics_Mask-original_BoundingBox_1', 'diagnostics_Mask-original_BoundingBox_2', 'diagnostics_Mask-original_BoundingBox_3',
-            'CPC_1', 'CPC_2', 'CPC_3', 'CTid_1', 'CTid_2', 'CTid_3', 'name_1', 'name_2', 'name_3',
-            'diagnostics_Mask-corrected_Spacing', 'diagnostics_Mask-corrected_Size', 'diagnostics_Mask-corrected_BoundingBox', 'diagnostics_Mask-corrected_VoxelNum',
-            'diagnostics_Mask-corrected_VolumeNum','diagnostics_Mask-corrected_CenterOfMassIndex',
-            'diagnostics_Mask-corrected_CenterOfMass', 'diagnostics_Mask-corrected_Mean', 'diagnostics_Mask-corrected_Minimum',
-            'diagnostics_Mask-corrected_Maximum', 'live', 'diagnostics_Versions_PyRadiomics', 'diagnostics_Versions_Numpy', 'diagnostics_Versions_SimpleITK', 'diagnostics_Versions_PyWavelet',
-            'diagnostics_Versions_Python', 'diagnostics_Configuration_Settings', 'diagnostics_Configuration_EnabledImageTypes', 'diagnostics_Image-original_Dimensionality']
-
-dropdata0 = ['diagnostics_Mask-corrected_Spacing', 'diagnostics_Mask-corrected_Size', 'diagnostics_Mask-corrected_BoundingBox', 
-             'diagnostics_Mask-corrected_VoxelNum', 'diagnostics_Mask-corrected_VolumeNum', 
-             'diagnostics_Mask-corrected_CenterOfMassIndex', 'diagnostics_Mask-corrected_CenterOfMass', 
-             'diagnostics_Mask-corrected_Mean', 'diagnostics_Mask-corrected_Minimum', 'diagnostics_Mask-corrected_Maximum', 'live']
+# 不需要的 diagnostics / 元数据列（见 config/feature_columns.py）
+dropdata = RADIOMICS_DROP_COLUMNS
 
 data0_1 = drop_columns(data0_1, dropdata)
 data1_1 = drop_columns(data1_1, dropdata)
diff --git a/code/cls/pipeline/__init__.py b/code/cls/pipeline/__init__.py
new file mode 100644
index 0000000..027129f
--- /dev/null
+++ b/code/cls/pipeline/__init__.py
@@ -0,0 +1,8 @@
+from pipeline.data import load_split_tables, prepare_labeled_frames
+from pipeline.experiment import run_experiment
+
+__all__ = [
+    "load_split_tables",
+    "prepare_labeled_frames",
+    "run_experiment",
+]
diff --git a/code/cls/pipeline/data.py b/code/cls/pipeline/data.py
new file mode 100644
index 0000000..1da118e
--- /dev/null
+++ b/code/cls/pipeline/data.py
@@ -0,0 +1,112 @@
+"""Data loading and CPC label preparation."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pandas as pd
+from sklearn.model_selection import train_test_split
+
+from config.defaults import CPC_LABEL_BINS, CPC_LABELS, ID_COLUMNS, NSE_COLUMNS
+
+
+def load_excel(path: Path | str) -> pd.DataFrame:
+    return pd.read_excel(Path(path))
+
+
+def assign_cpc_labels(
+    df: pd.DataFrame,
+    *,
+    bins: tuple[int, int, int] = CPC_LABEL_BINS,
+    labels: tuple[int, int] = CPC_LABELS,
+    exclude_cpc5: bool = False,
+) -> pd.DataFrame:
+    """Add binary ``label`` from ``CPC`` and drop the original column."""
+    if "label" in df.columns and "CPC" not in df.columns:
+        return df.copy()
+
+    out = df.copy()
+    if exclude_cpc5:
+        out = out[out["CPC"] != 5]
+    out["label"] = pd.cut(out["CPC"], bins=list(bins), labels=list(labels))
+    return out.drop(columns=["CPC"])
+
+
+def strip_id_columns(
+    df: pd.DataFrame,
+    *,
+    id_columns: tuple[str, ...] = tuple(ID_COLUMNS),
+    nse_columns: tuple[str, ...] = tuple(NSE_COLUMNS),
+    keep_nse: bool = False,
+) -> tuple[pd.DataFrame, pd.DataFrame | None]:
+    """Split feature frame and optional NSE side table."""
+    drop_cols = list(id_columns)
+    nse_df = None
+    if keep_nse:
+        present_nse = [c for c in nse_columns if c in df.columns]
+        if present_nse:
+            nse_df = df[present_nse].copy()
+        drop_cols = list(id_columns) + list(nse_columns)
+    features = df.drop(columns=[c for c in drop_cols if c in df.columns])
+    return features, nse_df
+
+
+def prepare_labeled_frames(
+    train_df: pd.DataFrame,
+    test_df: pd.DataFrame,
+    *,
+    use_nse: bool = False,
+    exclude_cpc5: bool = False,
+    bins: tuple[int, int, int] = CPC_LABEL_BINS,
+    labels: tuple[int, int] = CPC_LABELS,
+) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame | None, pd.DataFrame | None]:
+    """Label CPC (if needed), drop metadata columns, optionally extract NSE."""
+    train_labeled = assign_cpc_labels(
+        train_df, bins=bins, labels=labels, exclude_cpc5=exclude_cpc5
+    )
+    test_labeled = assign_cpc_labels(
+        test_df, bins=bins, labels=labels, exclude_cpc5=exclude_cpc5
+    )
+    train_x, train_nse = strip_id_columns(train_labeled, keep_nse=use_nse)
+    test_x, test_nse = strip_id_columns(test_labeled, keep_nse=use_nse)
+    return train_x, test_x, train_nse, test_nse
+
+
+def load_split_tables(
+    train_path: Path | str,
+    test_path: Path | str | None = None,
+    *,
+    test_size: float = 0.2,
+    random_state: int = 42,
+    stratify_column: str = "CPC",
+) -> tuple[pd.DataFrame, pd.DataFrame]:
+    """Load fixed train/test files or stratified-split a single table."""
+    train_path = Path(train_path)
+    if test_path is not None:
+        return load_excel(train_path), load_excel(test_path)
+
+    full = load_excel(train_path)
+    train_df, test_df = train_test_split(
+        full,
+        test_size=test_size,
+        stratify=full[stratify_column],
+        random_state=random_state,
+    )
+    return train_df, test_df
+
+
+def merge_nse_features(
+    train_features: pd.DataFrame,
+    test_features: pd.DataFrame,
+    train_nse: pd.DataFrame,
+    test_nse: pd.DataFrame,
+    selected_features: list[str],
+) -> tuple[pd.DataFrame, pd.DataFrame]:
+    """Apply LASSO-selected CT features and append NSE columns."""
+    train_body = train_features[selected_features].copy()
+    train_body["label"] = train_features["label"].values
+    test_body = test_features[selected_features].copy()
+    train_with_nse = pd.concat([train_body.drop(columns=["label"]), train_nse], axis=1)
+    train_with_nse["label"] = train_body["label"].values
+    test_with_nse = pd.concat([test_body, test_nse], axis=1)
+    return train_with_nse, test_with_nse
diff --git a/code/cls/pipeline/experiment.py b/code/cls/pipeline/experiment.py
new file mode 100644
index 0000000..9149e13
--- /dev/null
+++ b/code/cls/pipeline/experiment.py
@@ -0,0 +1,206 @@
+"""End-to-end training and evaluation for one experiment configuration."""
+
+from __future__ import annotations
+
+import os
+from datetime import datetime
+
+import numpy as np
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+
+from config.defaults import ExperimentConfig
+from models.factory import build_classifier
+from pipeline.data import (
+    load_excel,
+    load_split_tables,
+    merge_nse_features,
+    prepare_labeled_frames,
+)
+from utils.pre4data import if_same, lasso_dimension_reduction
+from utils.util import calculate_metrics, get_next_result_folder, save_results
+
+METRIC_KEYS = ("ACC", "Recall", "Specificity", "Precision", "NPV", "AUC")
+
+
+def _balanced_sample_weights(y: pd.Series) -> np.ndarray:
+    weights = np.ones(len(y))
+    for label in (0, 1):
+        mask = y == label
+        if mask.sum():
+            weights[mask] = len(y) / (2 * mask.sum())
+    return weights
+
+
+def _fit_classifier(clf, X_train, y_train, sample_weights) -> None:
+    name = clf.__class__.__name__.lower()
+    if "gaussiannb" in name or "logisticregression" in name:
+        clf.fit(X_train, y_train, sample_weight=sample_weights)
+    else:
+        clf.fit(X_train, y_train)
+
+
+def _run_single_fold(
+    train_df: pd.DataFrame,
+    test_df: pd.DataFrame,
+    config: ExperimentConfig,
+    *,
+    fold: int | None = None,
+    save_roc_dir: str | None = None,
+) -> tuple[dict[str, float], list[str], str]:
+    train_x, test_x, train_nse, test_nse = prepare_labeled_frames(
+        train_df,
+        test_df,
+        use_nse=config.use_nse,
+        exclude_cpc5=config.exclude_cpc5,
+        bins=config.cpc_bins,
+        labels=config.cpc_labels,
+    )
+
+    y_test = test_x["label"]
+    data_lasso, selected_features, _best_alpha = lasso_dimension_reduction(train_x)
+
+    if config.use_nse and train_nse is not None and test_nse is not None:
+        train_final, test_final = merge_nse_features(
+            train_x, test_x, train_nse, test_nse, selected_features
+        )
+    else:
+        train_final = data_lasso
+        test_final = test_x.drop(columns=["label"])[selected_features]
+
+    X_train = train_final.drop(columns=["label"])
+    y_train = train_final["label"]
+
+    if_same(X_train, test_final)
+
+    scaler = StandardScaler()
+    X_train_scaled = scaler.fit_transform(X_train)
+    X_test_scaled = scaler.transform(test_final)
+
+    n0, n1 = (y_train == 0).sum(), (y_train == 1).sum()
+    scale_pos_weight = float(n0 / max(n1, 1))
+    clf = build_classifier(
+        config.classifier,
+        scale_pos_weight=scale_pos_weight,
+        class_priors=((y_train == 0).mean(), (y_train == 1).mean()),
+        random_state=config.random_state,
+    )
+
+    _fit_classifier(
+        clf, X_train_scaled, y_train, _balanced_sample_weights(y_train)
+    )
+
+    y_pred = clf.predict(X_test_scaled)
+    y_prob = clf.predict_proba(X_test_scaled)[:, -1]
+    mode = clf.__class__.__name__
+
+    acc, recall, spec, prec, npv, auc = calculate_metrics(
+        y_test,
+        y_pred,
+        y_prob,
+        save_roc_path=save_roc_dir,
+        mode=mode,
+        fold=fold,
+    )
+
+    metrics = {
+        "ACC": acc,
+        "Recall": recall,
+        "Specificity": spec,
+        "Precision": prec,
+        "NPV": npv,
+        "AUC": auc or 0.0,
+    }
+    log = (
+        f"fold={fold}\n"
+        f"train size={len(X_train)}, test size={len(test_final)}\n"
+        f"selected features ({len(selected_features)}): {selected_features}\n"
+    )
+    return metrics, selected_features, log
+
+
+def run_experiment(config: ExperimentConfig) -> str:
+    """Execute experiment, write results, return results directory path."""
+    config.resolve_paths()
+    lab = (
+        f"{config.lab_describe}_withnse"
+        if config.use_nse
+        else f"{config.lab_describe}_withoutnse"
+    )
+
+    result_folder = get_next_result_folder(base_path=str(config.results_base_dir))
+    mode = build_classifier(config.classifier, scale_pos_weight=1.0).__class__.__name__
+    ct_mode = config.train_path.stem
+    save_path = os.path.join(
+        result_folder,
+        f"roc_curve_{mode}_time_{ct_mode}_{lab}_{datetime.now():%Y-%m-%d_%H-%M-%S}",
+    )
+    os.makedirs(save_path, exist_ok=True)
+
+    all_metrics: dict[str, list[float]] = {k: [] for k in METRIC_KEYS}
+    feature_logs: list[str] = []
+    split_logs: list[str] = []
+
+    if config.n_folds > 1:
+        # Split raw rows (keep CPC + NSE + CTid) so each fold can run full preprocessing.
+        full_raw = load_excel(config.train_path)
+        if config.exclude_cpc5:
+            full_raw = full_raw[full_raw["CPC"] != 5].copy()
+
+        for fold in range(1, config.n_folds + 1):
+            print(f"============== Fold {fold} ==============")
+            rs = config.fold_random_states[fold - 1]
+            train_fold, test_fold = train_test_split(
+                full_raw,
+                test_size=config.test_size,
+                stratify=full_raw["CPC"],
+                shuffle=True,
+                random_state=rs,
+            )
+            metrics, features, log = _run_single_fold(
+                train_fold,
+                test_fold,
+                config,
+                fold=fold,
+                save_roc_dir=save_path,
+            )
+            for k in METRIC_KEYS:
+                all_metrics[k].append(metrics[k])
+            feature_logs.append(f"fold {fold}: {features}\n")
+            split_logs.append(log)
+    else:
+        train_raw, test_raw = load_split_tables(
+            config.train_path,
+            config.test_path,
+            test_size=config.test_size,
+            random_state=config.random_state,
+        )
+        metrics, features, log = _run_single_fold(
+            train_raw, test_raw, config, save_roc_dir=save_path
+        )
+        for k in METRIC_KEYS:
+            all_metrics[k].append(metrics[k])
+        feature_logs.append(str(features))
+        split_logs.append(log)
+
+    results = (
+        f"lab: {lab}\nclassifier: {mode}\n"
+        f"random_state: {config.random_state}\nn_folds: {config.n_folds}\n\n"
+    )
+    results += "".join(split_logs)
+
+    for k in METRIC_KEYS:
+        vals = all_metrics[k]
+        results += f"\n{k}: {np.mean(vals):.3f} ± {np.std(vals):.3f}\n"
+        print(f"{k}: {np.mean(vals):.3f} ± {np.std(vals):.3f}")
+
+    for fl in feature_logs:
+        results += fl
+
+    probe = build_classifier(config.classifier, scale_pos_weight=1.0)
+    for pm, val in probe.get_params().items():
+        results += f"\nparameter {pm}: {val}\n"
+
+    save_results(results, result_folder)
+    return result_folder
diff --git a/code/cls/requirements.txt b/code/cls/requirements.txt
new file mode 100644
index 0000000..d122411
--- /dev/null
+++ b/code/cls/requirements.txt
@@ -0,0 +1,9 @@
+pandas>=2.0.0
+numpy>=1.24.0
+scikit-learn>=1.3.0
+xgboost>=2.0.0
+lightgbm>=4.0.0
+catboost>=1.2.0
+matplotlib>=3.7.0
+openpyxl>=3.1.0
+PyYAML>=6.0.0
diff --git a/code/cls/run_experiment.py b/code/cls/run_experiment.py
new file mode 100644
index 0000000..c0ea5c5
--- /dev/null
+++ b/code/cls/run_experiment.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+"""
+Unified CLI for all experiment profiles.
+
+Examples:
+    python run_experiment.py --list-profiles
+    python run_experiment.py -p lab1
+    python run_experiment.py -p kfold_nse --config config/experiments.yaml
+    CLS_DATA_ROOT=/path/to/data python run_experiment.py -p kfold_nse
+"""
+
+from config.cli import main
+
+if __name__ == "__main__":
+    main(default_profile="single")
diff --git a/code/cls/utils/__init__.py b/code/cls/utils/__init__.py
new file mode 100644
index 0000000..8d33d98
--- /dev/null
+++ b/code/cls/utils/__init__.py
@@ -0,0 +1 @@
+"""Shared utilities for classification experiments."""
diff --git a/code/cls/utils/util.py b/code/cls/utils/util.py
index 6763ba5..1e82581 100644
--- a/code/cls/utils/util.py
+++ b/code/cls/utils/util.py
@@ -36,8 +36,6 @@ def calculate_metrics(y_true, y_pred, y_prob, save_roc_path=None, mode=None, fol
             plt.savefig(os.path.join(save_roc_path, 'model-{}'.format(mode)), dpi=300, bbox_inches='tight')
         plt.close()
 
-    
-    roc_auc = roc_auc_score(y_true, y_prob) if y_prob is not None else None
     return ACC, Recall, Specificity, Precision, NPV, roc_auc