From 7baea6e6250c0c4f7bfc02c980846de14e47767b Mon Sep 17 00:00:00 2001 From: Hamish Gordon Date: Tue, 7 Dec 2021 17:43:29 -0500 Subject: [PATCH 001/188] add basemap --- training/gordon-group/seed/ActionRuntime/py-gordon_1_0_0.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/training/gordon-group/seed/ActionRuntime/py-gordon_1_0_0.json b/training/gordon-group/seed/ActionRuntime/py-gordon_1_0_0.json index e6318f0c..572e6311 100644 --- a/training/gordon-group/seed/ActionRuntime/py-gordon_1_0_0.json +++ b/training/gordon-group/seed/ActionRuntime/py-gordon_1_0_0.json @@ -6,7 +6,9 @@ "conda.netcdf4": "=1.5.7", "conda.numpy": "=1.17.0", "conda.pandas": "=1.1.5", - "conda.cartopy":"=0.18.0" + "conda.cartopy":"=0.18.0", + "conda.basemap":"=1.2.2", + "conda.iris":"=3.1.1" }, "repositories": [ "https://repo.continuum.io/pkgs/main", From decde04b07349df61f9e890a49f944c4d7331b66 Mon Sep 17 00:00:00 2001 From: Hamish Gordon Date: Tue, 7 Dec 2021 17:51:41 -0500 Subject: [PATCH 002/188] typo --- training/gordon-group/seed/ActionRuntime/py-gordon_1_0_0.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/seed/ActionRuntime/py-gordon_1_0_0.json b/training/gordon-group/seed/ActionRuntime/py-gordon_1_0_0.json index 572e6311..2cb29fbd 100644 --- a/training/gordon-group/seed/ActionRuntime/py-gordon_1_0_0.json +++ b/training/gordon-group/seed/ActionRuntime/py-gordon_1_0_0.json @@ -8,7 +8,7 @@ "conda.pandas": "=1.1.5", "conda.cartopy":"=0.18.0", "conda.basemap":"=1.2.2", - "conda.iris":"=3.1.1" + "conda.iris":"=3.1.0" }, "repositories": [ "https://repo.continuum.io/pkgs/main", From 525d4d3900dc9d160655fb15df389693ca7dc9d7 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 20 Jan 2022 12:42:10 -0600 Subject: [PATCH 003/188] just moving files --- .../src/entity/{ => Observations}/ObservationOutput.c3typ | 0 .../src/entity/{ => Observations}/ObservationOutputFile.c3typ | 0 .../src/entity/{ => Observations}/ObservationOutputFile.py | 0 .../src/entity/{ => Observations}/ObservationOutputSeries.c3typ | 0 .../src/entity/{ => Observations}/ObservationSet.c3typ | 0 .../gordon-group/src/entity/{ => Observations}/ObservationSet.js | 0 .../src/entity/{ => Simulations}/SimulationEnsemble.c3typ | 0 .../src/entity/{ => Simulations}/SimulationEnsemble.js | 0 .../src/entity/{ => Simulations}/SimulationModelOutput.c3typ | 0 .../entity/{ => Simulations}/SimulationModelOutputSeries.c3typ | 0 .../src/entity/{ => Simulations}/SimulationModelParameters.c3typ | 0 .../src/entity/{ => Simulations}/SimulationOutputFile.c3typ | 0 .../src/entity/{ => Simulations}/SimulationOutputFile.py | 0 .../src/entity/{ => Simulations}/SimulationSample.c3typ | 0 .../gordon-group/src/entity/{ => Simulations}/SimulationSample.js | 0 15 files changed, 0 insertions(+), 0 deletions(-) rename training/gordon-group/src/entity/{ => Observations}/ObservationOutput.c3typ (100%) rename training/gordon-group/src/entity/{ => Observations}/ObservationOutputFile.c3typ (100%) rename training/gordon-group/src/entity/{ => Observations}/ObservationOutputFile.py (100%) rename training/gordon-group/src/entity/{ => Observations}/ObservationOutputSeries.c3typ (100%) rename training/gordon-group/src/entity/{ => Observations}/ObservationSet.c3typ (100%) rename training/gordon-group/src/entity/{ => Observations}/ObservationSet.js (100%) rename training/gordon-group/src/entity/{ => Simulations}/SimulationEnsemble.c3typ (100%) rename training/gordon-group/src/entity/{ => Simulations}/SimulationEnsemble.js (100%) rename training/gordon-group/src/entity/{ => Simulations}/SimulationModelOutput.c3typ (100%) rename training/gordon-group/src/entity/{ => Simulations}/SimulationModelOutputSeries.c3typ (100%) rename training/gordon-group/src/entity/{ => Simulations}/SimulationModelParameters.c3typ (100%) rename training/gordon-group/src/entity/{ => Simulations}/SimulationOutputFile.c3typ (100%) rename training/gordon-group/src/entity/{ => Simulations}/SimulationOutputFile.py (100%) rename training/gordon-group/src/entity/{ => Simulations}/SimulationSample.c3typ (100%) rename training/gordon-group/src/entity/{ => Simulations}/SimulationSample.js (100%) diff --git a/training/gordon-group/src/entity/ObservationOutput.c3typ b/training/gordon-group/src/entity/Observations/ObservationOutput.c3typ similarity index 100% rename from training/gordon-group/src/entity/ObservationOutput.c3typ rename to training/gordon-group/src/entity/Observations/ObservationOutput.c3typ diff --git a/training/gordon-group/src/entity/ObservationOutputFile.c3typ b/training/gordon-group/src/entity/Observations/ObservationOutputFile.c3typ similarity index 100% rename from training/gordon-group/src/entity/ObservationOutputFile.c3typ rename to training/gordon-group/src/entity/Observations/ObservationOutputFile.c3typ diff --git a/training/gordon-group/src/entity/ObservationOutputFile.py b/training/gordon-group/src/entity/Observations/ObservationOutputFile.py similarity index 100% rename from training/gordon-group/src/entity/ObservationOutputFile.py rename to training/gordon-group/src/entity/Observations/ObservationOutputFile.py diff --git a/training/gordon-group/src/entity/ObservationOutputSeries.c3typ b/training/gordon-group/src/entity/Observations/ObservationOutputSeries.c3typ similarity index 100% rename from training/gordon-group/src/entity/ObservationOutputSeries.c3typ rename to training/gordon-group/src/entity/Observations/ObservationOutputSeries.c3typ diff --git a/training/gordon-group/src/entity/ObservationSet.c3typ b/training/gordon-group/src/entity/Observations/ObservationSet.c3typ similarity index 100% rename from training/gordon-group/src/entity/ObservationSet.c3typ rename to training/gordon-group/src/entity/Observations/ObservationSet.c3typ diff --git a/training/gordon-group/src/entity/ObservationSet.js b/training/gordon-group/src/entity/Observations/ObservationSet.js similarity index 100% rename from training/gordon-group/src/entity/ObservationSet.js rename to training/gordon-group/src/entity/Observations/ObservationSet.js diff --git a/training/gordon-group/src/entity/SimulationEnsemble.c3typ b/training/gordon-group/src/entity/Simulations/SimulationEnsemble.c3typ similarity index 100% rename from training/gordon-group/src/entity/SimulationEnsemble.c3typ rename to training/gordon-group/src/entity/Simulations/SimulationEnsemble.c3typ diff --git a/training/gordon-group/src/entity/SimulationEnsemble.js b/training/gordon-group/src/entity/Simulations/SimulationEnsemble.js similarity index 100% rename from training/gordon-group/src/entity/SimulationEnsemble.js rename to training/gordon-group/src/entity/Simulations/SimulationEnsemble.js diff --git a/training/gordon-group/src/entity/SimulationModelOutput.c3typ b/training/gordon-group/src/entity/Simulations/SimulationModelOutput.c3typ similarity index 100% rename from training/gordon-group/src/entity/SimulationModelOutput.c3typ rename to training/gordon-group/src/entity/Simulations/SimulationModelOutput.c3typ diff --git a/training/gordon-group/src/entity/SimulationModelOutputSeries.c3typ b/training/gordon-group/src/entity/Simulations/SimulationModelOutputSeries.c3typ similarity index 100% rename from training/gordon-group/src/entity/SimulationModelOutputSeries.c3typ rename to training/gordon-group/src/entity/Simulations/SimulationModelOutputSeries.c3typ diff --git a/training/gordon-group/src/entity/SimulationModelParameters.c3typ b/training/gordon-group/src/entity/Simulations/SimulationModelParameters.c3typ similarity index 100% rename from training/gordon-group/src/entity/SimulationModelParameters.c3typ rename to training/gordon-group/src/entity/Simulations/SimulationModelParameters.c3typ diff --git a/training/gordon-group/src/entity/SimulationOutputFile.c3typ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ similarity index 100% rename from training/gordon-group/src/entity/SimulationOutputFile.c3typ rename to training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ diff --git a/training/gordon-group/src/entity/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py similarity index 100% rename from training/gordon-group/src/entity/SimulationOutputFile.py rename to training/gordon-group/src/entity/Simulations/SimulationOutputFile.py diff --git a/training/gordon-group/src/entity/SimulationSample.c3typ b/training/gordon-group/src/entity/Simulations/SimulationSample.c3typ similarity index 100% rename from training/gordon-group/src/entity/SimulationSample.c3typ rename to training/gordon-group/src/entity/Simulations/SimulationSample.c3typ diff --git a/training/gordon-group/src/entity/SimulationSample.js b/training/gordon-group/src/entity/Simulations/SimulationSample.js similarity index 100% rename from training/gordon-group/src/entity/SimulationSample.js rename to training/gordon-group/src/entity/Simulations/SimulationSample.js From a644269d50777c8bab46bf2b29a06530d958da31 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 20 Jan 2022 12:46:36 -0600 Subject: [PATCH 004/188] moving more files --- training/gordon-group/src/{ => BatchJobs}/UpsertData.c3typ | 0 training/gordon-group/src/{ => BatchJobs}/UpsertData.js | 0 training/gordon-group/src/{ => BatchJobs}/UpsertDataBatch.c3typ | 0 training/gordon-group/src/{ => BatchJobs}/UpsertDataOptions.c3typ | 0 training/gordon-group/src/{ => BatchJobs}/UpsertObsData.c3typ | 0 training/gordon-group/src/{ => BatchJobs}/UpsertObsData.js | 0 .../gordon-group/src/{ => BatchJobs}/UpsertObsDataBatch.c3typ | 0 training/gordon-group/src/{ => NetCDF}/NetCDFUtil.c3typ | 0 training/gordon-group/src/{ => NetCDF}/NetCDFUtil.py | 0 9 files changed, 0 insertions(+), 0 deletions(-) rename training/gordon-group/src/{ => BatchJobs}/UpsertData.c3typ (100%) rename training/gordon-group/src/{ => BatchJobs}/UpsertData.js (100%) rename training/gordon-group/src/{ => BatchJobs}/UpsertDataBatch.c3typ (100%) rename training/gordon-group/src/{ => BatchJobs}/UpsertDataOptions.c3typ (100%) rename training/gordon-group/src/{ => BatchJobs}/UpsertObsData.c3typ (100%) rename training/gordon-group/src/{ => BatchJobs}/UpsertObsData.js (100%) rename training/gordon-group/src/{ => BatchJobs}/UpsertObsDataBatch.c3typ (100%) rename training/gordon-group/src/{ => NetCDF}/NetCDFUtil.c3typ (100%) rename training/gordon-group/src/{ => NetCDF}/NetCDFUtil.py (100%) diff --git a/training/gordon-group/src/UpsertData.c3typ b/training/gordon-group/src/BatchJobs/UpsertData.c3typ similarity index 100% rename from training/gordon-group/src/UpsertData.c3typ rename to training/gordon-group/src/BatchJobs/UpsertData.c3typ diff --git a/training/gordon-group/src/UpsertData.js b/training/gordon-group/src/BatchJobs/UpsertData.js similarity index 100% rename from training/gordon-group/src/UpsertData.js rename to training/gordon-group/src/BatchJobs/UpsertData.js diff --git a/training/gordon-group/src/UpsertDataBatch.c3typ b/training/gordon-group/src/BatchJobs/UpsertDataBatch.c3typ similarity index 100% rename from training/gordon-group/src/UpsertDataBatch.c3typ rename to training/gordon-group/src/BatchJobs/UpsertDataBatch.c3typ diff --git a/training/gordon-group/src/UpsertDataOptions.c3typ b/training/gordon-group/src/BatchJobs/UpsertDataOptions.c3typ similarity index 100% rename from training/gordon-group/src/UpsertDataOptions.c3typ rename to training/gordon-group/src/BatchJobs/UpsertDataOptions.c3typ diff --git a/training/gordon-group/src/UpsertObsData.c3typ b/training/gordon-group/src/BatchJobs/UpsertObsData.c3typ similarity index 100% rename from training/gordon-group/src/UpsertObsData.c3typ rename to training/gordon-group/src/BatchJobs/UpsertObsData.c3typ diff --git a/training/gordon-group/src/UpsertObsData.js b/training/gordon-group/src/BatchJobs/UpsertObsData.js similarity index 100% rename from training/gordon-group/src/UpsertObsData.js rename to training/gordon-group/src/BatchJobs/UpsertObsData.js diff --git a/training/gordon-group/src/UpsertObsDataBatch.c3typ b/training/gordon-group/src/BatchJobs/UpsertObsDataBatch.c3typ similarity index 100% rename from training/gordon-group/src/UpsertObsDataBatch.c3typ rename to training/gordon-group/src/BatchJobs/UpsertObsDataBatch.c3typ diff --git a/training/gordon-group/src/NetCDFUtil.c3typ b/training/gordon-group/src/NetCDF/NetCDFUtil.c3typ similarity index 100% rename from training/gordon-group/src/NetCDFUtil.c3typ rename to training/gordon-group/src/NetCDF/NetCDFUtil.c3typ diff --git a/training/gordon-group/src/NetCDFUtil.py b/training/gordon-group/src/NetCDF/NetCDFUtil.py similarity index 100% rename from training/gordon-group/src/NetCDFUtil.py rename to training/gordon-group/src/NetCDF/NetCDFUtil.py From a2c772ecc7264e2b727f323c7cec7dcc27f24a76 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 20 Jan 2022 12:58:21 -0600 Subject: [PATCH 005/188] method to clean Observation data --- .../src/entity/Observations/ObservationSet.c3typ | 2 ++ .../src/entity/Observations/ObservationSet.js | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/training/gordon-group/src/entity/Observations/ObservationSet.c3typ b/training/gordon-group/src/entity/Observations/ObservationSet.c3typ index 3be791ed..3183f979 100644 --- a/training/gordon-group/src/entity/Observations/ObservationSet.c3typ +++ b/training/gordon-group/src/entity/Observations/ObservationSet.c3typ @@ -21,4 +21,6 @@ entity type ObservationSet mixes MetricEvaluatable schema name "OBS_ST" { // Upsert data into {@link ObservationOutput} for all {@link ObservationOutputFile}s corresponding to this {@link ObservationSet} @dependency(include = "this, outputFiles") upsertObservationData: member function(): int js server + // Cleans all seeded data + removeAllSeededData: function() : int js server } \ No newline at end of file diff --git a/training/gordon-group/src/entity/Observations/ObservationSet.js b/training/gordon-group/src/entity/Observations/ObservationSet.js index 2766642d..289903ea 100644 --- a/training/gordon-group/src/entity/Observations/ObservationSet.js +++ b/training/gordon-group/src/entity/Observations/ObservationSet.js @@ -82,4 +82,18 @@ function upsertObservationData() { return total; }; - \ No newline at end of file + + + +/** + * removeAllSeededData() + * Removes all seeded data for this project. + */ + function removeAllSeededData() { + ObservationSet.removeAll(); + ObservationOutput.removeAll(); + ObservationOutputFile.removeAll(); + ObservationOutputSeries.removeAll(); + + return 0; +} \ No newline at end of file From 5cccbd478cf7d639d888e0a3587afa7bbd09be23 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 20 Jan 2022 13:01:55 -0600 Subject: [PATCH 006/188] fix doStart for batch processing --- training/gordon-group/src/BatchJobs/UpsertData.js | 2 +- training/gordon-group/src/BatchJobs/UpsertObsData.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/BatchJobs/UpsertData.js b/training/gordon-group/src/BatchJobs/UpsertData.js index 189bf716..193dd992 100644 --- a/training/gordon-group/src/BatchJobs/UpsertData.js +++ b/training/gordon-group/src/BatchJobs/UpsertData.js @@ -15,7 +15,7 @@ function doStart(job, options) { while(dataset.hasNext()) { batch.push(dataset.next()); - if (dataset.length >= options.batchSize || !dataset.hasNext()) { + if (batch.length >= options.batchSize || !dataset.hasNext()) { var batchSpec = UpsertDataBatch.make({values: batch}); job.scheduleBatch(batchSpec); diff --git a/training/gordon-group/src/BatchJobs/UpsertObsData.js b/training/gordon-group/src/BatchJobs/UpsertObsData.js index 72c33d23..62a6c646 100644 --- a/training/gordon-group/src/BatchJobs/UpsertObsData.js +++ b/training/gordon-group/src/BatchJobs/UpsertObsData.js @@ -15,7 +15,7 @@ while(dataset.hasNext()) { batch.push(dataset.next()); - if (dataset.length >= options.batchSize || !dataset.hasNext()) { + if (batch.length >= options.batchSize || !dataset.hasNext()) { var batchSpec = UpsertObsDataBatch.make({values: batch}); job.scheduleBatch(batchSpec); From 2cc845604a8fb0972aa1df61321722edc68c9cdf Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 20 Jan 2022 13:19:02 -0600 Subject: [PATCH 007/188] add 99 bins for UHSAdNdlogd --- .../Observations/ObservationOutput.c3typ | 196 ++++++++++++++++++ .../Observations/ObservationOutputFile.py | 2 +- 2 files changed, 197 insertions(+), 1 deletion(-) diff --git a/training/gordon-group/src/entity/Observations/ObservationOutput.c3typ b/training/gordon-group/src/entity/Observations/ObservationOutput.c3typ index c23a5b3e..94134894 100644 --- a/training/gordon-group/src/entity/Observations/ObservationOutput.c3typ +++ b/training/gordon-group/src/entity/Observations/ObservationOutput.c3typ @@ -41,9 +41,205 @@ entity type ObservationOutput mixes TimedDataPoint sche CNgt10: double @ts(treatment='avg') total_Cl: double + + // UHSASdNdlogd variables: 99 bins @ts(treatment='avg') UHSASdNdlogd_bin0: double @ts(treatment='avg') UHSASdNdlogd_bin1: double + @ts(treatment='avg') + UHSASdNdlogd_bin2: double + @ts(treatment='avg') + UHSASdNdlogd_bin3: double + @ts(treatment='avg') + UHSASdNdlogd_bin4: double + @ts(treatment='avg') + UHSASdNdlogd_bin5: double + @ts(treatment='avg') + UHSASdNdlogd_bin6: double + @ts(treatment='avg') + UHSASdNdlogd_bin7: double + @ts(treatment='avg') + UHSASdNdlogd_bin8: double + @ts(treatment='avg') + UHSASdNdlogd_bin9: double + @ts(treatment='avg') + UHSASdNdlogd_bin10: double + @ts(treatment='avg') + UHSASdNdlogd_bin11: double + @ts(treatment='avg') + UHSASdNdlogd_bin12: double + @ts(treatment='avg') + UHSASdNdlogd_bin13: double + @ts(treatment='avg') + UHSASdNdlogd_bin14: double + @ts(treatment='avg') + UHSASdNdlogd_bin15: double + @ts(treatment='avg') + UHSASdNdlogd_bin16: double + @ts(treatment='avg') + UHSASdNdlogd_bin17: double + @ts(treatment='avg') + UHSASdNdlogd_bin18: double + @ts(treatment='avg') + UHSASdNdlogd_bin19: double + @ts(treatment='avg') + UHSASdNdlogd_bin20: double + @ts(treatment='avg') + UHSASdNdlogd_bin21: double + @ts(treatment='avg') + UHSASdNdlogd_bin22: double + @ts(treatment='avg') + UHSASdNdlogd_bin23: double + @ts(treatment='avg') + UHSASdNdlogd_bin24: double + @ts(treatment='avg') + UHSASdNdlogd_bin25: double + @ts(treatment='avg') + UHSASdNdlogd_bin26: double + @ts(treatment='avg') + UHSASdNdlogd_bin27: double + @ts(treatment='avg') + UHSASdNdlogd_bin28: double + @ts(treatment='avg') + UHSASdNdlogd_bin29: double + @ts(treatment='avg') + UHSASdNdlogd_bin30: double + @ts(treatment='avg') + UHSASdNdlogd_bin31: double + @ts(treatment='avg') + UHSASdNdlogd_bin32: double + @ts(treatment='avg') + UHSASdNdlogd_bin33: double + @ts(treatment='avg') + UHSASdNdlogd_bin34: double + @ts(treatment='avg') + UHSASdNdlogd_bin35: double + @ts(treatment='avg') + UHSASdNdlogd_bin36: double + @ts(treatment='avg') + UHSASdNdlogd_bin37: double + @ts(treatment='avg') + UHSASdNdlogd_bin38: double + @ts(treatment='avg') + UHSASdNdlogd_bin39: double + @ts(treatment='avg') + UHSASdNdlogd_bin40: double + @ts(treatment='avg') + UHSASdNdlogd_bin41: double + @ts(treatment='avg') + UHSASdNdlogd_bin42: double + @ts(treatment='avg') + UHSASdNdlogd_bin43: double + @ts(treatment='avg') + UHSASdNdlogd_bin44: double + @ts(treatment='avg') + UHSASdNdlogd_bin45: double + @ts(treatment='avg') + UHSASdNdlogd_bin46: double + @ts(treatment='avg') + UHSASdNdlogd_bin47: double + @ts(treatment='avg') + UHSASdNdlogd_bin48: double + @ts(treatment='avg') + UHSASdNdlogd_bin49: double + @ts(treatment='avg') + UHSASdNdlogd_bin50: double + @ts(treatment='avg') + UHSASdNdlogd_bin51: double + @ts(treatment='avg') + UHSASdNdlogd_bin52: double + @ts(treatment='avg') + UHSASdNdlogd_bin53: double + @ts(treatment='avg') + UHSASdNdlogd_bin54: double + @ts(treatment='avg') + UHSASdNdlogd_bin55: double + @ts(treatment='avg') + UHSASdNdlogd_bin56: double + @ts(treatment='avg') + UHSASdNdlogd_bin57: double + @ts(treatment='avg') + UHSASdNdlogd_bin58: double + @ts(treatment='avg') + UHSASdNdlogd_bin59: double + @ts(treatment='avg') + UHSASdNdlogd_bin60: double + @ts(treatment='avg') + UHSASdNdlogd_bin61: double + @ts(treatment='avg') + UHSASdNdlogd_bin62: double + @ts(treatment='avg') + UHSASdNdlogd_bin63: double + @ts(treatment='avg') + UHSASdNdlogd_bin64: double + @ts(treatment='avg') + UHSASdNdlogd_bin65: double + @ts(treatment='avg') + UHSASdNdlogd_bin66: double + @ts(treatment='avg') + UHSASdNdlogd_bin67: double + @ts(treatment='avg') + UHSASdNdlogd_bin68: double + @ts(treatment='avg') + UHSASdNdlogd_bin69: double + @ts(treatment='avg') + UHSASdNdlogd_bin70: double + @ts(treatment='avg') + UHSASdNdlogd_bin71: double + @ts(treatment='avg') + UHSASdNdlogd_bin72: double + @ts(treatment='avg') + UHSASdNdlogd_bin73: double + @ts(treatment='avg') + UHSASdNdlogd_bin74: double + @ts(treatment='avg') + UHSASdNdlogd_bin75: double + @ts(treatment='avg') + UHSASdNdlogd_bin76: double + @ts(treatment='avg') + UHSASdNdlogd_bin77: double + @ts(treatment='avg') + UHSASdNdlogd_bin78: double + @ts(treatment='avg') + UHSASdNdlogd_bin79: double + @ts(treatment='avg') + UHSASdNdlogd_bin80: double + @ts(treatment='avg') + UHSASdNdlogd_bin81: double + @ts(treatment='avg') + UHSASdNdlogd_bin82: double + @ts(treatment='avg') + UHSASdNdlogd_bin83: double + @ts(treatment='avg') + UHSASdNdlogd_bin84: double + @ts(treatment='avg') + UHSASdNdlogd_bin85: double + @ts(treatment='avg') + UHSASdNdlogd_bin86: double + @ts(treatment='avg') + UHSASdNdlogd_bin87: double + @ts(treatment='avg') + UHSASdNdlogd_bin88: double + @ts(treatment='avg') + UHSASdNdlogd_bin89: double + @ts(treatment='avg') + UHSASdNdlogd_bin90: double + @ts(treatment='avg') + UHSASdNdlogd_bin91: double + @ts(treatment='avg') + UHSASdNdlogd_bin92: double + @ts(treatment='avg') + UHSASdNdlogd_bin93: double + @ts(treatment='avg') + UHSASdNdlogd_bin94: double + @ts(treatment='avg') + UHSASdNdlogd_bin95: double + @ts(treatment='avg') + UHSASdNdlogd_bin96: double + @ts(treatment='avg') + UHSASdNdlogd_bin97: double + @ts(treatment='avg') + UHSASdNdlogd_bin98: double } \ No newline at end of file diff --git a/training/gordon-group/src/entity/Observations/ObservationOutputFile.py b/training/gordon-group/src/entity/Observations/ObservationOutputFile.py index 54ba0053..1310a083 100644 --- a/training/gordon-group/src/entity/Observations/ObservationOutputFile.py +++ b/training/gordon-group/src/entity/Observations/ObservationOutputFile.py @@ -48,7 +48,7 @@ def get_df_from_c3_file(c3file): df[c3_var] = source.variables[nc_var][:] df[c3_var] = pd.to_datetime(df[c3_var],unit='s') elif nc_var == 'UHSASdNdlogd': - for i in range(0,1): + for i in range(0,99): name = c3_var + "_bin" + str(i) try: df[name] = source.variables[nc_var][:,i] From a6ea4317c99eb6e59a9c2d6c45c08ea48d27eb3e Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 27 Jan 2022 15:10:11 -0600 Subject: [PATCH 008/188] adding temperature UOM --- training/gordon-group/src/BatchJobs/UpsertObsData.c3typ | 2 +- .../src/entity/Simulations/SimulationModelOutput.c3typ | 2 +- .../src/entity/Simulations/SimulationSample.c3typ | 6 ++++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/BatchJobs/UpsertObsData.c3typ b/training/gordon-group/src/BatchJobs/UpsertObsData.c3typ index fbc99d93..3d6ba12d 100644 --- a/training/gordon-group/src/BatchJobs/UpsertObsData.c3typ +++ b/training/gordon-group/src/BatchJobs/UpsertObsData.c3typ @@ -1,6 +1,6 @@ /** * UpsertObsData.c3typ -* Batch job to upsert data from every {@link SimulationSample} in a {@link SimulationEnsemble} in parallel +* Batch job to upsert data from every {@link ObservationSet} in parallel */ type UpsertObsData extends BatchJob type key 'UPSERTOBSDATA' { diff --git a/training/gordon-group/src/entity/Simulations/SimulationModelOutput.c3typ b/training/gordon-group/src/entity/Simulations/SimulationModelOutput.c3typ index 051814f4..b052c7c6 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationModelOutput.c3typ +++ b/training/gordon-group/src/entity/Simulations/SimulationModelOutput.c3typ @@ -21,7 +21,7 @@ entity type SimulationModelOutput mixes TimedDataPoint Date: Mon, 21 Feb 2022 09:57:06 -0600 Subject: [PATCH 009/188] first stab at custom ML pipe --- .../ActionRuntime/py-gordon-ML_1_0_0.json | 17 ++++++++++++++++ .../PrincipalComponentAnalysis.c3typ | 12 +++++++++++ .../PrincipalComponentAnalysis.py | 20 +++++++++++++++++++ .../PrincipalComponentAnalysisTechnique.c3typ | 8 ++++++++ 4 files changed, 57 insertions(+) create mode 100644 training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json create mode 100644 training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.c3typ create mode 100644 training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.py create mode 100644 training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisTechnique.c3typ diff --git a/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json b/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json new file mode 100644 index 00000000..6b8dc8e9 --- /dev/null +++ b/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json @@ -0,0 +1,17 @@ +{ + "language": "Python", + "runtimeVersion": "3.9", + "modules": { + "conda.numpy": "=1.17.0", + "conda.pandas": "=1.1.5", + "conda.scikit-learn":"=1.0.2", + "conda.matplotlib":"=3.5.1" + }, + "repositories": [ + "https://repo.continuum.io/pkgs/main", + "conda-forge" + ], + "runtime": "CPython", + "name": "py-gordon-ML_1_0_0", + "id": "py-gordon-ML_1_0_0" +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.c3typ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.c3typ new file mode 100644 index 00000000..bcefbedc --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.c3typ @@ -0,0 +1,12 @@ +/** +* PrincipalComponentAnalysis.c3typ +* Performs Scikit-Learn's PCA for a set of features. +*/ +entity type PrincipalComponentAnalysis extends MLLeafPipe type key 'PCA' { + // the technique used for this pipe + technique: !PrincipalComponentAnalysisTechnique + + // train method (pre compute 'fillers' for usage when process() is called) + @py(env='gordon-ML_1_0_0') + train: ~ +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.py b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.py new file mode 100644 index 00000000..e41e0281 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.py @@ -0,0 +1,20 @@ +def train(this, input): + """ + Performs Scikit-Learn PCA. + """ + from sklearn.decomposition import PCA + + # get number of PCA components + nComps = this.technique.nComponents + + # cast features into pandas df + data = c3.Dataset.toPandas(dataset=input) + + # call pca + pca = PCA(n_components=nComps) + pca.fit_transform(data) + + # serialize this training + this.trainedModel = c3.MLTrainedModelArtifact(model=c3.PythonSerialization.serialize(obj=data)) + + return this \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisTechnique.c3typ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisTechnique.c3typ new file mode 100644 index 00000000..a25968c6 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisTechnique.c3typ @@ -0,0 +1,8 @@ +/** +* PrincipalComponentAnalysisTechnique.c3typ +* Defines hyper parameters for PCA. +*/ +type PrincipalComponentAnalysisTechnique mixes MLtechnique { + // the number of PCA components + nComponents: integer +} \ No newline at end of file From 92a878d4317332b18a67ffdd9a3f0d789f9c2f94 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 21 Feb 2022 10:00:27 -0600 Subject: [PATCH 010/188] lil syntax fix --- .../preProcessing/PrincipalComponentAnalysisTechnique.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisTechnique.c3typ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisTechnique.c3typ index a25968c6..2b572734 100644 --- a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisTechnique.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisTechnique.c3typ @@ -2,7 +2,7 @@ * PrincipalComponentAnalysisTechnique.c3typ * Defines hyper parameters for PCA. */ -type PrincipalComponentAnalysisTechnique mixes MLtechnique { +type PrincipalComponentAnalysisTechnique mixes MLTechnique { // the number of PCA components nComponents: integer } \ No newline at end of file From 0f64fa3377f3bbf36a1b39beb4b5c87efab9caf9 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 21 Feb 2022 10:37:23 -0600 Subject: [PATCH 011/188] shrink action runtime --- .../gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json | 5 +---- .../preProcessing/PrincipalComponentAnalysis.c3typ | 4 ++-- .../preProcessing/PrincipalComponentAnalysis.py | 3 ++- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json b/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json index 6b8dc8e9..c6b99c7c 100644 --- a/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json +++ b/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json @@ -2,10 +2,7 @@ "language": "Python", "runtimeVersion": "3.9", "modules": { - "conda.numpy": "=1.17.0", - "conda.pandas": "=1.1.5", - "conda.scikit-learn":"=1.0.2", - "conda.matplotlib":"=3.5.1" + "conda.scikit-learn":"=1.0.2" }, "repositories": [ "https://repo.continuum.io/pkgs/main", diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.c3typ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.c3typ index bcefbedc..0841bb42 100644 --- a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.c3typ @@ -1,8 +1,8 @@ /** -* PrincipalComponentAnalysis.c3typ +* PrincipalComponentAnalysisPipe.c3typ * Performs Scikit-Learn's PCA for a set of features. */ -entity type PrincipalComponentAnalysis extends MLLeafPipe type key 'PCA' { +entity type PrincipalComponentAnalysisPipe extends MLLeafPipe type key 'PCA' { // the technique used for this pipe technique: !PrincipalComponentAnalysisTechnique diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.py b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.py index e41e0281..b4caf76b 100644 --- a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.py +++ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.py @@ -1,6 +1,7 @@ def train(this, input): """ - Performs Scikit-Learn PCA. + Performs Scikit-Learn's PCA. + https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html """ from sklearn.decomposition import PCA From a0818f945cf842eb32172da1c213f7888ef9de2c Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 21 Feb 2022 10:41:05 -0600 Subject: [PATCH 012/188] change file names --- ...mponentAnalysis.c3typ => PrincipalComponentAnalysisPipe.c3typ} | 0 ...ipalComponentAnalysis.py => PrincipalComponentAnalysisPipe.py} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename training/gordon-group/src/CustomMLPipeline/preProcessing/{PrincipalComponentAnalysis.c3typ => PrincipalComponentAnalysisPipe.c3typ} (100%) rename training/gordon-group/src/CustomMLPipeline/preProcessing/{PrincipalComponentAnalysis.py => PrincipalComponentAnalysisPipe.py} (100%) diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.c3typ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.c3typ similarity index 100% rename from training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.c3typ rename to training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.c3typ diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.py b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py similarity index 100% rename from training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysis.py rename to training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py From 5a55fb284693308d208ac3c1f732fcb67a09964b Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 21 Feb 2022 13:00:07 -0600 Subject: [PATCH 013/188] add train method variables --- .../preProcessing/PrincipalComponentAnalysisPipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py index b4caf76b..ecca3334 100644 --- a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py +++ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py @@ -1,4 +1,4 @@ -def train(this, input): +def train(this, input, targetOutput, spec): """ Performs Scikit-Learn's PCA. https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html From b7b5cf08dcbb6a6d7403380cd10d772ef3f577fd Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 21 Feb 2022 13:24:50 -0600 Subject: [PATCH 014/188] add pandas --- .../gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json b/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json index c6b99c7c..d62b2cb1 100644 --- a/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json +++ b/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json @@ -2,7 +2,8 @@ "language": "Python", "runtimeVersion": "3.9", "modules": { - "conda.scikit-learn":"=1.0.2" + "conda.scikit-learn":"=1.0.2", + "conda.pandas":"=1.4.1" }, "repositories": [ "https://repo.continuum.io/pkgs/main", From 0eb2338ee608e2f09a2d5814eb6bfed54efc7c2a Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 21 Feb 2022 13:31:26 -0600 Subject: [PATCH 015/188] add dill --- .../gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json b/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json index d62b2cb1..142c320b 100644 --- a/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json +++ b/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json @@ -3,7 +3,8 @@ "runtimeVersion": "3.9", "modules": { "conda.scikit-learn":"=1.0.2", - "conda.pandas":"=1.4.1" + "conda.pandas":"=1.4.1", + "conda.dill":"=0.3.4" }, "repositories": [ "https://repo.continuum.io/pkgs/main", From 2cf4a306813527031eebbc00a7be16268ea7c44f Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 21 Feb 2022 14:15:08 -0600 Subject: [PATCH 016/188] try new kernel --- .../gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json b/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json index 142c320b..d35008b6 100644 --- a/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json +++ b/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json @@ -1,9 +1,11 @@ { "language": "Python", - "runtimeVersion": "3.9", + "runtimeVersion": "3.8.10", "modules": { "conda.scikit-learn":"=1.0.2", "conda.pandas":"=1.4.1", + "conda.numpy":"=1.22.2", + "conda.scipy":"1.8.0", "conda.dill":"=0.3.4" }, "repositories": [ From d41e2b41babe9010416098484ec5403da81ca63a Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 21 Feb 2022 14:22:06 -0600 Subject: [PATCH 017/188] scipy fix --- .../gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json b/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json index d35008b6..a8b95590 100644 --- a/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json +++ b/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json @@ -5,7 +5,7 @@ "conda.scikit-learn":"=1.0.2", "conda.pandas":"=1.4.1", "conda.numpy":"=1.22.2", - "conda.scipy":"1.8.0", + "conda.scipy":"=1.8.0", "conda.dill":"=0.3.4" }, "repositories": [ From 91e06bc69ceb678e34c189d55e864e2c4701368c Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 21 Feb 2022 14:33:56 -0600 Subject: [PATCH 018/188] another kernel try --- .../seed/ActionRuntime/py-gordon-ML_1_0_0.json | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json b/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json index a8b95590..22ebb849 100644 --- a/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json +++ b/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json @@ -1,12 +1,18 @@ { "language": "Python", - "runtimeVersion": "3.8.10", + //"runtimeVersion": "3.8.10", + "runtimeVersion": "3.6", "modules": { - "conda.scikit-learn":"=1.0.2", - "conda.pandas":"=1.4.1", - "conda.numpy":"=1.22.2", - "conda.scipy":"=1.8.0", - "conda.dill":"=0.3.4" + //"conda.scikit-learn":"=1.0.2", + //"conda.pandas":"=1.4.1", + //"conda.numpy":"=1.22.2", + //"conda.scipy":"=1.8.0", + //"conda.dill":"=0.3.4" + "conda.scikit-learn":"=0.23.1", + "conda.pandas":"=1.0.1", + "conda.numpy":"=1.18.1", + "conda.scipy":"=1.4.1", + "conda.dill":"=0.2.8.2" }, "repositories": [ "https://repo.continuum.io/pkgs/main", From c910b141105afd09977b041e5fe5b0324f7d4788 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 21 Feb 2022 14:49:40 -0600 Subject: [PATCH 019/188] transform data after fit --- .../preProcessing/PrincipalComponentAnalysisPipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py index ecca3334..db28adf5 100644 --- a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py +++ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py @@ -13,7 +13,7 @@ def train(this, input, targetOutput, spec): # call pca pca = PCA(n_components=nComps) - pca.fit_transform(data) + data = pca.fit_transform(data) # serialize this training this.trainedModel = c3.MLTrainedModelArtifact(model=c3.PythonSerialization.serialize(obj=data)) From 2d6cbf0c13af9dfe1ec53607c856640126013b43 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 21 Feb 2022 15:32:47 -0600 Subject: [PATCH 020/188] add process step --- .../PrincipalComponentAnalysisPipe.py | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py index db28adf5..a969c3cc 100644 --- a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py +++ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py @@ -1,6 +1,6 @@ def train(this, input, targetOutput, spec): """ - Performs Scikit-Learn's PCA. + Performs Scikit-Learn's PCA fit(). https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html """ from sklearn.decomposition import PCA @@ -13,9 +13,24 @@ def train(this, input, targetOutput, spec): # call pca pca = PCA(n_components=nComps) - data = pca.fit_transform(data) + #data = pca.fit_transform(data) + pca.fit(data) # serialize this training - this.trainedModel = c3.MLTrainedModelArtifact(model=c3.PythonSerialization.serialize(obj=data)) + #this.trainedModel = c3.MLTrainedModelArtifact(model=c3.PythonSerialization.serialize(obj=data)) + this.trainedModel = c3.MLTrainedModelArtifact(model=c3.PythonSerialization.serialize(obj=pca)) - return this \ No newline at end of file + return this + + +def process(this, input): + """ + Performs Scikit-Learn's PCA transform(). + https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html + """ + + data = c3.Dataset.toPandas(dataset=input) + pca = c3.PythonSerialization.deserialize(serialized=this.trainedModel.model) + data = pca.transform(data) + + return c3.Dataset.fromPython(pythonData=data) \ No newline at end of file From 8cc342ad8104c90eabeae58bb6db11ebb55f9c83 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 21 Feb 2022 16:01:08 -0600 Subject: [PATCH 021/188] adding field? --- .../preProcessing/PrincipalComponentAnalysisPipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py index a969c3cc..157cf159 100644 --- a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py +++ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py @@ -23,7 +23,7 @@ def train(this, input, targetOutput, spec): return this -def process(this, input): +def process(this, input, spec): """ Performs Scikit-Learn's PCA transform(). https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html From 5b8ba2686a7e5f6ca6c4f8cfc26de835736af17b Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 21 Feb 2022 16:18:03 -0600 Subject: [PATCH 022/188] now include process method --- .../preProcessing/PrincipalComponentAnalysisPipe.c3typ | 5 +++++ .../preProcessing/PrincipalComponentAnalysisPipe.py | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.c3typ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.c3typ index 0841bb42..48e17537 100644 --- a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.c3typ @@ -7,6 +7,11 @@ entity type PrincipalComponentAnalysisPipe extends MLLeafPipe technique: !PrincipalComponentAnalysisTechnique // train method (pre compute 'fillers' for usage when process() is called) + // this calls the fit() method of sklearn PCA @py(env='gordon-ML_1_0_0') train: ~ + // process method to actually transform data + // this calls the transform() method of sklearn PCA + @py(env='gordon-ML_1_0_0') + process: ~ } \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py index 157cf159..5e9ce5db 100644 --- a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py +++ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py @@ -17,7 +17,6 @@ def train(this, input, targetOutput, spec): pca.fit(data) # serialize this training - #this.trainedModel = c3.MLTrainedModelArtifact(model=c3.PythonSerialization.serialize(obj=data)) this.trainedModel = c3.MLTrainedModelArtifact(model=c3.PythonSerialization.serialize(obj=pca)) return this From 3a8057a5132002ea82e240db2300f81628933521 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 06:50:20 -0600 Subject: [PATCH 023/188] add isProcessable field --- .../seed/ActionRuntime/py-gordon-ML_1_0_0.json | 6 ------ .../preProcessing/PrincipalComponentAnalysisPipe.c3typ | 5 +++++ .../preProcessing/PrincipalComponentAnalysisPipe.py | 10 +++++++++- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json b/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json index 22ebb849..80f96da7 100644 --- a/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json +++ b/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json @@ -1,13 +1,7 @@ { "language": "Python", - //"runtimeVersion": "3.8.10", "runtimeVersion": "3.6", "modules": { - //"conda.scikit-learn":"=1.0.2", - //"conda.pandas":"=1.4.1", - //"conda.numpy":"=1.22.2", - //"conda.scipy":"=1.8.0", - //"conda.dill":"=0.3.4" "conda.scikit-learn":"=0.23.1", "conda.pandas":"=1.0.1", "conda.numpy":"=1.18.1", diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.c3typ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.c3typ index 48e17537..72ea55bb 100644 --- a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.c3typ @@ -3,9 +3,11 @@ * Performs Scikit-Learn's PCA for a set of features. */ entity type PrincipalComponentAnalysisPipe extends MLLeafPipe type key 'PCA' { + // ATTRIBUTES // the technique used for this pipe technique: !PrincipalComponentAnalysisTechnique + // METHODS // train method (pre compute 'fillers' for usage when process() is called) // this calls the fit() method of sklearn PCA @py(env='gordon-ML_1_0_0') @@ -14,4 +16,7 @@ entity type PrincipalComponentAnalysisPipe extends MLLeafPipe // this calls the transform() method of sklearn PCA @py(env='gordon-ML_1_0_0') process: ~ + // this is to make sure process is only callable after train + @py(env='gordon-ML_1_0_0') + isProcessable: ~ } \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py index 5e9ce5db..011151f4 100644 --- a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py +++ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py @@ -32,4 +32,12 @@ def process(this, input, spec): pca = c3.PythonSerialization.deserialize(serialized=this.trainedModel.model) data = pca.transform(data) - return c3.Dataset.fromPython(pythonData=data) \ No newline at end of file + return c3.Dataset.fromPython(pythonData=data) + + +def isProcessable(this): + """" + Guarantees that process() can only be called after train() + """ + + return this.isTrained() \ No newline at end of file From bd114471a00cb6352677e59e707c4785a33d8323 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 07:15:55 -0600 Subject: [PATCH 024/188] first fields of GP Reg --- .../GaussianProcessRegressionPipe.py | 19 +++++++++++++++++++ .../GaussianProcessRegressionTechnique.c3typ | 8 ++++++++ .../GaussianProcessesRegressionPipe.c3typ | 14 ++++++++++++++ 3 files changed, 41 insertions(+) create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionTechnique.c3typ create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessesRegressionPipe.c3typ diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py new file mode 100644 index 00000000..d08198cf --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py @@ -0,0 +1,19 @@ +def train(this, input, targetOutput, spec): + """ + Performs Scikit-Learn's GaussianProcessRegressor's fit(). + https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcessRegressor.html + """ + from sklearn.gaussian_process import GaussianProcessRegressor + + # get data + X = c3.Dataset.toNumpy(dataset=input) + y = c3.Dataset.toNumpy(dataset=targetOutput).flatten() + + # build and train GPR + gp = GaussianProcessRegressor() + gp.fit(X, y) + + # pickle model + this.trainedModel = c3.MLTrainedModelArtifact(model=c3.PythonSerialization.serialize(obj=gp)) + + return this \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionTechnique.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionTechnique.c3typ new file mode 100644 index 00000000..37319cf7 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionTechnique.c3typ @@ -0,0 +1,8 @@ +/** +* GaussianProcessRegressionTechnique.c3typ +* Defines hyper parameters for GP Regression. +*/ +type GaussianProcessRegressionTechnique mixes MLTechnique { + // random seed + randomState: integer +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessesRegressionPipe.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessesRegressionPipe.c3typ new file mode 100644 index 00000000..2ef24174 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessesRegressionPipe.c3typ @@ -0,0 +1,14 @@ +/** +* GaussianProcessRegressionPipe.c3typ +* Performs Scikit-Learn's GP Regression for a set of inputs-outputs. +*/ +entity type GaussianProcessRegressionPipe extends MLLeafPipe type key 'GPREG' { + // ATTRIBUTES + // the technique for this regression + technique: !GaussianProcessRegressionTechnique + + // METHODS + // train the model + @py(env='gordon-ML_1_0_0') + train: ~ +} From 3a4d099ebdce444185b51b37e337162936978bfd Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 07:20:42 -0600 Subject: [PATCH 025/188] filename fix --- ...esRegressionPipe.c3typ => GaussianProcessRegressionPipe.c3typ} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename training/gordon-group/src/CustomMLPipeline/machineLearning/{GaussianProcessesRegressionPipe.c3typ => GaussianProcessRegressionPipe.c3typ} (100%) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessesRegressionPipe.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.c3typ similarity index 100% rename from training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessesRegressionPipe.c3typ rename to training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.c3typ From 3fe80b36bc46da21e4abc082f204cc357f83d3e5 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 07:30:21 -0600 Subject: [PATCH 026/188] add process method to GPR --- .../GaussianProcessRegressionPipe.c3typ | 3 +++ .../GaussianProcessRegressionPipe.py | 16 +++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.c3typ index 2ef24174..8b0de92c 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.c3typ @@ -11,4 +11,7 @@ entity type GaussianProcessRegressionPipe extends MLLeafPipe t // train the model @py(env='gordon-ML_1_0_0') train: ~ + // process data, ie predictions + @py(env='gordon-ML_1_0_0') + process: ~ } diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py index d08198cf..e4cf25bb 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py @@ -16,4 +16,18 @@ def train(this, input, targetOutput, spec): # pickle model this.trainedModel = c3.MLTrainedModelArtifact(model=c3.PythonSerialization.serialize(obj=gp)) - return this \ No newline at end of file + return this + + +def process(this, input) + """ + Performs Scikit-Learn's GaussianProcessRegressor's predict(). + https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcessRegressor.html + """ + # unpickle the model + gp = c3.PythonSerialization.deserialize(serialized=this.trainedModel.model) + + # format data + X = c3.Dataset.toNumpy(dataset=input) + + return c3.Dataset.fromPython(pythonData=gp.predict(X)) \ No newline at end of file From 5d0773f54ce051ead3de42a04640c1a5cfdcfd87 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 07:34:35 -0600 Subject: [PATCH 027/188] forgot a collon !!! --- .../machineLearning/GaussianProcessRegressionPipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py index e4cf25bb..8382cada 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py @@ -19,7 +19,7 @@ def train(this, input, targetOutput, spec): return this -def process(this, input) +def process(this, input): """ Performs Scikit-Learn's GaussianProcessRegressor's predict(). https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcessRegressor.html From f1d96d93236d5f31a0445142ac5f8b276e0d0c11 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 07:41:53 -0600 Subject: [PATCH 028/188] add spec? --- .../machineLearning/GaussianProcessRegressionPipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py index 8382cada..4a73d35b 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py @@ -19,7 +19,7 @@ def train(this, input, targetOutput, spec): return this -def process(this, input): +def process(this, input, spec): """ Performs Scikit-Learn's GaussianProcessRegressor's predict(). https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcessRegressor.html From e1b5ebabdfbc74cc2c5af5c570d8c7185a12fd41 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 07:51:19 -0600 Subject: [PATCH 029/188] add isProcessable --- .../GaussianProcessRegressionPipe.c3typ | 3 +++ .../machineLearning/GaussianProcessRegressionPipe.py | 10 +++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.c3typ index 8b0de92c..2735cfbd 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.c3typ @@ -14,4 +14,7 @@ entity type GaussianProcessRegressionPipe extends MLLeafPipe t // process data, ie predictions @py(env='gordon-ML_1_0_0') process: ~ + // guarantee that process() is only allowed after train() + @py(env='gordon-ML_1_0_0') + isProcessable: ~ } diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py index 4a73d35b..6da9ed11 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py @@ -30,4 +30,12 @@ def process(this, input, spec): # format data X = c3.Dataset.toNumpy(dataset=input) - return c3.Dataset.fromPython(pythonData=gp.predict(X)) \ No newline at end of file + return c3.Dataset.fromPython(pythonData=gp.predict(X)) + + +def isProcessable(this): + """" + Guarantees that process() can only be called after train() + """ + + return this.isTrained() From ad3b3661ecca41073e57af97166dadbb24218ece Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 08:38:42 -0600 Subject: [PATCH 030/188] organizing files --- .../{ => GPRegression}/GaussianProcessRegressionPipe.c3typ | 0 .../{ => GPRegression}/GaussianProcessRegressionPipe.py | 0 .../{ => GPRegression}/GaussianProcessRegressionTechnique.c3typ | 0 .../preProcessing/{ => PCA}/PrincipalComponentAnalysisPipe.c3typ | 0 .../preProcessing/{ => PCA}/PrincipalComponentAnalysisPipe.py | 0 .../{ => PCA}/PrincipalComponentAnalysisTechnique.c3typ | 0 6 files changed, 0 insertions(+), 0 deletions(-) rename training/gordon-group/src/CustomMLPipeline/machineLearning/{ => GPRegression}/GaussianProcessRegressionPipe.c3typ (100%) rename training/gordon-group/src/CustomMLPipeline/machineLearning/{ => GPRegression}/GaussianProcessRegressionPipe.py (100%) rename training/gordon-group/src/CustomMLPipeline/machineLearning/{ => GPRegression}/GaussianProcessRegressionTechnique.c3typ (100%) rename training/gordon-group/src/CustomMLPipeline/preProcessing/{ => PCA}/PrincipalComponentAnalysisPipe.c3typ (100%) rename training/gordon-group/src/CustomMLPipeline/preProcessing/{ => PCA}/PrincipalComponentAnalysisPipe.py (100%) rename training/gordon-group/src/CustomMLPipeline/preProcessing/{ => PCA}/PrincipalComponentAnalysisTechnique.c3typ (100%) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ similarity index 100% rename from training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.c3typ rename to training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.py similarity index 100% rename from training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionPipe.py rename to training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.py diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionTechnique.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ similarity index 100% rename from training/gordon-group/src/CustomMLPipeline/machineLearning/GaussianProcessRegressionTechnique.c3typ rename to training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.c3typ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PCA/PrincipalComponentAnalysisPipe.c3typ similarity index 100% rename from training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.c3typ rename to training/gordon-group/src/CustomMLPipeline/preProcessing/PCA/PrincipalComponentAnalysisPipe.c3typ diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py b/training/gordon-group/src/CustomMLPipeline/preProcessing/PCA/PrincipalComponentAnalysisPipe.py similarity index 100% rename from training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisPipe.py rename to training/gordon-group/src/CustomMLPipeline/preProcessing/PCA/PrincipalComponentAnalysisPipe.py diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisTechnique.c3typ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PCA/PrincipalComponentAnalysisTechnique.c3typ similarity index 100% rename from training/gordon-group/src/CustomMLPipeline/preProcessing/PrincipalComponentAnalysisTechnique.c3typ rename to training/gordon-group/src/CustomMLPipeline/preProcessing/PCA/PrincipalComponentAnalysisTechnique.c3typ From 79fe6b6bdf595f09dfa0b65c9a188ac7c53ef14b Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 08:48:21 -0600 Subject: [PATCH 031/188] trying ml annotation --- .../GPRegression/GaussianProcessRegressionTechnique.c3typ | 1 + 1 file changed, 1 insertion(+) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ index 37319cf7..6d99dd9e 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ @@ -4,5 +4,6 @@ */ type GaussianProcessRegressionTechnique mixes MLTechnique { // random seed + @ml(hyperParameter=true) randomState: integer } \ No newline at end of file From 1ec4aa60e41af87c30f5bc6549dbb2d9dcbd6582 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 08:51:05 -0600 Subject: [PATCH 032/188] trying again --- .../GPRegression/GaussianProcessRegressionTechnique.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ index 6d99dd9e..9de10920 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ @@ -4,6 +4,6 @@ */ type GaussianProcessRegressionTechnique mixes MLTechnique { // random seed - @ml(hyperParameter=true) + @ML(hyperParameter=true) randomState: integer } \ No newline at end of file From ad5f083f3e50cc260298866f9ec091af734158c4 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 10:05:13 -0600 Subject: [PATCH 033/188] first stab at kernels --- .../GPRegression/kernels/SklearnGPRKernel.c3typ | 13 +++++++++++++ .../kernels/SklearnGPRKernelConstant.c3typ | 13 +++++++++++++ .../kernels/SklearnGPRKernelConstant.py | 10 ++++++++++ 3 files changed, 36 insertions(+) create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ new file mode 100644 index 00000000..129a1365 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -0,0 +1,13 @@ +/** +* SklearnGPRKernel.c3typ +* Stepping stone for Scikit-Learn Gaussian Process kernels. +* See: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.gaussian_process +*/ +type SklearnGPRKernel { + // the name of the kernel + name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') + // the kernel parameters + hyperParameters: [double] + // the pickled kernel + pickledKernel: PythonSerialization +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ new file mode 100644 index 00000000..11baa951 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ @@ -0,0 +1,13 @@ +/** +* SklearnGPRKernelConstant.c3typ +* Scikit-Learn Gaussian Process Constant Kernel. +* See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.ConstantKernel.html#sklearn.gaussian_process.kernels.ConstantKernel +*/ +type SklearnGPRKernelConstant mixes SklearnGPRKernel { + // Constant value that defines the kernel value + constantValue: !double + + // METHODS + @py(env='gordon-ML_1_0_0') + build: ~ +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py new file mode 100644 index 00000000..f83d51fd --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -0,0 +1,10 @@ +def build(this): + + from sklearn.gaussian_process.kernel import ConstantKernel + + this.name = 'Constant' + kernel = ConstantKernel(this.constantValue) + + this.pickledKernel = c3.PythonSerialization.serialize(obj=kernel) + + return \ No newline at end of file From ee082854b0d911803468b18125a6b18ffa76e003 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 10:09:42 -0600 Subject: [PATCH 034/188] adding method to basis type --- .../GPRegression/kernels/SklearnGPRKernel.c3typ | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 129a1365..2d7f4667 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -10,4 +10,8 @@ type SklearnGPRKernel { hyperParameters: [double] // the pickled kernel pickledKernel: PythonSerialization + + // build method to be overridden later + @py() + build: member function(): any } \ No newline at end of file From 1fd5fcb327890a7d4de30e94984dd9747d348ff8 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 10:11:53 -0600 Subject: [PATCH 035/188] placeholder function --- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 1 - 1 file changed, 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 2d7f4667..bdf058a8 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -12,6 +12,5 @@ type SklearnGPRKernel { pickledKernel: PythonSerialization // build method to be overridden later - @py() build: member function(): any } \ No newline at end of file From 47ddac91c396462de935ebba0c4f1ecbe85d65f7 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 10:17:33 -0600 Subject: [PATCH 036/188] tiny tiny tiny syntax fix --- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index f83d51fd..7b896800 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -1,6 +1,6 @@ def build(this): - from sklearn.gaussian_process.kernel import ConstantKernel + from sklearn.gaussian_process.kernels import ConstantKernel this.name = 'Constant' kernel = ConstantKernel(this.constantValue) From e110a23a5c54bfb7783e4dd9518b099102b734a7 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 10:21:44 -0600 Subject: [PATCH 037/188] pickling returns string... --- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index bdf058a8..3324e638 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -9,7 +9,7 @@ type SklearnGPRKernel { // the kernel parameters hyperParameters: [double] // the pickled kernel - pickledKernel: PythonSerialization + pickledKernel: string // build method to be overridden later build: member function(): any From be38511ebe58334ed2bc78927cef4251dc6f1f3f Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 10:25:10 -0600 Subject: [PATCH 038/188] return this --- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index 7b896800..54967bd8 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -7,4 +7,4 @@ def build(this): this.pickledKernel = c3.PythonSerialization.serialize(obj=kernel) - return \ No newline at end of file + return this \ No newline at end of file From 310e56549ecb49c7c428a521e9f033f6bb3ebe0d Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 10:39:07 -0600 Subject: [PATCH 039/188] member to inline --- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 2 +- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 3324e638..8b23329c 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -12,5 +12,5 @@ type SklearnGPRKernel { pickledKernel: string // build method to be overridden later - build: member function(): any + build: inline function(): any } \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index 54967bd8..7b896800 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -7,4 +7,4 @@ def build(this): this.pickledKernel = c3.PythonSerialization.serialize(obj=kernel) - return this \ No newline at end of file + return \ No newline at end of file From fb9342a2e80e07e11daf2de5772afe68c3070197 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 10:43:55 -0600 Subject: [PATCH 040/188] back to member --- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 8b23329c..3324e638 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -12,5 +12,5 @@ type SklearnGPRKernel { pickledKernel: string // build method to be overridden later - build: inline function(): any + build: member function(): any } \ No newline at end of file From 9b1ce96868dfb191ae55555d4bf7fa4fa608b691 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 10:54:17 -0600 Subject: [PATCH 041/188] trying to write to type --- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 3324e638..fce61b2d 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -3,7 +3,7 @@ * Stepping stone for Scikit-Learn Gaussian Process kernels. * See: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.gaussian_process */ -type SklearnGPRKernel { +type SklearnGPRKernel mixes Writable { // the name of the kernel name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters From 65f92540941712e5eacdfdb8aeaeeb288c3969cf Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 11:00:05 -0600 Subject: [PATCH 042/188] merging object --- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 1 + 1 file changed, 1 insertion(+) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index 7b896800..fbdc7e0c 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -6,5 +6,6 @@ def build(this): kernel = ConstantKernel(this.constantValue) this.pickledKernel = c3.PythonSerialization.serialize(obj=kernel) + c3.SklearnGPRKernelConstant.merge(this) return \ No newline at end of file From 79422daea97a18c8d0cebbe4719a2265cd9d60b1 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 11:18:22 -0600 Subject: [PATCH 043/188] back to what it was --- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 2 +- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index fce61b2d..3324e638 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -3,7 +3,7 @@ * Stepping stone for Scikit-Learn Gaussian Process kernels. * See: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.gaussian_process */ -type SklearnGPRKernel mixes Writable { +type SklearnGPRKernel { // the name of the kernel name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index fbdc7e0c..7b896800 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -6,6 +6,5 @@ def build(this): kernel = ConstantKernel(this.constantValue) this.pickledKernel = c3.PythonSerialization.serialize(obj=kernel) - c3.SklearnGPRKernelConstant.merge(this) return \ No newline at end of file From db65dc3d0de9cb7e1bfad189cd60412f45e0080e Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 11:23:49 -0600 Subject: [PATCH 044/188] trying merge again --- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index 7b896800..9e1b4865 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -7,4 +7,6 @@ def build(this): this.pickledKernel = c3.PythonSerialization.serialize(obj=kernel) + this.merge() + return \ No newline at end of file From dc916ec8d6e595d36fe4edef2597322ed17f94f8 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 11:29:24 -0600 Subject: [PATCH 045/188] making kernel persistable --- .../GPRegression/kernels/SklearnGPRKernelConstant.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ index 11baa951..a71f5e43 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ @@ -3,7 +3,7 @@ * Scikit-Learn Gaussian Process Constant Kernel. * See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.ConstantKernel.html#sklearn.gaussian_process.kernels.ConstantKernel */ -type SklearnGPRKernelConstant mixes SklearnGPRKernel { +entity type SklearnGPRKernelConstant mixes SklearnGPRKernel { // Constant value that defines the kernel value constantValue: !double From 21b7973315f290425eecf68b4f38595989838fb4 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 11:32:38 -0600 Subject: [PATCH 046/188] add schema name --- .../GPRegression/kernels/SklearnGPRKernelConstant.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ index a71f5e43..894aa3b6 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ @@ -3,7 +3,7 @@ * Scikit-Learn Gaussian Process Constant Kernel. * See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.ConstantKernel.html#sklearn.gaussian_process.kernels.ConstantKernel */ -entity type SklearnGPRKernelConstant mixes SklearnGPRKernel { +entity type SklearnGPRKernelConstant mixes SklearnGPRKernel schema name 'SKLRN_GPR_KRNL_CNSTNT' { // Constant value that defines the kernel value constantValue: !double From 7f905cd223e968f8ae2df0015b829ccef9c3c3c5 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 11:37:15 -0600 Subject: [PATCH 047/188] entity type on the basis type too --- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 3324e638..9189972b 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -3,7 +3,7 @@ * Stepping stone for Scikit-Learn Gaussian Process kernels. * See: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.gaussian_process */ -type SklearnGPRKernel { +entity type SklearnGPRKernel schema name 'SKLRN_GPR_KRNL' { // the name of the kernel name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters From 8ebba16efe7c32c6da5d83f608db46e82f22dc88 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 11:41:50 -0600 Subject: [PATCH 048/188] okay... no entities then --- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 2 +- .../GPRegression/kernels/SklearnGPRKernelConstant.c3typ | 2 +- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 9189972b..3324e638 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -3,7 +3,7 @@ * Stepping stone for Scikit-Learn Gaussian Process kernels. * See: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.gaussian_process */ -entity type SklearnGPRKernel schema name 'SKLRN_GPR_KRNL' { +type SklearnGPRKernel { // the name of the kernel name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ index 894aa3b6..11baa951 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ @@ -3,7 +3,7 @@ * Scikit-Learn Gaussian Process Constant Kernel. * See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.ConstantKernel.html#sklearn.gaussian_process.kernels.ConstantKernel */ -entity type SklearnGPRKernelConstant mixes SklearnGPRKernel schema name 'SKLRN_GPR_KRNL_CNSTNT' { +type SklearnGPRKernelConstant mixes SklearnGPRKernel { // Constant value that defines the kernel value constantValue: !double diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index 9e1b4865..7b896800 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -7,6 +7,4 @@ def build(this): this.pickledKernel = c3.PythonSerialization.serialize(obj=kernel) - this.merge() - return \ No newline at end of file From 31cca0003b01c06da34d4d2d7913633fdf3da257 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 11:55:04 -0600 Subject: [PATCH 049/188] all entities again... --- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 2 +- .../GPRegression/kernels/SklearnGPRKernelConstant.c3typ | 2 +- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 3324e638..e3255644 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -3,7 +3,7 @@ * Stepping stone for Scikit-Learn Gaussian Process kernels. * See: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.gaussian_process */ -type SklearnGPRKernel { +extendable entity type SklearnGPRKernel schema name "SKLRN_GPR_KRNL" { // the name of the kernel name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ index 11baa951..ee336af5 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ @@ -3,7 +3,7 @@ * Scikit-Learn Gaussian Process Constant Kernel. * See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.ConstantKernel.html#sklearn.gaussian_process.kernels.ConstantKernel */ -type SklearnGPRKernelConstant mixes SklearnGPRKernel { +entity type SklearnGPRKernelConstant extends SklearnGPRKernel type key "SKLRN_GPR_KRNL_CNSTNT" { // Constant value that defines the kernel value constantValue: !double diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index 7b896800..9e1b4865 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -7,4 +7,6 @@ def build(this): this.pickledKernel = c3.PythonSerialization.serialize(obj=kernel) + this.merge() + return \ No newline at end of file From b67f996ca210002a370da7ea7e2952549a6bc9f2 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 12:00:33 -0600 Subject: [PATCH 050/188] add schema name to array field --- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index e3255644..15ce9787 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -7,7 +7,7 @@ extendable entity type SklearnGPRKernel schema name "SKLRN_GPR_KRNL" { // the name of the kernel name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters - hyperParameters: [double] + hyperParameters: [double] schema name 'HPRPRMTRS' // the pickled kernel pickledKernel: string From 06c5e0fd6fc3863a456c4dfd16911cfed0323f76 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 12:03:17 -0600 Subject: [PATCH 051/188] using schema suffix instead --- .../GPRegression/kernels/SklearnGPRKernel.c3typ | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 15ce9787..5d8a24f0 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -3,11 +3,11 @@ * Stepping stone for Scikit-Learn Gaussian Process kernels. * See: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.gaussian_process */ -extendable entity type SklearnGPRKernel schema name "SKLRN_GPR_KRNL" { +extendable entity type SklearnGPRKernel schema name "SKL_GPR_KRNL" { // the name of the kernel name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters - hyperParameters: [double] schema name 'HPRPRMTRS' + hyperParameters: [double] schema suffix 'HPRS' // the pickled kernel pickledKernel: string From 8c086ca4a1678ac7358bfc122e0441dd63f5387c Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 12:15:55 -0600 Subject: [PATCH 052/188] adding after create... --- .../GPRegression/kernels/SklearnGPRKernelConstant.c3typ | 3 +++ .../GPRegression/kernels/SklearnGPRKernelConstant.js | 3 +++ 2 files changed, 6 insertions(+) create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.js diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ index ee336af5..6a520948 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ @@ -10,4 +10,7 @@ entity type SklearnGPRKernelConstant extends SklearnGPRKernel type key "SKLRN_GP // METHODS @py(env='gordon-ML_1_0_0') build: ~ + // call build when this type is created + @dependency(include = "this") + afterCreate: ~ js server } \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.js b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.js new file mode 100644 index 00000000..60ab69ed --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.js @@ -0,0 +1,3 @@ +function afterCreate(objs) { + objs.build() +}; \ No newline at end of file From 456ee41a97bdd19f21dcf5fd5827c4960f53fc6d Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 12:44:31 -0600 Subject: [PATCH 053/188] insert afterCreate again.. --- .../GPRegression/kernels/SklearnGPRKernelConstant.js | 3 ++- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.js b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.js index 60ab69ed..474896ce 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.js +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.js @@ -1,3 +1,4 @@ function afterCreate(objs) { - objs.build() + objs.forEach(build()) + return }; \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index 9e1b4865..4f52a8f8 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -7,6 +7,6 @@ def build(this): this.pickledKernel = c3.PythonSerialization.serialize(obj=kernel) - this.merge() + c3.SklearnGPRKernelConstant.merge(this) return \ No newline at end of file From 615609150e8ce39ef7813557f68f8db7d8fb273a Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 12:52:30 -0600 Subject: [PATCH 054/188] trying to fix syntax... --- .../GPRegression/kernels/SklearnGPRKernelConstant.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.js b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.js index 474896ce..d65fd821 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.js +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.js @@ -1,4 +1,4 @@ function afterCreate(objs) { - objs.forEach(build()) - return + objs.forEach(build); + return; }; \ No newline at end of file From 8f219ecaf9e8dcc7adb89cd439027ee3d6380af9 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 13:15:02 -0600 Subject: [PATCH 055/188] i don't want entity types --- .../GPRegression/kernels/SklearnGPRKernel.c3typ | 4 ++-- .../GPRegression/kernels/SklearnGPRKernelConstant.c3typ | 5 +---- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 3 +-- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 5d8a24f0..6999ee65 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -3,11 +3,11 @@ * Stepping stone for Scikit-Learn Gaussian Process kernels. * See: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.gaussian_process */ -extendable entity type SklearnGPRKernel schema name "SKL_GPR_KRNL" { +type SklearnGPRKernel type key "SKL_GPR_KRNL" { // the name of the kernel name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters - hyperParameters: [double] schema suffix 'HPRS' + hyperParameters: [double] // the pickled kernel pickledKernel: string diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ index 6a520948..330912e8 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ @@ -3,14 +3,11 @@ * Scikit-Learn Gaussian Process Constant Kernel. * See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.ConstantKernel.html#sklearn.gaussian_process.kernels.ConstantKernel */ -entity type SklearnGPRKernelConstant extends SklearnGPRKernel type key "SKLRN_GPR_KRNL_CNSTNT" { +type SklearnGPRKernelConstant mixes SklearnGPRKernel type key "SKLRN_GPR_KRNL_CNSTNT" { // Constant value that defines the kernel value constantValue: !double // METHODS @py(env='gordon-ML_1_0_0') build: ~ - // call build when this type is created - @dependency(include = "this") - afterCreate: ~ js server } \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index 4f52a8f8..d7222a1b 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -3,10 +3,9 @@ def build(this): from sklearn.gaussian_process.kernels import ConstantKernel this.name = 'Constant' + this.hyperParameters.push(this.ConstantValue) kernel = ConstantKernel(this.constantValue) this.pickledKernel = c3.PythonSerialization.serialize(obj=kernel) - c3.SklearnGPRKernelConstant.merge(this) - return \ No newline at end of file From c221c039f35980e36492e6cd266b9fd0190f63a5 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 13:20:30 -0600 Subject: [PATCH 056/188] minor fix? --- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index d7222a1b..9fdd2f32 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -3,7 +3,7 @@ def build(this): from sklearn.gaussian_process.kernels import ConstantKernel this.name = 'Constant' - this.hyperParameters.push(this.ConstantValue) + this.hyperParameters.push(this.constantValue) kernel = ConstantKernel(this.constantValue) this.pickledKernel = c3.PythonSerialization.serialize(obj=kernel) From 59529f87e8ea0d00ec7926ad41089206cfbbb456 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 13:57:49 -0600 Subject: [PATCH 057/188] hiding push --- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index 9fdd2f32..3ddf1e0c 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -3,7 +3,7 @@ def build(this): from sklearn.gaussian_process.kernels import ConstantKernel this.name = 'Constant' - this.hyperParameters.push(this.constantValue) + #this.hyperParameters.push(this.constantValue) kernel = ConstantKernel(this.constantValue) this.pickledKernel = c3.PythonSerialization.serialize(obj=kernel) From 5486e071aa689fce31c4f9ed6643dd21e715d604 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 14:18:51 -0600 Subject: [PATCH 058/188] mixing obj --- .../GPRegression/kernels/SklearnGPRKernelConstant.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ index 330912e8..565b0c75 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ @@ -3,7 +3,7 @@ * Scikit-Learn Gaussian Process Constant Kernel. * See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.ConstantKernel.html#sklearn.gaussian_process.kernels.ConstantKernel */ -type SklearnGPRKernelConstant mixes SklearnGPRKernel type key "SKLRN_GPR_KRNL_CNSTNT" { +type SklearnGPRKernelConstant mixes SklearnGPRKernel, Obj type key "SKLRN_GPR_KRNL_CNSTNT" { // Constant value that defines the kernel value constantValue: !double From c7172f30e335f68a1a3e7c230e7cfd9255490108 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 14:44:12 -0600 Subject: [PATCH 059/188] removing obj mixin...... --- .../GPRegression/kernels/SklearnGPRKernelConstant.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ index 565b0c75..330912e8 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ @@ -3,7 +3,7 @@ * Scikit-Learn Gaussian Process Constant Kernel. * See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.ConstantKernel.html#sklearn.gaussian_process.kernels.ConstantKernel */ -type SklearnGPRKernelConstant mixes SklearnGPRKernel, Obj type key "SKLRN_GPR_KRNL_CNSTNT" { +type SklearnGPRKernelConstant mixes SklearnGPRKernel type key "SKLRN_GPR_KRNL_CNSTNT" { // Constant value that defines the kernel value constantValue: !double From 9acaee9488627cfe8d9cfd0308942bcb9801936b Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 22 Feb 2022 16:13:53 -0600 Subject: [PATCH 060/188] trying inline + member --- .../GPRegression/kernels/SklearnGPRKernel.c3typ | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 6999ee65..357aaa04 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -12,5 +12,8 @@ type SklearnGPRKernel type key "SKL_GPR_KRNL" { pickledKernel: string // build method to be overridden later - build: member function(): any + build: inline member function(): any + + // override fromString + // look into beforeMake() } \ No newline at end of file From cd6c963a4254239afd5055f5c78b2ec08aae7f19 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 06:38:38 -0600 Subject: [PATCH 061/188] returning the object --- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index 3ddf1e0c..6e98bc14 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -8,4 +8,4 @@ def build(this): this.pickledKernel = c3.PythonSerialization.serialize(obj=kernel) - return \ No newline at end of file + return this \ No newline at end of file From 4081af8e78244a5e5f23fd0fa52241bdf26c5310 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 07:01:57 -0600 Subject: [PATCH 062/188] adding array field --- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index 6e98bc14..b8c84f75 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -1,9 +1,11 @@ def build(this): from sklearn.gaussian_process.kernels import ConstantKernel + import numpy as np this.name = 'Constant' - #this.hyperParameters.push(this.constantValue) + hyperPars = [this.constantValue] + this.hyperParameters = hyperPars kernel = ConstantKernel(this.constantValue) this.pickledKernel = c3.PythonSerialization.serialize(obj=kernel) From 0774caa381a29f41b016746c651f6bc1151800a8 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 07:14:12 -0600 Subject: [PATCH 063/188] changing type definition a little bit --- .../GPRegression/kernels/SklearnGPRKernelConstant.c3typ | 4 +++- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ index 330912e8..3a9abbb5 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ @@ -3,9 +3,11 @@ * Scikit-Learn Gaussian Process Constant Kernel. * See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.ConstantKernel.html#sklearn.gaussian_process.kernels.ConstantKernel */ -type SklearnGPRKernelConstant mixes SklearnGPRKernel type key "SKLRN_GPR_KRNL_CNSTNT" { +type SklearnGPRKernelConstant type key "SKLRN_GPR_KRNL_CNSTNT" { // Constant value that defines the kernel value constantValue: !double + // the SklearnGPRKernel for this object + kernel: SklearnGPRKernel // METHODS @py(env='gordon-ML_1_0_0') diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index b8c84f75..b981d579 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -3,11 +3,11 @@ def build(this): from sklearn.gaussian_process.kernels import ConstantKernel import numpy as np - this.name = 'Constant' + this.kernel.name = 'Constant' hyperPars = [this.constantValue] - this.hyperParameters = hyperPars - kernel = ConstantKernel(this.constantValue) + this.kernel.hyperParameters = hyperPars + sklKernel = ConstantKernel(this.constantValue) - this.pickledKernel = c3.PythonSerialization.serialize(obj=kernel) + this.kernel.pickledKernel = c3.PythonSerialization.serialize(obj=sklKernel) return this \ No newline at end of file From 9480f61ec029e54279c6bb3a3e3728127cc50b71 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 07:20:25 -0600 Subject: [PATCH 064/188] don't override function --- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 2 +- .../GPRegression/kernels/SklearnGPRKernelConstant.c3typ | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 357aaa04..cb213e75 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -12,7 +12,7 @@ type SklearnGPRKernel type key "SKL_GPR_KRNL" { pickledKernel: string // build method to be overridden later - build: inline member function(): any + //build: inline member function(): any // override fromString // look into beforeMake() diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ index 3a9abbb5..fcbc6fcf 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ @@ -11,5 +11,5 @@ type SklearnGPRKernelConstant type key "SKLRN_GPR_KRNL_CNSTNT" { // METHODS @py(env='gordon-ML_1_0_0') - build: ~ + build: member function(): any } \ No newline at end of file From be645ecfcade68406936c18bf967ab7959693774 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 07:25:45 -0600 Subject: [PATCH 065/188] add include annotation --- .../GPRegression/kernels/SklearnGPRKernelConstant.c3typ | 1 + 1 file changed, 1 insertion(+) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ index fcbc6fcf..6c2a4a10 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ @@ -11,5 +11,6 @@ type SklearnGPRKernelConstant type key "SKLRN_GPR_KRNL_CNSTNT" { // METHODS @py(env='gordon-ML_1_0_0') + @dependency(include="this, kernel") build: member function(): any } \ No newline at end of file From 6282368519f95803f49b89e1d3adb2ed7557583c Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 07:31:37 -0600 Subject: [PATCH 066/188] creating object inside build function --- .../kernels/SklearnGPRKernelConstant.py | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index b981d579..a2b0f65b 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -3,11 +3,23 @@ def build(this): from sklearn.gaussian_process.kernels import ConstantKernel import numpy as np - this.kernel.name = 'Constant' - hyperPars = [this.constantValue] - this.kernel.hyperParameters = hyperPars +# this.kernel.name = 'Constant' +# hyperPars = [this.constantValue] +# this.kernel.hyperParameters = hyperPars +# sklKernel = ConstantKernel(this.constantValue) +# +# this.kernel.pickledKernel = c3.PythonSerialization.serialize(obj=sklKernel) + sklKernel = ConstantKernel(this.constantValue) - this.kernel.pickledKernel = c3.PythonSerialization.serialize(obj=sklKernel) + kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) + kernel_name = 'Constant' + kernel_hyperParameters = [this.constantValue] + + this.kernel = c3.SklearnGPRKernel({ + name=kernel_name, + hyperParameters=kernel_hyperParameters, + pickledKernel=kernel_pickled + }) return this \ No newline at end of file From 12c460d3fc6aa525b60d57f81bb38c5efd9f092f Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 07:35:56 -0600 Subject: [PATCH 067/188] it's not a json... --- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index a2b0f65b..9e5ebf67 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -16,10 +16,10 @@ def build(this): kernel_name = 'Constant' kernel_hyperParameters = [this.constantValue] - this.kernel = c3.SklearnGPRKernel({ + this.kernel = c3.SklearnGPRKernel( name=kernel_name, hyperParameters=kernel_hyperParameters, pickledKernel=kernel_pickled - }) + ) return this \ No newline at end of file From 6027286759b54c5168788e1e18a7ff859b66d6ac Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 07:41:55 -0600 Subject: [PATCH 068/188] remove dependency annotation --- .../GPRegression/kernels/SklearnGPRKernelConstant.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ index 6c2a4a10..013013fe 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ @@ -11,6 +11,6 @@ type SklearnGPRKernelConstant type key "SKLRN_GPR_KRNL_CNSTNT" { // METHODS @py(env='gordon-ML_1_0_0') - @dependency(include="this, kernel") + //@dependency(include="this, kernel") build: member function(): any } \ No newline at end of file From 7fc20b93b746f3647726c46004cd9e5763e71e56 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 07:48:57 -0600 Subject: [PATCH 069/188] just trying something else I guess... --- .../GPRegression/kernels/SklearnGPRKernelConstant.c3typ | 2 +- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ index 013013fe..1cc8705b 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ @@ -12,5 +12,5 @@ type SklearnGPRKernelConstant type key "SKLRN_GPR_KRNL_CNSTNT" { // METHODS @py(env='gordon-ML_1_0_0') //@dependency(include="this, kernel") - build: member function(): any + build: inline member function(): any } \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index 9e5ebf67..af86852a 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -1,7 +1,6 @@ def build(this): from sklearn.gaussian_process.kernels import ConstantKernel - import numpy as np # this.kernel.name = 'Constant' # hyperPars = [this.constantValue] From e424b4d26a6b9deb57c133ffb1d125bddbacdf05 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 07:53:21 -0600 Subject: [PATCH 070/188] ... --- .../GPRegression/kernels/SklearnGPRKernelConstant.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ index 1cc8705b..bb3969b1 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ @@ -11,6 +11,6 @@ type SklearnGPRKernelConstant type key "SKLRN_GPR_KRNL_CNSTNT" { // METHODS @py(env='gordon-ML_1_0_0') - //@dependency(include="this, kernel") + @dependency(include="this, kernel") build: inline member function(): any } \ No newline at end of file From 34426fb13f0e4b5fa38aaf089457d84a9fd407d0 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 08:21:42 -0600 Subject: [PATCH 071/188] looks like this is FINALLY working --- .../GPRegression/kernels/SklearnGPRKernelConstant.c3typ | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ index bb3969b1..30c4dc8e 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ @@ -9,8 +9,8 @@ type SklearnGPRKernelConstant type key "SKLRN_GPR_KRNL_CNSTNT" { // the SklearnGPRKernel for this object kernel: SklearnGPRKernel - // METHODS + // constructs the SklearnGPRKernel internal object @py(env='gordon-ML_1_0_0') @dependency(include="this, kernel") - build: inline member function(): any + build: inline member function(): SklearnGPRKernel } \ No newline at end of file From cbeaa251b0d92df9bf4035fa0e60ab882dd238be Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 08:44:35 -0600 Subject: [PATCH 072/188] imputing kernel to technique --- .../GPRegression/GaussianProcessRegressionPipe.py | 5 ++++- .../GaussianProcessRegressionTechnique.c3typ | 3 +++ .../GPRegression/kernels/SklearnGPRKernel.c3typ | 8 +++----- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 11 +++-------- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.py index 6da9ed11..764abb82 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.py @@ -9,8 +9,11 @@ def train(this, input, targetOutput, spec): X = c3.Dataset.toNumpy(dataset=input) y = c3.Dataset.toNumpy(dataset=targetOutput).flatten() + # get kernel object from c3, make it python again + kernel = c3.PythonSerialization.deserialize(serialized=this.technique.kernel.pickledKernel) + # build and train GPR - gp = GaussianProcessRegressor() + gp = GaussianProcessRegressor(kernel=kernel) gp.fit(X, y) # pickle model diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ index 9de10920..50485926 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ @@ -6,4 +6,7 @@ type GaussianProcessRegressionTechnique mixes MLTechnique { // random seed @ML(hyperParameter=true) randomState: integer + // the kernel object + @ML(hyperParameter=true) + kernel: SklearnGPRKernel } \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index cb213e75..3855ecb0 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -11,9 +11,7 @@ type SklearnGPRKernel type key "SKL_GPR_KRNL" { // the pickled kernel pickledKernel: string - // build method to be overridden later - //build: inline member function(): any - - // override fromString - // look into beforeMake() + // C3DS hints + // - override fromString + // - look into beforeMake() } \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index af86852a..bfa271f6 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -1,14 +1,9 @@ def build(this): - + """ + This effectively constructs the type instance, by creating the object to be placed in the kernel field. Ideally this should be replaced by a callback function (beforeMake or afterMake). + """ from sklearn.gaussian_process.kernels import ConstantKernel -# this.kernel.name = 'Constant' -# hyperPars = [this.constantValue] -# this.kernel.hyperParameters = hyperPars -# sklKernel = ConstantKernel(this.constantValue) -# -# this.kernel.pickledKernel = c3.PythonSerialization.serialize(obj=sklKernel) - sklKernel = ConstantKernel(this.constantValue) kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) From e86243808470a889158acdd9411a29109f7e815b Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 08:51:17 -0600 Subject: [PATCH 073/188] add schema suffixes --- .../GPRegression/GaussianProcessRegressionPipe.c3typ | 2 +- .../GPRegression/GaussianProcessRegressionTechnique.c3typ | 2 +- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ index 2735cfbd..13913ecf 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ @@ -5,7 +5,7 @@ entity type GaussianProcessRegressionPipe extends MLLeafPipe type key 'GPREG' { // ATTRIBUTES // the technique for this regression - technique: !GaussianProcessRegressionTechnique + technique: !GaussianProcessRegressionTechnique schema suffix 'TCHNQ' // METHODS // train the model diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ index 50485926..c78876c2 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ @@ -8,5 +8,5 @@ type GaussianProcessRegressionTechnique mixes MLTechnique { randomState: integer // the kernel object @ML(hyperParameter=true) - kernel: SklearnGPRKernel + kernel: SklearnGPRKernel schema suffix 'KRNL' } \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 3855ecb0..11177b26 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -7,7 +7,7 @@ type SklearnGPRKernel type key "SKL_GPR_KRNL" { // the name of the kernel name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters - hyperParameters: [double] + hyperParameters: [double] schema suffix 'HPRPMS' // the pickled kernel pickledKernel: string From b715b74cec61088d15c4520aa16c322e4062e5bd Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 08:54:44 -0600 Subject: [PATCH 074/188] trying what I can... --- .../GPRegression/GaussianProcessRegressionTechnique.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ index c78876c2..202adb5b 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ @@ -8,5 +8,5 @@ type GaussianProcessRegressionTechnique mixes MLTechnique { randomState: integer // the kernel object @ML(hyperParameter=true) - kernel: SklearnGPRKernel schema suffix 'KRNL' + kernel: SklearnGPRKernel' } \ No newline at end of file From bc6ee94cd86b9134a47577f4340b374fde05c954 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 08:55:05 -0600 Subject: [PATCH 075/188] ... --- .../GPRegression/GaussianProcessRegressionTechnique.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ index 202adb5b..50485926 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ @@ -8,5 +8,5 @@ type GaussianProcessRegressionTechnique mixes MLTechnique { randomState: integer // the kernel object @ML(hyperParameter=true) - kernel: SklearnGPRKernel' + kernel: SklearnGPRKernel } \ No newline at end of file From e10306ba731933f3d4d7017aa4aaacb55b57608e Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 08:58:12 -0600 Subject: [PATCH 076/188] .... --- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 11177b26..3855ecb0 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -7,7 +7,7 @@ type SklearnGPRKernel type key "SKL_GPR_KRNL" { // the name of the kernel name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters - hyperParameters: [double] schema suffix 'HPRPMS' + hyperParameters: [double] // the pickled kernel pickledKernel: string From a2c765930aaf79eebd591a467d24c57ea41e893b Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 09:02:23 -0600 Subject: [PATCH 077/188] ............ --- .../GPRegression/GaussianProcessRegressionPipe.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ index 13913ecf..2735cfbd 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ @@ -5,7 +5,7 @@ entity type GaussianProcessRegressionPipe extends MLLeafPipe type key 'GPREG' { // ATTRIBUTES // the technique for this regression - technique: !GaussianProcessRegressionTechnique schema suffix 'TCHNQ' + technique: !GaussianProcessRegressionTechnique // METHODS // train the model From 3d931b65bfb33c00177f2cc23fc856e3d5f434cc Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 09:08:38 -0600 Subject: [PATCH 078/188] ... desperate commits --- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 3855ecb0..bd5397c7 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -7,7 +7,7 @@ type SklearnGPRKernel type key "SKL_GPR_KRNL" { // the name of the kernel name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters - hyperParameters: [double] + hyperParameters: [double] schema suffix 'HPRPRMS' // the pickled kernel pickledKernel: string From 32f5a4193b2f4ed5f1971775d3292ecad4cee39f Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 09:13:19 -0600 Subject: [PATCH 079/188] ... mostly ... --- .../GPRegression/GaussianProcessRegressionTechnique.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ index 50485926..c78876c2 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ @@ -8,5 +8,5 @@ type GaussianProcessRegressionTechnique mixes MLTechnique { randomState: integer // the kernel object @ML(hyperParameter=true) - kernel: SklearnGPRKernel + kernel: SklearnGPRKernel schema suffix 'KRNL' } \ No newline at end of file From f55d4a894fbbd9c9ca4418b362535e56984ba52d Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 09:21:45 -0600 Subject: [PATCH 080/188] well, sometimes they work --- .../GPRegression/kernels/SklearnGPRKernelConstant.js | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.js diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.js b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.js deleted file mode 100644 index d65fd821..00000000 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.js +++ /dev/null @@ -1,4 +0,0 @@ -function afterCreate(objs) { - objs.forEach(build); - return; -}; \ No newline at end of file From 1e92e946ae09e1bfbf4057a657306dcd7e51635b Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 09:29:20 -0600 Subject: [PATCH 081/188] add dot product kernel --- .../kernels/SklearnGPRKernelDotProduct.c3typ | 16 ++++++++++++++++ .../kernels/SklearnGPRKernelDotProduct.py | 19 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.py diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ new file mode 100644 index 00000000..cc1aabb4 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ @@ -0,0 +1,16 @@ +/** +* SklearnGPRKernelDotProduct.c3typ +* Scikit-Learn Gaussian Process Dot Product Kernel. +* See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.DotProduct.html#sklearn.gaussian_process.kernels.DotProduct +*/ +type SklearnGPRKernelDotProduct type key "SKLRN_GPR_KRNL_DTPRDCT" { + // Constant that defines the kernel inhomogenity + sigmaZero: !double + // the SklearnGPRKernel for this object + kernel: SklearnGPRKernel + + // constructs the SklearnGPRKernel internal object + @py(env='gordon-ML_1_0_0') + @dependency(include="this, kernel") + build: inline member function(): SklearnGPRKernel +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.py new file mode 100644 index 00000000..70430725 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.py @@ -0,0 +1,19 @@ +def build(this): + """ + This effectively constructs the type instance, by creating the object to be placed in the kernel field. Ideally this should be replaced by a callback function (beforeMake or afterMake). + """ + from sklearn.gaussian_process.kernels import DotProduct + + sklKernel = DotProduct(sigma_0=this.sigmaZero) + + kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) + kernel_name = 'DotProduct' + kernel_hyperParameters = [this.sigmaZero] + + this.kernel = c3.SklearnGPRKernel( + name=kernel_name, + hyperParameters=kernel_hyperParameters, + pickledKernel=kernel_pickled + ) + + return this \ No newline at end of file From 44e5a71d29fb7c492b064fb18578fa2413e4a248 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 09:40:54 -0600 Subject: [PATCH 082/188] add ExpSineSquared kernel --- .../SklearnGPRKernelExpSineSquared.c3typ | 18 ++++++++++++++++++ .../kernels/SklearnGPRKernelExpSineSquared.py | 19 +++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.c3typ create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.py diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.c3typ new file mode 100644 index 00000000..b6cdccfa --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.c3typ @@ -0,0 +1,18 @@ +/** +* SklearnGPRKernelExpSineSquared.c3typ +* Scikit-Learn Gaussian Process Exponential Sine Squared Kernel. +* See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.ExpSineSquared.html#sklearn.gaussian_process.kernels.ExpSineSquared +*/ +type SklearnGPRKernelExpSineSquared type key "SKLRN_GPR_KRNL_XPSNSQRD" { + // Constant that defines the kernel length scale + lengthScale: !double + // Constant that defines the kernel periodicity + periodicity: !double + // the SklearnGPRKernel for this object + kernel: SklearnGPRKernel + + // constructs the SklearnGPRKernel internal object + @py(env='gordon-ML_1_0_0') + @dependency(include="this, kernel") + build: inline member function(): SklearnGPRKernel +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.py new file mode 100644 index 00000000..fde1d4e2 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.py @@ -0,0 +1,19 @@ +def build(this): + """ + This effectively constructs the type instance, by creating the object to be placed in the kernel field. Ideally this should be replaced by a callback function (beforeMake or afterMake). + """ + from sklearn.gaussian_process.kernels import ExpSineSquared + + sklKernel = ExpSineSquared(length_scale=this.lengthScale, periodicity=this.periodicity) + + kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) + kernel_name = 'ExpSineSquared' + kernel_hyperParameters = [this.lengthScale, this.periodicity] + + this.kernel = c3.SklearnGPRKernel( + name=kernel_name, + hyperParameters=kernel_hyperParameters, + pickledKernel=kernel_pickled + ) + + return this \ No newline at end of file From 701409ca69a206fb26956eb825fe633f640b3348 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 11:20:40 -0600 Subject: [PATCH 083/188] add matern kernel --- .../kernels/SklearnGPRKernelMatern.c3typ | 18 ++++++++++++++++++ .../kernels/SklearnGPRKernelMatern.py | 19 +++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.c3typ create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.py diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.c3typ new file mode 100644 index 00000000..ed406172 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.c3typ @@ -0,0 +1,18 @@ +/** +* SklearnGPRKernelMatern.c3typ +* Scikit-Learn Gaussian Process Matern Kernel. +* See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.Matern.html#sklearn.gaussian_process.kernels.Matern +*/ +type SklearnGPRKernelMatern type key "SKLRN_GPR_KRNL_MTRN" { + // Constant that defines the kernel length scale + lengthScale: !double + // Gamma and modified Bessel function orders + nu: !double + // the SklearnGPRKernel for this object + kernel: SklearnGPRKernel + + // constructs the SklearnGPRKernel internal object + @py(env='gordon-ML_1_0_0') + @dependency(include="this, kernel") + build: inline member function(): SklearnGPRKernel +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.py new file mode 100644 index 00000000..beef221e --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.py @@ -0,0 +1,19 @@ +def build(this): + """ + This effectively constructs the type instance, by creating the object to be placed in the kernel field. Ideally this should be replaced by a callback function (beforeMake or afterMake). + """ + from sklearn.gaussian_process.kernels import Matern + + sklKernel = Matern(length_scale=this.lengthScale, nu=this.nu) + + kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) + kernel_name = 'Matern' + kernel_hyperParameters = [this.lengthScale, this.nu] + + this.kernel = c3.SklearnGPRKernel( + name=kernel_name, + hyperParameters=kernel_hyperParameters, + pickledKernel=kernel_pickled + ) + + return this \ No newline at end of file From a3bb3c46155a006ce64ae49df2ba698033b33c08 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 11:29:23 -0600 Subject: [PATCH 084/188] add RBF kernel --- .../kernels/SklearnGPRKernelRBF.c3typ | 16 ++++++++++++++++ .../kernels/SklearnGPRKernelRBF.py | 19 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.c3typ create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.py diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.c3typ new file mode 100644 index 00000000..51484fe2 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.c3typ @@ -0,0 +1,16 @@ +/** +* SklearnGPRKernelRBF.c3typ +* Scikit-Learn Gaussian Process RBF Kernel. +* See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.RBF.html#sklearn.gaussian_process.kernels.RBF +*/ +type SklearnGPRKernelRBF type key "SKLRN_GPR_KRNL_RBF" { + // Constant that defines the kernel length scale + lengthScale: !double + // the SklearnGPRKernel for this object + kernel: SklearnGPRKernel + + // constructs the SklearnGPRKernel internal object + @py(env='gordon-ML_1_0_0') + @dependency(include="this, kernel") + build: inline member function(): SklearnGPRKernel +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.py new file mode 100644 index 00000000..7b23df00 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.py @@ -0,0 +1,19 @@ +def build(this): + """ + This effectively constructs the type instance, by creating the object to be placed in the kernel field. Ideally this should be replaced by a callback function (beforeMake or afterMake). + """ + from sklearn.gaussian_process.kernels import RBF + + sklKernel = RBF(length_scale=this.lengthScale) + + kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) + kernel_name = 'RBF' + kernel_hyperParameters = [this.lengthScale] + + this.kernel = c3.SklearnGPRKernel( + name=kernel_name, + hyperParameters=kernel_hyperParameters, + pickledKernel=kernel_pickled + ) + + return this \ No newline at end of file From e637d04d2504cb28f3561cbe1fee9cb30d8a1788 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 11:38:03 -0600 Subject: [PATCH 085/188] add rational quadratic kernel --- .../SklearnGPRKernelRationalQuadratic.c3typ | 18 ++++++++++++++++++ .../SklearnGPRKernelRationalQuadratic.py | 19 +++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.c3typ create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.py diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.c3typ new file mode 100644 index 00000000..c5dbf623 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.c3typ @@ -0,0 +1,18 @@ +/** +* SklearnGPRKernelRationalQuadratic.c3typ +* Scikit-Learn Gaussian Process Rational Quadratic Kernel. +* See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.RationalQuadratic.html#sklearn.gaussian_process.kernels.RationalQuadratic +*/ +type SklearnGPRKernelRationalQuadratic type key "SKLRN_GPR_KRNL_RTNLQDRTC" { + // Constant that defines the kernel length scale + lengthScale: !double + // Scale mixture parameter > 0 + alpha: !double + // the SklearnGPRKernel for this object + kernel: SklearnGPRKernel + + // constructs the SklearnGPRKernel internal object + @py(env='gordon-ML_1_0_0') + @dependency(include="this, kernel") + build: inline member function(): SklearnGPRKernel +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.py new file mode 100644 index 00000000..f8ca6773 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.py @@ -0,0 +1,19 @@ +def build(this): + """ + This effectively constructs the type instance, by creating the object to be placed in the kernel field. Ideally this should be replaced by a callback function (beforeMake or afterMake). + """ + from sklearn.gaussian_process.kernels import RationalQuadratic + + sklKernel = RationalQuadratic(length_scale=this.lengthScale, alpha=this.alpha) + + kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) + kernel_name = 'RationalQuadratic' + kernel_hyperParameters = [this.lengthScale, this.alpha] + + this.kernel = c3.SklearnGPRKernel( + name=kernel_name, + hyperParameters=kernel_hyperParameters, + pickledKernel=kernel_pickled + ) + + return this \ No newline at end of file From 3fa66356007cdc7c287ec6cfc26aca355326ca60 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 11:47:20 -0600 Subject: [PATCH 086/188] add white kernel --- .../kernels/SklearnGPRKernelWhite.c3typ | 16 ++++++++++++++++ .../kernels/SklearnGPRKernelWhite.py | 19 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.c3typ create mode 100644 training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.py diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.c3typ new file mode 100644 index 00000000..4af68bc7 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.c3typ @@ -0,0 +1,16 @@ +/** +* SklearnGPRKernelWhite.c3typ +* Scikit-Learn Gaussian Process White Kernel. +* See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.WhiteKernel.html#sklearn.gaussian_process.kernels.WhiteKernel +*/ +type SklearnGPRKernelWhite type key "SKLRN_GPR_KRNL_WHT" { + // Controls the noise level (variance) of kernel + noiseLevel: !double + // the SklearnGPRKernel for this object + kernel: SklearnGPRKernel + + // constructs the SklearnGPRKernel internal object + @py(env='gordon-ML_1_0_0') + @dependency(include="this, kernel") + build: inline member function(): SklearnGPRKernel +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.py new file mode 100644 index 00000000..038ad7c3 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.py @@ -0,0 +1,19 @@ +def build(this): + """ + This effectively constructs the type instance, by creating the object to be placed in the kernel field. Ideally this should be replaced by a callback function (beforeMake or afterMake). + """ + from sklearn.gaussian_process.kernels import WhiteKernel + + sklKernel = WhiteKernel(noise_level=this.noiseLevel) + + kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) + kernel_name = 'White' + kernel_hyperParameters = [this.lengthScale, this.alpha] + + this.kernel = c3.SklearnGPRKernel( + name=kernel_name, + hyperParameters=kernel_hyperParameters, + pickledKernel=kernel_pickled + ) + + return this \ No newline at end of file From cb0d5d35d7adecbccfb72d80b96c0d148dbfc663 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 23 Feb 2022 11:49:55 -0600 Subject: [PATCH 087/188] of course the LAST one had a little typo --- .../GPRegression/kernels/SklearnGPRKernelWhite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.py index 038ad7c3..7ed61fe1 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.py @@ -8,7 +8,7 @@ def build(this): kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) kernel_name = 'White' - kernel_hyperParameters = [this.lengthScale, this.alpha] + kernel_hyperParameters = [this.noiseLevel] this.kernel = c3.SklearnGPRKernel( name=kernel_name, From 7878463e873b244745082e8bcbf3e7e581b9c2f6 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 28 Feb 2022 07:11:41 -0600 Subject: [PATCH 088/188] rmv inline of netcdf call --- training/gordon-group/src/NetCDF/NetCDFUtil.c3typ | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/training/gordon-group/src/NetCDF/NetCDFUtil.c3typ b/training/gordon-group/src/NetCDF/NetCDFUtil.c3typ index 2f2a98fb..817fc34c 100644 --- a/training/gordon-group/src/NetCDF/NetCDFUtil.c3typ +++ b/training/gordon-group/src/NetCDF/NetCDFUtil.c3typ @@ -5,7 +5,8 @@ type NetCDFUtil { // Open NetCDF file from external source @py(env='gordon_1_0_0') - openFile: inline function(url: !string, localPath: string = '/tmp'): any + //openFile: inline function(url: !string, localPath: string = '/tmp'): any + openFile: function(url: !string, localPath: string = '/tmp'): any // Close NetCDF file @py(env='gordon_1_0_0') closeFile: inline function(ds: !any, url: !string, localPath: string = '/tmp'): integer From 034d53942cbc632019256f599d2137ead8f2d868 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 28 Feb 2022 07:19:39 -0600 Subject: [PATCH 089/188] back to inline --- training/gordon-group/src/NetCDF/NetCDFUtil.c3typ | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/training/gordon-group/src/NetCDF/NetCDFUtil.c3typ b/training/gordon-group/src/NetCDF/NetCDFUtil.c3typ index 817fc34c..2f2a98fb 100644 --- a/training/gordon-group/src/NetCDF/NetCDFUtil.c3typ +++ b/training/gordon-group/src/NetCDF/NetCDFUtil.c3typ @@ -5,8 +5,7 @@ type NetCDFUtil { // Open NetCDF file from external source @py(env='gordon_1_0_0') - //openFile: inline function(url: !string, localPath: string = '/tmp'): any - openFile: function(url: !string, localPath: string = '/tmp'): any + openFile: inline function(url: !string, localPath: string = '/tmp'): any // Close NetCDF file @py(env='gordon_1_0_0') closeFile: inline function(ds: !any, url: !string, localPath: string = '/tmp'): integer From 3670ff8e16a7e5fad79e59dc940e971a9444b34c Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 28 Feb 2022 07:26:57 -0600 Subject: [PATCH 090/188] add sklearn --- .../seed/ActionRuntime/py-gordon_2_0_0.json | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/seed/ActionRuntime/py-gordon_2_0_0.json b/training/gordon-group/seed/ActionRuntime/py-gordon_2_0_0.json index b7c22bd7..b6065384 100644 --- a/training/gordon-group/seed/ActionRuntime/py-gordon_2_0_0.json +++ b/training/gordon-group/seed/ActionRuntime/py-gordon_2_0_0.json @@ -2,8 +2,14 @@ "language": "Python", "runtimeVersion": "3.6", "modules": { - "conda.cartopy":"=0.19.0", - "conda.pandas": "=1.0.1" + "conda.pip": "=21.2.2", + "conda.netcdf4": "=1.5.7", + "conda.numpy": "=1.17.0", + "conda.pandas": "=1.1.5", + "conda.cartopy":"=0.18.0", + "conda.basemap":"=1.2.2", + "conda.iris":"=3.1.0", + "conda.scikit-learn":"=1.0.2" }, "repositories": [ "https://repo.continuum.io/pkgs/main", From 10338215287b0084a88c6b5e366e5340e8cc3931 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 28 Feb 2022 07:40:16 -0600 Subject: [PATCH 091/188] adjust version --- training/gordon-group/seed/ActionRuntime/py-gordon_2_0_0.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/seed/ActionRuntime/py-gordon_2_0_0.json b/training/gordon-group/seed/ActionRuntime/py-gordon_2_0_0.json index b6065384..dd6defd5 100644 --- a/training/gordon-group/seed/ActionRuntime/py-gordon_2_0_0.json +++ b/training/gordon-group/seed/ActionRuntime/py-gordon_2_0_0.json @@ -9,7 +9,7 @@ "conda.cartopy":"=0.18.0", "conda.basemap":"=1.2.2", "conda.iris":"=3.1.0", - "conda.scikit-learn":"=1.0.2" + "conda.scikit-learn":"=0.23.1" }, "repositories": [ "https://repo.continuum.io/pkgs/main", From ac7459f313f0a50e139d71f6850023bbee1d4ad3 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 28 Feb 2022 10:19:12 -0600 Subject: [PATCH 092/188] add serialized netcdf util --- .../src/NetCDF/NetCDFUtilSerialized.c3typ | 12 ++++++ .../src/NetCDF/NetCDFUtilSerialized.py | 38 +++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 training/gordon-group/src/NetCDF/NetCDFUtilSerialized.c3typ create mode 100644 training/gordon-group/src/NetCDF/NetCDFUtilSerialized.py diff --git a/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.c3typ b/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.c3typ new file mode 100644 index 00000000..07a1ce29 --- /dev/null +++ b/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.c3typ @@ -0,0 +1,12 @@ +/** +* NetCDFUtilSerialized.c3typ +* Utility box to deal with NetCDF files. Methods return serialized python objects and can be run on the server as opposed to local env. +**/ +type NetCDFUtil { + // Open NetCDF file from external source + @py(env='gordon_1_0_0') + openFile: function(url: !string, localPath: string = '/tmp'): any + // Close NetCDF file + @py(env='gordon_1_0_0') + closeFile: function(ds: !any, url: !string, localPath: string = '/tmp'): integer +} \ No newline at end of file diff --git a/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.py b/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.py new file mode 100644 index 00000000..a13efc76 --- /dev/null +++ b/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.py @@ -0,0 +1,38 @@ +def openFile(url, localPath='/tmp'): + """ + Opens a NetCDF file from an external storage path (e.g. Azure blob) + + Arguments: + -url (str): URL to NetCDF file + -localPath (str): Path to the local file + Returns: + -netCDF4.Dataset: A netCDF4 Dataset object + """ + import netCDF4 as nc + import os + filename = os.path.basename(url) + tmp_path = localPath + '/' + filename + c3.Client.copyFilesToLocalClient(url, '/tmp') + pickledNC = c3.PythonSerialization.serialize(obj=nc.Dataset(tmp_path)) + + return pickledNC + + +def closeFile(ds, url, localPath='/tmp'): + """ + Closes a NetCDF file. + + Arguments: + -ds (netCDF4.Dataset): A netCDF4 Dataset object + -url (str): URL to a NetCDF file + -localPath (str): Path to the local file + Returns: + -integer + """ + import os + ds.close() + filename = os.path.basename(url) + tmp_path = localPath + '/' + filename + os.remove(tmp_path) + + return 1 \ No newline at end of file From 6d424a3a8e78ec38449bb5fa42582af121522ec6 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 28 Feb 2022 10:26:49 -0600 Subject: [PATCH 093/188] name fix --- training/gordon-group/src/NetCDF/NetCDFUtilSerialized.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.c3typ b/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.c3typ index 07a1ce29..13010703 100644 --- a/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.c3typ +++ b/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.c3typ @@ -2,7 +2,7 @@ * NetCDFUtilSerialized.c3typ * Utility box to deal with NetCDF files. Methods return serialized python objects and can be run on the server as opposed to local env. **/ -type NetCDFUtil { +type NetCDFUtilSerialized { // Open NetCDF file from external source @py(env='gordon_1_0_0') openFile: function(url: !string, localPath: string = '/tmp'): any From 1046ceadef7781466d0a412556d2cf6aca609762 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 28 Feb 2022 10:32:26 -0600 Subject: [PATCH 094/188] add simplified action runtime --- .../seed/ActionRuntime/py-gordon-nc4-_1_0_0.json | 15 +++++++++++++++ .../src/NetCDF/NetCDFUtilSerialized.c3typ | 4 ++-- 2 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 training/gordon-group/seed/ActionRuntime/py-gordon-nc4-_1_0_0.json diff --git a/training/gordon-group/seed/ActionRuntime/py-gordon-nc4-_1_0_0.json b/training/gordon-group/seed/ActionRuntime/py-gordon-nc4-_1_0_0.json new file mode 100644 index 00000000..5a91647f --- /dev/null +++ b/training/gordon-group/seed/ActionRuntime/py-gordon-nc4-_1_0_0.json @@ -0,0 +1,15 @@ +{ + "language": "Python", + "runtimeVersion": "3.6", + "modules": { + "conda.netcdf4": "=1.5.7", + "conda.dill":"=0.2.8.2" + }, + "repositories": [ + "https://repo.continuum.io/pkgs/main", + "conda-forge" + ], + "runtime": "CPython", + "name": "py-gordon-nc4_1_0_0", + "id": "py-gordon-nc4_1_0_0" +} \ No newline at end of file diff --git a/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.c3typ b/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.c3typ index 13010703..514972e3 100644 --- a/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.c3typ +++ b/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.c3typ @@ -4,9 +4,9 @@ **/ type NetCDFUtilSerialized { // Open NetCDF file from external source - @py(env='gordon_1_0_0') + @py(env='gordon-nc4_1_0_0') openFile: function(url: !string, localPath: string = '/tmp'): any // Close NetCDF file - @py(env='gordon_1_0_0') + @py(env='gordon-nc4_1_0_0') closeFile: function(ds: !any, url: !string, localPath: string = '/tmp'): integer } \ No newline at end of file From f2547eebc4a45c74348f9cb3a90b4df0bf4bc626 Mon Sep 17 00:00:00 2001 From: James Carzon Date: Fri, 4 Mar 2022 12:52:04 -0500 Subject: [PATCH 095/188] Add dill library to kernel The dill library is a dependency of the c3 GP ML pipeline. We include this in the py_gordon_2_0_0 kernel for use with the satellite data. --- training/gordon-group/seed/ActionRuntime/py-gordon_2_0_0.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/training/gordon-group/seed/ActionRuntime/py-gordon_2_0_0.json b/training/gordon-group/seed/ActionRuntime/py-gordon_2_0_0.json index dd6defd5..e77d6e4a 100644 --- a/training/gordon-group/seed/ActionRuntime/py-gordon_2_0_0.json +++ b/training/gordon-group/seed/ActionRuntime/py-gordon_2_0_0.json @@ -9,7 +9,8 @@ "conda.cartopy":"=0.18.0", "conda.basemap":"=1.2.2", "conda.iris":"=3.1.0", - "conda.scikit-learn":"=0.23.1" + "conda.scikit-learn":"=0.23.1", + "conda.dill":"=0.2.8.2" }, "repositories": [ "https://repo.continuum.io/pkgs/main", From 50337fe82877ec0bada89314eaa4752816790b1c Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 4 Mar 2022 13:05:50 -0600 Subject: [PATCH 096/188] comment on possible improvement --- .../GPRegression/kernels/SklearnGPRKernel.c3typ | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index bd5397c7..8c579c4d 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -6,8 +6,8 @@ type SklearnGPRKernel type key "SKL_GPR_KRNL" { // the name of the kernel name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') - // the kernel parameters - hyperParameters: [double] schema suffix 'HPRPRMS' + // the kernel parameters (this should probably be a map) + hyperParameters: [double] schema suffix 'HPRPRMS' // the pickled kernel pickledKernel: string From a8abf90e8ffbb7ad224dd7839b3769f31b58bebe Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 8 Mar 2022 13:48:30 -0600 Subject: [PATCH 097/188] rm inline call --- .../GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ index cc1aabb4..c55d3a51 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ @@ -12,5 +12,6 @@ type SklearnGPRKernelDotProduct type key "SKLRN_GPR_KRNL_DTPRDCT" { // constructs the SklearnGPRKernel internal object @py(env='gordon-ML_1_0_0') @dependency(include="this, kernel") - build: inline member function(): SklearnGPRKernel + //build: inline member function(): SklearnGPRKernel + build: member function(): SklearnGPRKernel } \ No newline at end of file From 60196495ed4e324dddfb81e2c2740aff4af71e45 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 8 Mar 2022 13:54:32 -0600 Subject: [PATCH 098/188] got a lil different error now --- .../GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ index c55d3a51..8435d62d 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ @@ -11,7 +11,7 @@ type SklearnGPRKernelDotProduct type key "SKLRN_GPR_KRNL_DTPRDCT" { // constructs the SklearnGPRKernel internal object @py(env='gordon-ML_1_0_0') - @dependency(include="this, kernel") + //@dependency(include="this, kernel") //build: inline member function(): SklearnGPRKernel build: member function(): SklearnGPRKernel } \ No newline at end of file From 1880437665eadb705bbbc743055b01ddd3c524b4 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 8 Mar 2022 14:09:12 -0600 Subject: [PATCH 099/188] another dependency --- .../GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ | 1 + 1 file changed, 1 insertion(+) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ index 8435d62d..76e21c1a 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ @@ -12,6 +12,7 @@ type SklearnGPRKernelDotProduct type key "SKLRN_GPR_KRNL_DTPRDCT" { // constructs the SklearnGPRKernel internal object @py(env='gordon-ML_1_0_0') //@dependency(include="this, kernel") + @dependency(include="this") //build: inline member function(): SklearnGPRKernel build: member function(): SklearnGPRKernel } \ No newline at end of file From ef20053bf230df68a6005c9ad7ae218818bc59ff Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 8 Mar 2022 14:12:58 -0600 Subject: [PATCH 100/188] fixing return type? --- .../GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ index 76e21c1a..cdbdc760 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ @@ -14,5 +14,5 @@ type SklearnGPRKernelDotProduct type key "SKLRN_GPR_KRNL_DTPRDCT" { //@dependency(include="this, kernel") @dependency(include="this") //build: inline member function(): SklearnGPRKernel - build: member function(): SklearnGPRKernel + build: member function(): SklearnGPRKernelDotProduct } \ No newline at end of file From d03e938adc7c2d395eef0cb61138be7d8914df7a Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 8 Mar 2022 14:19:15 -0600 Subject: [PATCH 101/188] include more stuff --- .../GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ index cdbdc760..8ae217e3 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ @@ -12,7 +12,7 @@ type SklearnGPRKernelDotProduct type key "SKLRN_GPR_KRNL_DTPRDCT" { // constructs the SklearnGPRKernel internal object @py(env='gordon-ML_1_0_0') //@dependency(include="this, kernel") - @dependency(include="this") + @dependency(include="this, sigmaZero, kernel") //build: inline member function(): SklearnGPRKernel build: member function(): SklearnGPRKernelDotProduct } \ No newline at end of file From e660070a63da90ed76c072c7524d277db589f114 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 8 Mar 2022 14:35:15 -0600 Subject: [PATCH 102/188] try hiding deps one more time --- .../GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ index 8ae217e3..828f9b2e 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ @@ -12,7 +12,7 @@ type SklearnGPRKernelDotProduct type key "SKLRN_GPR_KRNL_DTPRDCT" { // constructs the SklearnGPRKernel internal object @py(env='gordon-ML_1_0_0') //@dependency(include="this, kernel") - @dependency(include="this, sigmaZero, kernel") + //@dependency(include="this, sigmaZero, kernel") //build: inline member function(): SklearnGPRKernel build: member function(): SklearnGPRKernelDotProduct } \ No newline at end of file From d5637898eb473d91f93a87a66a25ff73e1f94df8 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 8 Mar 2022 14:39:33 -0600 Subject: [PATCH 103/188] add map to hyperPars --- .../GPRegression/kernels/SklearnGPRKernel.c3typ | 3 ++- .../GPRegression/kernels/SklearnGPRKernelDotProduct.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 8c579c4d..7063ddcd 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -7,7 +7,8 @@ type SklearnGPRKernel type key "SKL_GPR_KRNL" { // the name of the kernel name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters (this should probably be a map) - hyperParameters: [double] schema suffix 'HPRPRMS' + //hyperParameters: [double] schema suffix 'HPRPRMS' + hyperParameters: map // the pickled kernel pickledKernel: string diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.py index 70430725..5e938dab 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.py @@ -8,7 +8,8 @@ def build(this): kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) kernel_name = 'DotProduct' - kernel_hyperParameters = [this.sigmaZero] + #kernel_hyperParameters = [this.sigmaZero] + kernel_hyperParameters = c3.c3Make("map", {"sigmaZero":1.0}) this.kernel = c3.SklearnGPRKernel( name=kernel_name, From 31c155966d777d1cb9e33ecb3a5a5b96d4a3f1e6 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 8 Mar 2022 14:41:51 -0600 Subject: [PATCH 104/188] add schema suffix --- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 7063ddcd..2f9bf71d 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -8,7 +8,7 @@ type SklearnGPRKernel type key "SKL_GPR_KRNL" { name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters (this should probably be a map) //hyperParameters: [double] schema suffix 'HPRPRMS' - hyperParameters: map + hyperParameters: map schema suffix 'HPRPRMS' // the pickled kernel pickledKernel: string From 3c045b587be21d6b14f5887d17713e86209cab9e Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 8 Mar 2022 14:50:37 -0600 Subject: [PATCH 105/188] trying mapp --- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 2f9bf71d..132c6bfb 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -8,7 +8,7 @@ type SklearnGPRKernel type key "SKL_GPR_KRNL" { name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters (this should probably be a map) //hyperParameters: [double] schema suffix 'HPRPRMS' - hyperParameters: map schema suffix 'HPRPRMS' + hyperParameters: Mapp schema suffix 'HPRPRMS' // the pickled kernel pickledKernel: string From b833aae4ac2d97745d91dfbfc7ea089b3de65239 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 8 Mar 2022 14:54:13 -0600 Subject: [PATCH 106/188] back to map --- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 132c6bfb..2f9bf71d 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -8,7 +8,7 @@ type SklearnGPRKernel type key "SKL_GPR_KRNL" { name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters (this should probably be a map) //hyperParameters: [double] schema suffix 'HPRPRMS' - hyperParameters: Mapp schema suffix 'HPRPRMS' + hyperParameters: map schema suffix 'HPRPRMS' // the pickled kernel pickledKernel: string From 60735a475a23e5b7e0d6a850e461b45e8ca183c8 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 8 Mar 2022 14:57:19 -0600 Subject: [PATCH 107/188] back to array..... --- .../GPRegression/kernels/SklearnGPRKernel.c3typ | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 2f9bf71d..e5b10a67 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -7,8 +7,8 @@ type SklearnGPRKernel type key "SKL_GPR_KRNL" { // the name of the kernel name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters (this should probably be a map) - //hyperParameters: [double] schema suffix 'HPRPRMS' - hyperParameters: map schema suffix 'HPRPRMS' + hyperParameters: [double] schema suffix 'HPRPRMS' + //hyperParameters: map schema suffix 'HPRPRMS' // the pickled kernel pickledKernel: string From d19270fc3f8b6319b6678f4261d1eae1635d838c Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 8 Mar 2022 15:05:28 -0600 Subject: [PATCH 108/188] schema name --- .../GPRegression/kernels/SklearnGPRKernel.c3typ | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index e5b10a67..cadabcfb 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -7,8 +7,8 @@ type SklearnGPRKernel type key "SKL_GPR_KRNL" { // the name of the kernel name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters (this should probably be a map) - hyperParameters: [double] schema suffix 'HPRPRMS' - //hyperParameters: map schema suffix 'HPRPRMS' + //hyperParameters: [double] schema suffix 'HPRPRMS' + hyperParameters: map schema name 'HPRPRMS' // the pickled kernel pickledKernel: string From 55ae157bceefb7ce7e01176271d77fde16700292 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 8 Mar 2022 15:08:31 -0600 Subject: [PATCH 109/188] adding a space? --- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index cadabcfb..62c60ab9 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -8,7 +8,7 @@ type SklearnGPRKernel type key "SKL_GPR_KRNL" { name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters (this should probably be a map) //hyperParameters: [double] schema suffix 'HPRPRMS' - hyperParameters: map schema name 'HPRPRMS' + hyperParameters: map schema suffix 'HPRPRMS' // the pickled kernel pickledKernel: string From 0198c00c0b59124e0f7beb35fe6c9949952c3fc6 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 8 Mar 2022 16:08:25 -0600 Subject: [PATCH 110/188] change schema name --- .../machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 62c60ab9..29a18913 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -8,7 +8,7 @@ type SklearnGPRKernel type key "SKL_GPR_KRNL" { name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters (this should probably be a map) //hyperParameters: [double] schema suffix 'HPRPRMS' - hyperParameters: map schema suffix 'HPRPRMS' + hyperParameters: map schema suffix 'HPR' // the pickled kernel pickledKernel: string From e4a8e9f262cc3c93a38befd57d1fd25d83ce6c60 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 8 Mar 2022 16:11:53 -0600 Subject: [PATCH 111/188] schema name on technique --- .../GPRegression/GaussianProcessRegressionPipe.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ index 2735cfbd..2cddeffc 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ @@ -5,7 +5,7 @@ entity type GaussianProcessRegressionPipe extends MLLeafPipe type key 'GPREG' { // ATTRIBUTES // the technique for this regression - technique: !GaussianProcessRegressionTechnique + technique: !GaussianProcessRegressionTechnique schema suffix 'TCNQ' // METHODS // train the model From 02a8e82ce43d361cdc7142d71880c28d266f7045 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 8 Mar 2022 16:15:06 -0600 Subject: [PATCH 112/188] db annotation --- .../GPRegression/GaussianProcessRegressionPipe.c3typ | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ index 2cddeffc..39a785cb 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ @@ -5,7 +5,8 @@ entity type GaussianProcessRegressionPipe extends MLLeafPipe type key 'GPREG' { // ATTRIBUTES // the technique for this regression - technique: !GaussianProcessRegressionTechnique schema suffix 'TCNQ' + @db(persistAllFields=true) + technique: !GaussianProcessRegressionTechnique // METHODS // train the model From 4c54f783511c76c5a13bebf7512b3de3bcceb933 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 8 Mar 2022 16:21:18 -0600 Subject: [PATCH 113/188] getting rid of schemas and type keys --- .../GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ index 828f9b2e..6e536583 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ @@ -3,7 +3,7 @@ * Scikit-Learn Gaussian Process Dot Product Kernel. * See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.DotProduct.html#sklearn.gaussian_process.kernels.DotProduct */ -type SklearnGPRKernelDotProduct type key "SKLRN_GPR_KRNL_DTPRDCT" { +type SklearnGPRKernelDotProduct { // Constant that defines the kernel inhomogenity sigmaZero: !double // the SklearnGPRKernel for this object From 172860accd2c6c6bc9c2868bec7b6c48af34691d Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 8 Mar 2022 16:28:04 -0600 Subject: [PATCH 114/188] it's working now --- .../GPRegression/kernels/SklearnGPRKernel.c3typ | 8 ++------ .../GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ | 3 --- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index 29a18913..ebde71be 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -3,16 +3,12 @@ * Stepping stone for Scikit-Learn Gaussian Process kernels. * See: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.gaussian_process */ -type SklearnGPRKernel type key "SKL_GPR_KRNL" { +type SklearnGPRKernel { // the name of the kernel name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') // the kernel parameters (this should probably be a map) //hyperParameters: [double] schema suffix 'HPRPRMS' - hyperParameters: map schema suffix 'HPR' + hyperParameters: map // the pickled kernel pickledKernel: string - - // C3DS hints - // - override fromString - // - look into beforeMake() } \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ index 6e536583..65f4fa85 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ @@ -11,8 +11,5 @@ type SklearnGPRKernelDotProduct { // constructs the SklearnGPRKernel internal object @py(env='gordon-ML_1_0_0') - //@dependency(include="this, kernel") - //@dependency(include="this, sigmaZero, kernel") - //build: inline member function(): SklearnGPRKernel build: member function(): SklearnGPRKernelDotProduct } \ No newline at end of file From ed73dc38ca77b3112f4b437d50b3c0defd14ed80 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 9 Mar 2022 06:40:17 -0600 Subject: [PATCH 115/188] clean up code --- .../GPRegression/GaussianProcessRegressionPipe.c3typ | 2 -- .../GPRegression/GaussianProcessRegressionTechnique.c3typ | 2 +- .../GPRegression/kernels/SklearnGPRKernel.c3typ | 3 +-- .../GPRegression/kernels/SklearnGPRKernelConstant.c3typ | 5 ++--- .../GPRegression/kernels/SklearnGPRKernelConstant.py | 4 +++- .../GPRegression/kernels/SklearnGPRKernelDotProduct.py | 5 +++-- .../kernels/SklearnGPRKernelExpSineSquared.c3typ | 5 ++--- .../GPRegression/kernels/SklearnGPRKernelExpSineSquared.py | 6 +++++- .../GPRegression/kernels/SklearnGPRKernelMatern.c3typ | 5 ++--- .../GPRegression/kernels/SklearnGPRKernelMatern.py | 6 +++++- .../GPRegression/kernels/SklearnGPRKernelRBF.c3typ | 3 +-- .../GPRegression/kernels/SklearnGPRKernelRBF.py | 6 +++++- .../kernels/SklearnGPRKernelRationalQuadratic.c3typ | 5 ++--- .../kernels/SklearnGPRKernelRationalQuadratic.py | 6 +++++- .../GPRegression/kernels/SklearnGPRKernelWhite.c3typ | 5 ++--- .../GPRegression/kernels/SklearnGPRKernelWhite.py | 5 ++++- 16 files changed, 43 insertions(+), 30 deletions(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ index 39a785cb..de2d4439 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ @@ -3,12 +3,10 @@ * Performs Scikit-Learn's GP Regression for a set of inputs-outputs. */ entity type GaussianProcessRegressionPipe extends MLLeafPipe type key 'GPREG' { - // ATTRIBUTES // the technique for this regression @db(persistAllFields=true) technique: !GaussianProcessRegressionTechnique - // METHODS // train the model @py(env='gordon-ML_1_0_0') train: ~ diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ index c78876c2..50485926 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ @@ -8,5 +8,5 @@ type GaussianProcessRegressionTechnique mixes MLTechnique { randomState: integer // the kernel object @ML(hyperParameter=true) - kernel: SklearnGPRKernel schema suffix 'KRNL' + kernel: SklearnGPRKernel } \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ index ebde71be..bce84311 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -6,8 +6,7 @@ type SklearnGPRKernel { // the name of the kernel name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') - // the kernel parameters (this should probably be a map) - //hyperParameters: [double] schema suffix 'HPRPRMS' + // the kernel hyper parameters hyperParameters: map // the pickled kernel pickledKernel: string diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ index 30c4dc8e..e50f0452 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ @@ -3,7 +3,7 @@ * Scikit-Learn Gaussian Process Constant Kernel. * See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.ConstantKernel.html#sklearn.gaussian_process.kernels.ConstantKernel */ -type SklearnGPRKernelConstant type key "SKLRN_GPR_KRNL_CNSTNT" { +type SklearnGPRKernelConstant { // Constant value that defines the kernel value constantValue: !double // the SklearnGPRKernel for this object @@ -11,6 +11,5 @@ type SklearnGPRKernelConstant type key "SKLRN_GPR_KRNL_CNSTNT" { // constructs the SklearnGPRKernel internal object @py(env='gordon-ML_1_0_0') - @dependency(include="this, kernel") - build: inline member function(): SklearnGPRKernel + build: member function(): SklearnGPRKernelConstant } \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py index bfa271f6..0bb23aba 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -8,7 +8,9 @@ def build(this): kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) kernel_name = 'Constant' - kernel_hyperParameters = [this.constantValue] + kernel_hyperParameters = c3.c3Make( + "map", {"constantValue": this.constantValue} + ) this.kernel = c3.SklearnGPRKernel( name=kernel_name, diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.py index 5e938dab..7613d20b 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.py @@ -8,8 +8,9 @@ def build(this): kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) kernel_name = 'DotProduct' - #kernel_hyperParameters = [this.sigmaZero] - kernel_hyperParameters = c3.c3Make("map", {"sigmaZero":1.0}) + kernel_hyperParameters = c3.c3Make( + "map", {"sigmaZero": this.sigmaZero} + ) this.kernel = c3.SklearnGPRKernel( name=kernel_name, diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.c3typ index b6cdccfa..e0b23e25 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.c3typ @@ -3,7 +3,7 @@ * Scikit-Learn Gaussian Process Exponential Sine Squared Kernel. * See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.ExpSineSquared.html#sklearn.gaussian_process.kernels.ExpSineSquared */ -type SklearnGPRKernelExpSineSquared type key "SKLRN_GPR_KRNL_XPSNSQRD" { +type SklearnGPRKernelExpSineSquared { // Constant that defines the kernel length scale lengthScale: !double // Constant that defines the kernel periodicity @@ -13,6 +13,5 @@ type SklearnGPRKernelExpSineSquared type key "SKLRN_GPR_KRNL_XPSNSQRD" { // constructs the SklearnGPRKernel internal object @py(env='gordon-ML_1_0_0') - @dependency(include="this, kernel") - build: inline member function(): SklearnGPRKernel + build: member function(): SklearnGPRKernelExpSineSquared } \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.py index fde1d4e2..6b268185 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.py @@ -8,7 +8,11 @@ def build(this): kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) kernel_name = 'ExpSineSquared' - kernel_hyperParameters = [this.lengthScale, this.periodicity] + kernel_hyperParameters = c3.c3Make( + "map", {"lengthScale": this.lengthScale, + "periodicity": this.pediodicity + } + ) this.kernel = c3.SklearnGPRKernel( name=kernel_name, diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.c3typ index ed406172..143d8798 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.c3typ @@ -3,7 +3,7 @@ * Scikit-Learn Gaussian Process Matern Kernel. * See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.Matern.html#sklearn.gaussian_process.kernels.Matern */ -type SklearnGPRKernelMatern type key "SKLRN_GPR_KRNL_MTRN" { +type SklearnGPRKernelMatern { // Constant that defines the kernel length scale lengthScale: !double // Gamma and modified Bessel function orders @@ -13,6 +13,5 @@ type SklearnGPRKernelMatern type key "SKLRN_GPR_KRNL_MTRN" { // constructs the SklearnGPRKernel internal object @py(env='gordon-ML_1_0_0') - @dependency(include="this, kernel") - build: inline member function(): SklearnGPRKernel + build: member function(): SklearnGPRKernelMatern } \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.py index beef221e..550df242 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.py @@ -8,7 +8,11 @@ def build(this): kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) kernel_name = 'Matern' - kernel_hyperParameters = [this.lengthScale, this.nu] + kernel_hyperParameters = c3.c3Make( + "map", {"lengthScale": this.lengthScale, + "nu": this.nu + } + ) this.kernel = c3.SklearnGPRKernel( name=kernel_name, diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.c3typ index 51484fe2..a29354c9 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.c3typ @@ -11,6 +11,5 @@ type SklearnGPRKernelRBF type key "SKLRN_GPR_KRNL_RBF" { // constructs the SklearnGPRKernel internal object @py(env='gordon-ML_1_0_0') - @dependency(include="this, kernel") - build: inline member function(): SklearnGPRKernel + build: member function(): SklearnGPRKernelRBF } \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.py index 7b23df00..d8dced50 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.py @@ -8,7 +8,11 @@ def build(this): kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) kernel_name = 'RBF' - kernel_hyperParameters = [this.lengthScale] + kernel_hyperParameters = c3.c3Make( + "map", {"lengthScale": this.lengthScale + } + ) + this.kernel = c3.SklearnGPRKernel( name=kernel_name, diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.c3typ index c5dbf623..43906a44 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.c3typ @@ -3,7 +3,7 @@ * Scikit-Learn Gaussian Process Rational Quadratic Kernel. * See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.RationalQuadratic.html#sklearn.gaussian_process.kernels.RationalQuadratic */ -type SklearnGPRKernelRationalQuadratic type key "SKLRN_GPR_KRNL_RTNLQDRTC" { +type SklearnGPRKernelRationalQuadratic { // Constant that defines the kernel length scale lengthScale: !double // Scale mixture parameter > 0 @@ -13,6 +13,5 @@ type SklearnGPRKernelRationalQuadratic type key "SKLRN_GPR_KRNL_RTNLQDRTC" { // constructs the SklearnGPRKernel internal object @py(env='gordon-ML_1_0_0') - @dependency(include="this, kernel") - build: inline member function(): SklearnGPRKernel + build: member function(): SklearnGPRKernelRationalQuadratic } \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.py index f8ca6773..2e6cc5f6 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.py @@ -8,7 +8,11 @@ def build(this): kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) kernel_name = 'RationalQuadratic' - kernel_hyperParameters = [this.lengthScale, this.alpha] + kernel_hyperParameters = c3.c3Make( + "map", {"lengthScale": this.lengthScale, + "alpha": this.alpha + } + ) this.kernel = c3.SklearnGPRKernel( name=kernel_name, diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.c3typ index 4af68bc7..374bb41d 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.c3typ +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.c3typ @@ -3,7 +3,7 @@ * Scikit-Learn Gaussian Process White Kernel. * See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.WhiteKernel.html#sklearn.gaussian_process.kernels.WhiteKernel */ -type SklearnGPRKernelWhite type key "SKLRN_GPR_KRNL_WHT" { +type SklearnGPRKernelWhite { // Controls the noise level (variance) of kernel noiseLevel: !double // the SklearnGPRKernel for this object @@ -11,6 +11,5 @@ type SklearnGPRKernelWhite type key "SKLRN_GPR_KRNL_WHT" { // constructs the SklearnGPRKernel internal object @py(env='gordon-ML_1_0_0') - @dependency(include="this, kernel") - build: inline member function(): SklearnGPRKernel + build: member function(): SklearnGPRKernelWhite } \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.py index 7ed61fe1..1f73da08 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.py @@ -8,7 +8,10 @@ def build(this): kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) kernel_name = 'White' - kernel_hyperParameters = [this.noiseLevel] + kernel_hyperParameters = c3.c3Make( + "map", {"noiseLevel": this.noiseLevel + } + ) this.kernel = c3.SklearnGPRKernel( name=kernel_name, From 43ba2e136884d0fc040568d47b6024cbc12f7753 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 9 Mar 2022 06:46:17 -0600 Subject: [PATCH 116/188] tiny fix --- .../GPRegression/kernels/SklearnGPRKernelExpSineSquared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.py index 6b268185..544e10db 100644 --- a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.py +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.py @@ -10,7 +10,7 @@ def build(this): kernel_name = 'ExpSineSquared' kernel_hyperParameters = c3.c3Make( "map", {"lengthScale": this.lengthScale, - "periodicity": this.pediodicity + "periodicity": this.periodicity } ) From 5fa2765f2fccc89dd26c0df29b1f5fc0ace5b56a Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 15 Mar 2022 09:35:23 -0500 Subject: [PATCH 117/188] add container field and methods to upsert depending on that --- .../Simulations/SimulationOutputFile.c3typ | 9 +- .../Simulations/SimulationOutputFile.py | 159 +++++++++++------- 2 files changed, 110 insertions(+), 58 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ index 62b437b4..096a1354 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ @@ -8,11 +8,18 @@ entity type SimulationOutputFile schema name 'SMLTN_OTPT_FL' { dateTag: datetime // The simulation output file file: !File + // The container this file belongs to + container: !string enum('acure-aircraft', 'monthly-mean') // processed processed: boolean post default "false" - // METHODS // Load data from this file into {@link SimulationModelOutput} @py(env='gordon_1_0_0') + upsertAcureAircraftData: member function(): boolean + // Load data from this file into {@link SimulationMonthlyMeanOutput} + @py(env='gordon_1_0_0') + upsertMonthlyMeanData: member function(): boolean + // Load data from all containers + @py(env='gordon_1_0_0') upsertData: member function(): boolean } diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index 412cede4..d505d3ac 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -1,73 +1,118 @@ -def upsertData(this): +def upsertAcureAircraftData(this): """ - Function to Open files in the SimulationOutputFile table and then populate SimulationModelOutput and SimulationModelOutputSeries data. + Function to Open files in the SimulationOutputFile table with acure-aircraft container and then populate SimulationModelOutput and SimulationModelOutputSeries data. - Arguments: -this: an instance of SimulationOutputFile - Returns: - -bool: True if file was processed, false if file has already been processed + -bool: True if file was processed, false if file has already been processed or if container type does not match. """ from datetime import datetime, timedelta import pandas as pd - # open file - sample = c3.NetCDFUtil.openFile(this.file.url) + # verify file container + if(this.container == 'acure-aircraft'): + #open file + sample = c3.NetCDFUtil.openFile(this.file.url) - # cast it to dataframe - df = pd.DataFrame() - df['time'] = sample.variables['time'][:] - df['longitude'] = sample.variables['longitude'][:] - df['latitude'] = sample.variables['latitude'][:] - df['altitude'] = sample.variables['altitude'][:] - df['model_level_number'] = sample.variables['model_level_number'][:] - df['air_potential_temperature']= sample.variables['air_potential_temperature'][:] - df['air_pressure'] = sample.variables['air_pressure'][:] - df['cloud_flag'] = sample.variables['m01s38i478'][:] - df['cdnc_x_cloud_flag'] = sample.variables['m01s38i479'][:] - df['ambient_extinction_550'] = sample.variables['m01s02i530_550nm'][:] - df['ambient_scattering_550'] = sample.variables['m01s02i532_550nm'][:] - df['num_nuc'] = sample.variables['number_of_particles_per_air_molecule_of_soluble_nucleation_mode_aerosol_in_air'][:] - df['num_Ait'] = sample.variables['number_of_particles_per_air_molecule_of_soluble_aitken_mode_aerosol_in_air'][:] - df['num_acc'] = sample.variables['number_of_particles_per_air_molecule_of_soluble_accumulation_mode_aerosol_in_air'][:] - df['num_cor'] = sample.variables['number_of_particles_per_air_molecule_of_soluble_coarse_mode_aerosol_in_air'][:] - df['num_Aitins'] = sample.variables['number_of_particles_per_air_molecule_of_insoluble_aitken_mode_aerosol_in_air'][:] - df['mass_SU_Ait'] = sample.variables['mass_fraction_of_sulfuric_acid_in_soluble_aitken_mode_dry_aerosol_in_air'][:] - df['mass_SU_acc'] = sample.variables['mass_fraction_of_sulfuric_acid_in_soluble_accumulation_mode_dry_aerosol_in_air'][:] - df['mass_SU_cor'] = sample.variables['mass_fraction_of_sulfuric_acid_in_soluble_coarse_mode_dry_aerosol_in_air'][:] - df['mass_BC_Ait'] = sample.variables['mass_fraction_of_black_carbon_in_soluble_aitken_mode_dry_aerosol_in_air'][:] - df['mass_BC_acc'] = sample.variables['mass_fraction_of_black_carbon_in_soluble_accumulation_mode_dry_aerosol_in_air'][:] - df['mass_BC_cor'] = sample.variables['mass_fraction_of_black_carbon_in_soluble_coarse_mode_dry_aerosol_in_air'][:] - df['mass_BC_Aitins'] = sample.variables['mass_fraction_of_black_carbon_in_insoluble_aitken_mode_dry_aerosol_in_air'][:] - df['mass_OC_Ait'] = sample.variables['mass_fraction_of_particulate_organic_matter_in_soluble_aitken_mode_dry_aerosol_in_air'][:] - df['mass_OC_acc'] = sample.variables['mass_fraction_of_particulate_organic_matter_in_soluble_accumulation_mode_dry_aerosol_in_air'][:] - df['mass_OC_cor'] = sample.variables['mass_fraction_of_particulate_organic_matter_in_soluble_coarse_mode_dry_aerosol_in_air'][:] - df['mass_OC_Aitins'] = sample.variables['mass_fraction_of_particulate_organic_matter_in_insoluble_aitken_mode_dry_aerosol_in_air'][:] - df['mass_SS_acc'] = sample.variables['mass_fraction_of_seasalt_in_soluble_accumulation_mode_dry_aerosol_in_air'][:] - df['mass_SS_cor'] = sample.variables['mass_fraction_of_seasalt_in_soluble_coarse_mode_dry_aerosol_in_air'][:] - # a little gymnastic to get Datetime objs - zero_time = datetime(1970,1,1,0,0) - transformed_times = [] - for time in df['time']: - target_time = zero_time + timedelta(hours=time) - transformed_times.append(target_time) - df['start'] = transformed_times - df.drop(columns=['time'], inplace=True) + # cast it to dataframe + df = pd.DataFrame() + df['time'] = sample.variables['time'][:] + df['longitude'] = sample.variables['longitude'][:] + df['latitude'] = sample.variables['latitude'][:] + df['altitude'] = sample.variables['altitude'][:] + df['model_level_number'] = sample.variables['model_level_number'][:] + df['air_potential_temperature']= sample.variables['air_potential_temperature'][:] + df['air_pressure'] = sample.variables['air_pressure'][:] + df['cloud_flag'] = sample.variables['m01s38i478'][:] + df['cdnc_x_cloud_flag'] = sample.variables['m01s38i479'][:] + df['ambient_extinction_550'] = sample.variables['m01s02i530_550nm'][:] + df['ambient_scattering_550'] = sample.variables['m01s02i532_550nm'][:] + df['num_nuc'] = sample.variables['number_of_particles_per_air_molecule_of_soluble_nucleation_mode_aerosol_in_air'][:] + df['num_Ait'] = sample.variables['number_of_particles_per_air_molecule_of_soluble_aitken_mode_aerosol_in_air'][:] + df['num_acc'] = sample.variables['number_of_particles_per_air_molecule_of_soluble_accumulation_mode_aerosol_in_air'][:] + df['num_cor'] = sample.variables['number_of_particles_per_air_molecule_of_soluble_coarse_mode_aerosol_in_air'][:] + df['num_Aitins'] = sample.variables['number_of_particles_per_air_molecule_of_insoluble_aitken_mode_aerosol_in_air'][:] + df['mass_SU_Ait'] = sample.variables['mass_fraction_of_sulfuric_acid_in_soluble_aitken_mode_dry_aerosol_in_air'][:] + df['mass_SU_acc'] = sample.variables['mass_fraction_of_sulfuric_acid_in_soluble_accumulation_mode_dry_aerosol_in_air'][:] + df['mass_SU_cor'] = sample.variables['mass_fraction_of_sulfuric_acid_in_soluble_coarse_mode_dry_aerosol_in_air'][:] + df['mass_BC_Ait'] = sample.variables['mass_fraction_of_black_carbon_in_soluble_aitken_mode_dry_aerosol_in_air'][:] + df['mass_BC_acc'] = sample.variables['mass_fraction_of_black_carbon_in_soluble_accumulation_mode_dry_aerosol_in_air'][:] + df['mass_BC_cor'] = sample.variables['mass_fraction_of_black_carbon_in_soluble_coarse_mode_dry_aerosol_in_air'][:] + df['mass_BC_Aitins'] = sample.variables['mass_fraction_of_black_carbon_in_insoluble_aitken_mode_dry_aerosol_in_air'][:] + df['mass_OC_Ait'] = sample.variables['mass_fraction_of_particulate_organic_matter_in_soluble_aitken_mode_dry_aerosol_in_air'][:] + df['mass_OC_acc'] = sample.variables['mass_fraction_of_particulate_organic_matter_in_soluble_accumulation_mode_dry_aerosol_in_air'][:] + df['mass_OC_cor'] = sample.variables['mass_fraction_of_particulate_organic_matter_in_soluble_coarse_mode_dry_aerosol_in_air'][:] + df['mass_OC_Aitins'] = sample.variables['mass_fraction_of_particulate_organic_matter_in_insoluble_aitken_mode_dry_aerosol_in_air'][:] + df['mass_SS_acc'] = sample.variables['mass_fraction_of_seasalt_in_soluble_accumulation_mode_dry_aerosol_in_air'][:] + df['mass_SS_cor'] = sample.variables['mass_fraction_of_seasalt_in_soluble_coarse_mode_dry_aerosol_in_air'][:] + # a little gymnastic to get Datetime objs + zero_time = datetime(1970,1,1,0,0) + transformed_times = [] + for time in df['time']: + target_time = zero_time + timedelta(hours=time) + transformed_times.append(target_time) + df['start'] = transformed_times + df.drop(columns=['time'], inplace=True) + + parent_id = "SMOS_" + this.simulationSample.id + df['parent'] = parent_id - parent_id = "SMOS_" + this.simulationSample.id - df['parent'] = parent_id + now_time = datetime.now() + diff_time = (now_time - zero_time) + versionTag= -1 * diff_time.total_seconds() + df['dataVersion'] = versionTag - now_time = datetime.now() - diff_time = (now_time - zero_time) - versionTag= -1 * diff_time.total_seconds() - df['dataVersion'] = versionTag + output_records = df.to_dict(orient="records") - output_records = df.to_dict(orient="records") + # upsert this batch + c3.SimulationModelOutput.upsertBatch(objs=output_records) + + this.processed = True + c3.SimulationOutputFile.merge(this) + return True + else: + return False + - # upsert this batch - c3.SimulationModelOutput.upsertBatch(objs=output_records) - this.processed = True - c3.SimulationOutputFile.merge(this) - return True - \ No newline at end of file +def upsertAcureAircraftData(this): + """ + Function to Open files in the SimulationOutputFile table with monthly-mean container and then populate SimulationMonthlyMeanOutput data. + + - Arguments: + -this: an instance of SimulationOutputFile + + - Returns: + -bool: True if file was processed, false if file has already been processed or if container type does not match. + """ + from datetime import datetime, timedelta + import pandas as pd + + # verify file container + if(this.container == 'monthly-mean'): + return True + #open file + #sample = c3.NetCDFUtil.openFile(this.file.url) + + else: + return False + + +def upsertData(this): + """ + Function to Open files in the SimulationOutputFile table then populates Simulation***Output data. + + - Arguments: + -this: an instance of SimulationOutputFile + + - Returns: + -bool: True if file was processed, false if file has already been processed + """ + if(this.container == 'monthly-mean'): + return this.upsertMonthlyMeanData() + elif(this.container == 'acure-aircraft'): + return this.upsertAcureAircraftData() + else: + return False \ No newline at end of file From c5b7cbfa8afb5a56a4e8f44c547a74c6b9e85c5f Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 15 Mar 2022 11:30:06 -0500 Subject: [PATCH 118/188] just a bogus change --- .../src/entity/Simulations/SimulationModelOutput.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationModelOutput.c3typ b/training/gordon-group/src/entity/Simulations/SimulationModelOutput.c3typ index b052c7c6..88399610 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationModelOutput.c3typ +++ b/training/gordon-group/src/entity/Simulations/SimulationModelOutput.c3typ @@ -20,7 +20,7 @@ entity type SimulationModelOutput mixes TimedDataPoint Date: Tue, 15 Mar 2022 11:31:18 -0500 Subject: [PATCH 119/188] another bogus change --- .../src/entity/Simulations/SimulationModelOutput.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationModelOutput.c3typ b/training/gordon-group/src/entity/Simulations/SimulationModelOutput.c3typ index 88399610..b70db2c0 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationModelOutput.c3typ +++ b/training/gordon-group/src/entity/Simulations/SimulationModelOutput.c3typ @@ -20,7 +20,7 @@ entity type SimulationModelOutput mixes TimedDataPoint Date: Tue, 15 Mar 2022 13:55:46 -0500 Subject: [PATCH 120/188] attempt to create entity type --- .../SimulationMonthlyMeanOutput.c3typ | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 training/gordon-group/src/entity/Simulations/SimulationMonthlyMeanOutput.c3typ diff --git a/training/gordon-group/src/entity/Simulations/SimulationMonthlyMeanOutput.c3typ b/training/gordon-group/src/entity/Simulations/SimulationMonthlyMeanOutput.c3typ new file mode 100644 index 00000000..3a7f6e1d --- /dev/null +++ b/training/gordon-group/src/entity/Simulations/SimulationMonthlyMeanOutput.c3typ @@ -0,0 +1,29 @@ +/** +* SimulationMonthlyMeanOutput.c3typ +* A single monthly-mean output taken from a single [SimulationSample](type:SimulationSample) +*/ + +entity type SimulationMonthlyMeanOutput schema name 'SMLTN_MNTHLY_MN_TPT' { + // The {@link SimulationSample} this output belongs to + simulationSample: !SimulationSample + // dust + dust: !double + // soluble aitken mode + solubleAitkenMode: !double + // soluble accumulation mode + solubleAccumulationMode: !double + // soluble coarse mode + solubleCoarseMode: !double + // insoluble aitken mode + insolubleAitkenMode: !double + // insoluble accumulation mode + insolubleAccumulationMode: !double + // insoluble coarse mode + insolubleCoarseMode: !double + // latitude + latitude: !double + // longitude + longitude: !double + // time stamp + time: !datetime +} \ No newline at end of file From fe22bf3f7107d32faea5c69562b597d9519c5c44 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 15 Mar 2022 14:33:16 -0500 Subject: [PATCH 121/188] add js method to create SimulationOutputFile objects --- .../Simulations/SimulationOutputFile.py | 16 +++++- .../entity/Simulations/SimulationSample.js | 57 ++++++++++++++++--- 2 files changed, 62 insertions(+), 11 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index d505d3ac..f28b57f2 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -92,9 +92,21 @@ def upsertAcureAircraftData(this): # verify file container if(this.container == 'monthly-mean'): - return True + variable_names = { + "dust" : "atmosphere_optical_thickness_due_to_dust_ambient_aerosol", + "solubleAitkenMode" : "atmosphere_optical_thickness_due_to_soluble_aitken_mode_ambient_aerosol", + "solubleAccumulationMode" : "atmosphere_optical_thickness_due_to_soluble_accumulation_mode_ambient_aerosol", + "solubleCoarseMode" : "atmosphere_optical_thickness_due_to_soluble_coarse_mode_ambient_aerosol", + "insolubleAitkenMode" : "atmosphere_optical_thickness_due_to_insoluble_aitken_mode_ambient_aerosol", + "insolubleAccumulationMode" : "atmosphereOpticalThickness_due_to_insoluble_accumulation_mode_ambient_aerosol", + "insolubleCoarseMode" : "atmosphere_optical_thickness_due_to_insoluble_coarse_mode_ambient_aerosol" + } #open file - #sample = c3.NetCDFUtil.openFile(this.file.url) + sample = c3.NetCDFUtil.openFile(this.file.url) + df = pd.DataFrame() + + + return True else: return False diff --git a/training/gordon-group/src/entity/Simulations/SimulationSample.js b/training/gordon-group/src/entity/Simulations/SimulationSample.js index bee941ed..e5ca011d 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationSample.js +++ b/training/gordon-group/src/entity/Simulations/SimulationSample.js @@ -25,11 +25,11 @@ function afterCreate(objs) { var ensemble = SimulationEnsemble.fetch({ filter: Filter.eq("id",obj.ensemble.id) }).objs[0] - // AZURE DIRECTORY PATH HERE: change 'gordon-group; to whatever you need + + // ACURE-AIRCRAFT CONTAINER var ensemblePath = FileSystem.inst().rootUrl() + 'gordon-group/' + ensemble.name + '/'; var prePathToAllFiles = ensemblePath + ensemble.prePathToFiles; var pathToSample = prePathToAllFiles + padStart(String(obj.simulationNumber),3,'0'); - var sampleFiles = FileSystem.inst().listFiles(pathToSample).files; // Remove non-NetCDF files from list for (var i = 0; i < sampleFiles.length; i++) { @@ -38,6 +38,24 @@ function afterCreate(objs) { sampleFiles.splice(i,1); } } + + // MONTHLY-MEAN CONTAINER + var pathToAllFiles = "azure://monthly-mean-simulations/"; + var sampleFiles2 = FileSystem.inst().listFiles(pathToAllFiles.files); + // remove non netcdf stuff + for (var i = 0; i < sampleFiles2.length; i++) { + var sf = sampleFiles2[i]; + if (sf.url.slice(-3) !== ".nc") { + sampleFiles2.splice(i,1); + } + else if (sf.url.slice(-6,-3) !== padStart(String(obj.simulationNumber), 3, '0')) { + sampleFiles2.splice(i,1); + } + } + + // put two containers together + sampleFiles = sampleFiles.concat(sampleFiles2); + return sampleFiles.map(createSimOutFiles); function padStart(text, length, pad) { @@ -45,19 +63,40 @@ function afterCreate(objs) { } function createSimOutFiles(file) { - var year = file.url.slice(-11,-7); - var month = file.url.slice(-7,-5); - var day = file.url.slice(-5,-3); - var date_str = year + "-" + month + "-" + day; - return SimulationOutputFile.make({ + if (file.url.slice(0,32) === "azure://monthly-mean-simulations") { + var year = file.url.slice(42,46); + var month = file.url.slice(46,48); + var day = file.slice(48,50); + var date_str = year + "-" + month + "-" + day; + var container = "monthly-mean"; + return SimulationOutputFile.make({ + "simulationSample": obj, + "file": File.make({ + "url": file.url + }), + "dateTag": DateTime.make({ + "value": date_str + }), + "container": container + }); + } + else { + var year = file.url.slice(-11,-7); + var month = file.url.slice(-7,-5); + var day = file.url.slice(-5,-3); + var date_str = year + "-" + month + "-" + day; + var container = "acure-aircraft"; + return SimulationOutputFile.make({ "simulationSample": obj, "file": File.make({ "url": file.url }), "dateTag": DateTime.make({ "value": date_str - }) - }); + }), + "container": container + }); + } } } }; From 2460c9729741734b3d107794b2b28e3091f857bb Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 15 Mar 2022 15:01:22 -0500 Subject: [PATCH 122/188] little fix and adding one fake SimSam to test --- .../SimulationModelParametersMap.csv | 3 ++- .../gordon-group/src/entity/Simulations/SimulationSample.js | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/seed/SourceSimulationModelParametersMap/SimulationModelParametersMap.csv b/training/gordon-group/seed/SourceSimulationModelParametersMap/SimulationModelParametersMap.csv index 4a8f8eb9..4dbc0618 100644 --- a/training/gordon-group/seed/SourceSimulationModelParametersMap/SimulationModelParametersMap.csv +++ b/training/gordon-group/seed/SourceSimulationModelParametersMap/SimulationModelParametersMap.csv @@ -219,4 +219,5 @@ acure_aircraft,u-bs714_ens,1,0.816662113,0.515664036,0.802905339,1,0.566193943,0 acure_aircraft,u-bs714_ens,1,0.028482619,0.137399641,0.238849591,1,0.738906482,0.524010548,0.356609009,0.347079855,0.029771429,0.06404691,1,0.816968681,0.556520504,0.160632002,0.215303267,0.142351462,0.794423186,1,0.271949816,0.753744672,0.803044036,0.061267157,0.413326175,0.297453394,0.660060274,0.016539084,0.660054881,0.768132105,1,0.800717575,0.338075733,0.694526155,0.129864225,0.211885827,0.192071445,0.545154878,0.041307182,0.906801862,0.615796531,0.460932958,0.180696425,0.954293994,0.793580884,0.373458357,0.077677953,0.424398769,0.069870254,0.736601998,0.947263922,0.086535573,0.132397374,0.583853625,0.721476611,0.2252514,0.394430227,0.949027894,0.700238416,0.223646322,0.657565067,217 acure_aircraft,u-bs714_ens,1,0.679847981,0.425611308,0.751152201,1,0.26866151,0.513832928,0.008653191,0.023502415,0.758724712,0.252429671,1,0.637311818,0.039921547,0.693588059,0.349736295,0.386816947,0.674885205,1,0.900985779,0.872612572,0.103753959,0.957860156,0.847441972,0.610742231,0.076840579,0.490167141,0.881933776,0.159700502,1,0.76825085,0.911707795,0.852736028,0.702110856,0.91108847,0.169301419,0.528540991,0.139189548,0.545399941,0.304925806,0.109389704,0.832041758,0.835475012,0.970205061,0.722445638,0.806407436,0.552126049,0.672105362,0.031637358,0.43583705,0.701781117,0.070954223,0.16445825,0.795320202,0.077112709,0.884923596,0.280834955,0.904690337,0.561142135,0.690033045,218 acure_aircraft,u-bs714_ens,1,0.813528842,0.762109612,0.246023,1,0.742926152,0.218259693,0.983677742,0.824760216,0.640673252,0.303962022,1,0.720878421,0.174521123,0.830359276,0.628510572,0.936381843,0.089146495,1,0.569404831,0.224871143,0.789870718,0.148935719,0.549052101,0.485166421,0.771852798,0.575853325,0.062264454,0.935830161,1,0.977375296,0.341833413,0.654948022,0.601238888,0.166125822,0.182086945,0.500895199,0.635050851,0.899083437,0.890126522,0.375119328,0.15582524,0.98782505,0.369302012,0.361060331,0.336216908,0.835517902,0.931005758,0.678850127,0.969081579,0.056672805,0.378428875,0.002837684,0.84206923,0.409880444,0.994296143,0.467971339,0.970648714,0.607856052,0.835527409,219 -acure_aircraft,u-bs714_ens,1,0.409227521,0.468954547,0.423125894,1,0.062721093,0.489329108,0.683279961,0.005891246,0.552428136,0.133137421,1,0.4722196,0.284587406,0.117914625,0.68068933,0.530490239,0.058870405,1,0.444773736,0.043036874,0.767396497,0.938332703,0.222275099,0.781485508,0.119328921,0.957459864,0.391272393,0.167829166,1,0.505909211,0.021355555,0.493602899,0.45472629,0.951406771,0.596775603,0.051837809,0.752683635,0.924466545,0.01559588,0.676990166,0.544259482,0.342932513,0.780121543,0.837088917,0.010560531,0.593258211,0.728887192,0.620710287,0.796002441,0.247140792,0.803100366,0.189925239,0.099176835,0.153181803,0.346733698,0.984220346,0.861598759,0.749645048,0.238802259,220 \ No newline at end of file +acure_aircraft,u-bs714_ens,1,0.409227521,0.468954547,0.423125894,1,0.062721093,0.489329108,0.683279961,0.005891246,0.552428136,0.133137421,1,0.4722196,0.284587406,0.117914625,0.68068933,0.530490239,0.058870405,1,0.444773736,0.043036874,0.767396497,0.938332703,0.222275099,0.781485508,0.119328921,0.957459864,0.391272393,0.167829166,1,0.505909211,0.021355555,0.493602899,0.45472629,0.951406771,0.596775603,0.051837809,0.752683635,0.924466545,0.01559588,0.676990166,0.544259482,0.342932513,0.780121543,0.837088917,0.010560531,0.593258211,0.728887192,0.620710287,0.796002441,0.247140792,0.803100366,0.189925239,0.099176835,0.153181803,0.346733698,0.984220346,0.861598759,0.749645048,0.238802259,220 +acure_aircraft,u-bs714_ens,1,0.409227521,0.468954547,0.423125894,1,0.062721093,0.489329108,0.683279961,0.005891246,0.552428136,0.133137421,1,0.4722196,0.284587406,0.117914625,0.68068933,0.530490239,0.058870405,1,0.444773736,0.043036874,0.767396497,0.938332703,0.222275099,0.781485508,0.119328921,0.957459864,0.391272393,0.167829166,1,0.505909211,0.021355555,0.493602899,0.45472629,0.951406771,0.596775603,0.051837809,0.752683635,0.924466545,0.01559588,0.676990166,0.544259482,0.342932513,0.780121543,0.837088917,0.010560531,0.593258211,0.728887192,0.620710287,0.796002441,0.247140792,0.803100366,0.189925239,0.099176835,0.153181803,0.346733698,0.984220346,0.861598759,0.749645048,0.238802259,221 \ No newline at end of file diff --git a/training/gordon-group/src/entity/Simulations/SimulationSample.js b/training/gordon-group/src/entity/Simulations/SimulationSample.js index e5ca011d..3046cd02 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationSample.js +++ b/training/gordon-group/src/entity/Simulations/SimulationSample.js @@ -41,7 +41,7 @@ function afterCreate(objs) { // MONTHLY-MEAN CONTAINER var pathToAllFiles = "azure://monthly-mean-simulations/"; - var sampleFiles2 = FileSystem.inst().listFiles(pathToAllFiles.files); + var sampleFiles2 = FileSystem.inst().listFiles(pathToAllFiles).files; // remove non netcdf stuff for (var i = 0; i < sampleFiles2.length; i++) { var sf = sampleFiles2[i]; From 8184c726ab670c6dc47ff0135a55e1afea4c8870 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 15 Mar 2022 15:34:03 -0500 Subject: [PATCH 123/188] fixed a bunch of bugs --- .../src/entity/Simulations/SimulationSample.js | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationSample.js b/training/gordon-group/src/entity/Simulations/SimulationSample.js index 3046cd02..fbc50b1b 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationSample.js +++ b/training/gordon-group/src/entity/Simulations/SimulationSample.js @@ -51,6 +51,10 @@ function afterCreate(objs) { else if (sf.url.slice(-6,-3) !== padStart(String(obj.simulationNumber), 3, '0')) { sampleFiles2.splice(i,1); } + else if (sf.url.slice(33,36) === "aug") { + sampleFiles2.splice(i,1); + } + } // put two containers together @@ -64,9 +68,9 @@ function afterCreate(objs) { function createSimOutFiles(file) { if (file.url.slice(0,32) === "azure://monthly-mean-simulations") { - var year = file.url.slice(42,46); - var month = file.url.slice(46,48); - var day = file.slice(48,50); + var year = file.url.slice(-18,-14); + var month = file.url.slice(-14,-12); + var day = file.url.slice(-12,-10); var date_str = year + "-" + month + "-" + day; var container = "monthly-mean"; return SimulationOutputFile.make({ From d5088c12f23ab3c0fc5046dfed44828512f04973 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 16 Mar 2022 09:15:31 -0500 Subject: [PATCH 124/188] this seems to be working now --- .../entity/Simulations/SimulationSample.js | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationSample.js b/training/gordon-group/src/entity/Simulations/SimulationSample.js index fbc50b1b..eadb416e 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationSample.js +++ b/training/gordon-group/src/entity/Simulations/SimulationSample.js @@ -30,31 +30,29 @@ function afterCreate(objs) { var ensemblePath = FileSystem.inst().rootUrl() + 'gordon-group/' + ensemble.name + '/'; var prePathToAllFiles = ensemblePath + ensemble.prePathToFiles; var pathToSample = prePathToAllFiles + padStart(String(obj.simulationNumber),3,'0'); - var sampleFiles = FileSystem.inst().listFiles(pathToSample).files; + var allAAFiles = FileSystem.inst().listFiles(pathToSample).files; + var sampleFiles = new Array(); + // Remove non-NetCDF files from list - for (var i = 0; i < sampleFiles.length; i++) { - var sf = sampleFiles[i]; - if (sf.url.slice(-3) !== ".nc") { - sampleFiles.splice(i,1); + for (var i = 0; i < allAAFiles.length; i++) { + var sf = allAAFiles[i]; + if (sf.url.slice(-3) === ".nc") { + sampleFiles.push(sf); } } // MONTHLY-MEAN CONTAINER var pathToAllFiles = "azure://monthly-mean-simulations/"; - var sampleFiles2 = FileSystem.inst().listFiles(pathToAllFiles).files; - // remove non netcdf stuff - for (var i = 0; i < sampleFiles2.length; i++) { - var sf = sampleFiles2[i]; - if (sf.url.slice(-3) !== ".nc") { - sampleFiles2.splice(i,1); - } - else if (sf.url.slice(-6,-3) !== padStart(String(obj.simulationNumber), 3, '0')) { - sampleFiles2.splice(i,1); - } - else if (sf.url.slice(33,36) === "aug") { - sampleFiles2.splice(i,1); - } + var allMMFiles = FileSystem.inst().listFiles(pathToAllFiles).files; + var simString = padStart(String(obj.simulationNumber), 3, '0'); + var sampleFiles2 = new Array(); + // find correct simNumber, rm non-netcdf stuff, ommit "aug" folder + for (var i = 0; i < allMMFiles.length; i++) { + var sf = allMMFiles[i]; + if (sf.url.slice(-6,-3) === simString && sf.url.slice(-3) === ".nc" && sf.url.slice(33,36) !== "aug") { + sampleFiles2.push(sf); + } } // put two containers together From 92894cdf3dc20291dc7d244bde2c7c1b7829b686 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 16 Mar 2022 09:19:06 -0500 Subject: [PATCH 125/188] retrigger prov? --- .../gordon-group/src/entity/Simulations/SimulationSample.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationSample.js b/training/gordon-group/src/entity/Simulations/SimulationSample.js index eadb416e..f2df00bc 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationSample.js +++ b/training/gordon-group/src/entity/Simulations/SimulationSample.js @@ -41,7 +41,7 @@ function afterCreate(objs) { } } - // MONTHLY-MEAN CONTAINER + // MONTHLY-MEAN CONTAINER... var pathToAllFiles = "azure://monthly-mean-simulations/"; var allMMFiles = FileSystem.inst().listFiles(pathToAllFiles).files; var simString = padStart(String(obj.simulationNumber), 3, '0'); From e820a4f9a62d5192ecc4ce0ddf375519c58ca4f2 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 16 Mar 2022 10:17:11 -0500 Subject: [PATCH 126/188] adding python method tor monthly mean upsert --- .../Simulations/SimulationOutputFile.py | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index f28b57f2..5acca095 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -77,7 +77,7 @@ def upsertAcureAircraftData(this): -def upsertAcureAircraftData(this): +def upsertMonthlyMeanData(this): """ Function to Open files in the SimulationOutputFile table with monthly-mean container and then populate SimulationMonthlyMeanOutput data. @@ -89,6 +89,7 @@ def upsertAcureAircraftData(this): """ from datetime import datetime, timedelta import pandas as pd + import numpy as np # verify file container if(this.container == 'monthly-mean'): @@ -104,7 +105,35 @@ def upsertAcureAircraftData(this): #open file sample = c3.NetCDFUtil.openFile(this.file.url) df = pd.DataFrame() - + + # this is to take care of variables that need to be flattened + for var in variable_names: + netcdf_name = variable_names[var] + tensor = sample[netcdf_name][:][2,:,:,:] + tensor = np.array(tensor).flatten() + df[var] = tensor + + # now latitude, longitude and time + lat = sample["latitude"][:] + lon = [x*(x < 180) + (x - 360)*(x >= 180) for x in sample["longitude"][:]] + # this file times + ts = this.dateTag + # take a look at thsis: is 24 = 0? + times = [ts.replace(hour=3), ts.replace(hour=6), ts.replace(hour=9), + ts.replace(hour=12), ts.replace(hour=15), ts.replace(hour=18), + ts.replace(hour=21), ts.replace(hour=0)] + + df["time"] = [t for t in times for n in range(0, len(lat)*len(lon))] + df["latitude"] = [l for l in lat for n in range(0, len(lon))]*len(times) + df["longitude"] = [l for l in lon]*len(times)*len(lat) + + + # cast everything into dict and upsert + output_records = df.to_dict(orient="records") + c3.SimulationMonthlyMeanOutput.upsertBatch(objs=output_records) + + this.processed = True + c3.SimulationOutputFile.merge(this) return True From 8c90ad21e680e297d05a1d1a693c5490f55d3ae3 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 16 Mar 2022 11:14:34 -0500 Subject: [PATCH 127/188] trying that again with loop over dict items --- .../SimulationModelParametersMap.csv | 3 +-- .../src/entity/Simulations/SimulationOutputFile.py | 7 +++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/training/gordon-group/seed/SourceSimulationModelParametersMap/SimulationModelParametersMap.csv b/training/gordon-group/seed/SourceSimulationModelParametersMap/SimulationModelParametersMap.csv index 4dbc0618..4a8f8eb9 100644 --- a/training/gordon-group/seed/SourceSimulationModelParametersMap/SimulationModelParametersMap.csv +++ b/training/gordon-group/seed/SourceSimulationModelParametersMap/SimulationModelParametersMap.csv @@ -219,5 +219,4 @@ acure_aircraft,u-bs714_ens,1,0.816662113,0.515664036,0.802905339,1,0.566193943,0 acure_aircraft,u-bs714_ens,1,0.028482619,0.137399641,0.238849591,1,0.738906482,0.524010548,0.356609009,0.347079855,0.029771429,0.06404691,1,0.816968681,0.556520504,0.160632002,0.215303267,0.142351462,0.794423186,1,0.271949816,0.753744672,0.803044036,0.061267157,0.413326175,0.297453394,0.660060274,0.016539084,0.660054881,0.768132105,1,0.800717575,0.338075733,0.694526155,0.129864225,0.211885827,0.192071445,0.545154878,0.041307182,0.906801862,0.615796531,0.460932958,0.180696425,0.954293994,0.793580884,0.373458357,0.077677953,0.424398769,0.069870254,0.736601998,0.947263922,0.086535573,0.132397374,0.583853625,0.721476611,0.2252514,0.394430227,0.949027894,0.700238416,0.223646322,0.657565067,217 acure_aircraft,u-bs714_ens,1,0.679847981,0.425611308,0.751152201,1,0.26866151,0.513832928,0.008653191,0.023502415,0.758724712,0.252429671,1,0.637311818,0.039921547,0.693588059,0.349736295,0.386816947,0.674885205,1,0.900985779,0.872612572,0.103753959,0.957860156,0.847441972,0.610742231,0.076840579,0.490167141,0.881933776,0.159700502,1,0.76825085,0.911707795,0.852736028,0.702110856,0.91108847,0.169301419,0.528540991,0.139189548,0.545399941,0.304925806,0.109389704,0.832041758,0.835475012,0.970205061,0.722445638,0.806407436,0.552126049,0.672105362,0.031637358,0.43583705,0.701781117,0.070954223,0.16445825,0.795320202,0.077112709,0.884923596,0.280834955,0.904690337,0.561142135,0.690033045,218 acure_aircraft,u-bs714_ens,1,0.813528842,0.762109612,0.246023,1,0.742926152,0.218259693,0.983677742,0.824760216,0.640673252,0.303962022,1,0.720878421,0.174521123,0.830359276,0.628510572,0.936381843,0.089146495,1,0.569404831,0.224871143,0.789870718,0.148935719,0.549052101,0.485166421,0.771852798,0.575853325,0.062264454,0.935830161,1,0.977375296,0.341833413,0.654948022,0.601238888,0.166125822,0.182086945,0.500895199,0.635050851,0.899083437,0.890126522,0.375119328,0.15582524,0.98782505,0.369302012,0.361060331,0.336216908,0.835517902,0.931005758,0.678850127,0.969081579,0.056672805,0.378428875,0.002837684,0.84206923,0.409880444,0.994296143,0.467971339,0.970648714,0.607856052,0.835527409,219 -acure_aircraft,u-bs714_ens,1,0.409227521,0.468954547,0.423125894,1,0.062721093,0.489329108,0.683279961,0.005891246,0.552428136,0.133137421,1,0.4722196,0.284587406,0.117914625,0.68068933,0.530490239,0.058870405,1,0.444773736,0.043036874,0.767396497,0.938332703,0.222275099,0.781485508,0.119328921,0.957459864,0.391272393,0.167829166,1,0.505909211,0.021355555,0.493602899,0.45472629,0.951406771,0.596775603,0.051837809,0.752683635,0.924466545,0.01559588,0.676990166,0.544259482,0.342932513,0.780121543,0.837088917,0.010560531,0.593258211,0.728887192,0.620710287,0.796002441,0.247140792,0.803100366,0.189925239,0.099176835,0.153181803,0.346733698,0.984220346,0.861598759,0.749645048,0.238802259,220 -acure_aircraft,u-bs714_ens,1,0.409227521,0.468954547,0.423125894,1,0.062721093,0.489329108,0.683279961,0.005891246,0.552428136,0.133137421,1,0.4722196,0.284587406,0.117914625,0.68068933,0.530490239,0.058870405,1,0.444773736,0.043036874,0.767396497,0.938332703,0.222275099,0.781485508,0.119328921,0.957459864,0.391272393,0.167829166,1,0.505909211,0.021355555,0.493602899,0.45472629,0.951406771,0.596775603,0.051837809,0.752683635,0.924466545,0.01559588,0.676990166,0.544259482,0.342932513,0.780121543,0.837088917,0.010560531,0.593258211,0.728887192,0.620710287,0.796002441,0.247140792,0.803100366,0.189925239,0.099176835,0.153181803,0.346733698,0.984220346,0.861598759,0.749645048,0.238802259,221 \ No newline at end of file +acure_aircraft,u-bs714_ens,1,0.409227521,0.468954547,0.423125894,1,0.062721093,0.489329108,0.683279961,0.005891246,0.552428136,0.133137421,1,0.4722196,0.284587406,0.117914625,0.68068933,0.530490239,0.058870405,1,0.444773736,0.043036874,0.767396497,0.938332703,0.222275099,0.781485508,0.119328921,0.957459864,0.391272393,0.167829166,1,0.505909211,0.021355555,0.493602899,0.45472629,0.951406771,0.596775603,0.051837809,0.752683635,0.924466545,0.01559588,0.676990166,0.544259482,0.342932513,0.780121543,0.837088917,0.010560531,0.593258211,0.728887192,0.620710287,0.796002441,0.247140792,0.803100366,0.189925239,0.099176835,0.153181803,0.346733698,0.984220346,0.861598759,0.749645048,0.238802259,220 \ No newline at end of file diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index 5acca095..10e1b6cd 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -107,11 +107,10 @@ def upsertMonthlyMeanData(this): df = pd.DataFrame() # this is to take care of variables that need to be flattened - for var in variable_names: - netcdf_name = variable_names[var] - tensor = sample[netcdf_name][:][2,:,:,:] + for var in variable_names.items(): + tensor = sample[var[1]][:][2,:,:,:] tensor = np.array(tensor).flatten() - df[var] = tensor + df[var[0]] = tensor # now latitude, longitude and time lat = sample["latitude"][:] From 3934a63a0d73a350d2ed63bc5f10279f036e7ecd Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 16 Mar 2022 11:35:43 -0500 Subject: [PATCH 128/188] the tiniest of fixes... --- .../gordon-group/src/entity/Simulations/SimulationOutputFile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index 10e1b6cd..4051d89d 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -99,7 +99,7 @@ def upsertMonthlyMeanData(this): "solubleAccumulationMode" : "atmosphere_optical_thickness_due_to_soluble_accumulation_mode_ambient_aerosol", "solubleCoarseMode" : "atmosphere_optical_thickness_due_to_soluble_coarse_mode_ambient_aerosol", "insolubleAitkenMode" : "atmosphere_optical_thickness_due_to_insoluble_aitken_mode_ambient_aerosol", - "insolubleAccumulationMode" : "atmosphereOpticalThickness_due_to_insoluble_accumulation_mode_ambient_aerosol", + "insolubleAccumulationMode" : "atmosphere_optical_thickness_due_to_insoluble_accumulation_mode_ambient_aerosol", "insolubleCoarseMode" : "atmosphere_optical_thickness_due_to_insoluble_coarse_mode_ambient_aerosol" } #open file From bb535736318869d1b2e46611b8786edce7395e4e Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 16 Mar 2022 13:03:27 -0500 Subject: [PATCH 129/188] add SimSam field --- .../src/entity/Simulations/SimulationOutputFile.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index 4051d89d..94acd79a 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -126,6 +126,9 @@ def upsertMonthlyMeanData(this): df["latitude"] = [l for l in lat for n in range(0, len(lon))]*len(times) df["longitude"] = [l for l in lon]*len(times)*len(lat) + # now the SimulationSample field + df["simulationSample"] = this.simulationSample + # cast everything into dict and upsert output_records = df.to_dict(orient="records") From bd8eef4b3b4782597929a7499ca106a1a2b6d49c Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 17 Mar 2022 09:27:19 -0500 Subject: [PATCH 130/188] add months to container search --- .../entity/Simulations/SimulationSample.js | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationSample.js b/training/gordon-group/src/entity/Simulations/SimulationSample.js index f2df00bc..24c898ee 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationSample.js +++ b/training/gordon-group/src/entity/Simulations/SimulationSample.js @@ -42,16 +42,21 @@ function afterCreate(objs) { } // MONTHLY-MEAN CONTAINER... - var pathToAllFiles = "azure://monthly-mean-simulations/"; - var allMMFiles = FileSystem.inst().listFiles(pathToAllFiles).files; var simString = padStart(String(obj.simulationNumber), 3, '0'); - var sampleFiles2 = new Array(); - - // find correct simNumber, rm non-netcdf stuff, ommit "aug" folder - for (var i = 0; i < allMMFiles.length; i++) { - var sf = allMMFiles[i]; - if (sf.url.slice(-6,-3) === simString && sf.url.slice(-3) === ".nc" && sf.url.slice(33,36) !== "aug") { - sampleFiles2.push(sf); + var sampleFiles2 = new Array(); + + const months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', + 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']; + var containerRoot = "azure://monthly-mean-simulations/"; + + for (var month in months) { + var pathToFiles = containerRoot + month + "/"; + var fileStream = FileSystem.inst().listFilesStream(pathToFiles); + while (fileStream.hasNext()) { + var file = fileStream.next(); + if (file.url.slice(-6,-3) === simString && file.url.slice(-3) === ".nc" && file.url.slice(37,42) !== 'ACURE') { + sampleFiles2.push(file); + } } } From dd1686db0ac1bd873b8dd7439386860c296caf5a Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 17 Mar 2022 09:43:46 -0500 Subject: [PATCH 131/188] fixing my js stupidity --- .../gordon-group/src/entity/Simulations/SimulationSample.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationSample.js b/training/gordon-group/src/entity/Simulations/SimulationSample.js index 24c898ee..6c7d02cf 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationSample.js +++ b/training/gordon-group/src/entity/Simulations/SimulationSample.js @@ -49,7 +49,7 @@ function afterCreate(objs) { 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']; var containerRoot = "azure://monthly-mean-simulations/"; - for (var month in months) { + for (const month of months) { var pathToFiles = containerRoot + month + "/"; var fileStream = FileSystem.inst().listFilesStream(pathToFiles); while (fileStream.hasNext()) { From dd9a3e561d58768aaad4fd92a7f388b3b05f21f5 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 17 Mar 2022 09:48:44 -0500 Subject: [PATCH 132/188] another js bug --- .../gordon-group/src/entity/Simulations/SimulationSample.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationSample.js b/training/gordon-group/src/entity/Simulations/SimulationSample.js index 6c7d02cf..3c553fa7 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationSample.js +++ b/training/gordon-group/src/entity/Simulations/SimulationSample.js @@ -49,7 +49,7 @@ function afterCreate(objs) { 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']; var containerRoot = "azure://monthly-mean-simulations/"; - for (const month of months) { + for (var month of months) { var pathToFiles = containerRoot + month + "/"; var fileStream = FileSystem.inst().listFilesStream(pathToFiles); while (fileStream.hasNext()) { From 915a00c807122164fbb41b537e0035d3da6ffff9 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 17 Mar 2022 09:52:02 -0500 Subject: [PATCH 133/188] it's complaining about a SEMICOLON........ --- .../gordon-group/src/entity/Simulations/SimulationSample.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationSample.js b/training/gordon-group/src/entity/Simulations/SimulationSample.js index 3c553fa7..927f4d30 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationSample.js +++ b/training/gordon-group/src/entity/Simulations/SimulationSample.js @@ -56,9 +56,9 @@ function afterCreate(objs) { var file = fileStream.next(); if (file.url.slice(-6,-3) === simString && file.url.slice(-3) === ".nc" && file.url.slice(37,42) !== 'ACURE') { sampleFiles2.push(file); - } - } - } + }; + }; + }; // put two containers together sampleFiles = sampleFiles.concat(sampleFiles2); From c62e2f674cfe5049ba1860dfa3c3160031e000fd Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 17 Mar 2022 09:55:26 -0500 Subject: [PATCH 134/188] ...... desperate commits again --- .../gordon-group/src/entity/Simulations/SimulationSample.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationSample.js b/training/gordon-group/src/entity/Simulations/SimulationSample.js index 927f4d30..55e9c0c7 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationSample.js +++ b/training/gordon-group/src/entity/Simulations/SimulationSample.js @@ -45,8 +45,7 @@ function afterCreate(objs) { var simString = padStart(String(obj.simulationNumber), 3, '0'); var sampleFiles2 = new Array(); - const months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', - 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']; + var months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']; var containerRoot = "azure://monthly-mean-simulations/"; for (var month of months) { From f9199cfd50b0f2f5f978c80ba47d94d679aab3e4 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 17 Mar 2022 09:58:30 -0500 Subject: [PATCH 135/188] c3 does not like interpreting loops... --- .../src/entity/Simulations/SimulationSample.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationSample.js b/training/gordon-group/src/entity/Simulations/SimulationSample.js index 55e9c0c7..d0806529 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationSample.js +++ b/training/gordon-group/src/entity/Simulations/SimulationSample.js @@ -38,8 +38,8 @@ function afterCreate(objs) { var sf = allAAFiles[i]; if (sf.url.slice(-3) === ".nc") { sampleFiles.push(sf); - } - } + }; + }; // MONTHLY-MEAN CONTAINER... var simString = padStart(String(obj.simulationNumber), 3, '0'); @@ -48,7 +48,8 @@ function afterCreate(objs) { var months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']; var containerRoot = "azure://monthly-mean-simulations/"; - for (var month of months) { + for (var i = 0; i < months.length; i++) { + var month = months[i]; var pathToFiles = containerRoot + month + "/"; var fileStream = FileSystem.inst().listFilesStream(pathToFiles); while (fileStream.hasNext()) { From adb3897dedbfc774c3b5eca00cf0b42e6a56d73d Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 17 Mar 2022 10:24:06 -0500 Subject: [PATCH 136/188] rename type, get rid of 0-ed columns --- .../Simulation3HourlyAODOutput.c3typ | 25 +++++++++++++++++++ .../SimulationMonthlyMeanOutput.c3typ | 10 +++----- .../Simulations/SimulationOutputFile.py | 7 ++---- 3 files changed, 30 insertions(+), 12 deletions(-) create mode 100644 training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ diff --git a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ new file mode 100644 index 00000000..821ac1e8 --- /dev/null +++ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ @@ -0,0 +1,25 @@ +/** +* Simulation3HourlyAODOutput.c3typ +* A single monthly-mean output taken from a single [SimulationSample](type:SimulationSample) +*/ + +entity type Simulation3HourlyAODOutput schema name 'SMLTN_3HAOD_TPT' { + // The {@link SimulationSample} this output belongs to + simulationSample: !SimulationSample + // dust + dust: !double + // soluble aitken mode + solubleAitkenMode: !double + // soluble accumulation mode + solubleAccumulationMode: !double + // soluble coarse mode + solubleCoarseMode: !double + // insoluble aitken mode + insolubleAitkenMode: !double + // latitude + latitude: !double + // longitude + longitude: !double + // time stamp + time: !datetime +} \ No newline at end of file diff --git a/training/gordon-group/src/entity/Simulations/SimulationMonthlyMeanOutput.c3typ b/training/gordon-group/src/entity/Simulations/SimulationMonthlyMeanOutput.c3typ index 3a7f6e1d..df7b4d3a 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationMonthlyMeanOutput.c3typ +++ b/training/gordon-group/src/entity/Simulations/SimulationMonthlyMeanOutput.c3typ @@ -1,9 +1,9 @@ /** -* SimulationMonthlyMeanOutput.c3typ -* A single monthly-mean output taken from a single [SimulationSample](type:SimulationSample) +* Simulation3HourlyAODOutput.c3typ +* A single 3H AOD output taken from a single [SimulationSample](type:SimulationSample) */ -entity type SimulationMonthlyMeanOutput schema name 'SMLTN_MNTHLY_MN_TPT' { +entity type Simulation3HourlyAODOutput schema name 'SMLTN_3HAOD_TPT' { // The {@link SimulationSample} this output belongs to simulationSample: !SimulationSample // dust @@ -16,10 +16,6 @@ entity type SimulationMonthlyMeanOutput schema name 'SMLTN_MNTHLY_MN_TPT' { solubleCoarseMode: !double // insoluble aitken mode insolubleAitkenMode: !double - // insoluble accumulation mode - insolubleAccumulationMode: !double - // insoluble coarse mode - insolubleCoarseMode: !double // latitude latitude: !double // longitude diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index 94acd79a..6655d01f 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -87,7 +87,6 @@ def upsertMonthlyMeanData(this): - Returns: -bool: True if file was processed, false if file has already been processed or if container type does not match. """ - from datetime import datetime, timedelta import pandas as pd import numpy as np @@ -98,9 +97,7 @@ def upsertMonthlyMeanData(this): "solubleAitkenMode" : "atmosphere_optical_thickness_due_to_soluble_aitken_mode_ambient_aerosol", "solubleAccumulationMode" : "atmosphere_optical_thickness_due_to_soluble_accumulation_mode_ambient_aerosol", "solubleCoarseMode" : "atmosphere_optical_thickness_due_to_soluble_coarse_mode_ambient_aerosol", - "insolubleAitkenMode" : "atmosphere_optical_thickness_due_to_insoluble_aitken_mode_ambient_aerosol", - "insolubleAccumulationMode" : "atmosphere_optical_thickness_due_to_insoluble_accumulation_mode_ambient_aerosol", - "insolubleCoarseMode" : "atmosphere_optical_thickness_due_to_insoluble_coarse_mode_ambient_aerosol" + "insolubleAitkenMode" : "atmosphere_optical_thickness_due_to_insoluble_aitken_mode_ambient_aerosol" } #open file sample = c3.NetCDFUtil.openFile(this.file.url) @@ -132,7 +129,7 @@ def upsertMonthlyMeanData(this): # cast everything into dict and upsert output_records = df.to_dict(orient="records") - c3.SimulationMonthlyMeanOutput.upsertBatch(objs=output_records) + c3.Simulation3HourlyAODOutput.upsertBatch(objs=output_records) this.processed = True c3.SimulationOutputFile.merge(this) From af0c94a55a83c117bcbb6c8c6179229b232c1221 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 17 Mar 2022 10:25:22 -0500 Subject: [PATCH 137/188] rm old type --- .../SimulationMonthlyMeanOutput.c3typ | 25 ------------------- 1 file changed, 25 deletions(-) delete mode 100644 training/gordon-group/src/entity/Simulations/SimulationMonthlyMeanOutput.c3typ diff --git a/training/gordon-group/src/entity/Simulations/SimulationMonthlyMeanOutput.c3typ b/training/gordon-group/src/entity/Simulations/SimulationMonthlyMeanOutput.c3typ deleted file mode 100644 index df7b4d3a..00000000 --- a/training/gordon-group/src/entity/Simulations/SimulationMonthlyMeanOutput.c3typ +++ /dev/null @@ -1,25 +0,0 @@ -/** -* Simulation3HourlyAODOutput.c3typ -* A single 3H AOD output taken from a single [SimulationSample](type:SimulationSample) -*/ - -entity type Simulation3HourlyAODOutput schema name 'SMLTN_3HAOD_TPT' { - // The {@link SimulationSample} this output belongs to - simulationSample: !SimulationSample - // dust - dust: !double - // soluble aitken mode - solubleAitkenMode: !double - // soluble accumulation mode - solubleAccumulationMode: !double - // soluble coarse mode - solubleCoarseMode: !double - // insoluble aitken mode - insolubleAitkenMode: !double - // latitude - latitude: !double - // longitude - longitude: !double - // time stamp - time: !datetime -} \ No newline at end of file From 9d6ef32c69945b1bab31e32ab687c6eea9e00253 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 17 Mar 2022 10:29:48 -0500 Subject: [PATCH 138/188] update methods names --- .../src/entity/Simulations/SimulationOutputFile.c3typ | 2 +- .../src/entity/Simulations/SimulationOutputFile.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ index 096a1354..ba168c6d 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ @@ -18,7 +18,7 @@ entity type SimulationOutputFile schema name 'SMLTN_OTPT_FL' { upsertAcureAircraftData: member function(): boolean // Load data from this file into {@link SimulationMonthlyMeanOutput} @py(env='gordon_1_0_0') - upsertMonthlyMeanData: member function(): boolean + upsert3HourlyAODData: member function(): boolean // Load data from all containers @py(env='gordon_1_0_0') upsertData: member function(): boolean diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index 6655d01f..80f80c75 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -77,9 +77,9 @@ def upsertAcureAircraftData(this): -def upsertMonthlyMeanData(this): +def upsert3HourlyAODData(this): """ - Function to Open files in the SimulationOutputFile table with monthly-mean container and then populate SimulationMonthlyMeanOutput data. + Function to Open files in the SimulationOutputFile table with monthly-mean container and then populate Simulation3HourlyAODOutput data. - Arguments: -this: an instance of SimulationOutputFile @@ -151,7 +151,7 @@ def upsertData(this): -bool: True if file was processed, false if file has already been processed """ if(this.container == 'monthly-mean'): - return this.upsertMonthlyMeanData() + return this.upsert3HourlyAODData() elif(this.container == 'acure-aircraft'): return this.upsertAcureAircraftData() else: From 0674b3ba6e25f4ddccb6d4b1801c7ba703aa98fa Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 17 Mar 2022 12:23:11 -0500 Subject: [PATCH 139/188] starting to prototype coords --- .../gordon-group/src/entity/Coordinates/Latitude.c3typ | 8 ++++++++ .../src/entity/Simulations/SimulationSample.js | 2 ++ 2 files changed, 10 insertions(+) create mode 100644 training/gordon-group/src/entity/Coordinates/Latitude.c3typ diff --git a/training/gordon-group/src/entity/Coordinates/Latitude.c3typ b/training/gordon-group/src/entity/Coordinates/Latitude.c3typ new file mode 100644 index 00000000..5ec64cee --- /dev/null +++ b/training/gordon-group/src/entity/Coordinates/Latitude.c3typ @@ -0,0 +1,8 @@ +/** +* Latitude.c3typ +* Type to support geospatial coordinates +*/ +entity type Latitude schema name 'LTTD' { + // the latitude of this entry + value: !double +} \ No newline at end of file diff --git a/training/gordon-group/src/entity/Simulations/SimulationSample.js b/training/gordon-group/src/entity/Simulations/SimulationSample.js index d0806529..7b5ce5a1 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationSample.js +++ b/training/gordon-group/src/entity/Simulations/SimulationSample.js @@ -60,6 +60,8 @@ function afterCreate(objs) { }; }; + + // put two containers together sampleFiles = sampleFiles.concat(sampleFiles2); From 7ced1fa7c3e50cc0947edc7265ed8e87c4d18a0a Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 17 Mar 2022 13:26:16 -0500 Subject: [PATCH 140/188] double to float --- .../Simulation3HourlyAODOutput.c3typ | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ index 821ac1e8..56884751 100644 --- a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ +++ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ @@ -2,24 +2,29 @@ * Simulation3HourlyAODOutput.c3typ * A single monthly-mean output taken from a single [SimulationSample](type:SimulationSample) */ - +@db(datastore='cassandra', + partitionKeyField='parent', + persistenceOrder='start, dataVersion', + persistDuplicates=false, + compactType=true, + unique=['parent, start']) entity type Simulation3HourlyAODOutput schema name 'SMLTN_3HAOD_TPT' { // The {@link SimulationSample} this output belongs to simulationSample: !SimulationSample // dust - dust: !double + dust: !float // soluble aitken mode - solubleAitkenMode: !double + solubleAitkenMode: !float // soluble accumulation mode - solubleAccumulationMode: !double + solubleAccumulationMode: !float // soluble coarse mode - solubleCoarseMode: !double + solubleCoarseMode: !float // insoluble aitken mode - insolubleAitkenMode: !double + insolubleAitkenMode: !float // latitude - latitude: !double + latitude: !float // longitude - longitude: !double + longitude: !float // time stamp time: !datetime } \ No newline at end of file From 9515e20911907d224b7feea4f3729381584d77b3 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 17 Mar 2022 13:26:24 -0500 Subject: [PATCH 141/188] new upsert method --- .../Simulations/SimulationOutputFile.py | 54 +++++++++++++------ 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index 80f80c75..68f14232 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -101,32 +101,52 @@ def upsert3HourlyAODData(this): } #open file sample = c3.NetCDFUtil.openFile(this.file.url) - df = pd.DataFrame() +# df = pd.DataFrame() +# +# # this is to take care of variables that need to be flattened +# for var in variable_names.items(): +# tensor = sample[var[1]][:][2,:,:,:] +# tensor = np.array(tensor).flatten() +# df[var[0]] = tensor +# +# # now latitude, longitude and time +# lat = sample["latitude"][:] +# lon = [x*(x < 180) + (x - 360)*(x >= 180) for x in sample["longitude"][:]] +# # this file times +# ts = this.dateTag +# # take a look at thsis: is 24 = 0? +# times = [ts.replace(hour=3), ts.replace(hour=6), ts.replace(hour=9), +# ts.replace(hour=12), ts.replace(hour=15), ts.replace(hour=18), +# ts.replace(hour=21), ts.replace(hour=0)] + +# df["time"] = [t for t in times for n in range(0, len(lat)*len(lon))] +# df["latitude"] = [l for l in lat for n in range(0, len(lon))]*len(times) +# df["longitude"] = [l for l in lon]*len(times)*len(lat) - # this is to take care of variables that need to be flattened - for var in variable_names.items(): - tensor = sample[var[1]][:][2,:,:,:] - tensor = np.array(tensor).flatten() - df[var[0]] = tensor - - # now latitude, longitude and time - lat = sample["latitude"][:] - lon = [x*(x < 180) + (x - 360)*(x >= 180) for x in sample["longitude"][:]] - # this file times - ts = this.dateTag - # take a look at thsis: is 24 = 0? times = [ts.replace(hour=3), ts.replace(hour=6), ts.replace(hour=9), ts.replace(hour=12), ts.replace(hour=15), ts.replace(hour=18), ts.replace(hour=21), ts.replace(hour=0)] + lats = sample["latitude"][:] + lons = [x*(x < 180) + (x - 360)*(x >= 180) for x in sample["longitude"][:]] + + df = pd.DataFrame() + row = pd.Series({'time':times[0], 'latitude':lats[0], 'longitude':lons[0]}) + for var in variable_names.items(): + row[var[0]] = 0.0 + for i,time in enumerate(times): + row['time'] = time + for j,lat in enumerate(lats): + row['latitude'] = lat + for k,lon in enumerate(lons): + row['longitude'] = lon + for var in variable_names.items(): + row[var[0]] = sample[var[1]][:][2,i,j,k] + df.append(row, ignore_index=True) - df["time"] = [t for t in times for n in range(0, len(lat)*len(lon))] - df["latitude"] = [l for l in lat for n in range(0, len(lon))]*len(times) - df["longitude"] = [l for l in lon]*len(times)*len(lat) # now the SimulationSample field df["simulationSample"] = this.simulationSample - # cast everything into dict and upsert output_records = df.to_dict(orient="records") c3.Simulation3HourlyAODOutput.upsertBatch(objs=output_records) From 7e282f88985f7958930b0049270e22baa939142a Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 17 Mar 2022 13:28:39 -0500 Subject: [PATCH 142/188] rm db ann --- .../entity/Simulations/Simulation3HourlyAODOutput.c3typ | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ index 56884751..8dc30ebf 100644 --- a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ +++ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ @@ -2,12 +2,7 @@ * Simulation3HourlyAODOutput.c3typ * A single monthly-mean output taken from a single [SimulationSample](type:SimulationSample) */ -@db(datastore='cassandra', - partitionKeyField='parent', - persistenceOrder='start, dataVersion', - persistDuplicates=false, - compactType=true, - unique=['parent, start']) + entity type Simulation3HourlyAODOutput schema name 'SMLTN_3HAOD_TPT' { // The {@link SimulationSample} this output belongs to simulationSample: !SimulationSample From aec1261697f924cd8e2284d2d4ae53d8ed00db83 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 17 Mar 2022 13:40:14 -0500 Subject: [PATCH 143/188] lil fix as always --- .../gordon-group/src/entity/Simulations/SimulationOutputFile.py | 1 + 1 file changed, 1 insertion(+) diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index 68f14232..13e2df6c 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -123,6 +123,7 @@ def upsert3HourlyAODData(this): # df["latitude"] = [l for l in lat for n in range(0, len(lon))]*len(times) # df["longitude"] = [l for l in lon]*len(times)*len(lat) + ts = this.dateTag times = [ts.replace(hour=3), ts.replace(hour=6), ts.replace(hour=9), ts.replace(hour=12), ts.replace(hour=15), ts.replace(hour=18), ts.replace(hour=21), ts.replace(hour=0)] From b13ce9dbd033b0bed11b771a6eb9841a05bd9210 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 17 Mar 2022 14:45:38 -0500 Subject: [PATCH 144/188] the previous trial takes prohibitively long... --- .../Simulations/SimulationOutputFile.py | 54 ++++++------------- 1 file changed, 17 insertions(+), 37 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index 13e2df6c..a3ba5528 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -101,48 +101,28 @@ def upsert3HourlyAODData(this): } #open file sample = c3.NetCDFUtil.openFile(this.file.url) -# df = pd.DataFrame() -# -# # this is to take care of variables that need to be flattened -# for var in variable_names.items(): -# tensor = sample[var[1]][:][2,:,:,:] -# tensor = np.array(tensor).flatten() -# df[var[0]] = tensor -# -# # now latitude, longitude and time -# lat = sample["latitude"][:] -# lon = [x*(x < 180) + (x - 360)*(x >= 180) for x in sample["longitude"][:]] -# # this file times -# ts = this.dateTag -# # take a look at thsis: is 24 = 0? -# times = [ts.replace(hour=3), ts.replace(hour=6), ts.replace(hour=9), -# ts.replace(hour=12), ts.replace(hour=15), ts.replace(hour=18), -# ts.replace(hour=21), ts.replace(hour=0)] - -# df["time"] = [t for t in times for n in range(0, len(lat)*len(lon))] -# df["latitude"] = [l for l in lat for n in range(0, len(lon))]*len(times) -# df["longitude"] = [l for l in lon]*len(times)*len(lat) + df = pd.DataFrame() + # this is to take care of variables that need to be flattened + for var in variable_names.items(): + tensor = sample[var[1]][:][2,:,:,:] + tensor = np.array(tensor).flatten() + df[var[0]] = tensor + + # now latitude, longitude and time + lat = sample["latitude"][:] + lon = [x*(x < 180) + (x - 360)*(x >= 180) for x in sample["longitude"][:]] + # this file times ts = this.dateTag + # take a look at thsis: is 24 = 0? times = [ts.replace(hour=3), ts.replace(hour=6), ts.replace(hour=9), ts.replace(hour=12), ts.replace(hour=15), ts.replace(hour=18), ts.replace(hour=21), ts.replace(hour=0)] - lats = sample["latitude"][:] - lons = [x*(x < 180) + (x - 360)*(x >= 180) for x in sample["longitude"][:]] - - df = pd.DataFrame() - row = pd.Series({'time':times[0], 'latitude':lats[0], 'longitude':lons[0]}) - for var in variable_names.items(): - row[var[0]] = 0.0 - for i,time in enumerate(times): - row['time'] = time - for j,lat in enumerate(lats): - row['latitude'] = lat - for k,lon in enumerate(lons): - row['longitude'] = lon - for var in variable_names.items(): - row[var[0]] = sample[var[1]][:][2,i,j,k] - df.append(row, ignore_index=True) + df["time"] = [t for t in times for n in range(0, len(lat)*len(lon))] + df["latitude"] = [l for l in lat for n in range(0, len(lon))]*len(times) + df["longitude"] = [l for l in lon]*len(times)*len(lat) + + # now the SimulationSample field From bb542e7b9dea3c1513f10767d115d25949e104a2 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Thu, 17 Mar 2022 15:11:30 -0500 Subject: [PATCH 145/188] latitude as a reference field now --- .../src/entity/Simulations/Simulation3HourlyAODOutput.c3typ | 2 +- .../src/entity/Simulations/SimulationOutputFile.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ index 8dc30ebf..f06c9521 100644 --- a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ +++ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ @@ -17,7 +17,7 @@ entity type Simulation3HourlyAODOutput schema name 'SMLTN_3HAOD_TPT' { // insoluble aitken mode insolubleAitkenMode: !float // latitude - latitude: !float + latitude: !Latitude // longitude longitude: !float // time stamp diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index a3ba5528..f66f64f6 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -111,6 +111,10 @@ def upsert3HourlyAODData(this): # now latitude, longitude and time lat = sample["latitude"][:] + lats = [] + for l in lat: + obj = c3.Latitude.fetch({'filter': c3.Filter().eq("value", float(l))}).objs[0] + lats.append(obj) lon = [x*(x < 180) + (x - 360)*(x >= 180) for x in sample["longitude"][:]] # this file times ts = this.dateTag @@ -119,7 +123,7 @@ def upsert3HourlyAODData(this): ts.replace(hour=12), ts.replace(hour=15), ts.replace(hour=18), ts.replace(hour=21), ts.replace(hour=0)] df["time"] = [t for t in times for n in range(0, len(lat)*len(lon))] - df["latitude"] = [l for l in lat for n in range(0, len(lon))]*len(times) + df["latitude"] = [l for l in lats for n in range(0, len(lon))]*len(times) df["longitude"] = [l for l in lon]*len(times)*len(lat) From deabfd6de023290d14558399ada3b575b5ccbcf3 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 18 Mar 2022 07:54:49 -0500 Subject: [PATCH 146/188] adding type to compare performance --- .../Simulation3HourlyAODOutputPlain.c3typ | 25 +++++++ .../Simulations/SimulationOutputFile.c3typ | 3 + .../Simulations/SimulationOutputFile.py | 68 +++++++++++++++++++ 3 files changed, 96 insertions(+) create mode 100644 training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputPlain.c3typ diff --git a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputPlain.c3typ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputPlain.c3typ new file mode 100644 index 00000000..b68d0152 --- /dev/null +++ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputPlain.c3typ @@ -0,0 +1,25 @@ +/** +* Simulation3HourlyAODOutputPlain.c3typ +* A single monthly-mean output taken from a single [SimulationSample](type:SimulationSample) without references +*/ + +entity type Simulation3HourlyAODOutput schema name 'SMLTN_3HAOD_TPT_PLN' { + // The {@link SimulationSample} this output belongs to + simulationSample: !SimulationSample + // dust + dust: !float + // soluble aitken mode + solubleAitkenMode: !float + // soluble accumulation mode + solubleAccumulationMode: !float + // soluble coarse mode + solubleCoarseMode: !float + // insoluble aitken mode + insolubleAitkenMode: !float + // latitude + latitude: !float + // longitude + longitude: !float + // time stamp + time: !datetime +} \ No newline at end of file diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ index ba168c6d..073b8a82 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ @@ -19,6 +19,9 @@ entity type SimulationOutputFile schema name 'SMLTN_OTPT_FL' { // Load data from this file into {@link SimulationMonthlyMeanOutput} @py(env='gordon_1_0_0') upsert3HourlyAODData: member function(): boolean + // Load data and upsert to plain type + @py(env='gordon_1_0_0') + upsert3HourlyAODPlainData: member function(): boolean // Load data from all containers @py(env='gordon_1_0_0') upsertData: member function(): boolean diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index f66f64f6..7259d7b9 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -129,6 +129,74 @@ def upsert3HourlyAODData(this): + # now the SimulationSample field + df["simulationSample"] = this.simulationSample + + # cast everything into dict and upsert + output_records = df.to_dict(orient="records") + c3.Simulation3HourlyAODOutput.upsertBatch(objs=output_records) + + this.processed = True + c3.SimulationOutputFile.merge(this) + + return True + + else: + return False + + +def upsert3HourlyAODPlainData(this): + """ + Function to Open files in the SimulationOutputFile table with monthly-mean container and then populate Simulation3HourlyAODOutput data. + + - Arguments: + -this: an instance of SimulationOutputFile + + - Returns: + -bool: True if file was processed, false if file has already been processed or if container type does not match. + """ + import pandas as pd + import numpy as np + + # verify file container + if(this.container == 'monthly-mean'): + variable_names = { + "dust" : "atmosphere_optical_thickness_due_to_dust_ambient_aerosol", + "solubleAitkenMode" : "atmosphere_optical_thickness_due_to_soluble_aitken_mode_ambient_aerosol", + "solubleAccumulationMode" : "atmosphere_optical_thickness_due_to_soluble_accumulation_mode_ambient_aerosol", + "solubleCoarseMode" : "atmosphere_optical_thickness_due_to_soluble_coarse_mode_ambient_aerosol", + "insolubleAitkenMode" : "atmosphere_optical_thickness_due_to_insoluble_aitken_mode_ambient_aerosol" + } + #open file + sample = c3.NetCDFUtil.openFile(this.file.url) + df = pd.DataFrame() + + # this is to take care of variables that need to be flattened + for var in variable_names.items(): + tensor = sample[var[1]][:][2,:,:,:] + tensor = np.array(tensor).flatten() + df[var[0]] = tensor + + # now latitude, longitude and time + lat = sample["latitude"][:] + #lats = [] + #for l in lat: + # obj = c3.Latitude.fetch({'filter': c3.Filter().eq("value", float#(l))}).objs[0] + # lats.append(obj) + lon = [x*(x < 180) + (x - 360)*(x >= 180) for x in sample["longitude"][:]] + # this file times + ts = this.dateTag + # take a look at thsis: is 24 = 0? + times = [ts.replace(hour=3), ts.replace(hour=6), ts.replace(hour=9), + ts.replace(hour=12), ts.replace(hour=15), ts.replace(hour=18), + ts.replace(hour=21), ts.replace(hour=0)] + df["time"] = [t for t in times for n in range(0, len(lat)*len(lon))] + df["latitude"] = [l for l in lat for n in range(0, len(lon))]*len(times) + df["longitude"] = [l for l in lon]*len(times)*len(lat) + + + + # now the SimulationSample field df["simulationSample"] = this.simulationSample From ca48ff5c60e8f0b91f9e7c56c9f086cc6800ddcb Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 18 Mar 2022 08:04:22 -0500 Subject: [PATCH 147/188] there's always a tiny fix that should be highlighted before provisioning... --- .../entity/Simulations/Simulation3HourlyAODOutputPlain.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputPlain.c3typ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputPlain.c3typ index b68d0152..c0e1655f 100644 --- a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputPlain.c3typ +++ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputPlain.c3typ @@ -3,7 +3,7 @@ * A single monthly-mean output taken from a single [SimulationSample](type:SimulationSample) without references */ -entity type Simulation3HourlyAODOutput schema name 'SMLTN_3HAOD_TPT_PLN' { +entity type Simulation3HourlyAODOutputPlain schema name 'SMLTN_3HAOD_TPT_PLN' { // The {@link SimulationSample} this output belongs to simulationSample: !SimulationSample // dust From c07954d5d8b5eef081fc4ff93efc54752070c53b Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 18 Mar 2022 08:56:34 -0500 Subject: [PATCH 148/188] yeah... guess what... another tiny fix --- .../gordon-group/src/entity/Simulations/SimulationOutputFile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index 7259d7b9..de530acf 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -202,7 +202,7 @@ def upsert3HourlyAODPlainData(this): # cast everything into dict and upsert output_records = df.to_dict(orient="records") - c3.Simulation3HourlyAODOutput.upsertBatch(objs=output_records) + c3.Simulation3HourlyAODOutputPlain.upsertBatch(objs=output_records) this.processed = True c3.SimulationOutputFile.merge(this) From 3c23ad2bbbd89647e849ee9b8b3bee5462ca4dce Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 18 Mar 2022 12:26:37 -0500 Subject: [PATCH 149/188] add lat lon time tuple type --- .../src/entity/Coordinates/GeoSurfaceTime.c3typ | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ diff --git a/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ new file mode 100644 index 00000000..c176d86c --- /dev/null +++ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ @@ -0,0 +1,12 @@ +/** +* GeoSurfaceTime.c3typ +* A space-time point where space is the Earth's surface. +*/ +entity type GeoSurfaceTime schema name 'GSRFC_TM' { + // the latitude + latide: !float + // the longitude + longitude: !float + // the timestamp + time: !datetime +} \ No newline at end of file From 7dc11d785d7e12c037c62feef11c5211247a6e55 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 18 Mar 2022 12:29:00 -0500 Subject: [PATCH 150/188] type to host data with references to geosurftime --- .../Simulation3HourlyAODOutputAllRef.c3typ | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ diff --git a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ new file mode 100644 index 00000000..f3d099c1 --- /dev/null +++ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ @@ -0,0 +1,21 @@ +/** +* Simulation3HourlyAODOutputPlain.c3typ +* A single monthly-mean output taken from a single [SimulationSample](type:SimulationSample) with references to {@link GeoSurfaceTime} +*/ + +entity type Simulation3HourlyAODOutputPlain schema name 'SMLTN_3HAOD_TPT_PLN' { + // The {@link SimulationSample} this output belongs to + simulationSample: !SimulationSample + // dust + dust: !float + // soluble aitken mode + solubleAitkenMode: !float + // soluble accumulation mode + solubleAccumulationMode: !float + // soluble coarse mode + solubleCoarseMode: !float + // insoluble aitken mode + insolubleAitkenMode: !float + // the space-time point for this measurement + spaceTime: !GeoSurfaceTime +} \ No newline at end of file From 8c0b318c7181dd849252e5b05f73cc466a5fb75f Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 18 Mar 2022 13:04:10 -0500 Subject: [PATCH 151/188] adjust type name --- .../entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ index f3d099c1..c5a1c1f3 100644 --- a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ +++ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ @@ -1,9 +1,9 @@ /** -* Simulation3HourlyAODOutputPlain.c3typ +* Simulation3HourlyAODOutputAllRef.c3typ * A single monthly-mean output taken from a single [SimulationSample](type:SimulationSample) with references to {@link GeoSurfaceTime} */ -entity type Simulation3HourlyAODOutputPlain schema name 'SMLTN_3HAOD_TPT_PLN' { +entity type Simulation3HourlyAODOutputAllRef schema name 'SMLTN_3HAOD_TPT_LLRF' { // The {@link SimulationSample} this output belongs to simulationSample: !SimulationSample // dust From 428a281d8e80dd41481aa74ea860cad344b48614 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 18 Mar 2022 13:14:49 -0500 Subject: [PATCH 152/188] add method to upsert all ref --- .../Simulation3HourlyAODOutputAllRef.c3typ | 2 +- .../Simulations/SimulationOutputFile.c3typ | 3 + .../Simulations/SimulationOutputFile.py | 76 +++++++++++++++++++ 3 files changed, 80 insertions(+), 1 deletion(-) diff --git a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ index c5a1c1f3..f8627815 100644 --- a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ +++ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ @@ -17,5 +17,5 @@ entity type Simulation3HourlyAODOutputAllRef schema name 'SMLTN_3HAOD_TPT_LLRF' // insoluble aitken mode insolubleAitkenMode: !float // the space-time point for this measurement - spaceTime: !GeoSurfaceTime + geoSurfaceTimePoint: !GeoSurfaceTime } \ No newline at end of file diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ index 073b8a82..35cedf83 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ @@ -22,6 +22,9 @@ entity type SimulationOutputFile schema name 'SMLTN_OTPT_FL' { // Load data and upsert to plain type @py(env='gordon_1_0_0') upsert3HourlyAODPlainData: member function(): boolean + // Load data and upsert to AllRef type + @py(env='gordon_1_0_0') + upsert3HourlyAODAllRefData: member function(): boolean // Load data from all containers @py(env='gordon_1_0_0') upsertData: member function(): boolean diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index de530acf..c701d747 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -212,6 +212,82 @@ def upsert3HourlyAODPlainData(this): else: return False +def upsert3HourlyAODAllRefData(this): + """ + Function to Open files in the SimulationOutputFile table with monthly-mean container and then populate Simulation3HourlyAODOutput data. + + - Arguments: + -this: an instance of SimulationOutputFile + + - Returns: + -bool: True if file was processed, false if file has already been processed or if container type does not match. + """ + import pandas as pd + import numpy as np + + # verify file container + if(this.container == 'monthly-mean'): + variable_names = { + "dust" : "atmosphere_optical_thickness_due_to_dust_ambient_aerosol", + "solubleAitkenMode" : "atmosphere_optical_thickness_due_to_soluble_aitken_mode_ambient_aerosol", + "solubleAccumulationMode" : "atmosphere_optical_thickness_due_to_soluble_accumulation_mode_ambient_aerosol", + "solubleCoarseMode" : "atmosphere_optical_thickness_due_to_soluble_coarse_mode_ambient_aerosol", + "insolubleAitkenMode" : "atmosphere_optical_thickness_due_to_insoluble_aitken_mode_ambient_aerosol" + } + #open file + sample = c3.NetCDFUtil.openFile(this.file.url) + df = pd.DataFrame() + + # this is to take care of variables that need to be flattened + for var in variable_names.items(): + tensor = sample[var[1]][:][2,:,:,:] + tensor = np.array(tensor).flatten() + df[var[0]] = tensor + + + df_st = pd.DataFrame() + # now latitude, longitude and time + lat = sample["latitude"][:] + #lats = [] + #for l in lat: + # obj = c3.Latitude.fetch({'filter': c3.Filter().eq("value", float#(l))}).objs[0] + # lats.append(obj) + lon = [x*(x < 180) + (x - 360)*(x >= 180) for x in sample["longitude"][:]] + # this file times + ts = this.dateTag + # take a look at thsis: is 24 = 0? + times = [ts.replace(hour=3), ts.replace(hour=6), ts.replace(hour=9), + ts.replace(hour=12), ts.replace(hour=15), ts.replace(hour=18), + ts.replace(hour=21), ts.replace(hour=0)] + df_st["time"] = [t for t in times for n in range(0, len(lat)*len(lon))] + df_st["latitude"] = [l for l in lat for n in range(0, len(lon))]*len(times) + df_st["longitude"] = [l for l in lon]*len(times)*len(lat) + + for i in range(len(df_st)): + la = df_st["latitude"].iloc(i) + lo = df_st["longitude"].iloc(i) + ti = df_st["time"].iloc(i) + filt = c3.Filter().eq("latitude", la).and_().eq("longitude", lo).and_().eq("time", ti) + geosp_obj = c3.GeoSurfaceTime.fetch(spec={"filter": filt}).objs[0] + df_st["geosurftime"] = geosp_obj + + df["geoSurfaceTimePoint"] = df_st["geosurftime"] + + # now the SimulationSample field + df["simulationSample"] = this.simulationSample + + # cast everything into dict and upsert + output_records = df.to_dict(orient="records") + c3.Simulation3HourlyAODOutputAllRef.upsertBatch(objs=output_records) + + this.processed = True + c3.SimulationOutputFile.merge(this) + + return True + + else: + return False + def upsertData(this): """ From bf42d0f5660d12053fa173bf8984a6e77c7fdf0e Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 18 Mar 2022 13:17:49 -0500 Subject: [PATCH 153/188] too long of a schema name --- .../entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ index f8627815..fa4b93d1 100644 --- a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ +++ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ @@ -3,7 +3,7 @@ * A single monthly-mean output taken from a single [SimulationSample](type:SimulationSample) with references to {@link GeoSurfaceTime} */ -entity type Simulation3HourlyAODOutputAllRef schema name 'SMLTN_3HAOD_TPT_LLRF' { +entity type Simulation3HourlyAODOutputAllRef schema name 'SM_3HAOD_TPT_LLRF' { // The {@link SimulationSample} this output belongs to simulationSample: !SimulationSample // dust From e00c439923451a525628bc9f923e1d7647c8cb56 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 18 Mar 2022 13:28:59 -0500 Subject: [PATCH 154/188] fix name --- .../gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ index c176d86c..1781ba47 100644 --- a/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ +++ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ @@ -4,7 +4,7 @@ */ entity type GeoSurfaceTime schema name 'GSRFC_TM' { // the latitude - latide: !float + latitude: !float // the longitude longitude: !float // the timestamp From 73450d809eeda80026f5c3d39042722031d09c48 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 18 Mar 2022 13:40:20 -0500 Subject: [PATCH 155/188] python stupidity fix --- .../src/entity/Simulations/SimulationOutputFile.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index c701d747..bb6bb9df 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -264,12 +264,12 @@ def upsert3HourlyAODAllRefData(this): df_st["longitude"] = [l for l in lon]*len(times)*len(lat) for i in range(len(df_st)): - la = df_st["latitude"].iloc(i) - lo = df_st["longitude"].iloc(i) - ti = df_st["time"].iloc(i) + la = df_st["latitude"].iloc[i] + lo = df_st["longitude"].iloc[i] + ti = df_st["time"].iloc[i] filt = c3.Filter().eq("latitude", la).and_().eq("longitude", lo).and_().eq("time", ti) geosp_obj = c3.GeoSurfaceTime.fetch(spec={"filter": filt}).objs[0] - df_st["geosurftime"] = geosp_obj + df_st["geosurftime"].iloc[i] = geosp_obj df["geoSurfaceTimePoint"] = df_st["geosurftime"] From f67482595617a66c1a08759eaa3ef3a7cbd4fbe3 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 18 Mar 2022 14:07:56 -0500 Subject: [PATCH 156/188] bending to c3's will --- .../src/entity/Simulations/SimulationOutputFile.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index bb6bb9df..7ec058cb 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -263,15 +263,16 @@ def upsert3HourlyAODAllRefData(this): df_st["latitude"] = [l for l in lat for n in range(0, len(lon))]*len(times) df_st["longitude"] = [l for l in lon]*len(times)*len(lat) + gst = [] for i in range(len(df_st)): - la = df_st["latitude"].iloc[i] - lo = df_st["longitude"].iloc[i] - ti = df_st["time"].iloc[i] + la = float(df_st["latitude"].iloc[i]) + lo = float(df_st["longitude"].iloc[i]) + ti = str(df_st["time"].iloc[i]) filt = c3.Filter().eq("latitude", la).and_().eq("longitude", lo).and_().eq("time", ti) geosp_obj = c3.GeoSurfaceTime.fetch(spec={"filter": filt}).objs[0] - df_st["geosurftime"].iloc[i] = geosp_obj + gst.append(geosp_obj) - df["geoSurfaceTimePoint"] = df_st["geosurftime"] + df["geoSurfaceTimePoint"] = gst # now the SimulationSample field df["simulationSample"] = this.simulationSample From 94bc8cd8c4a156ab4f0e1eb0352e01cee4199c8a Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 18 Mar 2022 14:17:42 -0500 Subject: [PATCH 157/188] making objs instead of fetching --- .../src/entity/Simulations/SimulationOutputFile.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index 7ec058cb..fe1b1422 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -268,8 +268,7 @@ def upsert3HourlyAODAllRefData(this): la = float(df_st["latitude"].iloc[i]) lo = float(df_st["longitude"].iloc[i]) ti = str(df_st["time"].iloc[i]) - filt = c3.Filter().eq("latitude", la).and_().eq("longitude", lo).and_().eq("time", ti) - geosp_obj = c3.GeoSurfaceTime.fetch(spec={"filter": filt}).objs[0] + geosp_obj = c3.GeoSurfaceTime.makeObj({"latitude": la, "longitude":lo, "time": ti}) gst.append(geosp_obj) df["geoSurfaceTimePoint"] = gst From 2e8b987cae73789c8b74645124ab5afba64278e4 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 18 Mar 2022 14:40:21 -0500 Subject: [PATCH 158/188] eyt another approach --- .../src/entity/Simulations/SimulationOutputFile.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index fe1b1422..b85d6f40 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -263,14 +263,24 @@ def upsert3HourlyAODAllRefData(this): df_st["latitude"] = [l for l in lat for n in range(0, len(lon))]*len(times) df_st["longitude"] = [l for l in lon]*len(times)*len(lat) + + # perhaps this is best gst = [] for i in range(len(df_st)): la = float(df_st["latitude"].iloc[i]) lo = float(df_st["longitude"].iloc[i]) ti = str(df_st["time"].iloc[i]) - geosp_obj = c3.GeoSurfaceTime.makeObj({"latitude": la, "longitude":lo, "time": ti}) + filt = c3.Filter().eq("latitude", la).and_().eq("longitude", lo).and_().eq("time", ti) + fetch_obj = c3.GeoSurfaceTime.fetch(spec={"filter": filt, "limit":-1}) + if(fetch_obj.count == 0): + geosp_obj = c3.GeoSurfaceTime.makeObj({"latitude": la, "longitude":lo, "time": ti}) + geosp_obj.upsert() + else: + geosp_obj = fetch_obj.objs[0] gst.append(geosp_obj) + + df["geoSurfaceTimePoint"] = gst # now the SimulationSample field From 09c17594c08da650350876d8a8f6ce4c6e3f828d Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Mon, 21 Mar 2022 14:06:18 -0500 Subject: [PATCH 159/188] @db on GeoSurfaceTime --- .../gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ | 3 +++ 1 file changed, 3 insertions(+) diff --git a/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ index 1781ba47..1d177dec 100644 --- a/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ +++ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ @@ -2,6 +2,9 @@ * GeoSurfaceTime.c3typ * A space-time point where space is the Earth's surface. */ +@db(persistDuplicates=false, + compactType=true, + unique=['latitude, longitude, time']) entity type GeoSurfaceTime schema name 'GSRFC_TM' { // the latitude latitude: !float From 8196ef5e007a05a33bd4f26e2d0c4719bf9718fc Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 30 Mar 2022 15:36:37 -0500 Subject: [PATCH 160/188] excluding unique db ann --- .../gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ index 1d177dec..5b99b667 100644 --- a/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ +++ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ @@ -3,8 +3,7 @@ * A space-time point where space is the Earth's surface. */ @db(persistDuplicates=false, - compactType=true, - unique=['latitude, longitude, time']) + compactType=true) entity type GeoSurfaceTime schema name 'GSRFC_TM' { // the latitude latitude: !float From bb31036c9e6ab3817675c570c937402de67525dd Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 1 Apr 2022 10:24:19 -0500 Subject: [PATCH 161/188] python method for batch upsert --- .../Simulations/SimulationOutputFile.py | 49 ++++++------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index b85d6f40..712e7cc4 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -236,58 +236,41 @@ def upsert3HourlyAODAllRefData(this): } #open file sample = c3.NetCDFUtil.openFile(this.file.url) - df = pd.DataFrame() + df_var = pd.DataFrame() # this is to take care of variables that need to be flattened for var in variable_names.items(): tensor = sample[var[1]][:][2,:,:,:] tensor = np.array(tensor).flatten() - df[var[0]] = tensor + df_var[var[0]] = tensor + # include simulation sample + df_var["simulationSample"] = this.simulationSample + # now do spacetime coordinates df_st = pd.DataFrame() - # now latitude, longitude and time + lat = sample["latitude"][:] - #lats = [] - #for l in lat: - # obj = c3.Latitude.fetch({'filter': c3.Filter().eq("value", float#(l))}).objs[0] - # lats.append(obj) lon = [x*(x < 180) + (x - 360)*(x >= 180) for x in sample["longitude"][:]] - # this file times ts = this.dateTag - # take a look at thsis: is 24 = 0? times = [ts.replace(hour=3), ts.replace(hour=6), ts.replace(hour=9), - ts.replace(hour=12), ts.replace(hour=15), ts.replace(hour=18), - ts.replace(hour=21), ts.replace(hour=0)] + ts.replace(hour=12), ts.replace(hour=15), ts.replace(hour=18), + ts.replace(hour=21), ts.replace(hour=0)] + df_st["time"] = [t for t in times for n in range(0, len(lat)*len(lon))] df_st["latitude"] = [l for l in lat for n in range(0, len(lon))]*len(times) df_st["longitude"] = [l for l in lon]*len(times)*len(lat) + df_st["id"] = df_st["latitude"].astype(str) + "_" + df_st["longitude"].astype(str) + "_" + df_st["time"].astype(str).apply(lambda x: x.replace(" ", 'T')) - # perhaps this is best - gst = [] - for i in range(len(df_st)): - la = float(df_st["latitude"].iloc[i]) - lo = float(df_st["longitude"].iloc[i]) - ti = str(df_st["time"].iloc[i]) - filt = c3.Filter().eq("latitude", la).and_().eq("longitude", lo).and_().eq("time", ti) - fetch_obj = c3.GeoSurfaceTime.fetch(spec={"filter": filt, "limit":-1}) - if(fetch_obj.count == 0): - geosp_obj = c3.GeoSurfaceTime.makeObj({"latitude": la, "longitude":lo, "time": ti}) - geosp_obj.upsert() - else: - geosp_obj = fetch_obj.objs[0] - gst.append(geosp_obj) - - + # now upsert this + output_records = df_st.to_dict(orient="records") + gst = c3.GeoSurfaceTime.upsertBatch(objs=output_records) - df["geoSurfaceTimePoint"] = gst + df_batch = pd.DataFrame(df_var) + df_batch["geoSurfaceTimePoint"] = gst.objs - # now the SimulationSample field - df["simulationSample"] = this.simulationSample - - # cast everything into dict and upsert - output_records = df.to_dict(orient="records") + output_records = df_batch.to_dict(orient="records") c3.Simulation3HourlyAODOutputAllRef.upsertBatch(objs=output_records) this.processed = True From 001159a8da78298820d80c6116648fab8b7bf6ae Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 1 Apr 2022 13:03:08 -0500 Subject: [PATCH 162/188] create batch job for AOD data --- .../UpsertAODData/UpsertAODData.c3typ | 10 +++++ .../BatchJobs/UpsertAODData/UpsertAODData.js | 38 +++++++++++++++++++ .../UpsertAODData/UpsertAODDataBatch.c3typ | 7 ++++ .../UpsertAODData/UpsertAODDataOptions.c3typ | 7 ++++ .../{ => UpsertData}/UpsertData.c3typ | 0 .../BatchJobs/{ => UpsertData}/UpsertData.js | 0 .../{ => UpsertData}/UpsertDataBatch.c3typ | 0 .../{ => UpsertData}/UpsertDataOptions.c3typ | 0 .../{ => UpsertObsData}/UpsertObsData.c3typ | 0 .../{ => UpsertObsData}/UpsertObsData.js | 0 .../UpsertObsDataBatch.c3typ | 0 11 files changed, 62 insertions(+) create mode 100644 training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.c3typ create mode 100644 training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js create mode 100644 training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataBatch.c3typ create mode 100644 training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ rename training/gordon-group/src/BatchJobs/{ => UpsertData}/UpsertData.c3typ (100%) rename training/gordon-group/src/BatchJobs/{ => UpsertData}/UpsertData.js (100%) rename training/gordon-group/src/BatchJobs/{ => UpsertData}/UpsertDataBatch.c3typ (100%) rename training/gordon-group/src/BatchJobs/{ => UpsertData}/UpsertDataOptions.c3typ (100%) rename training/gordon-group/src/BatchJobs/{ => UpsertObsData}/UpsertObsData.c3typ (100%) rename training/gordon-group/src/BatchJobs/{ => UpsertObsData}/UpsertObsData.js (100%) rename training/gordon-group/src/BatchJobs/{ => UpsertObsData}/UpsertObsDataBatch.c3typ (100%) diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.c3typ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.c3typ new file mode 100644 index 00000000..c6132cad --- /dev/null +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.c3typ @@ -0,0 +1,10 @@ +/** +* UpsertAODData.c3typ +* Batch job to upsert data from files in {@link SimulationOutputFile}with container=monthly-mean in parallel +*/ +type UpsertAODData extends BatchJob type key 'PSRT_MNTHMNDT' { + + doStart: ~ js server + processBatch: ~ js server + // allComplete: ~ js server +} \ No newline at end of file diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js new file mode 100644 index 00000000..aa19d576 --- /dev/null +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js @@ -0,0 +1,38 @@ +/** + * UpsertAODData.js + * Implementation of UpsertAODData.c3typ + * @param {UpsertAODData} job + * @param {UpsertAODDataOptions} options + */ + function doStart(job, options) { + var batch = []; + + var dataset = SimulationOutputFile.fetchObjStream({ + filter: "container == 'monthly-mean'", + limit: -1 + }); + + while(dataset.hasNext()) { + batch.push(dataset.next()); + + if (batch.length >= options.batchSize || !dataset.hasNext()) { + var batchSpec = UpsertDataBatch.make({values: batch}); + job.scheduleBatch(batchSpec); + + batch = []; + } + } +} + + + +/** + * @param {UpsertAODDataBatch} batch + * @param {UpsertAODData} job + * @param {UpsertAODDataOptions} options + */ +function processBatch(batch, job, options) { + batch.values.forEach(function(file) { + file.upsert3HourlyAODAllRefData(); + }); +} \ No newline at end of file diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataBatch.c3typ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataBatch.c3typ new file mode 100644 index 00000000..ed095d64 --- /dev/null +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataBatch.c3typ @@ -0,0 +1,7 @@ +/** +* UpsertAODDataBatch.c3typ +* Represents a unit of work (batch) in a {@link UpsertAODData} batch job. +*/ +type UpsertAODDataBatch { + values: [SimulationOutputFile] +} \ No newline at end of file diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ new file mode 100644 index 00000000..21114d27 --- /dev/null +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ @@ -0,0 +1,7 @@ +/** +* UpsertAODDataOptions.c3typ +* Represents customization options for a {@link UpsertAODData} batch job +*/ +type UpsertAODDataOptions { + batchSize: int = 1 +} \ No newline at end of file diff --git a/training/gordon-group/src/BatchJobs/UpsertData.c3typ b/training/gordon-group/src/BatchJobs/UpsertData/UpsertData.c3typ similarity index 100% rename from training/gordon-group/src/BatchJobs/UpsertData.c3typ rename to training/gordon-group/src/BatchJobs/UpsertData/UpsertData.c3typ diff --git a/training/gordon-group/src/BatchJobs/UpsertData.js b/training/gordon-group/src/BatchJobs/UpsertData/UpsertData.js similarity index 100% rename from training/gordon-group/src/BatchJobs/UpsertData.js rename to training/gordon-group/src/BatchJobs/UpsertData/UpsertData.js diff --git a/training/gordon-group/src/BatchJobs/UpsertDataBatch.c3typ b/training/gordon-group/src/BatchJobs/UpsertData/UpsertDataBatch.c3typ similarity index 100% rename from training/gordon-group/src/BatchJobs/UpsertDataBatch.c3typ rename to training/gordon-group/src/BatchJobs/UpsertData/UpsertDataBatch.c3typ diff --git a/training/gordon-group/src/BatchJobs/UpsertDataOptions.c3typ b/training/gordon-group/src/BatchJobs/UpsertData/UpsertDataOptions.c3typ similarity index 100% rename from training/gordon-group/src/BatchJobs/UpsertDataOptions.c3typ rename to training/gordon-group/src/BatchJobs/UpsertData/UpsertDataOptions.c3typ diff --git a/training/gordon-group/src/BatchJobs/UpsertObsData.c3typ b/training/gordon-group/src/BatchJobs/UpsertObsData/UpsertObsData.c3typ similarity index 100% rename from training/gordon-group/src/BatchJobs/UpsertObsData.c3typ rename to training/gordon-group/src/BatchJobs/UpsertObsData/UpsertObsData.c3typ diff --git a/training/gordon-group/src/BatchJobs/UpsertObsData.js b/training/gordon-group/src/BatchJobs/UpsertObsData/UpsertObsData.js similarity index 100% rename from training/gordon-group/src/BatchJobs/UpsertObsData.js rename to training/gordon-group/src/BatchJobs/UpsertObsData/UpsertObsData.js diff --git a/training/gordon-group/src/BatchJobs/UpsertObsDataBatch.c3typ b/training/gordon-group/src/BatchJobs/UpsertObsData/UpsertObsDataBatch.c3typ similarity index 100% rename from training/gordon-group/src/BatchJobs/UpsertObsDataBatch.c3typ rename to training/gordon-group/src/BatchJobs/UpsertObsData/UpsertObsDataBatch.c3typ From ea97e027a60695dab4ed97af47c5954620afd709 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 1 Apr 2022 13:26:26 -0500 Subject: [PATCH 163/188] add limit to batch job --- .../gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js | 2 +- .../src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js index aa19d576..7ce70468 100644 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js @@ -9,7 +9,7 @@ var dataset = SimulationOutputFile.fetchObjStream({ filter: "container == 'monthly-mean'", - limit: -1 + limit: options.limit }); while(dataset.hasNext()) { diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ index 21114d27..15df46b5 100644 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ @@ -4,4 +4,5 @@ */ type UpsertAODDataOptions { batchSize: int = 1 + limit: int=-1 } \ No newline at end of file From 42c46dae33b366758ce5514afe9374a951d5b476 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 1 Apr 2022 13:34:25 -0500 Subject: [PATCH 164/188] lil fix --- .../gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js index 7ce70468..360dca25 100644 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js @@ -16,7 +16,7 @@ batch.push(dataset.next()); if (batch.length >= options.batchSize || !dataset.hasNext()) { - var batchSpec = UpsertDataBatch.make({values: batch}); + var batchSpec = UpsertAODDataBatch.make({values: batch}); job.scheduleBatch(batchSpec); batch = []; From 8840a9924226d74cd5454bff2b59985428c7bc31 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 1 Apr 2022 13:53:04 -0500 Subject: [PATCH 165/188] fix on merging file type --- .../src/entity/Simulations/SimulationOutputFile.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index 712e7cc4..4be3f060 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -273,8 +273,9 @@ def upsert3HourlyAODAllRefData(this): output_records = df_batch.to_dict(orient="records") c3.Simulation3HourlyAODOutputAllRef.upsertBatch(objs=output_records) - this.processed = True - c3.SimulationOutputFile.merge(this) + #this.processed = True + c3.SimulationOutputFile({"id": this.id, "processed"=True}).merge() + #c3.SimulationOutputFile.merge(this) return True From 14b5d45b25dd2753676b878a8280e5764b5e6083 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 1 Apr 2022 14:01:28 -0500 Subject: [PATCH 166/188] stupid fix --- .../gordon-group/src/entity/Simulations/SimulationOutputFile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index 4be3f060..af776b45 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -274,7 +274,7 @@ def upsert3HourlyAODAllRefData(this): c3.Simulation3HourlyAODOutputAllRef.upsertBatch(objs=output_records) #this.processed = True - c3.SimulationOutputFile({"id": this.id, "processed"=True}).merge() + c3.SimulationOutputFile({"id": this.id, "processed": True}).merge() #c3.SimulationOutputFile.merge(this) return True From 74ffb1c3fc973257464578ec9709e49ef4a1798a Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Fri, 1 Apr 2022 14:14:02 -0500 Subject: [PATCH 167/188] another syntax fix --- .../gordon-group/src/entity/Simulations/SimulationOutputFile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index af776b45..5474bbeb 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -274,7 +274,7 @@ def upsert3HourlyAODAllRefData(this): c3.Simulation3HourlyAODOutputAllRef.upsertBatch(objs=output_records) #this.processed = True - c3.SimulationOutputFile({"id": this.id, "processed": True}).merge() + c3.SimulationOutputFile(id=this.id, processed=True).merge() #c3.SimulationOutputFile.merge(this) return True From 1ed3eae477825335c7c3f3b7c5bfa5bbdb39909b Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Sat, 2 Apr 2022 10:54:26 -0500 Subject: [PATCH 168/188] batch job to clean table --- .../UpsertAODData/RemoveAODData.c3typ | 10 ++++++ .../BatchJobs/UpsertAODData/RemoveAODData.js | 35 +++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.c3typ create mode 100644 training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.c3typ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.c3typ new file mode 100644 index 00000000..1fc366d2 --- /dev/null +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.c3typ @@ -0,0 +1,10 @@ +/** +* RemoveAODData.c3typ +* Batch job to delete data from {@link Simulation3HourlyAODOutputAllRef} +*/ +type RemoveAODData extends BatchJob type key 'PSRT_MNTHMNDT' { + + doStart: ~ js server + processBatch: ~ js server + // allComplete: ~ js server +} \ No newline at end of file diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js new file mode 100644 index 00000000..b6052a13 --- /dev/null +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js @@ -0,0 +1,35 @@ +/** + * RemoveAODData.js + * Implementation of RemoveAODData.c3typ + * @param {RemoveAODData} job + * @param {UpsertAODDataOptions} options + */ + function doStart(job, options) { + var batch = []; + + var dataset = Simulation3HourlyAODOutputAllRef.fetchObjStream({ + limit: options.limit + }); + + while(dataset.hasNext()) { + batch.push(dataset.next()); + + if (batch.length >= options.batchSize || !dataset.hasNext()) { + var batchSpec = RemoveAODDataBatch.make({values: batch}); + job.scheduleBatch(batchSpec); + + batch = []; + } + } +} + + + +/** + * @param {UpsertAODDataBatch} batch + * @param {UpsertAODData} job + * @param {UpsertAODDataOptions} options + */ +function processBatch(batch, job, options) { + Simulation3HourlyAODOutputAllRef.removeBatch(objs=batch) +} \ No newline at end of file From 13743a249610bc4542e44c33be4401f8131fb166 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Sat, 2 Apr 2022 10:57:07 -0500 Subject: [PATCH 169/188] change type key --- .../src/BatchJobs/UpsertAODData/RemoveAODData.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.c3typ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.c3typ index 1fc366d2..7fc4686f 100644 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.c3typ +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.c3typ @@ -2,7 +2,7 @@ * RemoveAODData.c3typ * Batch job to delete data from {@link Simulation3HourlyAODOutputAllRef} */ -type RemoveAODData extends BatchJob type key 'PSRT_MNTHMNDT' { +type RemoveAODData extends BatchJob type key 'RM_MNTHMNDT' { doStart: ~ js server processBatch: ~ js server From 26a63f969d5746b968475bbf42742ff125c6c11c Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Sat, 2 Apr 2022 11:00:11 -0500 Subject: [PATCH 170/188] other batch types for rm --- .../src/BatchJobs/UpsertAODData/RemoveAODData.c3typ | 2 +- .../src/BatchJobs/UpsertAODData/RemoveAODDataBatch.c3typ | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDataBatch.c3typ diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.c3typ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.c3typ index 7fc4686f..eb0eaf71 100644 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.c3typ +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.c3typ @@ -2,7 +2,7 @@ * RemoveAODData.c3typ * Batch job to delete data from {@link Simulation3HourlyAODOutputAllRef} */ -type RemoveAODData extends BatchJob type key 'RM_MNTHMNDT' { +type RemoveAODData extends BatchJob type key 'RM_MNTHMNDT' { doStart: ~ js server processBatch: ~ js server diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDataBatch.c3typ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDataBatch.c3typ new file mode 100644 index 00000000..cd713bac --- /dev/null +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDataBatch.c3typ @@ -0,0 +1,7 @@ +/** +* RemoveAODDataBatch.c3typ +* Represents a unit of work (batch) in a {@link RemoveAODData} batch job. +*/ +type RemoveAODDataBatch { + values: [Simulation3HourlyAODOutputAllRef] +} \ No newline at end of file From a51b7f9b9ab90833dcaaac9f4cae05b47614851d Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Sat, 2 Apr 2022 11:11:30 -0500 Subject: [PATCH 171/188] trying a little fix --- .../gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js index b6052a13..491c102b 100644 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js @@ -31,5 +31,5 @@ * @param {UpsertAODDataOptions} options */ function processBatch(batch, job, options) { - Simulation3HourlyAODOutputAllRef.removeBatch(objs=batch) + Simulation3HourlyAODOutputAllRef.removeBatch(batch) } \ No newline at end of file From bd35be40181f064c4a8bd8c1b44c49248e26ba0a Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Sat, 2 Apr 2022 17:10:05 -0500 Subject: [PATCH 172/188] change how batches are built --- .../BatchJobs/UpsertAODData/RemoveAODData.js | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js index 491c102b..c10f83ee 100644 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js @@ -7,20 +7,17 @@ function doStart(job, options) { var batch = []; - var dataset = Simulation3HourlyAODOutputAllRef.fetchObjStream({ - limit: options.limit - }); + while(Simulation3HourlyAODOutputAllRef.exists()) { + var fetch_batch = Simulation3HourlyAODOutputAllRef.fetch({ + limit: options.limit + }) + batch = fetch_batch.objs; + var batchSpec = RemoveAODDataBatch.make({values: batch}); + job.scheduleBatch(batchSpec); - while(dataset.hasNext()) { - batch.push(dataset.next()); - - if (batch.length >= options.batchSize || !dataset.hasNext()) { - var batchSpec = RemoveAODDataBatch.make({values: batch}); - job.scheduleBatch(batchSpec); - - batch = []; - } + batch = []; } + } From e8ced85c6826d341e3dc58f821c67d070fd04111 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Sat, 2 Apr 2022 17:21:55 -0500 Subject: [PATCH 173/188] another trial --- .../gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js index c10f83ee..0f34e72f 100644 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js @@ -28,5 +28,5 @@ * @param {UpsertAODDataOptions} options */ function processBatch(batch, job, options) { - Simulation3HourlyAODOutputAllRef.removeBatch(batch) + Simulation3HourlyAODOutputAllRef.removeBatch(batch.values) } \ No newline at end of file From f95df3dcdef85e5c4109d07373c6d33360769dc9 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Sun, 3 Apr 2022 08:42:33 -0500 Subject: [PATCH 174/188] trying different script to scale up rm --- .../BatchJobs/UpsertAODData/RemoveAODData.js | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js index 0f34e72f..14e0d552 100644 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js @@ -5,18 +5,23 @@ * @param {UpsertAODDataOptions} options */ function doStart(job, options) { - var batch = []; + var batches = []; + var offset = 0; + var total = Simulation3HourlyAODOutputAllRef.fetchCount(); - while(Simulation3HourlyAODOutputAllRef.exists()) { + while ((offset + options.batchSize) < total) { var fetch_batch = Simulation3HourlyAODOutputAllRef.fetch({ - limit: options.limit - }) - batch = fetch_batch.objs; - var batchSpec = RemoveAODDataBatch.make({values: batch}); - job.scheduleBatch(batchSpec); + limit: options.batchSize, + offset: offset + }).objs; + var batchSpec = RemoveAODDataBatch.make({values: fetch_batch}); + batches.push(batchSpec); + offset += batch_size; + }; - batch = []; - } + for (var i = 0; i < batches.length; i++) { + job.scheduleBatch(batches[i]); + }; } From ab8462e6c16347ee47fc9d03a8b8212397d2cbdf Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Sun, 3 Apr 2022 08:48:16 -0500 Subject: [PATCH 175/188] lil fix --- .../gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js index 14e0d552..02f1ea0e 100644 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js @@ -16,7 +16,7 @@ }).objs; var batchSpec = RemoveAODDataBatch.make({values: fetch_batch}); batches.push(batchSpec); - offset += batch_size; + offset += options.batchSize; }; for (var i = 0; i < batches.length; i++) { From 857917cd9b8a8e79f1508a09126a6e093c06358a Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 5 Apr 2022 10:28:53 -0500 Subject: [PATCH 176/188] upsert -> create in AOD data --- .../UpsertAODData/RemoveAODDAtaOptions.c3typ | 10 +++++ .../BatchJobs/UpsertAODData/RemoveAODData.js | 42 +++++++++++++------ .../Simulations/SimulationOutputFile.py | 2 +- 3 files changed, 40 insertions(+), 14 deletions(-) create mode 100644 training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDAtaOptions.c3typ diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDAtaOptions.c3typ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDAtaOptions.c3typ new file mode 100644 index 00000000..e74bed20 --- /dev/null +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDAtaOptions.c3typ @@ -0,0 +1,10 @@ +/** +* RemoveAODDataOptions.c3typ +* Represents customization options for a {@link RemoveAODData} batch job +*/ +type UpsertAODDataOptions { + limit: int=-1 + initialDate: datetime + finalDate: datetime + timeGranularity: string enum('HOUR', 'DAY') +} \ No newline at end of file diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js index 02f1ea0e..eb112ad2 100644 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js @@ -5,20 +5,36 @@ * @param {UpsertAODDataOptions} options */ function doStart(job, options) { - var batches = []; - var offset = 0; - var total = Simulation3HourlyAODOutputAllRef.fetchCount(); - while ((offset + options.batchSize) < total) { - var fetch_batch = Simulation3HourlyAODOutputAllRef.fetch({ - limit: options.batchSize, - offset: offset - }).objs; - var batchSpec = RemoveAODDataBatch.make({values: fetch_batch}); - batches.push(batchSpec); - offset += options.batchSize; + var deltaTime = Math.abs(options.finalDate - options.initialDate); + var nBatches; + var batches = []; + var hour = 1000*60*60; + var day = hour*24; + if (options.timeGranularity == 'HOUR') { + nBatches = Math.ceil(deltaTime / hour); + for (var i = 0; i < nBatches; i++) { + var date = options.initialDate + i*hour; + var filter = Filter.ge("geoSurfaceTimePoint.time", date).and().lt("geoSurfaceTimePoint.time", date + hour); + var spec = FetchSpec.make({include: "[id]", limit: options.limit, + filter: filter + }); + batches.push(spec); + } + } + else if (options.timeGranularity == 'DAY') { + nBatches = Math.ceil(deltaTime / day); + for (var i = 0; i < nBatches; i++) { + var date = options.initialDate + i*day; + var filter = Filter.ge("geoSurfaceTimePoint.time", date).and().lt("geoSurfaceTimePoint.time", date + day).toString(); + var spec = FetchSpec.make({include: "[id]", limit: options.limit, + filter: filter + }); + batches.push(spec); + } }; + for (var i = 0; i < batches.length; i++) { job.scheduleBatch(batches[i]); }; @@ -26,12 +42,12 @@ } - /** * @param {UpsertAODDataBatch} batch * @param {UpsertAODData} job * @param {UpsertAODDataOptions} options */ function processBatch(batch, job, options) { - Simulation3HourlyAODOutputAllRef.removeBatch(batch.values) + var objects = Simulation3HourlyAODOutputAllRef(spec=batch); + Simulation3HourlyAODOutputAllRef.removeBatch(objects); } \ No newline at end of file diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index 5474bbeb..5f542df1 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -271,7 +271,7 @@ def upsert3HourlyAODAllRefData(this): df_batch["geoSurfaceTimePoint"] = gst.objs output_records = df_batch.to_dict(orient="records") - c3.Simulation3HourlyAODOutputAllRef.upsertBatch(objs=output_records) + c3.Simulation3HourlyAODOutputAllRef.createBatch(objs=output_records) #this.processed = True c3.SimulationOutputFile(id=this.id, processed=True).merge() From 34a8da748ea4b915cdfe33710cc3c4b8625d1a46 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 5 Apr 2022 11:53:11 -0500 Subject: [PATCH 177/188] lil fix --- .../src/BatchJobs/UpsertAODData/RemoveAODDAtaOptions.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDAtaOptions.c3typ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDAtaOptions.c3typ index e74bed20..5ed5b449 100644 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDAtaOptions.c3typ +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDAtaOptions.c3typ @@ -2,7 +2,7 @@ * RemoveAODDataOptions.c3typ * Represents customization options for a {@link RemoveAODData} batch job */ -type UpsertAODDataOptions { +type RemoveAODDataOptions { limit: int=-1 initialDate: datetime finalDate: datetime From 06c01912a05be703386827071ace343bbb3e0eb2 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 5 Apr 2022 13:50:42 -0500 Subject: [PATCH 178/188] trying db annotations --- .../gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ | 3 ++- .../entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ index 5b99b667..cc0ad8a5 100644 --- a/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ +++ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ @@ -3,7 +3,8 @@ * A space-time point where space is the Earth's surface. */ @db(persistDuplicates=false, - compactType=true) + compactType=true, + shortId=true) entity type GeoSurfaceTime schema name 'GSRFC_TM' { // the latitude latitude: !float diff --git a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ index fa4b93d1..361a668a 100644 --- a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ +++ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ @@ -2,7 +2,9 @@ * Simulation3HourlyAODOutputAllRef.c3typ * A single monthly-mean output taken from a single [SimulationSample](type:SimulationSample) with references to {@link GeoSurfaceTime} */ - +@db(index=["geoSurfaceTimePoint", + compactType=true, + shortId=true]) entity type Simulation3HourlyAODOutputAllRef schema name 'SM_3HAOD_TPT_LLRF' { // The {@link SimulationSample} this output belongs to simulationSample: !SimulationSample From 9cbb6fd4a43e831f4ad3600893c81ba89f4501b9 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 5 Apr 2022 14:33:14 -0500 Subject: [PATCH 179/188] lil fix... --- .../entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ index 361a668a..260fb97b 100644 --- a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ +++ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ @@ -2,9 +2,9 @@ * Simulation3HourlyAODOutputAllRef.c3typ * A single monthly-mean output taken from a single [SimulationSample](type:SimulationSample) with references to {@link GeoSurfaceTime} */ -@db(index=["geoSurfaceTimePoint", +@db(index=["geoSurfaceTimePoint"], compactType=true, - shortId=true]) + shortId=true) entity type Simulation3HourlyAODOutputAllRef schema name 'SM_3HAOD_TPT_LLRF' { // The {@link SimulationSample} this output belongs to simulationSample: !SimulationSample From 0aa6569383b7fd026aef6cc0cc53bb089d79e636 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 5 Apr 2022 14:46:36 -0500 Subject: [PATCH 180/188] bye remove files --- .../UpsertAODData/RemoveAODData.c3typ | 10 ---- .../BatchJobs/UpsertAODData/RemoveAODData.js | 53 ------------------- .../UpsertAODData/RemoveAODDataBatch.c3typ | 7 --- 3 files changed, 70 deletions(-) delete mode 100644 training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.c3typ delete mode 100644 training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js delete mode 100644 training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDataBatch.c3typ diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.c3typ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.c3typ deleted file mode 100644 index eb0eaf71..00000000 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.c3typ +++ /dev/null @@ -1,10 +0,0 @@ -/** -* RemoveAODData.c3typ -* Batch job to delete data from {@link Simulation3HourlyAODOutputAllRef} -*/ -type RemoveAODData extends BatchJob type key 'RM_MNTHMNDT' { - - doStart: ~ js server - processBatch: ~ js server - // allComplete: ~ js server -} \ No newline at end of file diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js deleted file mode 100644 index eb112ad2..00000000 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODData.js +++ /dev/null @@ -1,53 +0,0 @@ -/** - * RemoveAODData.js - * Implementation of RemoveAODData.c3typ - * @param {RemoveAODData} job - * @param {UpsertAODDataOptions} options - */ - function doStart(job, options) { - - var deltaTime = Math.abs(options.finalDate - options.initialDate); - var nBatches; - var batches = []; - var hour = 1000*60*60; - var day = hour*24; - if (options.timeGranularity == 'HOUR') { - nBatches = Math.ceil(deltaTime / hour); - for (var i = 0; i < nBatches; i++) { - var date = options.initialDate + i*hour; - var filter = Filter.ge("geoSurfaceTimePoint.time", date).and().lt("geoSurfaceTimePoint.time", date + hour); - var spec = FetchSpec.make({include: "[id]", limit: options.limit, - filter: filter - }); - batches.push(spec); - } - } - else if (options.timeGranularity == 'DAY') { - nBatches = Math.ceil(deltaTime / day); - for (var i = 0; i < nBatches; i++) { - var date = options.initialDate + i*day; - var filter = Filter.ge("geoSurfaceTimePoint.time", date).and().lt("geoSurfaceTimePoint.time", date + day).toString(); - var spec = FetchSpec.make({include: "[id]", limit: options.limit, - filter: filter - }); - batches.push(spec); - } - }; - - - for (var i = 0; i < batches.length; i++) { - job.scheduleBatch(batches[i]); - }; - -} - - -/** - * @param {UpsertAODDataBatch} batch - * @param {UpsertAODData} job - * @param {UpsertAODDataOptions} options - */ -function processBatch(batch, job, options) { - var objects = Simulation3HourlyAODOutputAllRef(spec=batch); - Simulation3HourlyAODOutputAllRef.removeBatch(objects); -} \ No newline at end of file diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDataBatch.c3typ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDataBatch.c3typ deleted file mode 100644 index cd713bac..00000000 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDataBatch.c3typ +++ /dev/null @@ -1,7 +0,0 @@ -/** -* RemoveAODDataBatch.c3typ -* Represents a unit of work (batch) in a {@link RemoveAODData} batch job. -*/ -type RemoveAODDataBatch { - values: [Simulation3HourlyAODOutputAllRef] -} \ No newline at end of file From 0fedc43f1e85b99a69da7302c0b86c490bbfaee6 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 5 Apr 2022 14:49:47 -0500 Subject: [PATCH 181/188] rm another one --- .../BatchJobs/UpsertAODData/RemoveAODDAtaOptions.c3typ | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDAtaOptions.c3typ diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDAtaOptions.c3typ b/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDAtaOptions.c3typ deleted file mode 100644 index 5ed5b449..00000000 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/RemoveAODDAtaOptions.c3typ +++ /dev/null @@ -1,10 +0,0 @@ -/** -* RemoveAODDataOptions.c3typ -* Represents customization options for a {@link RemoveAODData} batch job -*/ -type RemoveAODDataOptions { - limit: int=-1 - initialDate: datetime - finalDate: datetime - timeGranularity: string enum('HOUR', 'DAY') -} \ No newline at end of file From 9b63cba0d27c69ab957e9da4e7bed123baf1c86e Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 5 Apr 2022 17:31:28 -0500 Subject: [PATCH 182/188] moving to cassandra --- .../Simulations/Simulation3HourlyAODOutputAllRef.c3typ | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ index 260fb97b..f7712d79 100644 --- a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ +++ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ @@ -2,9 +2,11 @@ * Simulation3HourlyAODOutputAllRef.c3typ * A single monthly-mean output taken from a single [SimulationSample](type:SimulationSample) with references to {@link GeoSurfaceTime} */ -@db(index=["geoSurfaceTimePoint"], - compactType=true, - shortId=true) +@db(datastore='cassandra', + partitionKeyField='geoSurfaceTimePoint', + persistenceOrder='simulationSample', + persistDuplicates=false, + compactType=true) entity type Simulation3HourlyAODOutputAllRef schema name 'SM_3HAOD_TPT_LLRF' { // The {@link SimulationSample} this output belongs to simulationSample: !SimulationSample From 4ba0b383a10e31009343018a4a289bc0a4b6b34a Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Tue, 5 Apr 2022 20:33:46 -0500 Subject: [PATCH 183/188] rm shortId annotation --- .../gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ index cc0ad8a5..5b99b667 100644 --- a/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ +++ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ @@ -3,8 +3,7 @@ * A space-time point where space is the Earth's surface. */ @db(persistDuplicates=false, - compactType=true, - shortId=true) + compactType=true) entity type GeoSurfaceTime schema name 'GSRFC_TM' { // the latitude latitude: !float From aae2dcbbfffaba18d0185c381f4c9c7194888c3e Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 6 Apr 2022 07:01:27 -0500 Subject: [PATCH 184/188] add offset, fix db annotation --- .../gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js | 3 ++- .../src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ | 3 ++- .../gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ | 3 +-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js index 360dca25..f520d4fa 100644 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js @@ -9,7 +9,8 @@ var dataset = SimulationOutputFile.fetchObjStream({ filter: "container == 'monthly-mean'", - limit: options.limit + limit: options.limit, + offset: options.offset }); while(dataset.hasNext()) { diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ index 15df46b5..f0864db7 100644 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ @@ -4,5 +4,6 @@ */ type UpsertAODDataOptions { batchSize: int = 1 - limit: int=-1 + limit: int = -1 + offset: int = 0 } \ No newline at end of file diff --git a/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ index 5b99b667..9e368d1a 100644 --- a/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ +++ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ @@ -2,8 +2,7 @@ * GeoSurfaceTime.c3typ * A space-time point where space is the Earth's surface. */ -@db(persistDuplicates=false, - compactType=true) +@db(compactType=true) entity type GeoSurfaceTime schema name 'GSRFC_TM' { // the latitude latitude: !float From a232b47ab10e0bc0d7235d354e9f8688fd192713 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 6 Apr 2022 07:34:34 -0500 Subject: [PATCH 185/188] comapctType does not work well --- .../gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ index 9e368d1a..45b2790e 100644 --- a/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ +++ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ @@ -2,7 +2,7 @@ * GeoSurfaceTime.c3typ * A space-time point where space is the Earth's surface. */ -@db(compactType=true) + entity type GeoSurfaceTime schema name 'GSRFC_TM' { // the latitude latitude: !float From 47beba44605662b1508e4df09f8e108c93425938 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 6 Apr 2022 08:11:04 -0500 Subject: [PATCH 186/188] adding point to the name --- .../{GeoSurfaceTime.c3typ => GeoSurfaceTimePoint.c3typ} | 2 +- .../entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ | 2 +- .../gordon-group/src/entity/Simulations/SimulationOutputFile.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename training/gordon-group/src/entity/Coordinates/{GeoSurfaceTime.c3typ => GeoSurfaceTimePoint.c3typ} (78%) diff --git a/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTimePoint.c3typ similarity index 78% rename from training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ rename to training/gordon-group/src/entity/Coordinates/GeoSurfaceTimePoint.c3typ index 45b2790e..cde82eef 100644 --- a/training/gordon-group/src/entity/Coordinates/GeoSurfaceTime.c3typ +++ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTimePoint.c3typ @@ -3,7 +3,7 @@ * A space-time point where space is the Earth's surface. */ -entity type GeoSurfaceTime schema name 'GSRFC_TM' { +entity type GeoSurfaceTimePoint schema name 'GSRFCTMPT' { // the latitude latitude: !float // the longitude diff --git a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ index f7712d79..227d6f0e 100644 --- a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ +++ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ @@ -21,5 +21,5 @@ entity type Simulation3HourlyAODOutputAllRef schema name 'SM_3HAOD_TPT_LLRF' { // insoluble aitken mode insolubleAitkenMode: !float // the space-time point for this measurement - geoSurfaceTimePoint: !GeoSurfaceTime + geoSurfaceTimePoint: !GeoSurfaceTimePoint } \ No newline at end of file diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index 5f542df1..e62c46df 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -265,7 +265,7 @@ def upsert3HourlyAODAllRefData(this): # now upsert this output_records = df_st.to_dict(orient="records") - gst = c3.GeoSurfaceTime.upsertBatch(objs=output_records) + gst = c3.GeoSurfaceTimePoint.upsertBatch(objs=output_records) df_batch = pd.DataFrame(df_var) df_batch["geoSurfaceTimePoint"] = gst.objs From ea8cbac950e6b973cb2135f2312f5fc88c57f1c8 Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 6 Apr 2022 10:05:52 -0500 Subject: [PATCH 187/188] add workaround to keep track of files that are failing --- .../src/NetCDF/MetaFileProcessing.c3typ | 10 ++++++++++ .../entity/Simulations/SimulationOutputFile.c3typ | 2 ++ .../src/entity/Simulations/SimulationOutputFile.py | 13 +++++++++++-- 3 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 training/gordon-group/src/NetCDF/MetaFileProcessing.c3typ diff --git a/training/gordon-group/src/NetCDF/MetaFileProcessing.c3typ b/training/gordon-group/src/NetCDF/MetaFileProcessing.c3typ new file mode 100644 index 00000000..9ced659f --- /dev/null +++ b/training/gordon-group/src/NetCDF/MetaFileProcessing.c3typ @@ -0,0 +1,10 @@ +/** +* Contains meta data about attempts to process files. +*/ + +type MetaFileProcessing { + // the timestamp for the last attempt to process the file + lastProcessAttempt: datetime + // did the attempt succeed? + lastAttemptFailed: boolean +} \ No newline at end of file diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ index 35cedf83..7618783c 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.c3typ @@ -12,6 +12,8 @@ entity type SimulationOutputFile schema name 'SMLTN_OTPT_FL' { container: !string enum('acure-aircraft', 'monthly-mean') // processed processed: boolean post default "false" + // processing metadata + processMeta: MetaFileProcessing // Load data from this file into {@link SimulationModelOutput} @py(env='gordon_1_0_0') diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index e62c46df..dbdb5555 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -224,6 +224,7 @@ def upsert3HourlyAODAllRefData(this): """ import pandas as pd import numpy as np + from datetime import datetime as dt # verify file container if(this.container == 'monthly-mean'): @@ -265,7 +266,13 @@ def upsert3HourlyAODAllRefData(this): # now upsert this output_records = df_st.to_dict(orient="records") - gst = c3.GeoSurfaceTimePoint.upsertBatch(objs=output_records) + try: + gst = c3.GeoSurfaceTimePoint.upsertBatch(objs=output_records) + except: + meta = c3.MetaFileProcessing(lastProcessAttempt=dt.now(), + lastAttemptFailed=True) + c3.SimulationOutputFile(id=this.id, processMeta=meta).merge() + return False df_batch = pd.DataFrame(df_var) df_batch["geoSurfaceTimePoint"] = gst.objs @@ -274,7 +281,9 @@ def upsert3HourlyAODAllRefData(this): c3.Simulation3HourlyAODOutputAllRef.createBatch(objs=output_records) #this.processed = True - c3.SimulationOutputFile(id=this.id, processed=True).merge() + meta = c3.MetaFileProcessing(lastProcessAttempt=dt.now(), + lastAttemptFailed=False) + c3.SimulationOutputFile(id=this.id, processed=True, processMeta=meta).merge() #c3.SimulationOutputFile.merge(this) return True From 26ca04257bb797d0cf90ea47e5740634dcbbe23a Mon Sep 17 00:00:00 2001 From: babreu-ncsa Date: Wed, 6 Apr 2022 13:21:03 -0500 Subject: [PATCH 188/188] add filters to the batch job --- .../src/BatchJobs/UpsertAODData/UpsertAODData.js | 4 +++- .../UpsertAODData/UpsertAODDataOptions.c3typ | 5 +++++ .../src/entity/Simulations/SimulationOutputFile.py | 12 ++++++++---- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js index f520d4fa..ff4d4e14 100644 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js @@ -7,8 +7,10 @@ function doStart(job, options) { var batch = []; + var finalFilter = options.filter.and().eq("container", "monthly-mean"); + var dataset = SimulationOutputFile.fetchObjStream({ - filter: "container == 'monthly-mean'", + filter: finalFilter, limit: options.limit, offset: options.offset }); diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ index f0864db7..113c8d78 100644 --- a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ @@ -3,7 +3,12 @@ * Represents customization options for a {@link UpsertAODData} batch job */ type UpsertAODDataOptions { + // the number of files that will be processed in each batch batchSize: int = 1 + // a limit to the number of files that will be processed overall limit: int = -1 + // offset in the fetch to SimulationOutputFile offset: int = 0 + // any additional filters + filter: Filter } \ No newline at end of file diff --git a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py index dbdb5555..5967fb39 100644 --- a/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py +++ b/training/gordon-group/src/entity/Simulations/SimulationOutputFile.py @@ -276,15 +276,19 @@ def upsert3HourlyAODAllRefData(this): df_batch = pd.DataFrame(df_var) df_batch["geoSurfaceTimePoint"] = gst.objs - output_records = df_batch.to_dict(orient="records") - c3.Simulation3HourlyAODOutputAllRef.createBatch(objs=output_records) + try: + c3.Simulation3HourlyAODOutputAllRef.createBatch(objs=output_records) + except: + meta = c3.MetaFileProcessing(lastProcessAttempt=dt.now(), + lastAttemptFailed=True) + c3.SimulationOutputFile(id=this.id, processMeta=meta).merge() + return False - #this.processed = True + # if we get here, it worked meta = c3.MetaFileProcessing(lastProcessAttempt=dt.now(), lastAttemptFailed=False) c3.SimulationOutputFile(id=this.id, processed=True, processMeta=meta).merge() - #c3.SimulationOutputFile.merge(this) return True