diff --git a/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json b/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json new file mode 100644 index 00000000..80f96da7 --- /dev/null +++ b/training/gordon-group/seed/ActionRuntime/py-gordon-ML_1_0_0.json @@ -0,0 +1,18 @@ +{ + "language": "Python", + "runtimeVersion": "3.6", + "modules": { + "conda.scikit-learn":"=0.23.1", + "conda.pandas":"=1.0.1", + "conda.numpy":"=1.18.1", + "conda.scipy":"=1.4.1", + "conda.dill":"=0.2.8.2" + }, + "repositories": [ + "https://repo.continuum.io/pkgs/main", + "conda-forge" + ], + "runtime": "CPython", + "name": "py-gordon-ML_1_0_0", + "id": "py-gordon-ML_1_0_0" +} \ No newline at end of file diff --git a/training/gordon-group/seed/ActionRuntime/py-gordon-nc4-_1_0_0.json b/training/gordon-group/seed/ActionRuntime/py-gordon-nc4-_1_0_0.json new file mode 100644 index 00000000..5a91647f --- /dev/null +++ b/training/gordon-group/seed/ActionRuntime/py-gordon-nc4-_1_0_0.json @@ -0,0 +1,15 @@ +{ + "language": "Python", + "runtimeVersion": "3.6", + "modules": { + "conda.netcdf4": "=1.5.7", + "conda.dill":"=0.2.8.2" + }, + "repositories": [ + "https://repo.continuum.io/pkgs/main", + "conda-forge" + ], + "runtime": "CPython", + "name": "py-gordon-nc4_1_0_0", + "id": "py-gordon-nc4_1_0_0" +} \ No newline at end of file diff --git a/training/gordon-group/seed/ActionRuntime/py-gordon_1_0_0.json b/training/gordon-group/seed/ActionRuntime/py-gordon_1_0_0.json index e6318f0c..2cb29fbd 100644 --- a/training/gordon-group/seed/ActionRuntime/py-gordon_1_0_0.json +++ b/training/gordon-group/seed/ActionRuntime/py-gordon_1_0_0.json @@ -6,7 +6,9 @@ "conda.netcdf4": "=1.5.7", "conda.numpy": "=1.17.0", "conda.pandas": "=1.1.5", - "conda.cartopy":"=0.18.0" + "conda.cartopy":"=0.18.0", + "conda.basemap":"=1.2.2", + "conda.iris":"=3.1.0" }, "repositories": [ "https://repo.continuum.io/pkgs/main", diff --git a/training/gordon-group/seed/ActionRuntime/py-gordon_2_0_0.json b/training/gordon-group/seed/ActionRuntime/py-gordon_2_0_0.json index b7c22bd7..e77d6e4a 100644 --- a/training/gordon-group/seed/ActionRuntime/py-gordon_2_0_0.json +++ b/training/gordon-group/seed/ActionRuntime/py-gordon_2_0_0.json @@ -2,8 +2,15 @@ "language": "Python", "runtimeVersion": "3.6", "modules": { - "conda.cartopy":"=0.19.0", - "conda.pandas": "=1.0.1" + "conda.pip": "=21.2.2", + "conda.netcdf4": "=1.5.7", + "conda.numpy": "=1.17.0", + "conda.pandas": "=1.1.5", + "conda.cartopy":"=0.18.0", + "conda.basemap":"=1.2.2", + "conda.iris":"=3.1.0", + "conda.scikit-learn":"=0.23.1", + "conda.dill":"=0.2.8.2" }, "repositories": [ "https://repo.continuum.io/pkgs/main", diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.c3typ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.c3typ new file mode 100644 index 00000000..c6132cad --- /dev/null +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.c3typ @@ -0,0 +1,10 @@ +/** +* UpsertAODData.c3typ +* Batch job to upsert data from files in {@link SimulationOutputFile}with container=monthly-mean in parallel +*/ +type UpsertAODData extends BatchJob type key 'PSRT_MNTHMNDT' { + + doStart: ~ js server + processBatch: ~ js server + // allComplete: ~ js server +} \ No newline at end of file diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js new file mode 100644 index 00000000..ff4d4e14 --- /dev/null +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODData.js @@ -0,0 +1,41 @@ +/** + * UpsertAODData.js + * Implementation of UpsertAODData.c3typ + * @param {UpsertAODData} job + * @param {UpsertAODDataOptions} options + */ + function doStart(job, options) { + var batch = []; + + var finalFilter = options.filter.and().eq("container", "monthly-mean"); + + var dataset = SimulationOutputFile.fetchObjStream({ + filter: finalFilter, + limit: options.limit, + offset: options.offset + }); + + while(dataset.hasNext()) { + batch.push(dataset.next()); + + if (batch.length >= options.batchSize || !dataset.hasNext()) { + var batchSpec = UpsertAODDataBatch.make({values: batch}); + job.scheduleBatch(batchSpec); + + batch = []; + } + } +} + + + +/** + * @param {UpsertAODDataBatch} batch + * @param {UpsertAODData} job + * @param {UpsertAODDataOptions} options + */ +function processBatch(batch, job, options) { + batch.values.forEach(function(file) { + file.upsert3HourlyAODAllRefData(); + }); +} \ No newline at end of file diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataBatch.c3typ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataBatch.c3typ new file mode 100644 index 00000000..ed095d64 --- /dev/null +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataBatch.c3typ @@ -0,0 +1,7 @@ +/** +* UpsertAODDataBatch.c3typ +* Represents a unit of work (batch) in a {@link UpsertAODData} batch job. +*/ +type UpsertAODDataBatch { + values: [SimulationOutputFile] +} \ No newline at end of file diff --git a/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ new file mode 100644 index 00000000..113c8d78 --- /dev/null +++ b/training/gordon-group/src/BatchJobs/UpsertAODData/UpsertAODDataOptions.c3typ @@ -0,0 +1,14 @@ +/** +* UpsertAODDataOptions.c3typ +* Represents customization options for a {@link UpsertAODData} batch job +*/ +type UpsertAODDataOptions { + // the number of files that will be processed in each batch + batchSize: int = 1 + // a limit to the number of files that will be processed overall + limit: int = -1 + // offset in the fetch to SimulationOutputFile + offset: int = 0 + // any additional filters + filter: Filter +} \ No newline at end of file diff --git a/training/gordon-group/src/UpsertData.c3typ b/training/gordon-group/src/BatchJobs/UpsertData/UpsertData.c3typ similarity index 100% rename from training/gordon-group/src/UpsertData.c3typ rename to training/gordon-group/src/BatchJobs/UpsertData/UpsertData.c3typ diff --git a/training/gordon-group/src/UpsertData.js b/training/gordon-group/src/BatchJobs/UpsertData/UpsertData.js similarity index 91% rename from training/gordon-group/src/UpsertData.js rename to training/gordon-group/src/BatchJobs/UpsertData/UpsertData.js index 189bf716..193dd992 100644 --- a/training/gordon-group/src/UpsertData.js +++ b/training/gordon-group/src/BatchJobs/UpsertData/UpsertData.js @@ -15,7 +15,7 @@ function doStart(job, options) { while(dataset.hasNext()) { batch.push(dataset.next()); - if (dataset.length >= options.batchSize || !dataset.hasNext()) { + if (batch.length >= options.batchSize || !dataset.hasNext()) { var batchSpec = UpsertDataBatch.make({values: batch}); job.scheduleBatch(batchSpec); diff --git a/training/gordon-group/src/UpsertDataBatch.c3typ b/training/gordon-group/src/BatchJobs/UpsertData/UpsertDataBatch.c3typ similarity index 100% rename from training/gordon-group/src/UpsertDataBatch.c3typ rename to training/gordon-group/src/BatchJobs/UpsertData/UpsertDataBatch.c3typ diff --git a/training/gordon-group/src/UpsertDataOptions.c3typ b/training/gordon-group/src/BatchJobs/UpsertData/UpsertDataOptions.c3typ similarity index 100% rename from training/gordon-group/src/UpsertDataOptions.c3typ rename to training/gordon-group/src/BatchJobs/UpsertData/UpsertDataOptions.c3typ diff --git a/training/gordon-group/src/UpsertObsData.c3typ b/training/gordon-group/src/BatchJobs/UpsertObsData/UpsertObsData.c3typ similarity index 69% rename from training/gordon-group/src/UpsertObsData.c3typ rename to training/gordon-group/src/BatchJobs/UpsertObsData/UpsertObsData.c3typ index fbc99d93..3d6ba12d 100644 --- a/training/gordon-group/src/UpsertObsData.c3typ +++ b/training/gordon-group/src/BatchJobs/UpsertObsData/UpsertObsData.c3typ @@ -1,6 +1,6 @@ /** * UpsertObsData.c3typ -* Batch job to upsert data from every {@link SimulationSample} in a {@link SimulationEnsemble} in parallel +* Batch job to upsert data from every {@link ObservationSet} in parallel */ type UpsertObsData extends BatchJob type key 'UPSERTOBSDATA' { diff --git a/training/gordon-group/src/UpsertObsData.js b/training/gordon-group/src/BatchJobs/UpsertObsData/UpsertObsData.js similarity index 91% rename from training/gordon-group/src/UpsertObsData.js rename to training/gordon-group/src/BatchJobs/UpsertObsData/UpsertObsData.js index 72c33d23..62a6c646 100644 --- a/training/gordon-group/src/UpsertObsData.js +++ b/training/gordon-group/src/BatchJobs/UpsertObsData/UpsertObsData.js @@ -15,7 +15,7 @@ while(dataset.hasNext()) { batch.push(dataset.next()); - if (dataset.length >= options.batchSize || !dataset.hasNext()) { + if (batch.length >= options.batchSize || !dataset.hasNext()) { var batchSpec = UpsertObsDataBatch.make({values: batch}); job.scheduleBatch(batchSpec); diff --git a/training/gordon-group/src/UpsertObsDataBatch.c3typ b/training/gordon-group/src/BatchJobs/UpsertObsData/UpsertObsDataBatch.c3typ similarity index 100% rename from training/gordon-group/src/UpsertObsDataBatch.c3typ rename to training/gordon-group/src/BatchJobs/UpsertObsData/UpsertObsDataBatch.c3typ diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ new file mode 100644 index 00000000..de2d4439 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.c3typ @@ -0,0 +1,19 @@ +/** +* GaussianProcessRegressionPipe.c3typ +* Performs Scikit-Learn's GP Regression for a set of inputs-outputs. +*/ +entity type GaussianProcessRegressionPipe extends MLLeafPipe type key 'GPREG' { + // the technique for this regression + @db(persistAllFields=true) + technique: !GaussianProcessRegressionTechnique + + // train the model + @py(env='gordon-ML_1_0_0') + train: ~ + // process data, ie predictions + @py(env='gordon-ML_1_0_0') + process: ~ + // guarantee that process() is only allowed after train() + @py(env='gordon-ML_1_0_0') + isProcessable: ~ +} diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.py new file mode 100644 index 00000000..764abb82 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionPipe.py @@ -0,0 +1,44 @@ +def train(this, input, targetOutput, spec): + """ + Performs Scikit-Learn's GaussianProcessRegressor's fit(). + https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcessRegressor.html + """ + from sklearn.gaussian_process import GaussianProcessRegressor + + # get data + X = c3.Dataset.toNumpy(dataset=input) + y = c3.Dataset.toNumpy(dataset=targetOutput).flatten() + + # get kernel object from c3, make it python again + kernel = c3.PythonSerialization.deserialize(serialized=this.technique.kernel.pickledKernel) + + # build and train GPR + gp = GaussianProcessRegressor(kernel=kernel) + gp.fit(X, y) + + # pickle model + this.trainedModel = c3.MLTrainedModelArtifact(model=c3.PythonSerialization.serialize(obj=gp)) + + return this + + +def process(this, input, spec): + """ + Performs Scikit-Learn's GaussianProcessRegressor's predict(). + https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcessRegressor.html + """ + # unpickle the model + gp = c3.PythonSerialization.deserialize(serialized=this.trainedModel.model) + + # format data + X = c3.Dataset.toNumpy(dataset=input) + + return c3.Dataset.fromPython(pythonData=gp.predict(X)) + + +def isProcessable(this): + """" + Guarantees that process() can only be called after train() + """ + + return this.isTrained() diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ new file mode 100644 index 00000000..50485926 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/GaussianProcessRegressionTechnique.c3typ @@ -0,0 +1,12 @@ +/** +* GaussianProcessRegressionTechnique.c3typ +* Defines hyper parameters for GP Regression. +*/ +type GaussianProcessRegressionTechnique mixes MLTechnique { + // random seed + @ML(hyperParameter=true) + randomState: integer + // the kernel object + @ML(hyperParameter=true) + kernel: SklearnGPRKernel +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ new file mode 100644 index 00000000..bce84311 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernel.c3typ @@ -0,0 +1,13 @@ +/** +* SklearnGPRKernel.c3typ +* Stepping stone for Scikit-Learn Gaussian Process kernels. +* See: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.gaussian_process +*/ +type SklearnGPRKernel { + // the name of the kernel + name: string enum('Constant', 'DotProduct', 'ExpSineSquared', 'Matern', 'RBF', 'RationalQuadratic', 'White') + // the kernel hyper parameters + hyperParameters: map + // the pickled kernel + pickledKernel: string +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ new file mode 100644 index 00000000..e50f0452 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.c3typ @@ -0,0 +1,15 @@ +/** +* SklearnGPRKernelConstant.c3typ +* Scikit-Learn Gaussian Process Constant Kernel. +* See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.ConstantKernel.html#sklearn.gaussian_process.kernels.ConstantKernel +*/ +type SklearnGPRKernelConstant { + // Constant value that defines the kernel value + constantValue: !double + // the SklearnGPRKernel for this object + kernel: SklearnGPRKernel + + // constructs the SklearnGPRKernel internal object + @py(env='gordon-ML_1_0_0') + build: member function(): SklearnGPRKernelConstant +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py new file mode 100644 index 00000000..0bb23aba --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelConstant.py @@ -0,0 +1,21 @@ +def build(this): + """ + This effectively constructs the type instance, by creating the object to be placed in the kernel field. Ideally this should be replaced by a callback function (beforeMake or afterMake). + """ + from sklearn.gaussian_process.kernels import ConstantKernel + + sklKernel = ConstantKernel(this.constantValue) + + kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) + kernel_name = 'Constant' + kernel_hyperParameters = c3.c3Make( + "map", {"constantValue": this.constantValue} + ) + + this.kernel = c3.SklearnGPRKernel( + name=kernel_name, + hyperParameters=kernel_hyperParameters, + pickledKernel=kernel_pickled + ) + + return this \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ new file mode 100644 index 00000000..65f4fa85 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.c3typ @@ -0,0 +1,15 @@ +/** +* SklearnGPRKernelDotProduct.c3typ +* Scikit-Learn Gaussian Process Dot Product Kernel. +* See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.DotProduct.html#sklearn.gaussian_process.kernels.DotProduct +*/ +type SklearnGPRKernelDotProduct { + // Constant that defines the kernel inhomogenity + sigmaZero: !double + // the SklearnGPRKernel for this object + kernel: SklearnGPRKernel + + // constructs the SklearnGPRKernel internal object + @py(env='gordon-ML_1_0_0') + build: member function(): SklearnGPRKernelDotProduct +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.py new file mode 100644 index 00000000..7613d20b --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelDotProduct.py @@ -0,0 +1,21 @@ +def build(this): + """ + This effectively constructs the type instance, by creating the object to be placed in the kernel field. Ideally this should be replaced by a callback function (beforeMake or afterMake). + """ + from sklearn.gaussian_process.kernels import DotProduct + + sklKernel = DotProduct(sigma_0=this.sigmaZero) + + kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) + kernel_name = 'DotProduct' + kernel_hyperParameters = c3.c3Make( + "map", {"sigmaZero": this.sigmaZero} + ) + + this.kernel = c3.SklearnGPRKernel( + name=kernel_name, + hyperParameters=kernel_hyperParameters, + pickledKernel=kernel_pickled + ) + + return this \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.c3typ new file mode 100644 index 00000000..e0b23e25 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.c3typ @@ -0,0 +1,17 @@ +/** +* SklearnGPRKernelExpSineSquared.c3typ +* Scikit-Learn Gaussian Process Exponential Sine Squared Kernel. +* See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.ExpSineSquared.html#sklearn.gaussian_process.kernels.ExpSineSquared +*/ +type SklearnGPRKernelExpSineSquared { + // Constant that defines the kernel length scale + lengthScale: !double + // Constant that defines the kernel periodicity + periodicity: !double + // the SklearnGPRKernel for this object + kernel: SklearnGPRKernel + + // constructs the SklearnGPRKernel internal object + @py(env='gordon-ML_1_0_0') + build: member function(): SklearnGPRKernelExpSineSquared +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.py new file mode 100644 index 00000000..544e10db --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelExpSineSquared.py @@ -0,0 +1,23 @@ +def build(this): + """ + This effectively constructs the type instance, by creating the object to be placed in the kernel field. Ideally this should be replaced by a callback function (beforeMake or afterMake). + """ + from sklearn.gaussian_process.kernels import ExpSineSquared + + sklKernel = ExpSineSquared(length_scale=this.lengthScale, periodicity=this.periodicity) + + kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) + kernel_name = 'ExpSineSquared' + kernel_hyperParameters = c3.c3Make( + "map", {"lengthScale": this.lengthScale, + "periodicity": this.periodicity + } + ) + + this.kernel = c3.SklearnGPRKernel( + name=kernel_name, + hyperParameters=kernel_hyperParameters, + pickledKernel=kernel_pickled + ) + + return this \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.c3typ new file mode 100644 index 00000000..143d8798 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.c3typ @@ -0,0 +1,17 @@ +/** +* SklearnGPRKernelMatern.c3typ +* Scikit-Learn Gaussian Process Matern Kernel. +* See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.Matern.html#sklearn.gaussian_process.kernels.Matern +*/ +type SklearnGPRKernelMatern { + // Constant that defines the kernel length scale + lengthScale: !double + // Gamma and modified Bessel function orders + nu: !double + // the SklearnGPRKernel for this object + kernel: SklearnGPRKernel + + // constructs the SklearnGPRKernel internal object + @py(env='gordon-ML_1_0_0') + build: member function(): SklearnGPRKernelMatern +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.py new file mode 100644 index 00000000..550df242 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelMatern.py @@ -0,0 +1,23 @@ +def build(this): + """ + This effectively constructs the type instance, by creating the object to be placed in the kernel field. Ideally this should be replaced by a callback function (beforeMake or afterMake). + """ + from sklearn.gaussian_process.kernels import Matern + + sklKernel = Matern(length_scale=this.lengthScale, nu=this.nu) + + kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) + kernel_name = 'Matern' + kernel_hyperParameters = c3.c3Make( + "map", {"lengthScale": this.lengthScale, + "nu": this.nu + } + ) + + this.kernel = c3.SklearnGPRKernel( + name=kernel_name, + hyperParameters=kernel_hyperParameters, + pickledKernel=kernel_pickled + ) + + return this \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.c3typ new file mode 100644 index 00000000..a29354c9 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.c3typ @@ -0,0 +1,15 @@ +/** +* SklearnGPRKernelRBF.c3typ +* Scikit-Learn Gaussian Process RBF Kernel. +* See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.RBF.html#sklearn.gaussian_process.kernels.RBF +*/ +type SklearnGPRKernelRBF type key "SKLRN_GPR_KRNL_RBF" { + // Constant that defines the kernel length scale + lengthScale: !double + // the SklearnGPRKernel for this object + kernel: SklearnGPRKernel + + // constructs the SklearnGPRKernel internal object + @py(env='gordon-ML_1_0_0') + build: member function(): SklearnGPRKernelRBF +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.py new file mode 100644 index 00000000..d8dced50 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRBF.py @@ -0,0 +1,23 @@ +def build(this): + """ + This effectively constructs the type instance, by creating the object to be placed in the kernel field. Ideally this should be replaced by a callback function (beforeMake or afterMake). + """ + from sklearn.gaussian_process.kernels import RBF + + sklKernel = RBF(length_scale=this.lengthScale) + + kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) + kernel_name = 'RBF' + kernel_hyperParameters = c3.c3Make( + "map", {"lengthScale": this.lengthScale + } + ) + + + this.kernel = c3.SklearnGPRKernel( + name=kernel_name, + hyperParameters=kernel_hyperParameters, + pickledKernel=kernel_pickled + ) + + return this \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.c3typ new file mode 100644 index 00000000..43906a44 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.c3typ @@ -0,0 +1,17 @@ +/** +* SklearnGPRKernelRationalQuadratic.c3typ +* Scikit-Learn Gaussian Process Rational Quadratic Kernel. +* See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.RationalQuadratic.html#sklearn.gaussian_process.kernels.RationalQuadratic +*/ +type SklearnGPRKernelRationalQuadratic { + // Constant that defines the kernel length scale + lengthScale: !double + // Scale mixture parameter > 0 + alpha: !double + // the SklearnGPRKernel for this object + kernel: SklearnGPRKernel + + // constructs the SklearnGPRKernel internal object + @py(env='gordon-ML_1_0_0') + build: member function(): SklearnGPRKernelRationalQuadratic +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.py new file mode 100644 index 00000000..2e6cc5f6 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelRationalQuadratic.py @@ -0,0 +1,23 @@ +def build(this): + """ + This effectively constructs the type instance, by creating the object to be placed in the kernel field. Ideally this should be replaced by a callback function (beforeMake or afterMake). + """ + from sklearn.gaussian_process.kernels import RationalQuadratic + + sklKernel = RationalQuadratic(length_scale=this.lengthScale, alpha=this.alpha) + + kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) + kernel_name = 'RationalQuadratic' + kernel_hyperParameters = c3.c3Make( + "map", {"lengthScale": this.lengthScale, + "alpha": this.alpha + } + ) + + this.kernel = c3.SklearnGPRKernel( + name=kernel_name, + hyperParameters=kernel_hyperParameters, + pickledKernel=kernel_pickled + ) + + return this \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.c3typ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.c3typ new file mode 100644 index 00000000..374bb41d --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.c3typ @@ -0,0 +1,15 @@ +/** +* SklearnGPRKernelWhite.c3typ +* Scikit-Learn Gaussian Process White Kernel. +* See: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.WhiteKernel.html#sklearn.gaussian_process.kernels.WhiteKernel +*/ +type SklearnGPRKernelWhite { + // Controls the noise level (variance) of kernel + noiseLevel: !double + // the SklearnGPRKernel for this object + kernel: SklearnGPRKernel + + // constructs the SklearnGPRKernel internal object + @py(env='gordon-ML_1_0_0') + build: member function(): SklearnGPRKernelWhite +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.py b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.py new file mode 100644 index 00000000..1f73da08 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/machineLearning/GPRegression/kernels/SklearnGPRKernelWhite.py @@ -0,0 +1,22 @@ +def build(this): + """ + This effectively constructs the type instance, by creating the object to be placed in the kernel field. Ideally this should be replaced by a callback function (beforeMake or afterMake). + """ + from sklearn.gaussian_process.kernels import WhiteKernel + + sklKernel = WhiteKernel(noise_level=this.noiseLevel) + + kernel_pickled = c3.PythonSerialization.serialize(obj=sklKernel) + kernel_name = 'White' + kernel_hyperParameters = c3.c3Make( + "map", {"noiseLevel": this.noiseLevel + } + ) + + this.kernel = c3.SklearnGPRKernel( + name=kernel_name, + hyperParameters=kernel_hyperParameters, + pickledKernel=kernel_pickled + ) + + return this \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PCA/PrincipalComponentAnalysisPipe.c3typ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PCA/PrincipalComponentAnalysisPipe.c3typ new file mode 100644 index 00000000..72ea55bb --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PCA/PrincipalComponentAnalysisPipe.c3typ @@ -0,0 +1,22 @@ +/** +* PrincipalComponentAnalysisPipe.c3typ +* Performs Scikit-Learn's PCA for a set of features. +*/ +entity type PrincipalComponentAnalysisPipe extends MLLeafPipe type key 'PCA' { + // ATTRIBUTES + // the technique used for this pipe + technique: !PrincipalComponentAnalysisTechnique + + // METHODS + // train method (pre compute 'fillers' for usage when process() is called) + // this calls the fit() method of sklearn PCA + @py(env='gordon-ML_1_0_0') + train: ~ + // process method to actually transform data + // this calls the transform() method of sklearn PCA + @py(env='gordon-ML_1_0_0') + process: ~ + // this is to make sure process is only callable after train + @py(env='gordon-ML_1_0_0') + isProcessable: ~ +} \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PCA/PrincipalComponentAnalysisPipe.py b/training/gordon-group/src/CustomMLPipeline/preProcessing/PCA/PrincipalComponentAnalysisPipe.py new file mode 100644 index 00000000..011151f4 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PCA/PrincipalComponentAnalysisPipe.py @@ -0,0 +1,43 @@ +def train(this, input, targetOutput, spec): + """ + Performs Scikit-Learn's PCA fit(). + https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html + """ + from sklearn.decomposition import PCA + + # get number of PCA components + nComps = this.technique.nComponents + + # cast features into pandas df + data = c3.Dataset.toPandas(dataset=input) + + # call pca + pca = PCA(n_components=nComps) + #data = pca.fit_transform(data) + pca.fit(data) + + # serialize this training + this.trainedModel = c3.MLTrainedModelArtifact(model=c3.PythonSerialization.serialize(obj=pca)) + + return this + + +def process(this, input, spec): + """ + Performs Scikit-Learn's PCA transform(). + https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html + """ + + data = c3.Dataset.toPandas(dataset=input) + pca = c3.PythonSerialization.deserialize(serialized=this.trainedModel.model) + data = pca.transform(data) + + return c3.Dataset.fromPython(pythonData=data) + + +def isProcessable(this): + """" + Guarantees that process() can only be called after train() + """ + + return this.isTrained() \ No newline at end of file diff --git a/training/gordon-group/src/CustomMLPipeline/preProcessing/PCA/PrincipalComponentAnalysisTechnique.c3typ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PCA/PrincipalComponentAnalysisTechnique.c3typ new file mode 100644 index 00000000..2b572734 --- /dev/null +++ b/training/gordon-group/src/CustomMLPipeline/preProcessing/PCA/PrincipalComponentAnalysisTechnique.c3typ @@ -0,0 +1,8 @@ +/** +* PrincipalComponentAnalysisTechnique.c3typ +* Defines hyper parameters for PCA. +*/ +type PrincipalComponentAnalysisTechnique mixes MLTechnique { + // the number of PCA components + nComponents: integer +} \ No newline at end of file diff --git a/training/gordon-group/src/NetCDF/MetaFileProcessing.c3typ b/training/gordon-group/src/NetCDF/MetaFileProcessing.c3typ new file mode 100644 index 00000000..9ced659f --- /dev/null +++ b/training/gordon-group/src/NetCDF/MetaFileProcessing.c3typ @@ -0,0 +1,10 @@ +/** +* Contains meta data about attempts to process files. +*/ + +type MetaFileProcessing { + // the timestamp for the last attempt to process the file + lastProcessAttempt: datetime + // did the attempt succeed? + lastAttemptFailed: boolean +} \ No newline at end of file diff --git a/training/gordon-group/src/NetCDFUtil.c3typ b/training/gordon-group/src/NetCDF/NetCDFUtil.c3typ similarity index 100% rename from training/gordon-group/src/NetCDFUtil.c3typ rename to training/gordon-group/src/NetCDF/NetCDFUtil.c3typ diff --git a/training/gordon-group/src/NetCDFUtil.py b/training/gordon-group/src/NetCDF/NetCDFUtil.py similarity index 100% rename from training/gordon-group/src/NetCDFUtil.py rename to training/gordon-group/src/NetCDF/NetCDFUtil.py diff --git a/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.c3typ b/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.c3typ new file mode 100644 index 00000000..514972e3 --- /dev/null +++ b/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.c3typ @@ -0,0 +1,12 @@ +/** +* NetCDFUtilSerialized.c3typ +* Utility box to deal with NetCDF files. Methods return serialized python objects and can be run on the server as opposed to local env. +**/ +type NetCDFUtilSerialized { + // Open NetCDF file from external source + @py(env='gordon-nc4_1_0_0') + openFile: function(url: !string, localPath: string = '/tmp'): any + // Close NetCDF file + @py(env='gordon-nc4_1_0_0') + closeFile: function(ds: !any, url: !string, localPath: string = '/tmp'): integer +} \ No newline at end of file diff --git a/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.py b/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.py new file mode 100644 index 00000000..a13efc76 --- /dev/null +++ b/training/gordon-group/src/NetCDF/NetCDFUtilSerialized.py @@ -0,0 +1,38 @@ +def openFile(url, localPath='/tmp'): + """ + Opens a NetCDF file from an external storage path (e.g. Azure blob) + + Arguments: + -url (str): URL to NetCDF file + -localPath (str): Path to the local file + Returns: + -netCDF4.Dataset: A netCDF4 Dataset object + """ + import netCDF4 as nc + import os + filename = os.path.basename(url) + tmp_path = localPath + '/' + filename + c3.Client.copyFilesToLocalClient(url, '/tmp') + pickledNC = c3.PythonSerialization.serialize(obj=nc.Dataset(tmp_path)) + + return pickledNC + + +def closeFile(ds, url, localPath='/tmp'): + """ + Closes a NetCDF file. + + Arguments: + -ds (netCDF4.Dataset): A netCDF4 Dataset object + -url (str): URL to a NetCDF file + -localPath (str): Path to the local file + Returns: + -integer + """ + import os + ds.close() + filename = os.path.basename(url) + tmp_path = localPath + '/' + filename + os.remove(tmp_path) + + return 1 \ No newline at end of file diff --git a/training/gordon-group/src/entity/Coordinates/GeoSurfaceTimePoint.c3typ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTimePoint.c3typ new file mode 100644 index 00000000..cde82eef --- /dev/null +++ b/training/gordon-group/src/entity/Coordinates/GeoSurfaceTimePoint.c3typ @@ -0,0 +1,13 @@ +/** +* GeoSurfaceTime.c3typ +* A space-time point where space is the Earth's surface. +*/ + +entity type GeoSurfaceTimePoint schema name 'GSRFCTMPT' { + // the latitude + latitude: !float + // the longitude + longitude: !float + // the timestamp + time: !datetime +} \ No newline at end of file diff --git a/training/gordon-group/src/entity/Coordinates/Latitude.c3typ b/training/gordon-group/src/entity/Coordinates/Latitude.c3typ new file mode 100644 index 00000000..5ec64cee --- /dev/null +++ b/training/gordon-group/src/entity/Coordinates/Latitude.c3typ @@ -0,0 +1,8 @@ +/** +* Latitude.c3typ +* Type to support geospatial coordinates +*/ +entity type Latitude schema name 'LTTD' { + // the latitude of this entry + value: !double +} \ No newline at end of file diff --git a/training/gordon-group/src/entity/ObservationOutput.c3typ b/training/gordon-group/src/entity/ObservationOutput.c3typ deleted file mode 100644 index c23a5b3e..00000000 --- a/training/gordon-group/src/entity/ObservationOutput.c3typ +++ /dev/null @@ -1,49 +0,0 @@ -/** -* ObservationOutput.c3typ -* A single output taken from a single [Observation](type:Observation) -*/ -@db(datastore='cassandra', - partitionKeyField='parent', - persistenceOrder='start, dataVersion', - persistDuplicates=false, - compactType=true, - unique=['parent, start']) -entity type ObservationOutput mixes TimedDataPoint schema name 'OBS_TPT' { - // The Latitude of this output - @ts(treatment='avg') - latitude: double - // The Longitude of this output - @ts(treatment='avg') - longitude: double - @ts(treatment='avg') - altitude: double - @ts(treatment='avg') - total_BC: double - @ts(treatment='avg') - temperature: double - @ts(treatment='avg') - pressure: double - @ts(treatment='avg') - dewpoint: double - @ts(treatment='avg') - SSA_front: double - @ts(treatment='avg') - SSA_rear: double - @ts(treatment='avg') - scat530: double - @ts(treatment='avg') - NO3: double - @ts(treatment='avg') - total_SO4: double - @ts(treatment='avg') - total_ORG: double - @ts(treatment='avg') - CNgt10: double - @ts(treatment='avg') - total_Cl: double - @ts(treatment='avg') - UHSASdNdlogd_bin0: double - @ts(treatment='avg') - UHSASdNdlogd_bin1: double - -} \ No newline at end of file diff --git a/training/gordon-group/src/entity/Observations/ObservationOutput.c3typ b/training/gordon-group/src/entity/Observations/ObservationOutput.c3typ new file mode 100644 index 00000000..94134894 --- /dev/null +++ b/training/gordon-group/src/entity/Observations/ObservationOutput.c3typ @@ -0,0 +1,245 @@ +/** +* ObservationOutput.c3typ +* A single output taken from a single [Observation](type:Observation) +*/ +@db(datastore='cassandra', + partitionKeyField='parent', + persistenceOrder='start, dataVersion', + persistDuplicates=false, + compactType=true, + unique=['parent, start']) +entity type ObservationOutput mixes TimedDataPoint schema name 'OBS_TPT' { + // The Latitude of this output + @ts(treatment='avg') + latitude: double + // The Longitude of this output + @ts(treatment='avg') + longitude: double + @ts(treatment='avg') + altitude: double + @ts(treatment='avg') + total_BC: double + @ts(treatment='avg') + temperature: double + @ts(treatment='avg') + pressure: double + @ts(treatment='avg') + dewpoint: double + @ts(treatment='avg') + SSA_front: double + @ts(treatment='avg') + SSA_rear: double + @ts(treatment='avg') + scat530: double + @ts(treatment='avg') + NO3: double + @ts(treatment='avg') + total_SO4: double + @ts(treatment='avg') + total_ORG: double + @ts(treatment='avg') + CNgt10: double + @ts(treatment='avg') + total_Cl: double + + // UHSASdNdlogd variables: 99 bins + @ts(treatment='avg') + UHSASdNdlogd_bin0: double + @ts(treatment='avg') + UHSASdNdlogd_bin1: double + @ts(treatment='avg') + UHSASdNdlogd_bin2: double + @ts(treatment='avg') + UHSASdNdlogd_bin3: double + @ts(treatment='avg') + UHSASdNdlogd_bin4: double + @ts(treatment='avg') + UHSASdNdlogd_bin5: double + @ts(treatment='avg') + UHSASdNdlogd_bin6: double + @ts(treatment='avg') + UHSASdNdlogd_bin7: double + @ts(treatment='avg') + UHSASdNdlogd_bin8: double + @ts(treatment='avg') + UHSASdNdlogd_bin9: double + @ts(treatment='avg') + UHSASdNdlogd_bin10: double + @ts(treatment='avg') + UHSASdNdlogd_bin11: double + @ts(treatment='avg') + UHSASdNdlogd_bin12: double + @ts(treatment='avg') + UHSASdNdlogd_bin13: double + @ts(treatment='avg') + UHSASdNdlogd_bin14: double + @ts(treatment='avg') + UHSASdNdlogd_bin15: double + @ts(treatment='avg') + UHSASdNdlogd_bin16: double + @ts(treatment='avg') + UHSASdNdlogd_bin17: double + @ts(treatment='avg') + UHSASdNdlogd_bin18: double + @ts(treatment='avg') + UHSASdNdlogd_bin19: double + @ts(treatment='avg') + UHSASdNdlogd_bin20: double + @ts(treatment='avg') + UHSASdNdlogd_bin21: double + @ts(treatment='avg') + UHSASdNdlogd_bin22: double + @ts(treatment='avg') + UHSASdNdlogd_bin23: double + @ts(treatment='avg') + UHSASdNdlogd_bin24: double + @ts(treatment='avg') + UHSASdNdlogd_bin25: double + @ts(treatment='avg') + UHSASdNdlogd_bin26: double + @ts(treatment='avg') + UHSASdNdlogd_bin27: double + @ts(treatment='avg') + UHSASdNdlogd_bin28: double + @ts(treatment='avg') + UHSASdNdlogd_bin29: double + @ts(treatment='avg') + UHSASdNdlogd_bin30: double + @ts(treatment='avg') + UHSASdNdlogd_bin31: double + @ts(treatment='avg') + UHSASdNdlogd_bin32: double + @ts(treatment='avg') + UHSASdNdlogd_bin33: double + @ts(treatment='avg') + UHSASdNdlogd_bin34: double + @ts(treatment='avg') + UHSASdNdlogd_bin35: double + @ts(treatment='avg') + UHSASdNdlogd_bin36: double + @ts(treatment='avg') + UHSASdNdlogd_bin37: double + @ts(treatment='avg') + UHSASdNdlogd_bin38: double + @ts(treatment='avg') + UHSASdNdlogd_bin39: double + @ts(treatment='avg') + UHSASdNdlogd_bin40: double + @ts(treatment='avg') + UHSASdNdlogd_bin41: double + @ts(treatment='avg') + UHSASdNdlogd_bin42: double + @ts(treatment='avg') + UHSASdNdlogd_bin43: double + @ts(treatment='avg') + UHSASdNdlogd_bin44: double + @ts(treatment='avg') + UHSASdNdlogd_bin45: double + @ts(treatment='avg') + UHSASdNdlogd_bin46: double + @ts(treatment='avg') + UHSASdNdlogd_bin47: double + @ts(treatment='avg') + UHSASdNdlogd_bin48: double + @ts(treatment='avg') + UHSASdNdlogd_bin49: double + @ts(treatment='avg') + UHSASdNdlogd_bin50: double + @ts(treatment='avg') + UHSASdNdlogd_bin51: double + @ts(treatment='avg') + UHSASdNdlogd_bin52: double + @ts(treatment='avg') + UHSASdNdlogd_bin53: double + @ts(treatment='avg') + UHSASdNdlogd_bin54: double + @ts(treatment='avg') + UHSASdNdlogd_bin55: double + @ts(treatment='avg') + UHSASdNdlogd_bin56: double + @ts(treatment='avg') + UHSASdNdlogd_bin57: double + @ts(treatment='avg') + UHSASdNdlogd_bin58: double + @ts(treatment='avg') + UHSASdNdlogd_bin59: double + @ts(treatment='avg') + UHSASdNdlogd_bin60: double + @ts(treatment='avg') + UHSASdNdlogd_bin61: double + @ts(treatment='avg') + UHSASdNdlogd_bin62: double + @ts(treatment='avg') + UHSASdNdlogd_bin63: double + @ts(treatment='avg') + UHSASdNdlogd_bin64: double + @ts(treatment='avg') + UHSASdNdlogd_bin65: double + @ts(treatment='avg') + UHSASdNdlogd_bin66: double + @ts(treatment='avg') + UHSASdNdlogd_bin67: double + @ts(treatment='avg') + UHSASdNdlogd_bin68: double + @ts(treatment='avg') + UHSASdNdlogd_bin69: double + @ts(treatment='avg') + UHSASdNdlogd_bin70: double + @ts(treatment='avg') + UHSASdNdlogd_bin71: double + @ts(treatment='avg') + UHSASdNdlogd_bin72: double + @ts(treatment='avg') + UHSASdNdlogd_bin73: double + @ts(treatment='avg') + UHSASdNdlogd_bin74: double + @ts(treatment='avg') + UHSASdNdlogd_bin75: double + @ts(treatment='avg') + UHSASdNdlogd_bin76: double + @ts(treatment='avg') + UHSASdNdlogd_bin77: double + @ts(treatment='avg') + UHSASdNdlogd_bin78: double + @ts(treatment='avg') + UHSASdNdlogd_bin79: double + @ts(treatment='avg') + UHSASdNdlogd_bin80: double + @ts(treatment='avg') + UHSASdNdlogd_bin81: double + @ts(treatment='avg') + UHSASdNdlogd_bin82: double + @ts(treatment='avg') + UHSASdNdlogd_bin83: double + @ts(treatment='avg') + UHSASdNdlogd_bin84: double + @ts(treatment='avg') + UHSASdNdlogd_bin85: double + @ts(treatment='avg') + UHSASdNdlogd_bin86: double + @ts(treatment='avg') + UHSASdNdlogd_bin87: double + @ts(treatment='avg') + UHSASdNdlogd_bin88: double + @ts(treatment='avg') + UHSASdNdlogd_bin89: double + @ts(treatment='avg') + UHSASdNdlogd_bin90: double + @ts(treatment='avg') + UHSASdNdlogd_bin91: double + @ts(treatment='avg') + UHSASdNdlogd_bin92: double + @ts(treatment='avg') + UHSASdNdlogd_bin93: double + @ts(treatment='avg') + UHSASdNdlogd_bin94: double + @ts(treatment='avg') + UHSASdNdlogd_bin95: double + @ts(treatment='avg') + UHSASdNdlogd_bin96: double + @ts(treatment='avg') + UHSASdNdlogd_bin97: double + @ts(treatment='avg') + UHSASdNdlogd_bin98: double + +} \ No newline at end of file diff --git a/training/gordon-group/src/entity/ObservationOutputFile.c3typ b/training/gordon-group/src/entity/Observations/ObservationOutputFile.c3typ similarity index 100% rename from training/gordon-group/src/entity/ObservationOutputFile.c3typ rename to training/gordon-group/src/entity/Observations/ObservationOutputFile.c3typ diff --git a/training/gordon-group/src/entity/ObservationOutputFile.py b/training/gordon-group/src/entity/Observations/ObservationOutputFile.py similarity index 98% rename from training/gordon-group/src/entity/ObservationOutputFile.py rename to training/gordon-group/src/entity/Observations/ObservationOutputFile.py index 54ba0053..1310a083 100644 --- a/training/gordon-group/src/entity/ObservationOutputFile.py +++ b/training/gordon-group/src/entity/Observations/ObservationOutputFile.py @@ -48,7 +48,7 @@ def get_df_from_c3_file(c3file): df[c3_var] = source.variables[nc_var][:] df[c3_var] = pd.to_datetime(df[c3_var],unit='s') elif nc_var == 'UHSASdNdlogd': - for i in range(0,1): + for i in range(0,99): name = c3_var + "_bin" + str(i) try: df[name] = source.variables[nc_var][:,i] diff --git a/training/gordon-group/src/entity/ObservationOutputSeries.c3typ b/training/gordon-group/src/entity/Observations/ObservationOutputSeries.c3typ similarity index 100% rename from training/gordon-group/src/entity/ObservationOutputSeries.c3typ rename to training/gordon-group/src/entity/Observations/ObservationOutputSeries.c3typ diff --git a/training/gordon-group/src/entity/ObservationSet.c3typ b/training/gordon-group/src/entity/Observations/ObservationSet.c3typ similarity index 93% rename from training/gordon-group/src/entity/ObservationSet.c3typ rename to training/gordon-group/src/entity/Observations/ObservationSet.c3typ index 3be791ed..3183f979 100644 --- a/training/gordon-group/src/entity/ObservationSet.c3typ +++ b/training/gordon-group/src/entity/Observations/ObservationSet.c3typ @@ -21,4 +21,6 @@ entity type ObservationSet mixes MetricEvaluatable schema name "OBS_ST" { // Upsert data into {@link ObservationOutput} for all {@link ObservationOutputFile}s corresponding to this {@link ObservationSet} @dependency(include = "this, outputFiles") upsertObservationData: member function(): int js server + // Cleans all seeded data + removeAllSeededData: function() : int js server } \ No newline at end of file diff --git a/training/gordon-group/src/entity/ObservationSet.js b/training/gordon-group/src/entity/Observations/ObservationSet.js similarity index 90% rename from training/gordon-group/src/entity/ObservationSet.js rename to training/gordon-group/src/entity/Observations/ObservationSet.js index 2766642d..289903ea 100644 --- a/training/gordon-group/src/entity/ObservationSet.js +++ b/training/gordon-group/src/entity/Observations/ObservationSet.js @@ -82,4 +82,18 @@ function upsertObservationData() { return total; }; - \ No newline at end of file + + + +/** + * removeAllSeededData() + * Removes all seeded data for this project. + */ + function removeAllSeededData() { + ObservationSet.removeAll(); + ObservationOutput.removeAll(); + ObservationOutputFile.removeAll(); + ObservationOutputSeries.removeAll(); + + return 0; +} \ No newline at end of file diff --git a/training/gordon-group/src/entity/SimulationOutputFile.c3typ b/training/gordon-group/src/entity/SimulationOutputFile.c3typ deleted file mode 100644 index 62b437b4..00000000 --- a/training/gordon-group/src/entity/SimulationOutputFile.c3typ +++ /dev/null @@ -1,18 +0,0 @@ -/** -SimulationOutputFile.c3typ -*/ -entity type SimulationOutputFile schema name 'SMLTN_OTPT_FL' { - // The {@link SimulationSample} that theses files belong to. - simulationSample: !SimulationSample - // The date for this file (comes with the file name) - dateTag: datetime - // The simulation output file - file: !File - // processed - processed: boolean post default "false" - - // METHODS - // Load data from this file into {@link SimulationModelOutput} - @py(env='gordon_1_0_0') - upsertData: member function(): boolean -} diff --git a/training/gordon-group/src/entity/SimulationOutputFile.py b/training/gordon-group/src/entity/SimulationOutputFile.py deleted file mode 100644 index 412cede4..00000000 --- a/training/gordon-group/src/entity/SimulationOutputFile.py +++ /dev/null @@ -1,73 +0,0 @@ -def upsertData(this): - """ - Function to Open files in the SimulationOutputFile table and then populate SimulationModelOutput and SimulationModelOutputSeries data. - - - Arguments: - -this: an instance of SimulationOutputFile - - - Returns: - -bool: True if file was processed, false if file has already been processed - """ - from datetime import datetime, timedelta - import pandas as pd - - # open file - sample = c3.NetCDFUtil.openFile(this.file.url) - - # cast it to dataframe - df = pd.DataFrame() - df['time'] = sample.variables['time'][:] - df['longitude'] = sample.variables['longitude'][:] - df['latitude'] = sample.variables['latitude'][:] - df['altitude'] = sample.variables['altitude'][:] - df['model_level_number'] = sample.variables['model_level_number'][:] - df['air_potential_temperature']= sample.variables['air_potential_temperature'][:] - df['air_pressure'] = sample.variables['air_pressure'][:] - df['cloud_flag'] = sample.variables['m01s38i478'][:] - df['cdnc_x_cloud_flag'] = sample.variables['m01s38i479'][:] - df['ambient_extinction_550'] = sample.variables['m01s02i530_550nm'][:] - df['ambient_scattering_550'] = sample.variables['m01s02i532_550nm'][:] - df['num_nuc'] = sample.variables['number_of_particles_per_air_molecule_of_soluble_nucleation_mode_aerosol_in_air'][:] - df['num_Ait'] = sample.variables['number_of_particles_per_air_molecule_of_soluble_aitken_mode_aerosol_in_air'][:] - df['num_acc'] = sample.variables['number_of_particles_per_air_molecule_of_soluble_accumulation_mode_aerosol_in_air'][:] - df['num_cor'] = sample.variables['number_of_particles_per_air_molecule_of_soluble_coarse_mode_aerosol_in_air'][:] - df['num_Aitins'] = sample.variables['number_of_particles_per_air_molecule_of_insoluble_aitken_mode_aerosol_in_air'][:] - df['mass_SU_Ait'] = sample.variables['mass_fraction_of_sulfuric_acid_in_soluble_aitken_mode_dry_aerosol_in_air'][:] - df['mass_SU_acc'] = sample.variables['mass_fraction_of_sulfuric_acid_in_soluble_accumulation_mode_dry_aerosol_in_air'][:] - df['mass_SU_cor'] = sample.variables['mass_fraction_of_sulfuric_acid_in_soluble_coarse_mode_dry_aerosol_in_air'][:] - df['mass_BC_Ait'] = sample.variables['mass_fraction_of_black_carbon_in_soluble_aitken_mode_dry_aerosol_in_air'][:] - df['mass_BC_acc'] = sample.variables['mass_fraction_of_black_carbon_in_soluble_accumulation_mode_dry_aerosol_in_air'][:] - df['mass_BC_cor'] = sample.variables['mass_fraction_of_black_carbon_in_soluble_coarse_mode_dry_aerosol_in_air'][:] - df['mass_BC_Aitins'] = sample.variables['mass_fraction_of_black_carbon_in_insoluble_aitken_mode_dry_aerosol_in_air'][:] - df['mass_OC_Ait'] = sample.variables['mass_fraction_of_particulate_organic_matter_in_soluble_aitken_mode_dry_aerosol_in_air'][:] - df['mass_OC_acc'] = sample.variables['mass_fraction_of_particulate_organic_matter_in_soluble_accumulation_mode_dry_aerosol_in_air'][:] - df['mass_OC_cor'] = sample.variables['mass_fraction_of_particulate_organic_matter_in_soluble_coarse_mode_dry_aerosol_in_air'][:] - df['mass_OC_Aitins'] = sample.variables['mass_fraction_of_particulate_organic_matter_in_insoluble_aitken_mode_dry_aerosol_in_air'][:] - df['mass_SS_acc'] = sample.variables['mass_fraction_of_seasalt_in_soluble_accumulation_mode_dry_aerosol_in_air'][:] - df['mass_SS_cor'] = sample.variables['mass_fraction_of_seasalt_in_soluble_coarse_mode_dry_aerosol_in_air'][:] - # a little gymnastic to get Datetime objs - zero_time = datetime(1970,1,1,0,0) - transformed_times = [] - for time in df['time']: - target_time = zero_time + timedelta(hours=time) - transformed_times.append(target_time) - df['start'] = transformed_times - df.drop(columns=['time'], inplace=True) - - parent_id = "SMOS_" + this.simulationSample.id - df['parent'] = parent_id - - now_time = datetime.now() - diff_time = (now_time - zero_time) - versionTag= -1 * diff_time.total_seconds() - df['dataVersion'] = versionTag - - output_records = df.to_dict(orient="records") - - # upsert this batch - c3.SimulationModelOutput.upsertBatch(objs=output_records) - - this.processed = True - c3.SimulationOutputFile.merge(this) - return True - \ No newline at end of file diff --git a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ new file mode 100644 index 00000000..f06c9521 --- /dev/null +++ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutput.c3typ @@ -0,0 +1,25 @@ +/** +* Simulation3HourlyAODOutput.c3typ +* A single monthly-mean output taken from a single [SimulationSample](type:SimulationSample) +*/ + +entity type Simulation3HourlyAODOutput schema name 'SMLTN_3HAOD_TPT' { + // The {@link SimulationSample} this output belongs to + simulationSample: !SimulationSample + // dust + dust: !float + // soluble aitken mode + solubleAitkenMode: !float + // soluble accumulation mode + solubleAccumulationMode: !float + // soluble coarse mode + solubleCoarseMode: !float + // insoluble aitken mode + insolubleAitkenMode: !float + // latitude + latitude: !Latitude + // longitude + longitude: !float + // time stamp + time: !datetime +} \ No newline at end of file diff --git a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ new file mode 100644 index 00000000..227d6f0e --- /dev/null +++ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputAllRef.c3typ @@ -0,0 +1,25 @@ +/** +* Simulation3HourlyAODOutputAllRef.c3typ +* A single monthly-mean output taken from a single [SimulationSample](type:SimulationSample) with references to {@link GeoSurfaceTime} +*/ +@db(datastore='cassandra', + partitionKeyField='geoSurfaceTimePoint', + persistenceOrder='simulationSample', + persistDuplicates=false, + compactType=true) +entity type Simulation3HourlyAODOutputAllRef schema name 'SM_3HAOD_TPT_LLRF' { + // The {@link SimulationSample} this output belongs to + simulationSample: !SimulationSample + // dust + dust: !float + // soluble aitken mode + solubleAitkenMode: !float + // soluble accumulation mode + solubleAccumulationMode: !float + // soluble coarse mode + solubleCoarseMode: !float + // insoluble aitken mode + insolubleAitkenMode: !float + // the space-time point for this measurement + geoSurfaceTimePoint: !GeoSurfaceTimePoint +} \ No newline at end of file diff --git a/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputPlain.c3typ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputPlain.c3typ new file mode 100644 index 00000000..c0e1655f --- /dev/null +++ b/training/gordon-group/src/entity/Simulations/Simulation3HourlyAODOutputPlain.c3typ @@ -0,0 +1,25 @@ +/** +* Simulation3HourlyAODOutputPlain.c3typ +* A single monthly-mean output taken from a single [SimulationSample](type:SimulationSample) without references +*/ + +entity type Simulation3HourlyAODOutputPlain schema name 'SMLTN_3HAOD_TPT_PLN' { + // The {@link SimulationSample} this output belongs to + simulationSample: !SimulationSample + // dust + dust: !float + // soluble aitken mode + solubleAitkenMode: !float + // soluble accumulation mode + solubleAccumulationMode: !float + // soluble coarse mode + solubleCoarseMode: !float + // insoluble aitken mode + insolubleAitkenMode: !float + // latitude + latitude: !float + // longitude + longitude: !float + // time stamp + time: !datetime +} \ No newline at end of file diff --git a/training/gordon-group/src/entity/SimulationEnsemble.c3typ b/training/gordon-group/src/entity/Simulations/SimulationEnsemble.c3typ similarity index 100% rename from training/gordon-group/src/entity/SimulationEnsemble.c3typ rename to training/gordon-group/src/entity/Simulations/SimulationEnsemble.c3typ diff --git a/training/gordon-group/src/entity/SimulationEnsemble.js b/training/gordon-group/src/entity/Simulations/SimulationEnsemble.js similarity index 100% rename from training/gordon-group/src/entity/SimulationEnsemble.js rename to training/gordon-group/src/entity/Simulations/SimulationEnsemble.js diff --git a/training/gordon-group/src/entity/SimulationModelOutput.c3typ b/training/gordon-group/src/entity/Simulations/SimulationModelOutput.c3typ similarity index 93% rename from training/gordon-group/src/entity/SimulationModelOutput.c3typ rename to training/gordon-group/src/entity/Simulations/SimulationModelOutput.c3typ index 051814f4..b70db2c0 100644 --- a/training/gordon-group/src/entity/SimulationModelOutput.c3typ +++ b/training/gordon-group/src/entity/Simulations/SimulationModelOutput.c3typ @@ -20,8 +20,8 @@ entity type SimulationModelOutput mixes TimedDataPoint= 180) for x in sample["longitude"][:]] + # this file times + ts = this.dateTag + # take a look at thsis: is 24 = 0? + times = [ts.replace(hour=3), ts.replace(hour=6), ts.replace(hour=9), + ts.replace(hour=12), ts.replace(hour=15), ts.replace(hour=18), + ts.replace(hour=21), ts.replace(hour=0)] + df["time"] = [t for t in times for n in range(0, len(lat)*len(lon))] + df["latitude"] = [l for l in lats for n in range(0, len(lon))]*len(times) + df["longitude"] = [l for l in lon]*len(times)*len(lat) + + + + + # now the SimulationSample field + df["simulationSample"] = this.simulationSample + + # cast everything into dict and upsert + output_records = df.to_dict(orient="records") + c3.Simulation3HourlyAODOutput.upsertBatch(objs=output_records) + + this.processed = True + c3.SimulationOutputFile.merge(this) + + return True + + else: + return False + + +def upsert3HourlyAODPlainData(this): + """ + Function to Open files in the SimulationOutputFile table with monthly-mean container and then populate Simulation3HourlyAODOutput data. + + - Arguments: + -this: an instance of SimulationOutputFile + + - Returns: + -bool: True if file was processed, false if file has already been processed or if container type does not match. + """ + import pandas as pd + import numpy as np + + # verify file container + if(this.container == 'monthly-mean'): + variable_names = { + "dust" : "atmosphere_optical_thickness_due_to_dust_ambient_aerosol", + "solubleAitkenMode" : "atmosphere_optical_thickness_due_to_soluble_aitken_mode_ambient_aerosol", + "solubleAccumulationMode" : "atmosphere_optical_thickness_due_to_soluble_accumulation_mode_ambient_aerosol", + "solubleCoarseMode" : "atmosphere_optical_thickness_due_to_soluble_coarse_mode_ambient_aerosol", + "insolubleAitkenMode" : "atmosphere_optical_thickness_due_to_insoluble_aitken_mode_ambient_aerosol" + } + #open file + sample = c3.NetCDFUtil.openFile(this.file.url) + df = pd.DataFrame() + + # this is to take care of variables that need to be flattened + for var in variable_names.items(): + tensor = sample[var[1]][:][2,:,:,:] + tensor = np.array(tensor).flatten() + df[var[0]] = tensor + + # now latitude, longitude and time + lat = sample["latitude"][:] + #lats = [] + #for l in lat: + # obj = c3.Latitude.fetch({'filter': c3.Filter().eq("value", float#(l))}).objs[0] + # lats.append(obj) + lon = [x*(x < 180) + (x - 360)*(x >= 180) for x in sample["longitude"][:]] + # this file times + ts = this.dateTag + # take a look at thsis: is 24 = 0? + times = [ts.replace(hour=3), ts.replace(hour=6), ts.replace(hour=9), + ts.replace(hour=12), ts.replace(hour=15), ts.replace(hour=18), + ts.replace(hour=21), ts.replace(hour=0)] + df["time"] = [t for t in times for n in range(0, len(lat)*len(lon))] + df["latitude"] = [l for l in lat for n in range(0, len(lon))]*len(times) + df["longitude"] = [l for l in lon]*len(times)*len(lat) + + + + + # now the SimulationSample field + df["simulationSample"] = this.simulationSample + + # cast everything into dict and upsert + output_records = df.to_dict(orient="records") + c3.Simulation3HourlyAODOutputPlain.upsertBatch(objs=output_records) + + this.processed = True + c3.SimulationOutputFile.merge(this) + + return True + + else: + return False + +def upsert3HourlyAODAllRefData(this): + """ + Function to Open files in the SimulationOutputFile table with monthly-mean container and then populate Simulation3HourlyAODOutput data. + + - Arguments: + -this: an instance of SimulationOutputFile + + - Returns: + -bool: True if file was processed, false if file has already been processed or if container type does not match. + """ + import pandas as pd + import numpy as np + from datetime import datetime as dt + + # verify file container + if(this.container == 'monthly-mean'): + variable_names = { + "dust" : "atmosphere_optical_thickness_due_to_dust_ambient_aerosol", + "solubleAitkenMode" : "atmosphere_optical_thickness_due_to_soluble_aitken_mode_ambient_aerosol", + "solubleAccumulationMode" : "atmosphere_optical_thickness_due_to_soluble_accumulation_mode_ambient_aerosol", + "solubleCoarseMode" : "atmosphere_optical_thickness_due_to_soluble_coarse_mode_ambient_aerosol", + "insolubleAitkenMode" : "atmosphere_optical_thickness_due_to_insoluble_aitken_mode_ambient_aerosol" + } + #open file + sample = c3.NetCDFUtil.openFile(this.file.url) + df_var = pd.DataFrame() + + # this is to take care of variables that need to be flattened + for var in variable_names.items(): + tensor = sample[var[1]][:][2,:,:,:] + tensor = np.array(tensor).flatten() + df_var[var[0]] = tensor + + # include simulation sample + df_var["simulationSample"] = this.simulationSample + + # now do spacetime coordinates + df_st = pd.DataFrame() + + lat = sample["latitude"][:] + lon = [x*(x < 180) + (x - 360)*(x >= 180) for x in sample["longitude"][:]] + ts = this.dateTag + times = [ts.replace(hour=3), ts.replace(hour=6), ts.replace(hour=9), + ts.replace(hour=12), ts.replace(hour=15), ts.replace(hour=18), + ts.replace(hour=21), ts.replace(hour=0)] + + df_st["time"] = [t for t in times for n in range(0, len(lat)*len(lon))] + df_st["latitude"] = [l for l in lat for n in range(0, len(lon))]*len(times) + df_st["longitude"] = [l for l in lon]*len(times)*len(lat) + + df_st["id"] = df_st["latitude"].astype(str) + "_" + df_st["longitude"].astype(str) + "_" + df_st["time"].astype(str).apply(lambda x: x.replace(" ", 'T')) + + # now upsert this + output_records = df_st.to_dict(orient="records") + try: + gst = c3.GeoSurfaceTimePoint.upsertBatch(objs=output_records) + except: + meta = c3.MetaFileProcessing(lastProcessAttempt=dt.now(), + lastAttemptFailed=True) + c3.SimulationOutputFile(id=this.id, processMeta=meta).merge() + return False + + df_batch = pd.DataFrame(df_var) + df_batch["geoSurfaceTimePoint"] = gst.objs + output_records = df_batch.to_dict(orient="records") + try: + c3.Simulation3HourlyAODOutputAllRef.createBatch(objs=output_records) + except: + meta = c3.MetaFileProcessing(lastProcessAttempt=dt.now(), + lastAttemptFailed=True) + c3.SimulationOutputFile(id=this.id, processMeta=meta).merge() + return False + + # if we get here, it worked + meta = c3.MetaFileProcessing(lastProcessAttempt=dt.now(), + lastAttemptFailed=False) + c3.SimulationOutputFile(id=this.id, processed=True, processMeta=meta).merge() + + return True + + else: + return False + + +def upsertData(this): + """ + Function to Open files in the SimulationOutputFile table then populates Simulation***Output data. + + - Arguments: + -this: an instance of SimulationOutputFile + + - Returns: + -bool: True if file was processed, false if file has already been processed + """ + if(this.container == 'monthly-mean'): + return this.upsert3HourlyAODData() + elif(this.container == 'acure-aircraft'): + return this.upsertAcureAircraftData() + else: + return False \ No newline at end of file diff --git a/training/gordon-group/src/entity/SimulationSample.c3typ b/training/gordon-group/src/entity/Simulations/SimulationSample.c3typ similarity index 91% rename from training/gordon-group/src/entity/SimulationSample.c3typ rename to training/gordon-group/src/entity/Simulations/SimulationSample.c3typ index 68567ce9..7d0769db 100644 --- a/training/gordon-group/src/entity/SimulationSample.c3typ +++ b/training/gordon-group/src/entity/Simulations/SimulationSample.c3typ @@ -3,6 +3,7 @@ * One single simulation taken from the entire [SimulationEnsemble](type:SimulationEnsemble). */ entity type SimulationSample mixes MetricEvaluatable schema name "SMLTN_SMPL" { + // ATTRIBUTES // The number of this simulation within its {@link SimulationEnsemble} simulationNumber: !int // The {@link SimulationEnsemble} that this sample belongs to @@ -14,6 +15,11 @@ entity type SimulationSample mixes MetricEvaluatable schema name "SMLTN_SMPL" { // the collection of {@link SimulationOutputFile}s for this simulation sample outputFiles: [SimulationOutputFile](simulationSample, id) + // Temperature unit of measure + temperatureUOM: Unit post default "{id: 'degrees_celsius'}" + + + // METHODS // Callback to create the {@link SimulationOutputFile}s whenever a {@link SimulationSample} is upserted. @dependency(include = "this, ensemble.name") diff --git a/training/gordon-group/src/entity/SimulationSample.js b/training/gordon-group/src/entity/Simulations/SimulationSample.js similarity index 51% rename from training/gordon-group/src/entity/SimulationSample.js rename to training/gordon-group/src/entity/Simulations/SimulationSample.js index bee941ed..7b5ce5a1 100644 --- a/training/gordon-group/src/entity/SimulationSample.js +++ b/training/gordon-group/src/entity/Simulations/SimulationSample.js @@ -25,19 +25,46 @@ function afterCreate(objs) { var ensemble = SimulationEnsemble.fetch({ filter: Filter.eq("id",obj.ensemble.id) }).objs[0] - // AZURE DIRECTORY PATH HERE: change 'gordon-group; to whatever you need + + // ACURE-AIRCRAFT CONTAINER var ensemblePath = FileSystem.inst().rootUrl() + 'gordon-group/' + ensemble.name + '/'; var prePathToAllFiles = ensemblePath + ensemble.prePathToFiles; var pathToSample = prePathToAllFiles + padStart(String(obj.simulationNumber),3,'0'); + var allAAFiles = FileSystem.inst().listFiles(pathToSample).files; + var sampleFiles = new Array(); - var sampleFiles = FileSystem.inst().listFiles(pathToSample).files; // Remove non-NetCDF files from list - for (var i = 0; i < sampleFiles.length; i++) { - var sf = sampleFiles[i]; - if (sf.url.slice(-3) !== ".nc") { - sampleFiles.splice(i,1); - } - } + for (var i = 0; i < allAAFiles.length; i++) { + var sf = allAAFiles[i]; + if (sf.url.slice(-3) === ".nc") { + sampleFiles.push(sf); + }; + }; + + // MONTHLY-MEAN CONTAINER... + var simString = padStart(String(obj.simulationNumber), 3, '0'); + var sampleFiles2 = new Array(); + + var months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']; + var containerRoot = "azure://monthly-mean-simulations/"; + + for (var i = 0; i < months.length; i++) { + var month = months[i]; + var pathToFiles = containerRoot + month + "/"; + var fileStream = FileSystem.inst().listFilesStream(pathToFiles); + while (fileStream.hasNext()) { + var file = fileStream.next(); + if (file.url.slice(-6,-3) === simString && file.url.slice(-3) === ".nc" && file.url.slice(37,42) !== 'ACURE') { + sampleFiles2.push(file); + }; + }; + }; + + + + // put two containers together + sampleFiles = sampleFiles.concat(sampleFiles2); + return sampleFiles.map(createSimOutFiles); function padStart(text, length, pad) { @@ -45,19 +72,40 @@ function afterCreate(objs) { } function createSimOutFiles(file) { - var year = file.url.slice(-11,-7); - var month = file.url.slice(-7,-5); - var day = file.url.slice(-5,-3); - var date_str = year + "-" + month + "-" + day; - return SimulationOutputFile.make({ + if (file.url.slice(0,32) === "azure://monthly-mean-simulations") { + var year = file.url.slice(-18,-14); + var month = file.url.slice(-14,-12); + var day = file.url.slice(-12,-10); + var date_str = year + "-" + month + "-" + day; + var container = "monthly-mean"; + return SimulationOutputFile.make({ + "simulationSample": obj, + "file": File.make({ + "url": file.url + }), + "dateTag": DateTime.make({ + "value": date_str + }), + "container": container + }); + } + else { + var year = file.url.slice(-11,-7); + var month = file.url.slice(-7,-5); + var day = file.url.slice(-5,-3); + var date_str = year + "-" + month + "-" + day; + var container = "acure-aircraft"; + return SimulationOutputFile.make({ "simulationSample": obj, "file": File.make({ "url": file.url }), "dateTag": DateTime.make({ "value": date_str - }) - }); + }), + "container": container + }); + } } } };