Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
b53f675
Merge pull request #20 from c3aidti/smoke-dev
babreu-ncsa Nov 29, 2022
4eca59a
Merge pull request #21 from c3aidti/smoke-dev
babreu-ncsa Jan 23, 2023
fb71c22
Merge pull request #22 from c3aidti/smoke-dev
babreu-ncsa Feb 7, 2023
8d65c62
Merge pull request #23 from c3aidti/smoke-dev
babreu-ncsa Mar 22, 2023
63fb807
more modifications to smokePPEGaussianML folder
vasanchez16 Apr 10, 2023
642a4ec
capitals
babreu-ncsa Apr 11, 2023
c55be2e
Rename smokePPEGaussianMLTrainingJob.c3typ to SmokePPEGaussianMLTrain…
babreu-ncsa Apr 11, 2023
601a8d5
Rename smokePPEGaussianMLTrainingJob.js to SmokePPEGaussianMLTraining…
babreu-ncsa Apr 11, 2023
c647d4d
Rename smokePPEGaussianMLTrainingJobBatch.c3typ to SmokePPEGaussianML…
babreu-ncsa Apr 11, 2023
cf7da7d
Rename smokePPEGaussianMLTrainingJobOptions.c3typ to SmokePPEGaussian…
babreu-ncsa Apr 11, 2023
031718c
update smoke ppe parameters
babreu-ncsa Apr 12, 2023
38f7078
rm csv from git repo
babreu-ncsa Apr 17, 2023
f12f9a2
add csvs outside app package
babreu-ncsa Apr 17, 2023
b47771d
workaround for nones
babreu-ncsa Apr 17, 2023
8b088b8
train only if sizes are > 0
babreu-ncsa Apr 17, 2023
59e3793
Merge pull request #29 from c3aidti/smoke-dev
babreu-ncsa Apr 17, 2023
6822c77
drop integer targets
babreu-ncsa Apr 19, 2023
402200c
grab entire technique
babreu-ncsa May 2, 2023
d73756f
add full kernel
babreu-ncsa May 2, 2023
bf19532
staging type
babreu-ncsa May 17, 2023
a4a9b57
staging methods
babreu-ncsa May 17, 2023
4033693
send objs
babreu-ncsa May 17, 2023
0239314
pushing PPE predictor
babreu-ncsa May 24, 2023
e283984
fix to get full kernel
babreu-ncsa May 24, 2023
5e0d0f3
desperate attempt
babreu-ncsa May 26, 2023
8034687
extractLearnedParametersJob additions for smokeppe
vasanchez16 Jun 6, 2023
bc38a10
fix to first approach for extract parameters
vasanchez16 Jun 8, 2023
338fe59
second approach to extract learned params job
vasanchez16 Jun 8, 2023
90c5d01
including staged training
babreu-ncsa Jun 14, 2023
e759104
fixed dostart
babreu-ncsa Jun 14, 2023
48fda78
undo first approach to extractParamsJob
vasanchez16 Jun 15, 2023
6519c4c
limit argument test
vasanchez16 Jun 28, 2023
8eabaee
undo limit arg, made no difference
vasanchez16 Jun 29, 2023
a979c23
small fix
babreu-ncsa Jun 29, 2023
ddb681c
Merge branch 'smoke-dev' of https://github.com/c3aidti/smoke into smo…
babreu-ncsa Jun 29, 2023
3a9bfc6
add method to count pipes
babreu-ncsa Jul 12, 2023
180fb09
30min to test ONE line
babreu-ncsa Jul 12, 2023
d3967ea
another line
babreu-ncsa Jul 12, 2023
0bd3787
filter simulation hours of interest
vasanchez16 Jul 15, 2023
a3ae280
Merge branch 'smoke-dev' of https://github.com/c3aidti/smoke into smo…
vasanchez16 Jul 15, 2023
8eb4d26
limit arg test
vasanchez16 Jul 15, 2023
8810dbb
undo sim hours filter, change made outside github
vasanchez16 Jul 15, 2023
021796c
undo second limit arg try
vasanchez16 Jul 15, 2023
5855dc7
include model in predjob output
vasanchez16 Jul 17, 2023
74da7ea
try include gstpId in pred output
vasanchez16 Jul 17, 2023
b9af8e1
add sklearn version
babreu-ncsa Jul 17, 2023
575752d
get all results
babreu-ncsa Sep 7, 2023
de738af
get all results, for old ppe func
vasanchez16 Sep 18, 2023
bc5d2ec
new type with coarse graining
babreu-ncsa Oct 17, 2023
96d418d
Merge pull request #31 from c3aidti/smoke-dev
dadamsncsa Nov 7, 2023
802bb9e
Merge pull request #33 from c3aidti/dev/smokeApp/rc1
dadamsncsa Nov 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 122 additions & 0 deletions nonAppFiles/smokePPE/smoke_PPE_Design_original_scale_w_bc_ri_index.csv

Large diffs are not rendered by default.

122 changes: 122 additions & 0 deletions nonAppFiles/smokePPE/smoke_PPE_Unit_Design.csv

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion smoke/smokeApp/seed/ActionRuntime/py-gordon_1_0_0.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
"conda.basemap":"=1.2.2",
"conda.nbformat":"=5.1.3",
"conda.iris":"=3.1.0",
"conda.fsspec":""
"conda.fsspec":"",
"conda.scikit-learn":"=0.23.1"
},
"repositories": [
"https://repo.continuum.io/pkgs/main",
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/**
* Copyright (c) 2022, C3 AI DTI, Development Operations Team
* All rights reserved. License: https://github.com/c3aidti/.github
**/
/*
* Training job for AOD data {@link Simulation3HourlyAODOutput}
* and {@link GaussianProcessRegressionPipe}s with coarse graining
*/
type SmokePPECoarseGrainedGaussianMLTrainingJob extends BatchJob<SmokePPECoarseGrainedGaussianMLTrainingJob, SmokePPECoarseGrainedGaussianMLTrainingJobOptions, SmokePPECoarseGrainedGaussianMLTrainingJobBatch> type key 'SMKPPECRSMLJB' {
doStart: ~ js server
processBatch: ~ js server
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,27 @@
* All rights reserved. License: https://github.com/c3aidti/.github
**/
/**
* Implementation of AODGaussianMLTrainingJob
* @param {AODGaussianMLTrainingJob} job
* @param {AODGaussianMLTrainingJobOptions} options
* Implementation of SmokePPECoarseGrainedGaussianMLTrainingJob
* @param {SmokePPECoarseGrainedGaussianMLTrainingJob} job
* @param {SmokePPECoarseGrainedGaussianMLTrainingJobOptions} options
*/
function doStart(job, options) {
job.setHardwareProfile(options.hardwareProfileId);
var batch = [];


var gstpFilter = Filter.ge("latitude", options.minLat).and().lt("latitude", options.maxLat).and().ge("longitude", options.minLon).and().lt("longitude", options.maxLon).and().ge("time", options.minTime).and().lt("time", options.maxTime);

var gstps = GeoSurfaceTimePoint.fetchObjStream({
filter: options.gstpFilter,
filter: gstpFilter,
limit: -1
});

while(gstps.hasNext()) {
batch.push(gstps.next());

if (batch.length >= options.batchSize || !gstps.hasNext()) {
var batchSpec = AODGaussianMLTrainingJobBatch.make({values: batch});
var batchSpec = SmokePPEGaussianMLTrainingJobBatch.make({values: batch});
job.scheduleBatch(batchSpec);

batch = [];
Expand All @@ -31,9 +34,9 @@ function doStart(job, options) {

/**
* Implementation of what to do in each batch
* @param {AODGaussianMLTrainingJobBatch} batch
* @param {AODGaussianMLTrainingJob} job
* @param {AODGaussianMLTrainingJobOptions} options
* @param {SmokePPEGaussianMLTrainingJobBatch} batch
* @param {SmokePPEGaussianMLTrainingJob} job
* @param {SmokePPEGaussianMLTrainingJobOptions} options
*/
function processBatch(batch, job, options) {
batch.values.forEach(function(gstp) {
Expand Down Expand Up @@ -105,9 +108,10 @@ function processBatch(batch, job, options) {
var X = GPR_pipe.getFeatures();
var y = GPR_pipe.getTarget();

// train and save
var GPR_pipe_trained = GPR_pipe.train(X, y);
GPR_pipe_trained.upsert();

if (X.size() > 0 && y.size() > 0) {
// train and save
var GPR_pipe_trained = GPR_pipe.train(X, y);
GPR_pipe_trained.upsert();
};
});
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
def doStart(self, job, options):
import pandas as pd
# set hardware profile
job.setHardwareProfile(options.hardwareProfileId)

# grab all gstps
gstpFilter = c3.Filter().ge("latitude", options.minLat).and_().lt("latitude", options.maxLat).and_().ge("longitude", options.minLon).and_().lt("longitude", options.maxLon).and_().ge("time", options.minTime).and_().lt("time", options.maxTime)

allGstps = c3.GeoSurfaceTimePoint.fetch({
"filter": gstpFilter,
"limit": -1
}).toPandas()

# find all unique time stamps
times = allGstps["time"].unique()

batch = []
# loop over each unique time stamp
for time in times:
# get all lat-lon poiunts for that time stamp
gstpsForTime = allGstps[allGstps["time"] == time]
# loop between minLat, maxLat with latStep
n_lat_steps = (options.maxLat - options.minLat) / options.latStep
n_lon_steps = (options.maxLon - options.minLon) / options.lonStep
for i in range(n_lat_steps):
lat_down = options.minLat + i * options.latStep
lat_up = lat_down + options.latStep
# loop between minLon, maxLon with lonStep
for j in range(n_lon_steps):
lon_left = options.minLon + j * options.lonStep
lon_right = lon_left + options.lonStep
# get all gstps in that lat-lon box
gstpsInBox = gstpsForTime[gstpsForTime["latitude"] >= lat_down and gstpsForTime["latitude"] < lat_up and gstpsForTime["longitude"] >= lon_left and gstpsForTime["longitude"] < lon_right]
# loop over each gstp
targets = []
for gstp in gstpsInBox:
targetFilter = c3.Filter().eq("geoSurfaceTimePoint.id", gstp["id"])
target = c3.SmokePPESimulationOutput.fetch({
"filter": targetFilter
"limit": -1
})
targets.append(target)
# average over list of targets

batch.append(above_list_of_targets)
if len(batch) >= options.batchSize:
batchSpec = c3.SmokePPECoarseGrainedGaussianMLTrainingJobBatch.make({"values": batch})
job.scheduleBatch(batchSpec)
batch = []


Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/**
* Copyright (c) 2022, C3 AI DTI, Development Operations Team
* All rights reserved. License: https://github.com/c3aidti/.github
**/
/**
* Represents a unit of work (batch) in a {@link SmokePPECoarseGrainedGaussianMLTraningJob}
*/
type SmokePPECoarseGrainedGaussianMLTrainingJobBatch {
// The {@link GeoSurfaceTimePoint}s that define the targets for the models in this batch
values: [GeoSurfaceTimePoint]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/**
* Copyright (c) 2022, C3 AI DTI, Development Operations Team
* All rights reserved. License: https://github.com/c3aidti/.github
**/
/**
* Represents customization options for {@link SmokePPECoarseGrainedGaussianMLTrainingJob}
*/
type SmokePPECoarseGrainedGaussianMLTrainingJobOptions {
// How many models will be trained in each batch
batchSize: int = 10
// the min latitude
minLat: !float
// the max latitude
maxLat: !float
// the min longitude
minLon: !float
// the max longitude
maxLon: !float
// the latitute step
latStep: !float
// the longitude step
lonStep: !float
// the name of the variable to collect from {@link Simulation3HourlyAODData}
targetName: !string
// the features to exclude in {@link SimulationModelParameters}
excludeFeatures: [string]
// the {@link GaussianProcessRegressionTechnique} to train the models
gprTechnique: !GaussianProcessRegressionTechnique
// hardware profile ID to run the batches
hardwareProfileId: string = "appc8m642-w"
// flag for staged GSTPs training ({@link StagedGSTP})
stagedGSTP: boolean = false
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* Training job for AOD data {@link Simulation3HourlyAODOutput}
* and {@link GaussianProcessRegressionPipe}s
*/
type AODGaussianMLTrainingJob extends BatchJob<AODGaussianMLTrainingJob, AODGaussianMLTrainingJobOptions, AODGaussianMLTrainingJobBatch> type key 'AODGMLJB' {
type SmokePPEGaussianMLTrainingJob extends BatchJob<SmokePPEGaussianMLTrainingJob, SmokePPEGaussianMLTrainingJobOptions, SmokePPEGaussianMLTrainingJobBatch> type key 'SMKPPEMLJB' {
doStart: ~ js server
processBatch: ~ js server
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
/**
* Copyright (c) 2022, C3 AI DTI, Development Operations Team
* All rights reserved. License: https://github.com/c3aidti/.github
**/
/**
* Implementation of SmokePPEGaussianMLTrainingJob
* @param {SmokePPEGaussianMLTrainingJob} job
* @param {SmokePPEGaussianMLTrainingJobOptions} options
*/
function doStart(job, options) {
job.setHardwareProfile(options.hardwareProfileId);
var batch = [];

if (options.stagedGSTP) {
var staged_gstps = StagedGSTP.fetchObjStream({
limit: -1
});

while(staged_gstps.hasNext()) {
var gstp = GeoSurfaceTimePoint.get(staged_gstps.next().geoSurfaceTimePoint.id);
batch.push(gstp);

if (batch.length >= options.batchSize || !gstps.hasNext()) {
var batchSpec = SmokePPEGaussianMLTrainingJobBatch.make({values: batch});
job.scheduleBatch(batchSpec);

batch = [];
}
}
} else {
var gstps = GeoSurfaceTimePoint.fetchObjStream({
filter: options.gstpFilter,
limit: -1
});

while(gstps.hasNext()) {
batch.push(gstps.next());

if (batch.length >= options.batchSize || !gstps.hasNext()) {
var batchSpec = SmokePPEGaussianMLTrainingJobBatch.make({values: batch});
job.scheduleBatch(batchSpec);

batch = [];
}
}
}
}


/**
* Implementation of what to do in each batch
* @param {SmokePPEGaussianMLTrainingJobBatch} batch
* @param {SmokePPEGaussianMLTrainingJob} job
* @param {SmokePPEGaussianMLTrainingJobOptions} options
*/
function processBatch(batch, job, options) {
batch.values.forEach(function(gstp) {

// define target
var targetType = TypeRef.make({"typeName": "SmokePPESimulationOutput"});
var targetFilter = Filter.eq("geoSurfaceTimePoint.id", gstp.id);
var targetSpec = FetchSpec.make({
"limit": -1,
"order": "simulationSample.id",
"filter": targetFilter.toString()
});

// find the simulations
var simulationsSpec = FetchSpec.make({
"limit": -1,
"order": "simulationSample.id",
"filter": targetFilter.toString(),
"include": "simulationSample"
});
var samples = targetType.toType().fetch(simulationsSpec).objs;
var simIds = [];
for(var i = 0; i < samples.length; i++) {
simIds.push(samples[i].simulationSample.id);
}

var featuresType = TypeRef.make({"typeName": "SmokePPESimulationModelParameters"});
var allSamples = featuresType.toType().fetch({
"limit": -1,
"order": "id",
"include": "id"
}).objs;
var allSimIds = [];
for(var i = 0; i < allSamples.length; i++) {
allSimIds.push(allSamples[i].id);
};
var excludeIds = [];
for(var i = 0; i < allSimIds.length; i++) {
if(simIds.indexOf(allSimIds[i]) === -1) {
excludeIds.push(allSimIds[i]);
}
};

// define the features
var featuresFilter = Filter.not().intersects("id", excludeIds);
var featuresSpec = FetchSpec.make({
"limit": -1,
"order": "id",
"filter": featuresFilter
});

// define the data source spec
var sourceSpec = GPRDataSourceSpec.make({
"featuresType": featuresType,
"featuresSpec": featuresSpec,
"excludeFeatures": options.excludeFeatures,
"targetType": targetType,
"targetSpec": targetSpec,
"targetName": options.targetName
}).upsert()

// create the pipe
var GPR_pipe = GaussianProcessRegressionPipe.make({
"technique": options.gprTechnique,
"dataSourceSpec": sourceSpec
})

// get target and features
var X = GPR_pipe.getFeatures();
var y = GPR_pipe.getTarget();

if (X.size() > 0 && y.size() > 0) {
// train and save
var GPR_pipe_trained = GPR_pipe.train(X, y);
GPR_pipe_trained.upsert();
};
});
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
* All rights reserved. License: https://github.com/c3aidti/.github
**/
/**
* Represents a unit of work (batch) in a {@link AODGaussianMLTraningJob}
* Represents a unit of work (batch) in a {@link SmokePPEGaussianMLTraningJob}
*/
type AODGaussianMLTrainingJobBatch {
type SmokePPEGaussianMLTrainingJobBatch {
// The {@link GeoSurfaceTimePoint}s that define the targets for the models in this batch
values: [GeoSurfaceTimePoint]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
* All rights reserved. License: https://github.com/c3aidti/.github
**/
/**
* Represents customization options for {@link AODGaussianMLTrainingJob}
* Represents customization options for {@link SmokePPEGaussianMLTrainingJob}
*/
type AODGaussianMLTrainingJobOptions {
type SmokePPEGaussianMLTrainingJobOptions {
// How many models will be trained in each batch
batchSize: int = 10
// {@link GeoSurfaceTimePoint}s filter that defines the total number of models
Expand All @@ -18,4 +18,6 @@ type AODGaussianMLTrainingJobOptions {
gprTechnique: !GaussianProcessRegressionTechnique
// hardware profile ID to run the batches
hardwareProfileId: string = "appc8m642-w"
}
// flag for staged GSTPs training ({@link StagedGSTP})
stagedGSTP: boolean = false
}
18 changes: 18 additions & 0 deletions smoke/smokeApp/src/entity/coordinates/StagedGSTP.c3typ
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/**
* Copyright (c) 2022, C3 AI DTI, Development Operations Team
* All rights reserved. License: https://github.com/c3aidti/.github
**/
/**
* Staged {@link GeoSurfaceTimePoint} for faster processing by ML pipes.
*/
@db(unique=['geoSurfaceTimePoint'])
entity type StagedGSTP schema name 'STGD_GSTP' {
// the {@link GeoSurfaceTimePoint}
geoSurfaceTimePoint: !GeoSurfaceTimePoint
// stage based on a region filter
@py(env='gordon-ML_1_0_0')
directStage: function(gstpFilter: any): int
// unstage based on a ragion filter
@py(env='gordon-ML_1_0_0')
unstage: function(gstpFilter: any): int
}
Loading