From 3ed97b144df758a1058083082d0bb863ccb17a58 Mon Sep 17 00:00:00 2001 From: Tyler Benster Date: Thu, 28 Dec 2017 19:42:08 -0800 Subject: [PATCH 1/4] fixed test/validation nomenclature --- glia/machine_learning.py | 28 +++++----- glia_scripts/classify/svc.py | 102 +++++++++++++++++------------------ glia_scripts/convert.py | 76 +++++++++++++------------- glia_scripts/solid.py | 2 +- 4 files changed, 104 insertions(+), 104 deletions(-) diff --git a/glia/machine_learning.py b/glia/machine_learning.py index a2de3f8..2fce7c6 100644 --- a/glia/machine_learning.py +++ b/glia/machine_learning.py @@ -11,7 +11,7 @@ logger = logging.getLogger('glia') -TVT = namedtuple("TVT", ['training', "validation", "test"]) +TVT = namedtuple("TVT", ['training', "test", "validation"]) def tvt_by_percentage(n, training=60, validation=20,testing=20): summed = training+validation+testing @@ -19,7 +19,7 @@ def tvt_by_percentage(n, training=60, validation=20,testing=20): train = int(np.floor(n*training/100)) valid = int(np.ceil(n*validation/100)) test = n - valid - train - return TVT(train, valid, test) + return TVT(train, test, valid) def f_split_dict(tvt): """Subset dict into training, validation, & test.""" @@ -31,14 +31,14 @@ def anonymous(dictionary): v = dictionary[k] if i < tvt.training: split.training[k] = v - elif i < tvt.validation + tvt.training: - split.validation[k] = v - elif i < tvt.test + tvt.validation + tvt.training: + elif i < tvt.test + tvt.training: split.test[k] = v + elif i < tvt.validation + tvt.test + tvt.training: + split.validation[k] = v else: - raise(ValueError, 'bad training, validation & test split.') + raise(ValueError, 'bad training, test & validation split.') i += 1 - assert i == tvt.training+tvt.validation+tvt.test + assert i == tvt.training+tvt.test+tvt.validation return split return anonymous @@ -62,7 +62,7 @@ def anonymous(dictionary): ) def tvt_map(tvt, f): - return TVT(f(tvt.training), f(tvt.validation), f(tvt.test)) + return TVT(f(tvt.training), f(tvt.test), f(tvt.validation)) def f_split_list(tvt, get_list=lambda x: x): """Subset list into training, validation, & test.""" @@ -72,16 +72,16 @@ def anonymous(x): for i,v in enumerate(my_list): if i < tvt.training: split.training.append(v) - elif i < tvt.validation + tvt.training: - split.validation.append(v) - elif i < tvt.test + tvt.validation + tvt.training: + elif i < tvt.test + tvt.training: split.test.append(v) + elif i < tvt.validation + tvt.test + tvt.training: + split.validation.append(v) else: - raise(ValueError, 'bad training, validation & test split.') + raise(ValueError, 'bad training, test & validation split.') try: - assert len(my_list) == tvt.training+tvt.validation+tvt.test + assert len(my_list) == tvt.training+tvt.test+tvt.validation except Exception as e: - print(len(my_list), tvt.training+tvt.validation+tvt.test) + print(len(my_list), tvt.training+tvt.test+tvt.validation) raise e return split diff --git a/glia_scripts/classify/svc.py b/glia_scripts/classify/svc.py index 70093e8..e6bcb16 100644 --- a/glia_scripts/classify/svc.py +++ b/glia_scripts/classify/svc.py @@ -37,24 +37,24 @@ def get_checkerboard_contrasts(stimulus_list): assert len(contrasts)>0 return contrasts -def svm_helper(training_data, training_target, validation_data, validation_target): +def svm_helper(training_data, training_target, test_data, test_target): # Create a classifier: a support vector classifier classifier = svm.SVC() classifier.fit(training_data, training_target) - predicted = classifier.predict(validation_data) - expected = validation_target + predicted = classifier.predict(test_data) + expected = test_target return metrics.accuracy_score(expected, predicted) -def classifier_helper(classifier, training, validation): +def classifier_helper(classifier, training, test): training_data, training_target = training - validation_data, validation_target = validation + test_data, test_target = test classifier.fit(training_data, training_target) - predicted = classifier.predict(validation_data) - expected = validation_target + predicted = classifier.predict(test_data) + expected = test_target report = metrics.classification_report(expected, predicted) confusion = confusion_matrix(expected, predicted) @@ -79,15 +79,15 @@ def error_bars(data, target, ndraws=20): for i in range(ndraws): np.random.shuffle(indices) training_ind = indices[0:ntrain] - validation_ind = indices[ntrain:] + test_ind = indices[ntrain:] training_data = data[training_ind] training_target = target[training_ind] - validation_data = data[validation_ind] - validation_target = target[validation_ind] + test_data = data[test_ind] + test_target = target[test_ind] accuracy[i] = svm_helper(training_data, training_target, - validation_data, validation_target) + test_data, test_target) std = np.std(accuracy) return (np.mean(accuracy),std,std) @@ -109,10 +109,10 @@ def bin_sum(data): (nconditions, nsizes, n_training, n_x*n_y*n_units)) def plot_acuity(logmar, accuracy, yerror, - n_validation, name, conditions, condition_name, plot_directory): + n_test, name, conditions, condition_name, plot_directory): print(f"plotting {name} {condition_name} classification accuracy.") - sig5 = np.repeat(binom.ppf(0.95, n_validation, 0.5)/n_validation, len(logmar)) - sig1 = np.repeat(binom.ppf(0.99, n_validation, 0.5)/n_validation, len(logmar)) + sig5 = np.repeat(binom.ppf(0.95, n_test, 0.5)/n_test, len(logmar)) + sig1 = np.repeat(binom.ppf(0.99, n_test, 0.5)/n_test, len(logmar)) fig, ax = plt.subplots() nconditions = len(conditions) @@ -147,20 +147,20 @@ def plot_acuity(logmar, accuracy, yerror, ax.set_title(f'{name} classification by {condition_name}') fig.savefig(os.path.join(plot_directory, f"{name}-{condition_name}_acuity.png")) -def acuity(training_data, training_target, validation_data, validation_target, +def acuity(training_data, training_target, test_data, test_target, stimulus_list, plot_directory, name, sizes, conditions, condition_name): print(f"training classifiers.") # polymorphic over ndarray or list for conditions nconditions = len(training_data) assert nconditions==len(training_target) - assert nconditions==len(validation_data) - assert nconditions==len(validation_target) + assert nconditions==len(test_data) + assert nconditions==len(test_target) nsizes = training_data[0].shape[0] assert nsizes==training_target[0].shape[0] - assert nsizes==validation_data[0].shape[0] - assert nsizes==validation_target[0].shape[0] - n_validation = validation_data[0].shape[1] + assert nsizes==test_data[0].shape[0] + assert nsizes==test_target[0].shape[0] + n_test = test_data[0].shape[1] nclasses = 2 accuracy_100 = np.full((nconditions, nsizes), 0, dtype=np.float) @@ -168,16 +168,16 @@ def acuity(training_data, training_target, validation_data, validation_target, for condition in range(nconditions): for size in range(nsizes): data = np.concatenate( - [training_data[condition][size],validation_data[condition][size]]) + [training_data[condition][size],test_data[condition][size]]) target = np.concatenate( - [training_target[condition][size],validation_target[condition][size]]) + [training_target[condition][size],test_target[condition][size]]) (mean,below,above) = error_bars(data,target) accuracy_100[condition, size] = mean yerror[condition, :, size] = [below,above] logmar = list(map(px_to_logmar,sizes)) - plot_acuity(logmar, accuracy_100, yerror, n_validation, + plot_acuity(logmar, accuracy_100, yerror, n_test, name, conditions, condition_name, plot_directory) @@ -187,17 +187,17 @@ def checkerboard_svc(data, metadata, stimulus_list, lab_notebook, plot_directory name = metadata["name"] if name=='checkerboard-contrast': training_data = bin_100ms(data["training_data"]) - validation_data = bin_100ms(data["validation_data"]) + test_data = bin_100ms(data["test_data"]) training_target = data["training_target"] - validation_target = data["validation_target"] + test_target = data["test_target"] conditions = get_checkerboard_contrasts(stimulus_list) condition_name = "contrast" elif name=="checkerboard-durations": training_data = bin_100ms(data["training_data"]) - validation_data = bin_100ms(data["validation_data"]) + test_data = bin_100ms(data["test_data"]) training_target = data["training_target"] - validation_target = data["validation_target"] + test_target = data["test_target"] conditions = get_stimulus_parameters(stimulus_list, "CHECKERBOARD", 'lifespan') @@ -206,13 +206,13 @@ def checkerboard_svc(data, metadata, stimulus_list, lab_notebook, plot_directory training_100ms = bin_100ms(data["training_data"])[0] training_sum = bin_sum(data["training_data"])[0] training_data = [training_100ms, training_sum] - validation_100ms = bin_100ms(data["validation_data"])[0] - validation_sum = bin_sum(data["validation_data"])[0] - validation_data = [validation_100ms, validation_sum] + test_100ms = bin_100ms(data["test_data"])[0] + test_sum = bin_sum(data["test_data"])[0] + test_data = [test_100ms, test_sum] tt = data["training_target"][0] training_target = [tt,tt] - vt = data["validation_target"][0] - validation_target = [vt,vt] + vt = data["test_target"][0] + test_target = [vt,vt] conditions = ['100ms bins', 'spike count'] condition_name = None @@ -222,7 +222,7 @@ def checkerboard_svc(data, metadata, stimulus_list, lab_notebook, plot_directory plot_diff_nsamples(data, stimulus_list, plot_directory, "checkerboard", sizes, conditions, condition_name) else: - acuity(training_data, training_target, validation_data, validation_target, + acuity(training_data, training_target, test_data, test_target, stimulus_list, plot_directory, "checkerboard", sizes, conditions, condition_name) @@ -232,25 +232,25 @@ def grating_svc(data, metadata, stimulus_list, lab_notebook, plot_directory, sizes = get_stimulus_parameters(stimulus_list, "GRATING", "width") if metadata["name"]=='grating-contrast': training_data = bin_100ms(data["training_data"]) - validation_data = bin_100ms(data["validation_data"]) + test_data = bin_100ms(data["test_data"]) training_target = data["training_target"] - validation_target = data["validation_target"] + test_target = data["test_target"] conditions = get_grating_contrasts(stimulus_list) condition_name = "contrast" elif metadata["name"]=="grating-durations": training_data = bin_100ms(data["training_data"]) - validation_data = bin_100ms(data["validation_data"]) + test_data = bin_100ms(data["test_data"]) training_target = data["training_target"] - validation_target = data["validation_target"] + test_target = data["test_target"] conditions = get_stimulus_parameters(stimulus_list, "GRATING", 'lifespan') condition_name = "durations" elif metadata["name"]=="grating-speeds": training_data = bin_100ms(data["training_data"]) - validation_data = bin_100ms(data["validation_data"]) + test_data = bin_100ms(data["test_data"]) training_target = data["training_target"] - validation_target = data["validation_target"] + test_target = data["test_target"] conditions = get_stimulus_parameters(stimulus_list, "GRATING", 'speed') condition_name = "speeds" @@ -258,13 +258,13 @@ def grating_svc(data, metadata, stimulus_list, lab_notebook, plot_directory, training_100ms = bin_100ms(data["training_data"])[0] training_sum = bin_sum(data["training_data"])[0] training_data = [training_100ms, training_sum] - validation_100ms = bin_100ms(data["validation_data"])[0] - validation_sum = bin_sum(data["validation_data"])[0] - validation_data = [validation_100ms, validation_sum] + test_100ms = bin_100ms(data["test_data"])[0] + test_sum = bin_sum(data["test_data"])[0] + test_data = [test_100ms, test_sum] tt = data["training_target"][0] training_target = [tt,tt] - vt = data["validation_target"][0] - validation_target = [vt,vt] + vt = data["test_target"][0] + test_target = [vt,vt] conditions = ['100ms bins', 'spike count'] condition_name = None @@ -274,7 +274,7 @@ def grating_svc(data, metadata, stimulus_list, lab_notebook, plot_directory, plot_diff_nsamples(data, stimulus_list, plot_directory, "grating", sizes, conditions, condition_name) else: - acuity(training_data, training_target, validation_data, validation_target, + acuity(training_data, training_target, test_data, test_target, stimulus_list, plot_directory, "grating", sizes, conditions, condition_name) @@ -289,14 +289,14 @@ def letter_svc(data, metadata, stimulus_list, lab_notebook, plot_directory, logger.debug(data["training_data"].shape) # add nconditions dim training_100ms = bin_100ms(np.expand_dims(data["training_data"],0)) - validation_100ms = bin_100ms(np.expand_dims(data["validation_data"],0)) + test_100ms = bin_100ms(np.expand_dims(data["test_data"],0)) logger.debug(f'training_100ms shape {training_100ms.shape}') logger.debug(f'sizes {sizes}') for i, size in enumerate(sizes): print(f'SVC for size {size}') # note: no expand dims, hardcoded 1 ncondition training_target = data["training_target"][i] - validation_target = data["validation_target"][i] + test_target = data["test_target"][i] logger.debug(np.size(training_target)) svr = svm.SVC() parameters = {'C': [1, 10, 100, 1000], @@ -304,7 +304,7 @@ def letter_svc(data, metadata, stimulus_list, lab_notebook, plot_directory, clf = GridSearchCV(svr, parameters, n_jobs=12) report, confusion = classifier_helper(clf, (training_100ms[0,i], training_target), - (validation_100ms[0,i], validation_target)) + (test_100ms[0,i], test_target)) with open(f"{plot_directory}/letter-{size}.txt", "w") as f: f.write(report+'\n') f.write(str(confusion)) @@ -321,14 +321,14 @@ def tiled_letter_svc(data, metadata, stimulus_list, lab_notebook, plot_directory logger.debug(data["training_data"].shape) # add nconditions dim training_100ms = bin_100ms(np.expand_dims(data["training_data"],0)) - validation_100ms = bin_100ms(np.expand_dims(data["validation_data"],0)) + test_100ms = bin_100ms(np.expand_dims(data["test_data"],0)) logger.debug(f'training_100ms shape {training_100ms.shape}') logger.debug(f'sizes {sizes}') for i, size in enumerate(sizes): print(f'SVC for size {size}') # note: no expand dims, hardcoded 1 ncondition training_target = data["training_target"][i] - validation_target = data["validation_target"][i] + test_target = data["test_target"][i] logger.debug(np.size(training_target)) svr = svm.SVC() parameters = {'C': [1, 10, 100, 1000], @@ -336,7 +336,7 @@ def tiled_letter_svc(data, metadata, stimulus_list, lab_notebook, plot_directory clf = GridSearchCV(svr, parameters, n_jobs=12) report, confusion = classifier_helper(clf, (training_100ms[0,i], training_target), - (validation_100ms[0,i], validation_target)) + (test_100ms[0,i], test_target)) with open(f"{plot_directory}/letter-{size}.txt", "w") as f: f.write(report+'\n') f.write(str(confusion)) diff --git a/glia_scripts/convert.py b/glia_scripts/convert.py index aca1f86..a652ddb 100644 --- a/glia_scripts/convert.py +++ b/glia_scripts/convert.py @@ -148,8 +148,8 @@ def save_eyechart_npz(units, stimulus_list, name): test_target[size_index] = tt np.savez(name, training_data=training_data, training_target=training_target, - validation_data=validation_data, validation_target=validation_target) - # test_data=test_data, test_target=test_target) + test_data=test_data, test_target=test_target) + # validation_data=validation_data, validation_target=validation_target) def save_letter_npz(units, stimulus_list, name): print("Saving letter NPZ file.") @@ -190,10 +190,10 @@ def save_letter_npz(units, stimulus_list, name): tvt.training*experiments_per_cohort,d,8,8,10),0,dtype='int8') training_target = np.full((nsizes, tvt.training*experiments_per_cohort),0,dtype='int8') - validation_data = np.full((nsizes, - tvt.validation*experiments_per_cohort,d,8,8,10),0,dtype='int8') - validation_target = np.full((nsizes, - tvt.validation*experiments_per_cohort),0,dtype='int8') + test_data = np.full((nsizes, + tvt.test*experiments_per_cohort,d,8,8,10),0,dtype='int8') + test_target = np.full((nsizes, + tvt.test*experiments_per_cohort),0,dtype='int8') size_map = {s: i for i,s in enumerate(sizes)} for size, cohorts in letters.items(): @@ -210,17 +210,17 @@ def save_letter_npz(units, stimulus_list, name): training_data[size_index] = pad_td training_target[size_index] = tt - td, tt = glia.experiments_to_ndarrays(glia.validation_cohorts(X), + td, tt = glia.experiments_to_ndarrays(glia.test_cohorts(X), letter_class) pad_td = np.pad(td, ((0,0),(0,missing_duration),(0,0),(0,0),(0,0)), mode='constant') - validation_data[size_index] = pad_td - validation_target[size_index] = tt + test_data[size_index] = pad_td + test_target[size_index] = tt np.savez(name, training_data=training_data, training_target=training_target, - validation_data=validation_data, validation_target=validation_target) - # test_data=test_data, test_target=test_target) + test_data=test_data, test_target=test_target) + # validation_data=validation_data, validation_target=validation_target) def save_letters_tiled_npz(units, stimulus_list, name): print("Saving letters-tiled NPZ file.") @@ -270,10 +270,10 @@ def save_letters_tiled_npz(units, stimulus_list, name): tvt.training*experiments_per_cohort,d,8,8,10),0,dtype='int8') training_target = np.full((nsizes, tvt.training*experiments_per_cohort),0,dtype='int8') - validation_data = np.full((nsizes, - tvt.validation*experiments_per_cohort,d,8,8,10),0,dtype='int8') - validation_target = np.full((nsizes, - tvt.validation*experiments_per_cohort),0,dtype='int8') + test_data = np.full((nsizes, + tvt.test*experiments_per_cohort,d,8,8,10),0,dtype='int8') + test_target = np.full((nsizes, + tvt.test*experiments_per_cohort),0,dtype='int8') size_map = {s: i for i,s in enumerate(sizes)} for size, cohorts in letters.items(): @@ -290,17 +290,17 @@ def save_letters_tiled_npz(units, stimulus_list, name): training_data[size_index] = pad_td training_target[size_index] = tt - td, tt = glia.experiments_to_ndarrays(glia.validation_cohorts(X), + td, tt = glia.experiments_to_ndarrays(glia.test_cohorts(X), letter_class) pad_td = np.pad(td, ((0,0),(0,missing_duration),(0,0),(0,0),(0,0)), mode='constant') - validation_data[size_index] = pad_td - validation_target[size_index] = tt + test_data[size_index] = pad_td + test_target[size_index] = tt np.savez(name, training_data=training_data, training_target=training_target, - validation_data=validation_data, validation_target=validation_target) - # test_data=test_data, test_target=test_target) + test_data=test_data, test_target=test_target) + # validation_data=validation_data, validation_target=validation_target) @@ -355,10 +355,10 @@ def save_checkerboard_npz(units, stimulus_list, name, group_by): tvt.training*4,d,8,8,10),0,dtype='int8') training_target = np.full((nconditions,nsizes, tvt.training*4),0,dtype='int8') - validation_data = np.full((nconditions,nsizes, - tvt.validation*4,d,8,8,10),0,dtype='int8') - validation_target = np.full((nconditions,nsizes, - tvt.validation*4),0,dtype='int8') + test_data = np.full((nconditions,nsizes, + tvt.test*4,d,8,8,10),0,dtype='int8') + test_target = np.full((nconditions,nsizes, + tvt.test*4),0,dtype='int8') # test_data = np.full((nsizes,tvt.test,d,nunits),0,dtype='int8') # test_target = np.full((nsizes,tvt.test),0,dtype='int8') @@ -380,18 +380,18 @@ def save_checkerboard_npz(units, stimulus_list, name, group_by): training_data[condition_index, size_index] = pad_td training_target[condition_index, size_index] = tt - td, tt = glia.experiments_to_ndarrays(glia.validation_cohorts(X), + td, tt = glia.experiments_to_ndarrays(glia.test_cohorts(X), checker_discrimination_class) pad_td = np.pad(td, ((0,0),(0,missing_duration),(0,0),(0,0),(0,0)), mode='constant') - validation_data[condition_index, size_index] = pad_td - validation_target[condition_index, size_index] = tt + test_data[condition_index, size_index] = pad_td + test_target[condition_index, size_index] = tt print('saving to ',name) np.savez(name, training_data=training_data, training_target=training_target, - validation_data=validation_data, validation_target=validation_target) - # test_data=test_data, test_target=test_target) + test_data=test_data, test_target=test_target) + # validation_data=validation_data, validation_target=validation_target) @@ -436,10 +436,10 @@ def save_grating_npz(units, stimulus_list, name, group_by): tvt.training*2,d,8,8,10),0,dtype='int8') training_target = np.full((nconditions,nsizes, tvt.training*2),0,dtype='int8') - validation_data = np.full((nconditions,nsizes, - tvt.validation*2,d,8,8,10),0,dtype='int8') - validation_target = np.full((nconditions,nsizes, - tvt.validation*2),0,dtype='int8') + test_data = np.full((nconditions,nsizes, + tvt.test*2,d,8,8,10),0,dtype='int8') + test_target = np.full((nconditions,nsizes, + tvt.test*2),0,dtype='int8') condition_map = {c: i for i,c in enumerate(conditions)} size_map = {s: i for i,s in enumerate(sizes)} @@ -458,15 +458,15 @@ def save_grating_npz(units, stimulus_list, name, group_by): training_data[condition_index, size_index] = pad_td training_target[condition_index, size_index] = tt - td, tt = glia.experiments_to_ndarrays(glia.validation_cohorts(X), + td, tt = glia.experiments_to_ndarrays(glia.test_cohorts(X), grating_class) pad_td = np.pad(td, ((0,0),(0,missing_duration),(0,0),(0,0),(0,0)), mode='constant') - validation_data[condition_index, size_index] = pad_td - validation_target[condition_index, size_index] = tt + test_data[condition_index, size_index] = pad_td + test_target[condition_index, size_index] = tt print('saving to ',name) np.savez(name, training_data=training_data, training_target=training_target, - validation_data=validation_data, validation_target=validation_target) - # test_data=test_data, test_target=test_target) + test_data=test_data, test_target=test_target) + # validation_data=validation_data, validation_target=validation_target) diff --git a/glia_scripts/solid.py b/glia_scripts/solid.py index 77c459f..0e731c8 100644 --- a/glia_scripts/solid.py +++ b/glia_scripts/solid.py @@ -239,7 +239,7 @@ def unit_classification_accuracy(tvt): for dark vs on and dark vs off""" dark_training, on_training, off_training = integrity_spike_counts(tvt.training) - dark_test, on_test, off_test = integrity_spike_counts(tvt.validation) + dark_test, on_test, off_test = integrity_spike_counts(tvt.test) X_on_train = np.array(dark_training + on_training).reshape((-1,1)) Y_on_train = np.hstack([np.full(len(dark_training), 0,dtype='int8'), From cde01e29fb940b5286464daba591f2c14d57a607 Mon Sep 17 00:00:00 2001 From: Tyler Benster Date: Thu, 28 Dec 2017 20:20:58 -0800 Subject: [PATCH 2/4] merged --- glia_scripts/convert.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/glia_scripts/convert.py b/glia_scripts/convert.py index 0bc2968..0a1f9b7 100644 --- a/glia_scripts/convert.py +++ b/glia_scripts/convert.py @@ -376,10 +376,10 @@ def save_image_npz(units, stimulus_list, name): tvt.training*experiments_per_cohort,d,8,8,10),0,dtype='int8') training_target = np.full((nsizes, tvt.training*experiments_per_cohort),0,dtype='int8') - validation_data = np.full((nsizes, - tvt.validation*experiments_per_cohort,d,8,8,10),0,dtype='int8') - validation_target = np.full((nsizes, - tvt.validation*experiments_per_cohort),0,dtype='int8') + test_data = np.full((nsizes, + tvt.test*experiments_per_cohort,d,8,8,10),0,dtype='int8') + test_target = np.full((nsizes, + tvt.test*experiments_per_cohort),0,dtype='int8') size_map = {s: i for i,s in enumerate(sizes)} for size, cohorts in letters.items(): @@ -396,17 +396,17 @@ def save_image_npz(units, stimulus_list, name): training_data[size_index] = pad_td training_target[size_index] = tt - td, tt = glia.experiments_to_ndarrays(glia.validation_cohorts(X), + td, tt = glia.experiments_to_ndarrays(glia.test_cohorts(X), image_class) pad_td = np.pad(td, ((0,0),(0,missing_duration),(0,0),(0,0),(0,0)), mode='constant') - validation_data[size_index] = pad_td - validation_target[size_index] = tt + test_data[size_index] = pad_td + test_target[size_index] = tt np.savez(name, training_data=training_data, training_target=training_target, - validation_data=validation_data, validation_target=validation_target) - # test_data=test_data, test_target=test_target) + test_data=test_data, test_target=test_target) + # validation_data=validation_data, validation_target=validation_target) From e69a656d010deb705198490cf94c1b551cc9bb3b Mon Sep 17 00:00:00 2001 From: Tyler Benster Date: Thu, 28 Dec 2017 20:30:11 -0800 Subject: [PATCH 3/4] fixed more names --- glia/machine_learning.py | 14 +++++++------- glia/plot.py | 10 +++++----- glia_scripts/classify/svc.py | 6 +++--- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/glia/machine_learning.py b/glia/machine_learning.py index 177f7d4..66a0851 100644 --- a/glia/machine_learning.py +++ b/glia/machine_learning.py @@ -202,13 +202,13 @@ def bin_sum(data): sorted(list(letter_map.items()), key=lambda x: x[1]))) -def classifier_helper(classifier, training, validation, classes=letter_classes): +def classifier_helper(classifier, training, test, classes=letter_classes): training_data, training_target = training - validation_data, validation_target = validation + test_data, test_target = test classifier.fit(training_data, training_target) - predicted = classifier.predict(validation_data) - expected = validation_target + predicted = classifier.predict(test_data) + expected = test_target report = metrics.classification_report(expected, predicted) confusion = confusion_matrix(expected, predicted, classes) @@ -259,12 +259,12 @@ def get_checkerboard_contrasts(stimulus_list): assert len(contrasts)>0 return contrasts -def svm_helper(training_data, training_target, validation_data, validation_target): +def svm_helper(training_data, training_target, test_data, test_target): # Create a classifier: a support vector classifier classifier = svm.SVC() classifier.fit(training_data, training_target) - predicted = classifier.predict(validation_data) - expected = validation_target + predicted = classifier.predict(test_data) + expected = test_target return metrics.accuracy_score(expected, predicted) diff --git a/glia/plot.py b/glia/plot.py index e0dd3b1..cf45bba 100755 --- a/glia/plot.py +++ b/glia/plot.py @@ -498,19 +498,19 @@ def raster_group(fig, axis_gen, data): def error_bars(data, target, ndraws=20): n = data.shape[0] accuracy = np.full((ndraws,), 0) - (ntrain, nvalid, _) = tvt_by_percentage(n,60,40,0) + (ntrain, ntest, _) = tvt_by_percentage(n,60,40,0) indices = np.arange(n) for i in range(ndraws): np.random.shuffle(indices) training_ind = indices[0:ntrain] - validation_ind = indices[ntrain:] + test_ind = indices[ntrain:] training_data = data[training_ind] training_target = target[training_ind] - validation_data = data[validation_ind] - validation_target = target[validation_ind] + test_data = data[test_ind] + test_target = target[test_ind] accuracy[i] = svm_helper(training_data, training_target, - validation_data, validation_target) + test_data, test_target) std = np.std(accuracy) return (np.mean(accuracy),std,std) diff --git a/glia_scripts/classify/svc.py b/glia_scripts/classify/svc.py index caafa36..66c7a47 100644 --- a/glia_scripts/classify/svc.py +++ b/glia_scripts/classify/svc.py @@ -225,14 +225,14 @@ def tiled_letter_svc(data, metadata, stimulus_list, lab_notebook, plot_directory logger.debug(data["training_data"].shape) # add nconditions dim training_100ms = glia.glia.bin_100ms(np.expand_dims(data["training_data"],0)) - validation_100ms = glia.glia.bin_100ms(np.expand_dims(data["validation_data"],0)) + test_100ms = glia.glia.bin_100ms(np.expand_dims(data["test_data"],0)) logger.debug(f'training_100ms shape {training_100ms.shape}') logger.debug(f'sizes {sizes}') for i, size in enumerate(sizes): print(f'SVC for size {size}') # note: no expand dims, hardcoded 1 ncondition training_target = data["training_target"][i] - validation_target = data["validation_target"][i] + test_target = data["test_target"][i] logger.debug(np.size(training_target)) svr = svm.SVC() parameters = {'C': [1, 10, 100, 1000], @@ -240,7 +240,7 @@ def tiled_letter_svc(data, metadata, stimulus_list, lab_notebook, plot_directory clf = GridSearchCV(svr, parameters, n_jobs=12) report, confusion = glia.classifier_helper(clf, (training_100ms[0,i], training_target), - (validation_100ms[0,i], validation_target)) + (test_100ms[0,i], test_target)) with open(f"{plot_directory}/letter-{size}.txt", "w") as f: f.write(report+'\n') f.write(str(confusion)) From 6f976c180d34a19f9ef4461f85221ffefa58449a Mon Sep 17 00:00:00 2001 From: Tyler Benster Date: Thu, 28 Dec 2017 22:40:15 -0800 Subject: [PATCH 4/4] fix --- glia/machine_learning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glia/machine_learning.py b/glia/machine_learning.py index 66a0851..e1ffb61 100644 --- a/glia/machine_learning.py +++ b/glia/machine_learning.py @@ -16,7 +16,7 @@ TVT = namedtuple("TVT", ['training', "test", "validation"]) -def tvt_by_percentage(n, training=60, validation=20,testing=20): +def tvt_by_percentage(n, training=60,testing=20, validation=20): summed = training+validation+testing assert summed==100 train = int(np.floor(n*training/100))